|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.024681201151789386, |
|
"eval_steps": 500, |
|
"global_step": 120, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00020567667626491157, |
|
"grad_norm": 12.799294471740723, |
|
"learning_rate": 5e-06, |
|
"loss": 1.5984, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00041135335252982314, |
|
"grad_norm": 16.628677368164062, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3592, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0006170300287947347, |
|
"grad_norm": 9.23403549194336, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.9703, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0008227067050596463, |
|
"grad_norm": 8.804163932800293, |
|
"learning_rate": 2e-05, |
|
"loss": 1.7174, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0010283833813245578, |
|
"grad_norm": 19.202816009521484, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.5644, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0012340600575894694, |
|
"grad_norm": Infinity, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.9342, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.001439736733854381, |
|
"grad_norm": 7.568014621734619, |
|
"learning_rate": 3e-05, |
|
"loss": 1.8679, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0016454134101192926, |
|
"grad_norm": 14.93575382232666, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.6565, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.001851090086384204, |
|
"grad_norm": 11.807939529418945, |
|
"learning_rate": 4e-05, |
|
"loss": 2.067, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0020567667626491155, |
|
"grad_norm": 9.383672714233398, |
|
"learning_rate": 4.5e-05, |
|
"loss": 2.2929, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0022624434389140274, |
|
"grad_norm": 12.13580322265625, |
|
"learning_rate": 5e-05, |
|
"loss": 1.902, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0024681201151789387, |
|
"grad_norm": 26.6066837310791, |
|
"learning_rate": 4.9545454545454553e-05, |
|
"loss": 2.2335, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00267379679144385, |
|
"grad_norm": 22.572980880737305, |
|
"learning_rate": 4.909090909090909e-05, |
|
"loss": 1.3673, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.002879473467708762, |
|
"grad_norm": 14.39148235321045, |
|
"learning_rate": 4.863636363636364e-05, |
|
"loss": 1.8704, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0030851501439736733, |
|
"grad_norm": 10.3450288772583, |
|
"learning_rate": 4.8181818181818186e-05, |
|
"loss": 1.0219, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.003290826820238585, |
|
"grad_norm": 11.662192344665527, |
|
"learning_rate": 4.772727272727273e-05, |
|
"loss": 1.9658, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0034965034965034965, |
|
"grad_norm": 9.669842720031738, |
|
"learning_rate": 4.7272727272727275e-05, |
|
"loss": 1.7769, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.003702180172768408, |
|
"grad_norm": Infinity, |
|
"learning_rate": 4.7272727272727275e-05, |
|
"loss": 1.4503, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.00390785684903332, |
|
"grad_norm": 24.309511184692383, |
|
"learning_rate": 4.681818181818182e-05, |
|
"loss": 1.1466, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.004113533525298231, |
|
"grad_norm": 10.933331489562988, |
|
"learning_rate": 4.636363636363636e-05, |
|
"loss": 1.9125, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0043192102015631425, |
|
"grad_norm": 16.095943450927734, |
|
"learning_rate": 4.5909090909090914e-05, |
|
"loss": 2.1035, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.004524886877828055, |
|
"grad_norm": 11.987975120544434, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.6702, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.004730563554092966, |
|
"grad_norm": 8.843574523925781, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.5454, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0049362402303578775, |
|
"grad_norm": 8.445805549621582, |
|
"learning_rate": 4.454545454545455e-05, |
|
"loss": 1.7987, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.005141916906622789, |
|
"grad_norm": 5.443497657775879, |
|
"learning_rate": 4.409090909090909e-05, |
|
"loss": 0.2675, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0053475935828877, |
|
"grad_norm": 5.370733737945557, |
|
"learning_rate": 4.3636363636363636e-05, |
|
"loss": 0.1773, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0055532702591526125, |
|
"grad_norm": 12.393692016601562, |
|
"learning_rate": 4.318181818181819e-05, |
|
"loss": 2.0755, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.005758946935417524, |
|
"grad_norm": 11.817388534545898, |
|
"learning_rate": 4.2727272727272724e-05, |
|
"loss": 1.796, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.005964623611682435, |
|
"grad_norm": 7.721453666687012, |
|
"learning_rate": 4.2272727272727275e-05, |
|
"loss": 1.461, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.006170300287947347, |
|
"grad_norm": 8.523295402526855, |
|
"learning_rate": 4.181818181818182e-05, |
|
"loss": 1.6376, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.006375976964212258, |
|
"grad_norm": 14.518152236938477, |
|
"learning_rate": 4.1363636363636364e-05, |
|
"loss": 2.036, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.00658165364047717, |
|
"grad_norm": 12.425220489501953, |
|
"learning_rate": 4.0909090909090915e-05, |
|
"loss": 0.4348, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.006787330316742082, |
|
"grad_norm": 26.095151901245117, |
|
"learning_rate": 4.045454545454546e-05, |
|
"loss": 1.2404, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.006993006993006993, |
|
"grad_norm": 4.784183979034424, |
|
"learning_rate": 4e-05, |
|
"loss": 1.309, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.007198683669271904, |
|
"grad_norm": 13.188830375671387, |
|
"learning_rate": 3.954545454545455e-05, |
|
"loss": 1.5275, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.007404360345536816, |
|
"grad_norm": 9.305349349975586, |
|
"learning_rate": 3.909090909090909e-05, |
|
"loss": 1.8645, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.007610037021801728, |
|
"grad_norm": 10.391180038452148, |
|
"learning_rate": 3.8636363636363636e-05, |
|
"loss": 0.2851, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.00781571369806664, |
|
"grad_norm": 2.905449390411377, |
|
"learning_rate": 3.818181818181819e-05, |
|
"loss": 0.0632, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.008021390374331552, |
|
"grad_norm": 15.784213066101074, |
|
"learning_rate": 3.7727272727272725e-05, |
|
"loss": 0.7361, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.008227067050596462, |
|
"grad_norm": 4.363598346710205, |
|
"learning_rate": 3.7272727272727276e-05, |
|
"loss": 0.0486, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.008432743726861374, |
|
"grad_norm": 17.239139556884766, |
|
"learning_rate": 3.681818181818182e-05, |
|
"loss": 1.3015, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.008638420403126285, |
|
"grad_norm": 1.9617282152175903, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 0.0413, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.008844097079391197, |
|
"grad_norm": 5.427540302276611, |
|
"learning_rate": 3.590909090909091e-05, |
|
"loss": 1.733, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.00904977375565611, |
|
"grad_norm": 41.895721435546875, |
|
"learning_rate": 3.545454545454546e-05, |
|
"loss": 1.7908, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.00925545043192102, |
|
"grad_norm": 6.50022554397583, |
|
"learning_rate": 3.5e-05, |
|
"loss": 2.393, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.009461127108185932, |
|
"grad_norm": 13.534425735473633, |
|
"learning_rate": 3.454545454545455e-05, |
|
"loss": 1.0052, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.009666803784450843, |
|
"grad_norm": 1.7201191186904907, |
|
"learning_rate": 3.409090909090909e-05, |
|
"loss": 0.0428, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.009872480460715755, |
|
"grad_norm": 18.76580810546875, |
|
"learning_rate": 3.3636363636363636e-05, |
|
"loss": 0.8643, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.010078157136980667, |
|
"grad_norm": 18.51691246032715, |
|
"learning_rate": 3.318181818181819e-05, |
|
"loss": 1.0954, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.010283833813245578, |
|
"grad_norm": 8.370597839355469, |
|
"learning_rate": 3.272727272727273e-05, |
|
"loss": 0.6081, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.01048951048951049, |
|
"grad_norm": 6.726954936981201, |
|
"learning_rate": 3.2272727272727276e-05, |
|
"loss": 0.0419, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0106951871657754, |
|
"grad_norm": 6.499707221984863, |
|
"learning_rate": 3.181818181818182e-05, |
|
"loss": 1.7456, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.010900863842040313, |
|
"grad_norm": 17.723892211914062, |
|
"learning_rate": 3.1363636363636365e-05, |
|
"loss": 1.2133, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.011106540518305225, |
|
"grad_norm": 7.051292419433594, |
|
"learning_rate": 3.090909090909091e-05, |
|
"loss": 0.2732, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.011312217194570135, |
|
"grad_norm": 12.115779876708984, |
|
"learning_rate": 3.0454545454545456e-05, |
|
"loss": 2.4646, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.011517893870835048, |
|
"grad_norm": 5.81415319442749, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6826, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.011723570547099958, |
|
"grad_norm": 13.005535125732422, |
|
"learning_rate": 2.954545454545455e-05, |
|
"loss": 0.8454, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.01192924722336487, |
|
"grad_norm": 13.358834266662598, |
|
"learning_rate": 2.909090909090909e-05, |
|
"loss": 0.6585, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.012134923899629783, |
|
"grad_norm": 7.232337474822998, |
|
"learning_rate": 2.863636363636364e-05, |
|
"loss": 2.0886, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.012340600575894693, |
|
"grad_norm": 5.909549713134766, |
|
"learning_rate": 2.818181818181818e-05, |
|
"loss": 2.04, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.012546277252159605, |
|
"grad_norm": 5.2378621101379395, |
|
"learning_rate": 2.772727272727273e-05, |
|
"loss": 1.3154, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.012751953928424516, |
|
"grad_norm": 7.897792816162109, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 1.7168, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.012957630604689428, |
|
"grad_norm": 10.026203155517578, |
|
"learning_rate": 2.681818181818182e-05, |
|
"loss": 2.1631, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.01316330728095434, |
|
"grad_norm": 7.848910808563232, |
|
"learning_rate": 2.636363636363636e-05, |
|
"loss": 0.895, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.013368983957219251, |
|
"grad_norm": 8.935349464416504, |
|
"learning_rate": 2.590909090909091e-05, |
|
"loss": 1.3377, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.013574660633484163, |
|
"grad_norm": 12.838030815124512, |
|
"learning_rate": 2.5454545454545454e-05, |
|
"loss": 1.0923, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.013780337309749074, |
|
"grad_norm": 11.543920516967773, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.8728, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.013986013986013986, |
|
"grad_norm": 5.111774444580078, |
|
"learning_rate": 2.4545454545454545e-05, |
|
"loss": 1.4745, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.014191690662278898, |
|
"grad_norm": 9.102482795715332, |
|
"learning_rate": 2.4090909090909093e-05, |
|
"loss": 0.5785, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.014397367338543809, |
|
"grad_norm": 10.797809600830078, |
|
"learning_rate": 2.3636363636363637e-05, |
|
"loss": 1.5844, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.014603044014808721, |
|
"grad_norm": 6.701333999633789, |
|
"learning_rate": 2.318181818181818e-05, |
|
"loss": 1.6503, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.014808720691073632, |
|
"grad_norm": 8.514144897460938, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 2.0404, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.015014397367338544, |
|
"grad_norm": 4.390872001647949, |
|
"learning_rate": 2.2272727272727274e-05, |
|
"loss": 1.3714, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.015220074043603456, |
|
"grad_norm": 11.0691556930542, |
|
"learning_rate": 2.1818181818181818e-05, |
|
"loss": 1.9498, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.015425750719868367, |
|
"grad_norm": 4.954442024230957, |
|
"learning_rate": 2.1363636363636362e-05, |
|
"loss": 1.5526, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.01563142739613328, |
|
"grad_norm": 3.523308038711548, |
|
"learning_rate": 2.090909090909091e-05, |
|
"loss": 0.0933, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.01583710407239819, |
|
"grad_norm": 7.044577121734619, |
|
"learning_rate": 2.0454545454545457e-05, |
|
"loss": 1.8968, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.016042780748663103, |
|
"grad_norm": 12.184310913085938, |
|
"learning_rate": 2e-05, |
|
"loss": 1.9779, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.016248457424928014, |
|
"grad_norm": 7.611854076385498, |
|
"learning_rate": 1.9545454545454546e-05, |
|
"loss": 1.1828, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.016454134101192924, |
|
"grad_norm": 2.3979077339172363, |
|
"learning_rate": 1.9090909090909094e-05, |
|
"loss": 0.0637, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.016659810777457835, |
|
"grad_norm": 7.704205513000488, |
|
"learning_rate": 1.8636363636363638e-05, |
|
"loss": 0.8134, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.01686548745372275, |
|
"grad_norm": 5.452297210693359, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 1.4938, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.01707116412998766, |
|
"grad_norm": 6.996687889099121, |
|
"learning_rate": 1.772727272727273e-05, |
|
"loss": 0.8019, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.01727684080625257, |
|
"grad_norm": 7.0274271965026855, |
|
"learning_rate": 1.7272727272727274e-05, |
|
"loss": 0.7567, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.017482517482517484, |
|
"grad_norm": 14.325960159301758, |
|
"learning_rate": 1.6818181818181818e-05, |
|
"loss": 1.861, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.017688194158782394, |
|
"grad_norm": 8.082893371582031, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 1.7144, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.017893870835047305, |
|
"grad_norm": 18.079805374145508, |
|
"learning_rate": 1.590909090909091e-05, |
|
"loss": 1.3443, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.01809954751131222, |
|
"grad_norm": 7.730350971221924, |
|
"learning_rate": 1.5454545454545454e-05, |
|
"loss": 0.6243, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.01830522418757713, |
|
"grad_norm": 11.749229431152344, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.9159, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.01851090086384204, |
|
"grad_norm": 1.573517918586731, |
|
"learning_rate": 1.4545454545454545e-05, |
|
"loss": 0.0361, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.01871657754010695, |
|
"grad_norm": 12.70760440826416, |
|
"learning_rate": 1.409090909090909e-05, |
|
"loss": 0.6055, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.018922254216371864, |
|
"grad_norm": 8.807103157043457, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 1.4647, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.019127930892636775, |
|
"grad_norm": 4.610854625701904, |
|
"learning_rate": 1.318181818181818e-05, |
|
"loss": 0.1759, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.019333607568901685, |
|
"grad_norm": 4.804567813873291, |
|
"learning_rate": 1.2727272727272727e-05, |
|
"loss": 1.5768, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0195392842451666, |
|
"grad_norm": 8.914559364318848, |
|
"learning_rate": 1.2272727272727273e-05, |
|
"loss": 0.9042, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.01974496092143151, |
|
"grad_norm": 10.30044174194336, |
|
"learning_rate": 1.1818181818181819e-05, |
|
"loss": 1.4115, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.01995063759769642, |
|
"grad_norm": 1.7506386041641235, |
|
"learning_rate": 1.1363636363636365e-05, |
|
"loss": 0.0564, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.020156314273961334, |
|
"grad_norm": 15.82219123840332, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 1.3599, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.020361990950226245, |
|
"grad_norm": 13.379084587097168, |
|
"learning_rate": 1.0454545454545455e-05, |
|
"loss": 0.6752, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.020567667626491155, |
|
"grad_norm": 13.464095115661621, |
|
"learning_rate": 1e-05, |
|
"loss": 0.71, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.020773344302756066, |
|
"grad_norm": 11.258218765258789, |
|
"learning_rate": 9.545454545454547e-06, |
|
"loss": 1.5134, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.02097902097902098, |
|
"grad_norm": 6.195601463317871, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 1.9024, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.02118469765528589, |
|
"grad_norm": 14.71764087677002, |
|
"learning_rate": 8.636363636363637e-06, |
|
"loss": 1.8721, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0213903743315508, |
|
"grad_norm": 18.410600662231445, |
|
"learning_rate": 8.181818181818183e-06, |
|
"loss": 0.9641, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.021596051007815715, |
|
"grad_norm": 14.327963829040527, |
|
"learning_rate": 7.727272727272727e-06, |
|
"loss": 1.3635, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.021801727684080625, |
|
"grad_norm": 9.677972793579102, |
|
"learning_rate": 7.272727272727272e-06, |
|
"loss": 1.1723, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.022007404360345536, |
|
"grad_norm": 8.000631332397461, |
|
"learning_rate": 6.818181818181818e-06, |
|
"loss": 0.6521, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.02221308103661045, |
|
"grad_norm": 1.116436243057251, |
|
"learning_rate": 6.363636363636363e-06, |
|
"loss": 0.0357, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.02241875771287536, |
|
"grad_norm": 1.7491281032562256, |
|
"learning_rate": 5.909090909090909e-06, |
|
"loss": 0.0469, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.02262443438914027, |
|
"grad_norm": 1.2441469430923462, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 0.0386, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.02283011106540518, |
|
"grad_norm": 18.10247039794922, |
|
"learning_rate": 5e-06, |
|
"loss": 1.1133, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.023035787741670095, |
|
"grad_norm": 8.674224853515625, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 1.4932, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.023241464417935006, |
|
"grad_norm": 10.565869331359863, |
|
"learning_rate": 4.0909090909090915e-06, |
|
"loss": 0.2315, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.023447141094199916, |
|
"grad_norm": 7.652951240539551, |
|
"learning_rate": 3.636363636363636e-06, |
|
"loss": 1.8242, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.02365281777046483, |
|
"grad_norm": 5.98936653137207, |
|
"learning_rate": 3.1818181818181817e-06, |
|
"loss": 1.9984, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.02385849444672974, |
|
"grad_norm": 9.790857315063477, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 1.9085, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.02406417112299465, |
|
"grad_norm": 1.22812020778656, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"loss": 0.0346, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.024269847799259565, |
|
"grad_norm": 14.80505657196045, |
|
"learning_rate": 1.818181818181818e-06, |
|
"loss": 1.1611, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.024475524475524476, |
|
"grad_norm": 15.46601390838623, |
|
"learning_rate": 1.3636363636363636e-06, |
|
"loss": 2.3691, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.024681201151789386, |
|
"grad_norm": 17.55849266052246, |
|
"learning_rate": 9.09090909090909e-07, |
|
"loss": 0.8722, |
|
"step": 120 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 120, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3875365448736768.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|