{ "best_metric": 1.5489531755447388, "best_model_checkpoint": "miner_id_24/checkpoint-200", "epoch": 0.2178649237472767, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0010893246187363835, "grad_norm": 0.8468479514122009, "learning_rate": 1.013e-05, "loss": 2.3662, "step": 1 }, { "epoch": 0.0010893246187363835, "eval_loss": 2.2146637439727783, "eval_runtime": 8.8247, "eval_samples_per_second": 43.854, "eval_steps_per_second": 10.992, "step": 1 }, { "epoch": 0.002178649237472767, "grad_norm": 0.8245108723640442, "learning_rate": 2.026e-05, "loss": 2.3291, "step": 2 }, { "epoch": 0.0032679738562091504, "grad_norm": 0.75188148021698, "learning_rate": 3.039e-05, "loss": 2.3472, "step": 3 }, { "epoch": 0.004357298474945534, "grad_norm": 0.7816623449325562, "learning_rate": 4.052e-05, "loss": 2.1797, "step": 4 }, { "epoch": 0.0054466230936819175, "grad_norm": 0.8955801129341125, "learning_rate": 5.065e-05, "loss": 2.3615, "step": 5 }, { "epoch": 0.006535947712418301, "grad_norm": 0.9104874134063721, "learning_rate": 6.078e-05, "loss": 2.3277, "step": 6 }, { "epoch": 0.007625272331154684, "grad_norm": 0.7353428602218628, "learning_rate": 7.091e-05, "loss": 2.1145, "step": 7 }, { "epoch": 0.008714596949891068, "grad_norm": 0.715476393699646, "learning_rate": 8.104e-05, "loss": 2.1865, "step": 8 }, { "epoch": 0.00980392156862745, "grad_norm": 0.7417934536933899, "learning_rate": 9.117e-05, "loss": 2.1959, "step": 9 }, { "epoch": 0.010893246187363835, "grad_norm": 0.6690745949745178, "learning_rate": 0.0001013, "loss": 1.985, "step": 10 }, { "epoch": 0.011982570806100218, "grad_norm": 0.699161171913147, "learning_rate": 0.00010076684210526316, "loss": 2.0609, "step": 11 }, { "epoch": 0.013071895424836602, "grad_norm": 0.670540452003479, "learning_rate": 0.0001002336842105263, "loss": 2.1343, "step": 12 }, { "epoch": 0.014161220043572984, "grad_norm": 0.6809763312339783, "learning_rate": 9.970052631578946e-05, "loss": 2.154, "step": 13 }, { "epoch": 0.015250544662309368, "grad_norm": 0.737635612487793, "learning_rate": 9.916736842105263e-05, "loss": 2.0001, "step": 14 }, { "epoch": 0.016339869281045753, "grad_norm": 0.7703385949134827, "learning_rate": 9.863421052631579e-05, "loss": 2.0937, "step": 15 }, { "epoch": 0.017429193899782137, "grad_norm": 0.6536821126937866, "learning_rate": 9.810105263157895e-05, "loss": 2.0618, "step": 16 }, { "epoch": 0.018518518518518517, "grad_norm": 0.6938826441764832, "learning_rate": 9.756789473684211e-05, "loss": 1.7985, "step": 17 }, { "epoch": 0.0196078431372549, "grad_norm": 0.680492639541626, "learning_rate": 9.703473684210525e-05, "loss": 1.8766, "step": 18 }, { "epoch": 0.020697167755991286, "grad_norm": 0.6917757391929626, "learning_rate": 9.650157894736842e-05, "loss": 1.7646, "step": 19 }, { "epoch": 0.02178649237472767, "grad_norm": 0.7513395547866821, "learning_rate": 9.596842105263158e-05, "loss": 1.9604, "step": 20 }, { "epoch": 0.02287581699346405, "grad_norm": 0.878359854221344, "learning_rate": 9.543526315789474e-05, "loss": 2.0287, "step": 21 }, { "epoch": 0.023965141612200435, "grad_norm": 0.8792089223861694, "learning_rate": 9.49021052631579e-05, "loss": 1.7934, "step": 22 }, { "epoch": 0.02505446623093682, "grad_norm": 0.7932198643684387, "learning_rate": 9.436894736842105e-05, "loss": 1.9981, "step": 23 }, { "epoch": 0.026143790849673203, "grad_norm": 0.698189914226532, "learning_rate": 9.38357894736842e-05, "loss": 1.6964, "step": 24 }, { "epoch": 0.027233115468409588, "grad_norm": 0.7018634080886841, "learning_rate": 9.330263157894737e-05, "loss": 1.564, "step": 25 }, { "epoch": 0.02832244008714597, "grad_norm": 0.6993752717971802, "learning_rate": 9.276947368421051e-05, "loss": 1.7264, "step": 26 }, { "epoch": 0.029411764705882353, "grad_norm": 0.7661710977554321, "learning_rate": 9.223631578947369e-05, "loss": 1.9146, "step": 27 }, { "epoch": 0.030501089324618737, "grad_norm": 0.762957751750946, "learning_rate": 9.170315789473684e-05, "loss": 1.9995, "step": 28 }, { "epoch": 0.03159041394335512, "grad_norm": 0.7237067222595215, "learning_rate": 9.117e-05, "loss": 1.7293, "step": 29 }, { "epoch": 0.032679738562091505, "grad_norm": 0.7281014323234558, "learning_rate": 9.063684210526316e-05, "loss": 1.7677, "step": 30 }, { "epoch": 0.03376906318082789, "grad_norm": 0.6990227699279785, "learning_rate": 9.010368421052632e-05, "loss": 1.8624, "step": 31 }, { "epoch": 0.034858387799564274, "grad_norm": 0.7196266055107117, "learning_rate": 8.957052631578946e-05, "loss": 1.9624, "step": 32 }, { "epoch": 0.03594771241830065, "grad_norm": 0.7386839389801025, "learning_rate": 8.903736842105263e-05, "loss": 1.9444, "step": 33 }, { "epoch": 0.037037037037037035, "grad_norm": 0.7023281455039978, "learning_rate": 8.850421052631579e-05, "loss": 1.6355, "step": 34 }, { "epoch": 0.03812636165577342, "grad_norm": 0.6985993981361389, "learning_rate": 8.797105263157895e-05, "loss": 1.6566, "step": 35 }, { "epoch": 0.0392156862745098, "grad_norm": 0.7002662420272827, "learning_rate": 8.743789473684211e-05, "loss": 1.7767, "step": 36 }, { "epoch": 0.04030501089324619, "grad_norm": 0.7869582772254944, "learning_rate": 8.690473684210526e-05, "loss": 1.7869, "step": 37 }, { "epoch": 0.04139433551198257, "grad_norm": 0.6894192695617676, "learning_rate": 8.637157894736842e-05, "loss": 1.7435, "step": 38 }, { "epoch": 0.042483660130718956, "grad_norm": 0.6954795122146606, "learning_rate": 8.583842105263158e-05, "loss": 1.9205, "step": 39 }, { "epoch": 0.04357298474945534, "grad_norm": 0.7487967014312744, "learning_rate": 8.530526315789472e-05, "loss": 1.6169, "step": 40 }, { "epoch": 0.044662309368191724, "grad_norm": 0.7093148231506348, "learning_rate": 8.47721052631579e-05, "loss": 1.6094, "step": 41 }, { "epoch": 0.0457516339869281, "grad_norm": 0.7556299567222595, "learning_rate": 8.423894736842105e-05, "loss": 1.8219, "step": 42 }, { "epoch": 0.046840958605664486, "grad_norm": 0.7483210563659668, "learning_rate": 8.37057894736842e-05, "loss": 1.8317, "step": 43 }, { "epoch": 0.04793028322440087, "grad_norm": 0.7007085084915161, "learning_rate": 8.317263157894737e-05, "loss": 1.6541, "step": 44 }, { "epoch": 0.049019607843137254, "grad_norm": 0.718443751335144, "learning_rate": 8.263947368421053e-05, "loss": 1.6292, "step": 45 }, { "epoch": 0.05010893246187364, "grad_norm": 0.7827504277229309, "learning_rate": 8.210631578947368e-05, "loss": 1.8549, "step": 46 }, { "epoch": 0.05119825708061002, "grad_norm": 0.7425017952919006, "learning_rate": 8.157315789473684e-05, "loss": 1.7507, "step": 47 }, { "epoch": 0.05228758169934641, "grad_norm": 0.8017943501472473, "learning_rate": 8.104e-05, "loss": 1.5472, "step": 48 }, { "epoch": 0.05337690631808279, "grad_norm": 0.7863806486129761, "learning_rate": 8.050684210526316e-05, "loss": 1.8109, "step": 49 }, { "epoch": 0.054466230936819175, "grad_norm": 0.8114119172096252, "learning_rate": 7.997368421052632e-05, "loss": 1.9256, "step": 50 }, { "epoch": 0.054466230936819175, "eval_loss": 1.6874905824661255, "eval_runtime": 8.8711, "eval_samples_per_second": 43.625, "eval_steps_per_second": 10.934, "step": 50 }, { "epoch": 0.05555555555555555, "grad_norm": 0.7554138898849487, "learning_rate": 7.944052631578947e-05, "loss": 1.7389, "step": 51 }, { "epoch": 0.05664488017429194, "grad_norm": 0.7589044570922852, "learning_rate": 7.890736842105263e-05, "loss": 1.8938, "step": 52 }, { "epoch": 0.05773420479302832, "grad_norm": 0.7669321894645691, "learning_rate": 7.837421052631579e-05, "loss": 1.7453, "step": 53 }, { "epoch": 0.058823529411764705, "grad_norm": 0.7059975266456604, "learning_rate": 7.784105263157893e-05, "loss": 1.8318, "step": 54 }, { "epoch": 0.05991285403050109, "grad_norm": 0.6577880382537842, "learning_rate": 7.730789473684211e-05, "loss": 1.7475, "step": 55 }, { "epoch": 0.06100217864923747, "grad_norm": 0.66456538438797, "learning_rate": 7.677473684210526e-05, "loss": 1.4203, "step": 56 }, { "epoch": 0.06209150326797386, "grad_norm": 0.6937424540519714, "learning_rate": 7.624157894736842e-05, "loss": 1.7745, "step": 57 }, { "epoch": 0.06318082788671024, "grad_norm": 0.7721080183982849, "learning_rate": 7.570842105263158e-05, "loss": 1.6768, "step": 58 }, { "epoch": 0.06427015250544663, "grad_norm": 0.7601541876792908, "learning_rate": 7.517526315789474e-05, "loss": 1.8961, "step": 59 }, { "epoch": 0.06535947712418301, "grad_norm": 0.7114940881729126, "learning_rate": 7.464210526315789e-05, "loss": 1.6703, "step": 60 }, { "epoch": 0.0664488017429194, "grad_norm": 0.6319119334220886, "learning_rate": 7.410894736842106e-05, "loss": 1.6744, "step": 61 }, { "epoch": 0.06753812636165578, "grad_norm": 0.6452541947364807, "learning_rate": 7.35757894736842e-05, "loss": 1.5781, "step": 62 }, { "epoch": 0.06862745098039216, "grad_norm": 0.6736257672309875, "learning_rate": 7.304263157894737e-05, "loss": 1.6831, "step": 63 }, { "epoch": 0.06971677559912855, "grad_norm": 0.6553575992584229, "learning_rate": 7.250947368421053e-05, "loss": 1.5707, "step": 64 }, { "epoch": 0.07080610021786492, "grad_norm": 0.7237080335617065, "learning_rate": 7.197631578947368e-05, "loss": 1.6452, "step": 65 }, { "epoch": 0.0718954248366013, "grad_norm": 0.6965909004211426, "learning_rate": 7.144315789473684e-05, "loss": 1.647, "step": 66 }, { "epoch": 0.07298474945533769, "grad_norm": 0.7979607582092285, "learning_rate": 7.091e-05, "loss": 1.87, "step": 67 }, { "epoch": 0.07407407407407407, "grad_norm": 0.6893535256385803, "learning_rate": 7.037684210526316e-05, "loss": 1.7528, "step": 68 }, { "epoch": 0.07516339869281045, "grad_norm": 0.7276719808578491, "learning_rate": 6.984368421052632e-05, "loss": 1.8107, "step": 69 }, { "epoch": 0.07625272331154684, "grad_norm": 0.6999695301055908, "learning_rate": 6.931052631578947e-05, "loss": 1.6743, "step": 70 }, { "epoch": 0.07734204793028322, "grad_norm": 0.623289167881012, "learning_rate": 6.877736842105263e-05, "loss": 1.4726, "step": 71 }, { "epoch": 0.0784313725490196, "grad_norm": 0.6653074026107788, "learning_rate": 6.824421052631579e-05, "loss": 1.5267, "step": 72 }, { "epoch": 0.07952069716775599, "grad_norm": 0.7170076370239258, "learning_rate": 6.771105263157895e-05, "loss": 1.8484, "step": 73 }, { "epoch": 0.08061002178649238, "grad_norm": 0.7597705721855164, "learning_rate": 6.71778947368421e-05, "loss": 1.7838, "step": 74 }, { "epoch": 0.08169934640522876, "grad_norm": 0.6835247278213501, "learning_rate": 6.664473684210527e-05, "loss": 1.5718, "step": 75 }, { "epoch": 0.08278867102396514, "grad_norm": 0.6559690237045288, "learning_rate": 6.611157894736842e-05, "loss": 1.4839, "step": 76 }, { "epoch": 0.08387799564270153, "grad_norm": 0.7687884569168091, "learning_rate": 6.557842105263158e-05, "loss": 1.7473, "step": 77 }, { "epoch": 0.08496732026143791, "grad_norm": 0.6864319443702698, "learning_rate": 6.504526315789474e-05, "loss": 1.6012, "step": 78 }, { "epoch": 0.0860566448801743, "grad_norm": 0.7399160861968994, "learning_rate": 6.451210526315789e-05, "loss": 1.4372, "step": 79 }, { "epoch": 0.08714596949891068, "grad_norm": 0.6669185161590576, "learning_rate": 6.397894736842105e-05, "loss": 1.5907, "step": 80 }, { "epoch": 0.08823529411764706, "grad_norm": 0.7020127177238464, "learning_rate": 6.344578947368421e-05, "loss": 1.6716, "step": 81 }, { "epoch": 0.08932461873638345, "grad_norm": 0.7354124188423157, "learning_rate": 6.291263157894737e-05, "loss": 1.704, "step": 82 }, { "epoch": 0.09041394335511982, "grad_norm": 0.6661585569381714, "learning_rate": 6.237947368421053e-05, "loss": 1.6447, "step": 83 }, { "epoch": 0.0915032679738562, "grad_norm": 0.7644115686416626, "learning_rate": 6.184631578947368e-05, "loss": 1.5358, "step": 84 }, { "epoch": 0.09259259259259259, "grad_norm": 0.8438684344291687, "learning_rate": 6.131315789473684e-05, "loss": 1.5447, "step": 85 }, { "epoch": 0.09368191721132897, "grad_norm": 0.7569748759269714, "learning_rate": 6.078e-05, "loss": 1.8301, "step": 86 }, { "epoch": 0.09477124183006536, "grad_norm": 0.835098147392273, "learning_rate": 6.024684210526315e-05, "loss": 1.8157, "step": 87 }, { "epoch": 0.09586056644880174, "grad_norm": 0.6884233355522156, "learning_rate": 5.9713684210526305e-05, "loss": 1.671, "step": 88 }, { "epoch": 0.09694989106753812, "grad_norm": 0.7727047204971313, "learning_rate": 5.918052631578947e-05, "loss": 1.6081, "step": 89 }, { "epoch": 0.09803921568627451, "grad_norm": 0.7228268980979919, "learning_rate": 5.8647368421052634e-05, "loss": 1.7695, "step": 90 }, { "epoch": 0.09912854030501089, "grad_norm": 0.7163753509521484, "learning_rate": 5.811421052631579e-05, "loss": 1.5363, "step": 91 }, { "epoch": 0.10021786492374728, "grad_norm": 0.7386108040809631, "learning_rate": 5.758105263157894e-05, "loss": 1.6602, "step": 92 }, { "epoch": 0.10130718954248366, "grad_norm": 0.7105979919433594, "learning_rate": 5.70478947368421e-05, "loss": 1.6921, "step": 93 }, { "epoch": 0.10239651416122005, "grad_norm": 0.73846834897995, "learning_rate": 5.6514736842105256e-05, "loss": 1.6722, "step": 94 }, { "epoch": 0.10348583877995643, "grad_norm": 0.8133267164230347, "learning_rate": 5.5981578947368424e-05, "loss": 1.7563, "step": 95 }, { "epoch": 0.10457516339869281, "grad_norm": 0.7420879602432251, "learning_rate": 5.544842105263158e-05, "loss": 1.7493, "step": 96 }, { "epoch": 0.1056644880174292, "grad_norm": 0.7559931874275208, "learning_rate": 5.491526315789474e-05, "loss": 1.4191, "step": 97 }, { "epoch": 0.10675381263616558, "grad_norm": 0.8215981125831604, "learning_rate": 5.438210526315789e-05, "loss": 1.5849, "step": 98 }, { "epoch": 0.10784313725490197, "grad_norm": 0.8385877013206482, "learning_rate": 5.384894736842105e-05, "loss": 1.7516, "step": 99 }, { "epoch": 0.10893246187363835, "grad_norm": 0.8303121328353882, "learning_rate": 5.331578947368421e-05, "loss": 1.6459, "step": 100 }, { "epoch": 0.10893246187363835, "eval_loss": 1.5992034673690796, "eval_runtime": 8.8309, "eval_samples_per_second": 43.823, "eval_steps_per_second": 10.984, "step": 100 }, { "epoch": 0.11002178649237472, "grad_norm": 0.6450762152671814, "learning_rate": 5.278263157894736e-05, "loss": 1.5434, "step": 101 }, { "epoch": 0.1111111111111111, "grad_norm": 0.7612766623497009, "learning_rate": 5.224947368421053e-05, "loss": 1.5387, "step": 102 }, { "epoch": 0.11220043572984749, "grad_norm": 0.6882017254829407, "learning_rate": 5.171631578947368e-05, "loss": 1.7437, "step": 103 }, { "epoch": 0.11328976034858387, "grad_norm": 0.7027923464775085, "learning_rate": 5.1183157894736844e-05, "loss": 1.5566, "step": 104 }, { "epoch": 0.11437908496732026, "grad_norm": 0.675394594669342, "learning_rate": 5.065e-05, "loss": 1.6142, "step": 105 }, { "epoch": 0.11546840958605664, "grad_norm": 0.6687760949134827, "learning_rate": 5.011684210526315e-05, "loss": 1.614, "step": 106 }, { "epoch": 0.11655773420479303, "grad_norm": 0.6839330792427063, "learning_rate": 4.958368421052631e-05, "loss": 1.6976, "step": 107 }, { "epoch": 0.11764705882352941, "grad_norm": 0.7157800197601318, "learning_rate": 4.9050526315789473e-05, "loss": 1.6307, "step": 108 }, { "epoch": 0.1187363834422658, "grad_norm": 0.6894105672836304, "learning_rate": 4.851736842105263e-05, "loss": 1.6422, "step": 109 }, { "epoch": 0.11982570806100218, "grad_norm": 0.731292188167572, "learning_rate": 4.798421052631579e-05, "loss": 1.5619, "step": 110 }, { "epoch": 0.12091503267973856, "grad_norm": 0.6677466630935669, "learning_rate": 4.745105263157895e-05, "loss": 1.6484, "step": 111 }, { "epoch": 0.12200435729847495, "grad_norm": 0.6295744776725769, "learning_rate": 4.69178947368421e-05, "loss": 1.4522, "step": 112 }, { "epoch": 0.12309368191721133, "grad_norm": 0.6922392249107361, "learning_rate": 4.638473684210526e-05, "loss": 1.5718, "step": 113 }, { "epoch": 0.12418300653594772, "grad_norm": 0.6795622110366821, "learning_rate": 4.585157894736842e-05, "loss": 1.583, "step": 114 }, { "epoch": 0.12527233115468409, "grad_norm": 0.7639903426170349, "learning_rate": 4.531842105263158e-05, "loss": 1.6608, "step": 115 }, { "epoch": 0.12636165577342048, "grad_norm": 0.7626394629478455, "learning_rate": 4.478526315789473e-05, "loss": 1.5329, "step": 116 }, { "epoch": 0.12745098039215685, "grad_norm": 0.7293768525123596, "learning_rate": 4.425210526315789e-05, "loss": 1.8416, "step": 117 }, { "epoch": 0.12854030501089325, "grad_norm": 0.7011256217956543, "learning_rate": 4.3718947368421054e-05, "loss": 1.5256, "step": 118 }, { "epoch": 0.12962962962962962, "grad_norm": 0.6665348410606384, "learning_rate": 4.318578947368421e-05, "loss": 1.5852, "step": 119 }, { "epoch": 0.13071895424836602, "grad_norm": 0.8003459572792053, "learning_rate": 4.265263157894736e-05, "loss": 1.6383, "step": 120 }, { "epoch": 0.1318082788671024, "grad_norm": 0.6996583938598633, "learning_rate": 4.211947368421052e-05, "loss": 1.5461, "step": 121 }, { "epoch": 0.1328976034858388, "grad_norm": 0.6799657940864563, "learning_rate": 4.1586315789473684e-05, "loss": 1.3618, "step": 122 }, { "epoch": 0.13398692810457516, "grad_norm": 0.7959406971931458, "learning_rate": 4.105315789473684e-05, "loss": 1.7334, "step": 123 }, { "epoch": 0.13507625272331156, "grad_norm": 0.7717329859733582, "learning_rate": 4.052e-05, "loss": 1.6222, "step": 124 }, { "epoch": 0.13616557734204793, "grad_norm": 0.8085086941719055, "learning_rate": 3.998684210526316e-05, "loss": 1.5135, "step": 125 }, { "epoch": 0.13725490196078433, "grad_norm": 0.7387843132019043, "learning_rate": 3.945368421052631e-05, "loss": 1.6988, "step": 126 }, { "epoch": 0.1383442265795207, "grad_norm": 0.6821923851966858, "learning_rate": 3.892052631578947e-05, "loss": 1.4993, "step": 127 }, { "epoch": 0.1394335511982571, "grad_norm": 0.8134518265724182, "learning_rate": 3.838736842105263e-05, "loss": 1.7561, "step": 128 }, { "epoch": 0.14052287581699346, "grad_norm": 0.722003698348999, "learning_rate": 3.785421052631579e-05, "loss": 1.5946, "step": 129 }, { "epoch": 0.14161220043572983, "grad_norm": 0.7371602058410645, "learning_rate": 3.732105263157894e-05, "loss": 1.8073, "step": 130 }, { "epoch": 0.14270152505446623, "grad_norm": 0.6882696747779846, "learning_rate": 3.67878947368421e-05, "loss": 1.5664, "step": 131 }, { "epoch": 0.1437908496732026, "grad_norm": 0.6685984134674072, "learning_rate": 3.6254736842105264e-05, "loss": 1.5766, "step": 132 }, { "epoch": 0.144880174291939, "grad_norm": 0.7375728487968445, "learning_rate": 3.572157894736842e-05, "loss": 1.7014, "step": 133 }, { "epoch": 0.14596949891067537, "grad_norm": 0.7775611281394958, "learning_rate": 3.518842105263158e-05, "loss": 1.5063, "step": 134 }, { "epoch": 0.14705882352941177, "grad_norm": 0.7103753089904785, "learning_rate": 3.465526315789473e-05, "loss": 1.4402, "step": 135 }, { "epoch": 0.14814814814814814, "grad_norm": 0.6593254208564758, "learning_rate": 3.4122105263157894e-05, "loss": 1.577, "step": 136 }, { "epoch": 0.14923747276688454, "grad_norm": 0.7314425110816956, "learning_rate": 3.358894736842105e-05, "loss": 1.5287, "step": 137 }, { "epoch": 0.1503267973856209, "grad_norm": 0.6922585368156433, "learning_rate": 3.305578947368421e-05, "loss": 1.5804, "step": 138 }, { "epoch": 0.1514161220043573, "grad_norm": 0.7198253273963928, "learning_rate": 3.252263157894737e-05, "loss": 1.4271, "step": 139 }, { "epoch": 0.15250544662309368, "grad_norm": 0.7590131759643555, "learning_rate": 3.198947368421052e-05, "loss": 1.5776, "step": 140 }, { "epoch": 0.15359477124183007, "grad_norm": 0.7425817251205444, "learning_rate": 3.1456315789473684e-05, "loss": 1.6903, "step": 141 }, { "epoch": 0.15468409586056645, "grad_norm": 0.7869381308555603, "learning_rate": 3.092315789473684e-05, "loss": 1.6573, "step": 142 }, { "epoch": 0.15577342047930284, "grad_norm": 0.6935414671897888, "learning_rate": 3.039e-05, "loss": 1.411, "step": 143 }, { "epoch": 0.1568627450980392, "grad_norm": 0.7712908387184143, "learning_rate": 2.9856842105263153e-05, "loss": 1.4406, "step": 144 }, { "epoch": 0.1579520697167756, "grad_norm": 0.8041695952415466, "learning_rate": 2.9323684210526317e-05, "loss": 1.6472, "step": 145 }, { "epoch": 0.15904139433551198, "grad_norm": 0.7929959893226624, "learning_rate": 2.879052631578947e-05, "loss": 1.6822, "step": 146 }, { "epoch": 0.16013071895424835, "grad_norm": 0.7389121055603027, "learning_rate": 2.8257368421052628e-05, "loss": 1.4642, "step": 147 }, { "epoch": 0.16122004357298475, "grad_norm": 0.7483341693878174, "learning_rate": 2.772421052631579e-05, "loss": 1.7301, "step": 148 }, { "epoch": 0.16230936819172112, "grad_norm": 0.7628040313720703, "learning_rate": 2.7191052631578946e-05, "loss": 1.3729, "step": 149 }, { "epoch": 0.16339869281045752, "grad_norm": 1.0039489269256592, "learning_rate": 2.6657894736842104e-05, "loss": 1.8224, "step": 150 }, { "epoch": 0.16339869281045752, "eval_loss": 1.5608174800872803, "eval_runtime": 8.9952, "eval_samples_per_second": 43.023, "eval_steps_per_second": 10.783, "step": 150 }, { "epoch": 0.1644880174291939, "grad_norm": 0.7176492214202881, "learning_rate": 2.6124736842105265e-05, "loss": 1.5574, "step": 151 }, { "epoch": 0.1655773420479303, "grad_norm": 0.7583156228065491, "learning_rate": 2.5591578947368422e-05, "loss": 1.6524, "step": 152 }, { "epoch": 0.16666666666666666, "grad_norm": 0.6708285212516785, "learning_rate": 2.5058421052631576e-05, "loss": 1.5471, "step": 153 }, { "epoch": 0.16775599128540306, "grad_norm": 0.7624933123588562, "learning_rate": 2.4525263157894737e-05, "loss": 1.5342, "step": 154 }, { "epoch": 0.16884531590413943, "grad_norm": 0.7120580673217773, "learning_rate": 2.3992105263157894e-05, "loss": 1.6989, "step": 155 }, { "epoch": 0.16993464052287582, "grad_norm": 0.7226291298866272, "learning_rate": 2.345894736842105e-05, "loss": 1.6025, "step": 156 }, { "epoch": 0.1710239651416122, "grad_norm": 0.7568950653076172, "learning_rate": 2.292578947368421e-05, "loss": 1.655, "step": 157 }, { "epoch": 0.1721132897603486, "grad_norm": 0.7114288210868835, "learning_rate": 2.2392631578947366e-05, "loss": 1.3329, "step": 158 }, { "epoch": 0.17320261437908496, "grad_norm": 0.6590518355369568, "learning_rate": 2.1859473684210527e-05, "loss": 1.4937, "step": 159 }, { "epoch": 0.17429193899782136, "grad_norm": 0.7094108462333679, "learning_rate": 2.132631578947368e-05, "loss": 1.6263, "step": 160 }, { "epoch": 0.17538126361655773, "grad_norm": 0.6764444708824158, "learning_rate": 2.0793157894736842e-05, "loss": 1.4824, "step": 161 }, { "epoch": 0.17647058823529413, "grad_norm": 0.76336669921875, "learning_rate": 2.026e-05, "loss": 1.7773, "step": 162 }, { "epoch": 0.1775599128540305, "grad_norm": 0.6926636099815369, "learning_rate": 1.9726842105263157e-05, "loss": 1.408, "step": 163 }, { "epoch": 0.1786492374727669, "grad_norm": 0.727808952331543, "learning_rate": 1.9193684210526314e-05, "loss": 1.5192, "step": 164 }, { "epoch": 0.17973856209150327, "grad_norm": 0.7736932635307312, "learning_rate": 1.866052631578947e-05, "loss": 1.6246, "step": 165 }, { "epoch": 0.18082788671023964, "grad_norm": 0.7523664832115173, "learning_rate": 1.8127368421052632e-05, "loss": 1.7334, "step": 166 }, { "epoch": 0.18191721132897604, "grad_norm": 0.7081722021102905, "learning_rate": 1.759421052631579e-05, "loss": 1.5646, "step": 167 }, { "epoch": 0.1830065359477124, "grad_norm": 0.7327615022659302, "learning_rate": 1.7061052631578947e-05, "loss": 1.6238, "step": 168 }, { "epoch": 0.1840958605664488, "grad_norm": 0.7988473773002625, "learning_rate": 1.6527894736842104e-05, "loss": 1.9429, "step": 169 }, { "epoch": 0.18518518518518517, "grad_norm": 0.7013329267501831, "learning_rate": 1.599473684210526e-05, "loss": 1.5473, "step": 170 }, { "epoch": 0.18627450980392157, "grad_norm": 0.8148373961448669, "learning_rate": 1.546157894736842e-05, "loss": 1.6869, "step": 171 }, { "epoch": 0.18736383442265794, "grad_norm": 0.7735821604728699, "learning_rate": 1.4928421052631576e-05, "loss": 1.4732, "step": 172 }, { "epoch": 0.18845315904139434, "grad_norm": 0.7189416885375977, "learning_rate": 1.4395263157894735e-05, "loss": 1.5656, "step": 173 }, { "epoch": 0.1895424836601307, "grad_norm": 0.7622708082199097, "learning_rate": 1.3862105263157895e-05, "loss": 1.6252, "step": 174 }, { "epoch": 0.1906318082788671, "grad_norm": 0.7497777342796326, "learning_rate": 1.3328947368421052e-05, "loss": 1.4933, "step": 175 }, { "epoch": 0.19172113289760348, "grad_norm": 0.7765294313430786, "learning_rate": 1.2795789473684211e-05, "loss": 1.757, "step": 176 }, { "epoch": 0.19281045751633988, "grad_norm": 0.8528848886489868, "learning_rate": 1.2262631578947368e-05, "loss": 1.7598, "step": 177 }, { "epoch": 0.19389978213507625, "grad_norm": 0.7356722950935364, "learning_rate": 1.1729473684210526e-05, "loss": 1.4556, "step": 178 }, { "epoch": 0.19498910675381265, "grad_norm": 0.7138367295265198, "learning_rate": 1.1196315789473683e-05, "loss": 1.6086, "step": 179 }, { "epoch": 0.19607843137254902, "grad_norm": 0.6896017789840698, "learning_rate": 1.066315789473684e-05, "loss": 1.6123, "step": 180 }, { "epoch": 0.19716775599128541, "grad_norm": 0.7540486454963684, "learning_rate": 1.013e-05, "loss": 1.6241, "step": 181 }, { "epoch": 0.19825708061002179, "grad_norm": 0.7777424454689026, "learning_rate": 9.596842105263157e-06, "loss": 1.6396, "step": 182 }, { "epoch": 0.19934640522875818, "grad_norm": 0.7179080247879028, "learning_rate": 9.063684210526316e-06, "loss": 1.6244, "step": 183 }, { "epoch": 0.20043572984749455, "grad_norm": 0.7581405639648438, "learning_rate": 8.530526315789473e-06, "loss": 1.5821, "step": 184 }, { "epoch": 0.20152505446623092, "grad_norm": 0.7708899974822998, "learning_rate": 7.99736842105263e-06, "loss": 1.564, "step": 185 }, { "epoch": 0.20261437908496732, "grad_norm": 0.785279393196106, "learning_rate": 7.464210526315788e-06, "loss": 1.3969, "step": 186 }, { "epoch": 0.2037037037037037, "grad_norm": 0.8054214119911194, "learning_rate": 6.931052631578947e-06, "loss": 1.809, "step": 187 }, { "epoch": 0.2047930283224401, "grad_norm": 0.8063158392906189, "learning_rate": 6.3978947368421055e-06, "loss": 1.7511, "step": 188 }, { "epoch": 0.20588235294117646, "grad_norm": 0.779520571231842, "learning_rate": 5.864736842105263e-06, "loss": 1.5878, "step": 189 }, { "epoch": 0.20697167755991286, "grad_norm": 0.7568636536598206, "learning_rate": 5.33157894736842e-06, "loss": 1.4977, "step": 190 }, { "epoch": 0.20806100217864923, "grad_norm": 0.7863628268241882, "learning_rate": 4.7984210526315785e-06, "loss": 1.6876, "step": 191 }, { "epoch": 0.20915032679738563, "grad_norm": 0.746377170085907, "learning_rate": 4.265263157894737e-06, "loss": 1.611, "step": 192 }, { "epoch": 0.210239651416122, "grad_norm": 0.673345685005188, "learning_rate": 3.732105263157894e-06, "loss": 1.4457, "step": 193 }, { "epoch": 0.2113289760348584, "grad_norm": 0.818687379360199, "learning_rate": 3.1989473684210527e-06, "loss": 1.473, "step": 194 }, { "epoch": 0.21241830065359477, "grad_norm": 0.7401917576789856, "learning_rate": 2.66578947368421e-06, "loss": 1.337, "step": 195 }, { "epoch": 0.21350762527233116, "grad_norm": 0.8702475428581238, "learning_rate": 2.1326315789473684e-06, "loss": 1.599, "step": 196 }, { "epoch": 0.21459694989106753, "grad_norm": 0.9154536724090576, "learning_rate": 1.5994736842105264e-06, "loss": 1.4283, "step": 197 }, { "epoch": 0.21568627450980393, "grad_norm": 0.8432170152664185, "learning_rate": 1.0663157894736842e-06, "loss": 1.6127, "step": 198 }, { "epoch": 0.2167755991285403, "grad_norm": 0.8510313630104065, "learning_rate": 5.331578947368421e-07, "loss": 1.6107, "step": 199 }, { "epoch": 0.2178649237472767, "grad_norm": 0.9831571578979492, "learning_rate": 0.0, "loss": 1.6545, "step": 200 }, { "epoch": 0.2178649237472767, "eval_loss": 1.5489531755447388, "eval_runtime": 8.8028, "eval_samples_per_second": 43.964, "eval_steps_per_second": 11.019, "step": 200 } ], "logging_steps": 1, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1248448902856704.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }