{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.984, "eval_steps": 1, "global_step": 124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016, "grad_norm": 52.25, "learning_rate": 2.5e-05, "loss": 1.2424, "step": 1 }, { "epoch": 0.016, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.5754985754985755, "eval_f1_m": 0.30537274516866364, "eval_loss": 1.8040205240249634, "eval_runtime": 12.5246, "eval_samples_per_second": 19.961, "eval_steps_per_second": 2.555, "step": 1 }, { "epoch": 0.032, "grad_norm": 74.5, "learning_rate": 5e-05, "loss": 1.5616, "step": 2 }, { "epoch": 0.032, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.5754985754985755, "eval_f1_m": 0.30537274516866364, "eval_loss": 1.6737734079360962, "eval_runtime": 12.5202, "eval_samples_per_second": 19.968, "eval_steps_per_second": 2.556, "step": 2 }, { "epoch": 0.048, "grad_norm": 81.5, "learning_rate": 4.959016393442623e-05, "loss": 1.9327, "step": 3 }, { "epoch": 0.048, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.5798816568047337, "eval_f1_m": 0.3293744237621789, "eval_loss": 1.0940678119659424, "eval_runtime": 12.4172, "eval_samples_per_second": 20.133, "eval_steps_per_second": 2.577, "step": 3 }, { "epoch": 0.064, "grad_norm": 24.5, "learning_rate": 4.918032786885246e-05, "loss": 0.7502, "step": 4 }, { "epoch": 0.064, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.326797385620915, "eval_f1_m": 0.40487871139131637, "eval_loss": 0.7598310708999634, "eval_runtime": 12.5191, "eval_samples_per_second": 19.969, "eval_steps_per_second": 2.556, "step": 4 }, { "epoch": 0.08, "grad_norm": 43.5, "learning_rate": 4.8770491803278687e-05, "loss": 1.2439, "step": 5 }, { "epoch": 0.08, "eval_exact_match": 0.0, "eval_f1_a": 0.11475409836065575, "eval_f1_m": 0.3452577201376722, "eval_loss": 0.8241621255874634, "eval_runtime": 12.412, "eval_samples_per_second": 20.142, "eval_steps_per_second": 2.578, "step": 5 }, { "epoch": 0.096, "grad_norm": 21.625, "learning_rate": 4.836065573770492e-05, "loss": 0.9487, "step": 6 }, { "epoch": 0.096, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.05555555555555556, "eval_f1_m": 0.35745697213284255, "eval_loss": 0.8299877643585205, "eval_runtime": 12.4102, "eval_samples_per_second": 20.145, "eval_steps_per_second": 2.579, "step": 6 }, { "epoch": 0.112, "grad_norm": 114.0, "learning_rate": 4.795081967213115e-05, "loss": 1.9374, "step": 7 }, { "epoch": 0.112, "eval_exact_match": 0.0, "eval_f1_a": 0.08620689655172413, "eval_f1_m": 0.3505331636484098, "eval_loss": 0.7635751962661743, "eval_runtime": 12.5057, "eval_samples_per_second": 19.991, "eval_steps_per_second": 2.559, "step": 7 }, { "epoch": 0.128, "grad_norm": 49.25, "learning_rate": 4.754098360655738e-05, "loss": 0.9383, "step": 8 }, { "epoch": 0.128, "eval_exact_match": 0.12244897959183673, "eval_f1_a": 0.37499999999999994, "eval_f1_m": 0.4638421102706817, "eval_loss": 0.7212109565734863, "eval_runtime": 12.5257, "eval_samples_per_second": 19.959, "eval_steps_per_second": 2.555, "step": 8 }, { "epoch": 0.144, "grad_norm": 10.375, "learning_rate": 4.713114754098361e-05, "loss": 0.6205, "step": 9 }, { "epoch": 0.144, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.46413502109704646, "eval_f1_m": 0.39599618069005815, "eval_loss": 0.7557714581489563, "eval_runtime": 12.5194, "eval_samples_per_second": 19.969, "eval_steps_per_second": 2.556, "step": 9 }, { "epoch": 0.16, "grad_norm": 11.8125, "learning_rate": 4.672131147540984e-05, "loss": 0.9441, "step": 10 }, { "epoch": 0.16, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.5454545454545455, "eval_f1_m": 0.3639190741231557, "eval_loss": 0.8026367425918579, "eval_runtime": 12.4166, "eval_samples_per_second": 20.134, "eval_steps_per_second": 2.577, "step": 10 }, { "epoch": 0.176, "grad_norm": 32.25, "learning_rate": 4.631147540983607e-05, "loss": 0.9161, "step": 11 }, { "epoch": 0.176, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.5608108108108107, "eval_f1_m": 0.36614026563006147, "eval_loss": 0.8101338148117065, "eval_runtime": 12.5325, "eval_samples_per_second": 19.948, "eval_steps_per_second": 2.553, "step": 11 }, { "epoch": 0.192, "grad_norm": 36.25, "learning_rate": 4.59016393442623e-05, "loss": 0.8093, "step": 12 }, { "epoch": 0.192, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.5429553264604812, "eval_f1_m": 0.35214683275907754, "eval_loss": 0.7952831983566284, "eval_runtime": 12.4346, "eval_samples_per_second": 20.105, "eval_steps_per_second": 2.573, "step": 12 }, { "epoch": 0.208, "grad_norm": 11.0, "learning_rate": 4.549180327868853e-05, "loss": 0.6146, "step": 13 }, { "epoch": 0.208, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.5441696113074205, "eval_f1_m": 0.3687700621374089, "eval_loss": 0.7707070112228394, "eval_runtime": 12.5246, "eval_samples_per_second": 19.961, "eval_steps_per_second": 2.555, "step": 13 }, { "epoch": 0.224, "grad_norm": 14.625, "learning_rate": 4.508196721311476e-05, "loss": 0.7804, "step": 14 }, { "epoch": 0.224, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.5376344086021505, "eval_f1_m": 0.36071490867409234, "eval_loss": 0.7593163847923279, "eval_runtime": 12.5202, "eval_samples_per_second": 19.968, "eval_steps_per_second": 2.556, "step": 14 }, { "epoch": 0.24, "grad_norm": 12.6875, "learning_rate": 4.467213114754098e-05, "loss": 0.75, "step": 15 }, { "epoch": 0.24, "eval_exact_match": 0.0, "eval_f1_a": 0.5300353356890458, "eval_f1_m": 0.3416269105044615, "eval_loss": 0.7546816468238831, "eval_runtime": 12.53, "eval_samples_per_second": 19.952, "eval_steps_per_second": 2.554, "step": 15 }, { "epoch": 0.256, "grad_norm": 10.4375, "learning_rate": 4.426229508196721e-05, "loss": 0.9339, "step": 16 }, { "epoch": 0.256, "eval_exact_match": 0.0, "eval_f1_a": 0.5454545454545454, "eval_f1_m": 0.3630654492899391, "eval_loss": 0.7366230487823486, "eval_runtime": 12.5191, "eval_samples_per_second": 19.97, "eval_steps_per_second": 2.556, "step": 16 }, { "epoch": 0.272, "grad_norm": 36.75, "learning_rate": 4.3852459016393444e-05, "loss": 0.6286, "step": 17 }, { "epoch": 0.272, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.528, "eval_f1_m": 0.38775957602488215, "eval_loss": 0.712777316570282, "eval_runtime": 12.5294, "eval_samples_per_second": 19.953, "eval_steps_per_second": 2.554, "step": 17 }, { "epoch": 0.288, "grad_norm": 31.75, "learning_rate": 4.3442622950819674e-05, "loss": 0.7253, "step": 18 }, { "epoch": 0.288, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.5301204819277109, "eval_f1_m": 0.39053531502511096, "eval_loss": 0.7025195360183716, "eval_runtime": 12.5183, "eval_samples_per_second": 19.971, "eval_steps_per_second": 2.556, "step": 18 }, { "epoch": 0.304, "grad_norm": 22.0, "learning_rate": 4.3032786885245904e-05, "loss": 0.5984, "step": 19 }, { "epoch": 0.304, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.5045045045045046, "eval_f1_m": 0.4079482875401243, "eval_loss": 0.6883164048194885, "eval_runtime": 12.5119, "eval_samples_per_second": 19.981, "eval_steps_per_second": 2.558, "step": 19 }, { "epoch": 0.32, "grad_norm": 24.75, "learning_rate": 4.262295081967213e-05, "loss": 0.6453, "step": 20 }, { "epoch": 0.32, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4931506849315069, "eval_f1_m": 0.4012221905079048, "eval_loss": 0.6845566630363464, "eval_runtime": 12.5289, "eval_samples_per_second": 19.954, "eval_steps_per_second": 2.554, "step": 20 }, { "epoch": 0.336, "grad_norm": 8.3125, "learning_rate": 4.2213114754098365e-05, "loss": 0.58, "step": 21 }, { "epoch": 0.336, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.4859813084112149, "eval_f1_m": 0.4041376424029486, "eval_loss": 0.6770976781845093, "eval_runtime": 12.4164, "eval_samples_per_second": 20.135, "eval_steps_per_second": 2.577, "step": 21 }, { "epoch": 0.352, "grad_norm": 28.625, "learning_rate": 4.1803278688524595e-05, "loss": 0.6812, "step": 22 }, { "epoch": 0.352, "eval_exact_match": 0.0, "eval_f1_a": 0.509090909090909, "eval_f1_m": 0.40605698156718567, "eval_loss": 0.6734863519668579, "eval_runtime": 12.5159, "eval_samples_per_second": 19.975, "eval_steps_per_second": 2.557, "step": 22 }, { "epoch": 0.368, "grad_norm": 18.25, "learning_rate": 4.1393442622950826e-05, "loss": 0.4762, "step": 23 }, { "epoch": 0.368, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.4976076555023924, "eval_f1_m": 0.4171717738044269, "eval_loss": 0.6683046817779541, "eval_runtime": 12.5214, "eval_samples_per_second": 19.966, "eval_steps_per_second": 2.556, "step": 23 }, { "epoch": 0.384, "grad_norm": 9.125, "learning_rate": 4.098360655737705e-05, "loss": 0.6075, "step": 24 }, { "epoch": 0.384, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.48756218905472637, "eval_f1_m": 0.41513432878979106, "eval_loss": 0.6622753739356995, "eval_runtime": 12.4157, "eval_samples_per_second": 20.136, "eval_steps_per_second": 2.577, "step": 24 }, { "epoch": 0.4, "grad_norm": 10.1875, "learning_rate": 4.057377049180328e-05, "loss": 0.6313, "step": 25 }, { "epoch": 0.4, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.48087431693989074, "eval_f1_m": 0.4350705716852176, "eval_loss": 0.6575761437416077, "eval_runtime": 12.5232, "eval_samples_per_second": 19.963, "eval_steps_per_second": 2.555, "step": 25 }, { "epoch": 0.416, "grad_norm": 14.0, "learning_rate": 4.016393442622951e-05, "loss": 0.6486, "step": 26 }, { "epoch": 0.416, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.4606741573033708, "eval_f1_m": 0.44082196315089484, "eval_loss": 0.6529648303985596, "eval_runtime": 12.5257, "eval_samples_per_second": 19.959, "eval_steps_per_second": 2.555, "step": 26 }, { "epoch": 0.432, "grad_norm": 14.9375, "learning_rate": 3.975409836065574e-05, "loss": 0.8356, "step": 27 }, { "epoch": 0.432, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.44311377245508976, "eval_f1_m": 0.4505972219057453, "eval_loss": 0.6500390768051147, "eval_runtime": 12.5271, "eval_samples_per_second": 19.957, "eval_steps_per_second": 2.554, "step": 27 }, { "epoch": 0.448, "grad_norm": 9.125, "learning_rate": 3.934426229508197e-05, "loss": 0.6429, "step": 28 }, { "epoch": 0.448, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.43636363636363634, "eval_f1_m": 0.4468071344421886, "eval_loss": 0.6493672132492065, "eval_runtime": 12.5212, "eval_samples_per_second": 19.966, "eval_steps_per_second": 2.556, "step": 28 }, { "epoch": 0.464, "grad_norm": 15.8125, "learning_rate": 3.89344262295082e-05, "loss": 0.6248, "step": 29 }, { "epoch": 0.464, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.38926174496644295, "eval_f1_m": 0.4801647632079804, "eval_loss": 0.6509453058242798, "eval_runtime": 12.535, "eval_samples_per_second": 19.944, "eval_steps_per_second": 2.553, "step": 29 }, { "epoch": 0.48, "grad_norm": 24.5, "learning_rate": 3.8524590163934424e-05, "loss": 0.4855, "step": 30 }, { "epoch": 0.48, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.3283582089552239, "eval_f1_m": 0.46286459638800564, "eval_loss": 0.6590214967727661, "eval_runtime": 12.5171, "eval_samples_per_second": 19.973, "eval_steps_per_second": 2.557, "step": 30 }, { "epoch": 0.496, "grad_norm": 31.375, "learning_rate": 3.8114754098360655e-05, "loss": 0.7308, "step": 31 }, { "epoch": 0.496, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.30534351145038163, "eval_f1_m": 0.4485788821022914, "eval_loss": 0.6624687314033508, "eval_runtime": 12.4214, "eval_samples_per_second": 20.127, "eval_steps_per_second": 2.576, "step": 31 }, { "epoch": 0.512, "grad_norm": 28.125, "learning_rate": 3.7704918032786885e-05, "loss": 0.7749, "step": 32 }, { "epoch": 0.512, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.31007751937984496, "eval_f1_m": 0.4561152299747737, "eval_loss": 0.6629375219345093, "eval_runtime": 12.5333, "eval_samples_per_second": 19.947, "eval_steps_per_second": 2.553, "step": 32 }, { "epoch": 0.528, "grad_norm": 25.125, "learning_rate": 3.729508196721312e-05, "loss": 0.7449, "step": 33 }, { "epoch": 0.528, "eval_exact_match": 0.04081632653061224, "eval_f1_a": 0.34782608695652173, "eval_f1_m": 0.45786965468838214, "eval_loss": 0.6554140448570251, "eval_runtime": 12.5362, "eval_samples_per_second": 19.942, "eval_steps_per_second": 2.553, "step": 33 }, { "epoch": 0.544, "grad_norm": 12.1875, "learning_rate": 3.6885245901639346e-05, "loss": 0.7871, "step": 34 }, { "epoch": 0.544, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3382352941176471, "eval_f1_m": 0.4546140667389167, "eval_loss": 0.6557744145393372, "eval_runtime": 12.5339, "eval_samples_per_second": 19.946, "eval_steps_per_second": 2.553, "step": 34 }, { "epoch": 0.56, "grad_norm": 10.5625, "learning_rate": 3.6475409836065576e-05, "loss": 0.4428, "step": 35 }, { "epoch": 0.56, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.2900763358778626, "eval_f1_m": 0.4422134787981126, "eval_loss": 0.6567177772521973, "eval_runtime": 12.5258, "eval_samples_per_second": 19.959, "eval_steps_per_second": 2.555, "step": 35 }, { "epoch": 0.576, "grad_norm": 37.75, "learning_rate": 3.6065573770491806e-05, "loss": 0.8155, "step": 36 }, { "epoch": 0.576, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3333333333333333, "eval_f1_m": 0.4582583816077213, "eval_loss": 0.6531835794448853, "eval_runtime": 12.5226, "eval_samples_per_second": 19.964, "eval_steps_per_second": 2.555, "step": 36 }, { "epoch": 0.592, "grad_norm": 6.53125, "learning_rate": 3.5655737704918037e-05, "loss": 0.6122, "step": 37 }, { "epoch": 0.592, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3230769230769231, "eval_f1_m": 0.45850133593230824, "eval_loss": 0.6510556936264038, "eval_runtime": 12.5321, "eval_samples_per_second": 19.949, "eval_steps_per_second": 2.553, "step": 37 }, { "epoch": 0.608, "grad_norm": 20.0, "learning_rate": 3.524590163934427e-05, "loss": 0.6843, "step": 38 }, { "epoch": 0.608, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.3382352941176471, "eval_f1_m": 0.4538476562786286, "eval_loss": 0.6473007798194885, "eval_runtime": 12.4172, "eval_samples_per_second": 20.133, "eval_steps_per_second": 2.577, "step": 38 }, { "epoch": 0.624, "grad_norm": 22.25, "learning_rate": 3.483606557377049e-05, "loss": 0.6606, "step": 39 }, { "epoch": 0.624, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.42580645161290326, "eval_f1_m": 0.48005761412123965, "eval_loss": 0.6421992182731628, "eval_runtime": 12.5309, "eval_samples_per_second": 19.951, "eval_steps_per_second": 2.554, "step": 39 }, { "epoch": 0.64, "grad_norm": 19.5, "learning_rate": 3.442622950819672e-05, "loss": 0.6284, "step": 40 }, { "epoch": 0.64, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.4331210191082803, "eval_f1_m": 0.4849652914778965, "eval_loss": 0.6393261551856995, "eval_runtime": 12.4078, "eval_samples_per_second": 20.149, "eval_steps_per_second": 2.579, "step": 40 }, { "epoch": 0.656, "grad_norm": 24.0, "learning_rate": 3.401639344262295e-05, "loss": 0.6525, "step": 41 }, { "epoch": 0.656, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.45977011494252873, "eval_f1_m": 0.4644901883497321, "eval_loss": 0.6380175948143005, "eval_runtime": 12.5392, "eval_samples_per_second": 19.938, "eval_steps_per_second": 2.552, "step": 41 }, { "epoch": 0.672, "grad_norm": 26.875, "learning_rate": 3.360655737704918e-05, "loss": 0.7537, "step": 42 }, { "epoch": 0.672, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.4816753926701571, "eval_f1_m": 0.4603846000604704, "eval_loss": 0.6373457312583923, "eval_runtime": 12.5124, "eval_samples_per_second": 19.98, "eval_steps_per_second": 2.557, "step": 42 }, { "epoch": 0.688, "grad_norm": 8.5625, "learning_rate": 3.319672131147541e-05, "loss": 0.6196, "step": 43 }, { "epoch": 0.688, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5118483412322276, "eval_f1_m": 0.4378736642842286, "eval_loss": 0.641525387763977, "eval_runtime": 12.5135, "eval_samples_per_second": 19.978, "eval_steps_per_second": 2.557, "step": 43 }, { "epoch": 0.704, "grad_norm": 11.5625, "learning_rate": 3.2786885245901635e-05, "loss": 0.7105, "step": 44 }, { "epoch": 0.704, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5407725321888414, "eval_f1_m": 0.42079032079032075, "eval_loss": 0.6511328220367432, "eval_runtime": 12.5196, "eval_samples_per_second": 19.969, "eval_steps_per_second": 2.556, "step": 44 }, { "epoch": 0.72, "grad_norm": 41.75, "learning_rate": 3.237704918032787e-05, "loss": 0.7902, "step": 45 }, { "epoch": 0.72, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.5338983050847457, "eval_f1_m": 0.4120925559701069, "eval_loss": 0.6530937552452087, "eval_runtime": 12.5207, "eval_samples_per_second": 19.967, "eval_steps_per_second": 2.556, "step": 45 }, { "epoch": 0.736, "grad_norm": 30.25, "learning_rate": 3.19672131147541e-05, "loss": 0.621, "step": 46 }, { "epoch": 0.736, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.5423728813559323, "eval_f1_m": 0.4205959573306511, "eval_loss": 0.6510214805603027, "eval_runtime": 12.5251, "eval_samples_per_second": 19.96, "eval_steps_per_second": 2.555, "step": 46 }, { "epoch": 0.752, "grad_norm": 13.0, "learning_rate": 3.155737704918033e-05, "loss": 0.8848, "step": 47 }, { "epoch": 0.752, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.5485232067510549, "eval_f1_m": 0.42849197287972796, "eval_loss": 0.6499238014221191, "eval_runtime": 12.5214, "eval_samples_per_second": 19.966, "eval_steps_per_second": 2.556, "step": 47 }, { "epoch": 0.768, "grad_norm": 51.25, "learning_rate": 3.114754098360656e-05, "loss": 0.876, "step": 48 }, { "epoch": 0.768, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5391304347826087, "eval_f1_m": 0.4377896479937296, "eval_loss": 0.6470175981521606, "eval_runtime": 12.5243, "eval_samples_per_second": 19.961, "eval_steps_per_second": 2.555, "step": 48 }, { "epoch": 0.784, "grad_norm": 41.25, "learning_rate": 3.073770491803279e-05, "loss": 0.7048, "step": 49 }, { "epoch": 0.784, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5302325581395348, "eval_f1_m": 0.4584981911512524, "eval_loss": 0.6386250257492065, "eval_runtime": 12.5286, "eval_samples_per_second": 19.954, "eval_steps_per_second": 2.554, "step": 49 }, { "epoch": 0.8, "grad_norm": 17.625, "learning_rate": 3.0327868852459017e-05, "loss": 0.8217, "step": 50 }, { "epoch": 0.8, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.5258215962441315, "eval_f1_m": 0.45543696666145644, "eval_loss": 0.6384491920471191, "eval_runtime": 12.5207, "eval_samples_per_second": 19.967, "eval_steps_per_second": 2.556, "step": 50 }, { "epoch": 0.816, "grad_norm": 24.0, "learning_rate": 2.9918032786885248e-05, "loss": 0.6093, "step": 51 }, { "epoch": 0.816, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5125628140703518, "eval_f1_m": 0.48412188718311155, "eval_loss": 0.6337217092514038, "eval_runtime": 12.519, "eval_samples_per_second": 19.97, "eval_steps_per_second": 2.556, "step": 51 }, { "epoch": 0.832, "grad_norm": 28.5, "learning_rate": 2.9508196721311478e-05, "loss": 0.5875, "step": 52 }, { "epoch": 0.832, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.4444444444444445, "eval_f1_m": 0.4678045310698371, "eval_loss": 0.6313378810882568, "eval_runtime": 12.5146, "eval_samples_per_second": 19.977, "eval_steps_per_second": 2.557, "step": 52 }, { "epoch": 0.848, "grad_norm": 46.25, "learning_rate": 2.9098360655737705e-05, "loss": 0.5525, "step": 53 }, { "epoch": 0.848, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.380952380952381, "eval_f1_m": 0.48287293938554443, "eval_loss": 0.6318368911743164, "eval_runtime": 12.5161, "eval_samples_per_second": 19.974, "eval_steps_per_second": 2.557, "step": 53 }, { "epoch": 0.864, "grad_norm": 17.625, "learning_rate": 2.8688524590163935e-05, "loss": 0.632, "step": 54 }, { "epoch": 0.864, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.2992125984251968, "eval_f1_m": 0.4569305710962373, "eval_loss": 0.6356464624404907, "eval_runtime": 12.5236, "eval_samples_per_second": 19.962, "eval_steps_per_second": 2.555, "step": 54 }, { "epoch": 0.88, "grad_norm": 8.875, "learning_rate": 2.8278688524590162e-05, "loss": 0.5346, "step": 55 }, { "epoch": 0.88, "eval_exact_match": 0.0, "eval_f1_a": 0.24999999999999997, "eval_f1_m": 0.4310802309601829, "eval_loss": 0.6434394717216492, "eval_runtime": 12.5233, "eval_samples_per_second": 19.963, "eval_steps_per_second": 2.555, "step": 55 }, { "epoch": 0.896, "grad_norm": 11.25, "learning_rate": 2.7868852459016392e-05, "loss": 0.4656, "step": 56 }, { "epoch": 0.896, "eval_exact_match": 0.0, "eval_f1_a": 0.11009174311926605, "eval_f1_m": 0.38233843987145305, "eval_loss": 0.655026376247406, "eval_runtime": 12.5245, "eval_samples_per_second": 19.961, "eval_steps_per_second": 2.555, "step": 56 }, { "epoch": 0.912, "grad_norm": 5.96875, "learning_rate": 2.7459016393442626e-05, "loss": 0.6506, "step": 57 }, { "epoch": 0.912, "eval_exact_match": 0.0, "eval_f1_a": 0.05714285714285714, "eval_f1_m": 0.3669373443683168, "eval_loss": 0.6712407469749451, "eval_runtime": 12.5217, "eval_samples_per_second": 19.965, "eval_steps_per_second": 2.556, "step": 57 }, { "epoch": 0.928, "grad_norm": 47.75, "learning_rate": 2.7049180327868856e-05, "loss": 0.9128, "step": 58 }, { "epoch": 0.928, "eval_exact_match": 0.0, "eval_f1_a": 0.038461538461538464, "eval_f1_m": 0.35922759380142344, "eval_loss": 0.6825835108757019, "eval_runtime": 12.5246, "eval_samples_per_second": 19.961, "eval_steps_per_second": 2.555, "step": 58 }, { "epoch": 0.944, "grad_norm": 47.25, "learning_rate": 2.6639344262295087e-05, "loss": 0.8041, "step": 59 }, { "epoch": 0.944, "eval_exact_match": 0.0, "eval_f1_a": 0.038461538461538464, "eval_f1_m": 0.35922759380142344, "eval_loss": 0.6874199509620667, "eval_runtime": 12.5257, "eval_samples_per_second": 19.959, "eval_steps_per_second": 2.555, "step": 59 }, { "epoch": 0.96, "grad_norm": 34.0, "learning_rate": 2.6229508196721314e-05, "loss": 0.7979, "step": 60 }, { "epoch": 0.96, "eval_exact_match": 0.0, "eval_f1_a": 0.038461538461538464, "eval_f1_m": 0.35922759380142344, "eval_loss": 0.6864975690841675, "eval_runtime": 12.5253, "eval_samples_per_second": 19.96, "eval_steps_per_second": 2.555, "step": 60 }, { "epoch": 0.976, "grad_norm": 27.0, "learning_rate": 2.5819672131147544e-05, "loss": 0.626, "step": 61 }, { "epoch": 0.976, "eval_exact_match": 0.0, "eval_f1_a": 0.038461538461538464, "eval_f1_m": 0.35922759380142344, "eval_loss": 0.6805967092514038, "eval_runtime": 12.5272, "eval_samples_per_second": 19.957, "eval_steps_per_second": 2.554, "step": 61 }, { "epoch": 0.992, "grad_norm": 13.1875, "learning_rate": 2.540983606557377e-05, "loss": 0.5989, "step": 62 }, { "epoch": 0.992, "eval_exact_match": 0.0, "eval_f1_a": 0.05714285714285714, "eval_f1_m": 0.3673908591075459, "eval_loss": 0.6689673066139221, "eval_runtime": 12.5192, "eval_samples_per_second": 19.969, "eval_steps_per_second": 2.556, "step": 62 }, { "epoch": 1.008, "grad_norm": 38.25, "learning_rate": 2.5e-05, "loss": 0.6907, "step": 63 }, { "epoch": 1.008, "eval_exact_match": 0.0, "eval_f1_a": 0.1272727272727273, "eval_f1_m": 0.3914323804779987, "eval_loss": 0.6558759808540344, "eval_runtime": 12.5213, "eval_samples_per_second": 19.966, "eval_steps_per_second": 2.556, "step": 63 }, { "epoch": 1.024, "grad_norm": 20.375, "learning_rate": 2.459016393442623e-05, "loss": 0.5757, "step": 64 }, { "epoch": 1.024, "eval_exact_match": 0.0, "eval_f1_a": 0.23333333333333334, "eval_f1_m": 0.426488394225489, "eval_loss": 0.6442788243293762, "eval_runtime": 12.5203, "eval_samples_per_second": 19.968, "eval_steps_per_second": 2.556, "step": 64 }, { "epoch": 1.04, "grad_norm": 36.5, "learning_rate": 2.418032786885246e-05, "loss": 0.5626, "step": 65 }, { "epoch": 1.04, "eval_exact_match": 0.02040816326530612, "eval_f1_a": 0.2677165354330709, "eval_f1_m": 0.4416973349446338, "eval_loss": 0.6316845417022705, "eval_runtime": 12.5217, "eval_samples_per_second": 19.965, "eval_steps_per_second": 2.556, "step": 65 }, { "epoch": 1.056, "grad_norm": 33.0, "learning_rate": 2.377049180327869e-05, "loss": 0.6962, "step": 66 }, { "epoch": 1.056, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.3973509933774835, "eval_f1_m": 0.46887548559217224, "eval_loss": 0.6250205039978027, "eval_runtime": 12.5299, "eval_samples_per_second": 19.952, "eval_steps_per_second": 2.554, "step": 66 }, { "epoch": 1.072, "grad_norm": 10.25, "learning_rate": 2.336065573770492e-05, "loss": 0.5543, "step": 67 }, { "epoch": 1.072, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5082872928176796, "eval_f1_m": 0.5018395736583011, "eval_loss": 0.6262060403823853, "eval_runtime": 12.5279, "eval_samples_per_second": 19.955, "eval_steps_per_second": 2.554, "step": 67 }, { "epoch": 1.088, "grad_norm": 32.25, "learning_rate": 2.295081967213115e-05, "loss": 0.5034, "step": 68 }, { "epoch": 1.088, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.5471698113207547, "eval_f1_m": 0.4798306568714732, "eval_loss": 0.6319238543510437, "eval_runtime": 12.4294, "eval_samples_per_second": 20.114, "eval_steps_per_second": 2.575, "step": 68 }, { "epoch": 1.104, "grad_norm": 44.0, "learning_rate": 2.254098360655738e-05, "loss": 0.6685, "step": 69 }, { "epoch": 1.104, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5650224215246638, "eval_f1_m": 0.47513240727526446, "eval_loss": 0.6369560360908508, "eval_runtime": 12.5288, "eval_samples_per_second": 19.954, "eval_steps_per_second": 2.554, "step": 69 }, { "epoch": 1.12, "grad_norm": 20.5, "learning_rate": 2.2131147540983607e-05, "loss": 0.7279, "step": 70 }, { "epoch": 1.12, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5663716814159292, "eval_f1_m": 0.46904937466161956, "eval_loss": 0.6388349533081055, "eval_runtime": 12.5241, "eval_samples_per_second": 19.961, "eval_steps_per_second": 2.555, "step": 70 }, { "epoch": 1.1360000000000001, "grad_norm": 26.75, "learning_rate": 2.1721311475409837e-05, "loss": 0.6486, "step": 71 }, { "epoch": 1.1360000000000001, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5826086956521739, "eval_f1_m": 0.4729702836845694, "eval_loss": 0.6392431855201721, "eval_runtime": 12.5238, "eval_samples_per_second": 19.962, "eval_steps_per_second": 2.555, "step": 71 }, { "epoch": 1.152, "grad_norm": 4.875, "learning_rate": 2.1311475409836064e-05, "loss": 0.7069, "step": 72 }, { "epoch": 1.152, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5836909871244634, "eval_f1_m": 0.47516055373198224, "eval_loss": 0.6390937566757202, "eval_runtime": 12.4268, "eval_samples_per_second": 20.118, "eval_steps_per_second": 2.575, "step": 72 }, { "epoch": 1.168, "grad_norm": 7.375, "learning_rate": 2.0901639344262298e-05, "loss": 0.597, "step": 73 }, { "epoch": 1.168, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5872340425531916, "eval_f1_m": 0.47219651097202114, "eval_loss": 0.6382343769073486, "eval_runtime": 12.5204, "eval_samples_per_second": 19.967, "eval_steps_per_second": 2.556, "step": 73 }, { "epoch": 1.184, "grad_norm": 6.03125, "learning_rate": 2.0491803278688525e-05, "loss": 0.5629, "step": 74 }, { "epoch": 1.184, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.6050420168067226, "eval_f1_m": 0.4974637607290668, "eval_loss": 0.6368105411529541, "eval_runtime": 12.4204, "eval_samples_per_second": 20.128, "eval_steps_per_second": 2.576, "step": 74 }, { "epoch": 1.2, "grad_norm": 12.5625, "learning_rate": 2.0081967213114755e-05, "loss": 0.4962, "step": 75 }, { "epoch": 1.2, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5983606557377048, "eval_f1_m": 0.48530426489610157, "eval_loss": 0.6430624723434448, "eval_runtime": 12.5269, "eval_samples_per_second": 19.957, "eval_steps_per_second": 2.555, "step": 75 }, { "epoch": 1.216, "grad_norm": 33.5, "learning_rate": 1.9672131147540985e-05, "loss": 0.6281, "step": 76 }, { "epoch": 1.216, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.6040816326530611, "eval_f1_m": 0.4916509227733717, "eval_loss": 0.6421269774436951, "eval_runtime": 12.5178, "eval_samples_per_second": 19.972, "eval_steps_per_second": 2.556, "step": 76 }, { "epoch": 1.232, "grad_norm": 8.375, "learning_rate": 1.9262295081967212e-05, "loss": 0.5854, "step": 77 }, { "epoch": 1.232, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5901639344262296, "eval_f1_m": 0.4785052249337963, "eval_loss": 0.640457034111023, "eval_runtime": 12.5109, "eval_samples_per_second": 19.983, "eval_steps_per_second": 2.558, "step": 77 }, { "epoch": 1.248, "grad_norm": 14.25, "learning_rate": 1.8852459016393442e-05, "loss": 0.6745, "step": 78 }, { "epoch": 1.248, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5934959349593495, "eval_f1_m": 0.4793091828806114, "eval_loss": 0.6414687633514404, "eval_runtime": 12.5277, "eval_samples_per_second": 19.956, "eval_steps_per_second": 2.554, "step": 78 }, { "epoch": 1.264, "grad_norm": 15.1875, "learning_rate": 1.8442622950819673e-05, "loss": 0.5662, "step": 79 }, { "epoch": 1.264, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5967741935483871, "eval_f1_m": 0.48058251272536984, "eval_loss": 0.6460214853286743, "eval_runtime": 12.5159, "eval_samples_per_second": 19.975, "eval_steps_per_second": 2.557, "step": 79 }, { "epoch": 1.28, "grad_norm": 55.25, "learning_rate": 1.8032786885245903e-05, "loss": 0.679, "step": 80 }, { "epoch": 1.28, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.6007905138339921, "eval_f1_m": 0.4773567135812034, "eval_loss": 0.6468359231948853, "eval_runtime": 12.5291, "eval_samples_per_second": 19.953, "eval_steps_per_second": 2.554, "step": 80 }, { "epoch": 1.296, "grad_norm": 7.21875, "learning_rate": 1.7622950819672133e-05, "loss": 0.5867, "step": 81 }, { "epoch": 1.296, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.6007905138339921, "eval_f1_m": 0.4778103982185614, "eval_loss": 0.6476621031761169, "eval_runtime": 12.5295, "eval_samples_per_second": 19.953, "eval_steps_per_second": 2.554, "step": 81 }, { "epoch": 1.312, "grad_norm": 8.125, "learning_rate": 1.721311475409836e-05, "loss": 0.5129, "step": 82 }, { "epoch": 1.312, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.6007905138339921, "eval_f1_m": 0.47502746686420155, "eval_loss": 0.6478710770606995, "eval_runtime": 12.5269, "eval_samples_per_second": 19.957, "eval_steps_per_second": 2.554, "step": 82 }, { "epoch": 1.328, "grad_norm": 41.25, "learning_rate": 1.680327868852459e-05, "loss": 0.6594, "step": 83 }, { "epoch": 1.328, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5952380952380952, "eval_f1_m": 0.4711932058870834, "eval_loss": 0.6437314748764038, "eval_runtime": 12.4098, "eval_samples_per_second": 20.145, "eval_steps_per_second": 2.579, "step": 83 }, { "epoch": 1.3439999999999999, "grad_norm": 12.3125, "learning_rate": 1.6393442622950818e-05, "loss": 0.5754, "step": 84 }, { "epoch": 1.3439999999999999, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5910931174089069, "eval_f1_m": 0.4749946765252888, "eval_loss": 0.6412422060966492, "eval_runtime": 12.4226, "eval_samples_per_second": 20.125, "eval_steps_per_second": 2.576, "step": 84 }, { "epoch": 1.3599999999999999, "grad_norm": 12.0625, "learning_rate": 1.598360655737705e-05, "loss": 0.5544, "step": 85 }, { "epoch": 1.3599999999999999, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5887096774193548, "eval_f1_m": 0.47564113890644494, "eval_loss": 0.639517605304718, "eval_runtime": 12.5196, "eval_samples_per_second": 19.969, "eval_steps_per_second": 2.556, "step": 85 }, { "epoch": 1.376, "grad_norm": 13.1875, "learning_rate": 1.557377049180328e-05, "loss": 0.6295, "step": 86 }, { "epoch": 1.376, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.5726141078838174, "eval_f1_m": 0.4691413461821626, "eval_loss": 0.6366972923278809, "eval_runtime": 12.5166, "eval_samples_per_second": 19.973, "eval_steps_per_second": 2.557, "step": 86 }, { "epoch": 1.392, "grad_norm": 5.875, "learning_rate": 1.5163934426229509e-05, "loss": 0.6123, "step": 87 }, { "epoch": 1.392, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.5690376569037656, "eval_f1_m": 0.46367544020605245, "eval_loss": 0.6347695589065552, "eval_runtime": 12.5135, "eval_samples_per_second": 19.978, "eval_steps_per_second": 2.557, "step": 87 }, { "epoch": 1.408, "grad_norm": 41.75, "learning_rate": 1.4754098360655739e-05, "loss": 0.6, "step": 88 }, { "epoch": 1.408, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.5702127659574467, "eval_f1_m": 0.4770136326258776, "eval_loss": 0.6304101347923279, "eval_runtime": 12.4096, "eval_samples_per_second": 20.146, "eval_steps_per_second": 2.579, "step": 88 }, { "epoch": 1.424, "grad_norm": 52.5, "learning_rate": 1.4344262295081968e-05, "loss": 0.6655, "step": 89 }, { "epoch": 1.424, "eval_exact_match": 0.061224489795918366, "eval_f1_a": 0.5663716814159292, "eval_f1_m": 0.4777696226675817, "eval_loss": 0.6252734661102295, "eval_runtime": 12.52, "eval_samples_per_second": 19.968, "eval_steps_per_second": 2.556, "step": 89 }, { "epoch": 1.44, "grad_norm": 7.5625, "learning_rate": 1.3934426229508196e-05, "loss": 0.5784, "step": 90 }, { "epoch": 1.44, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5610859728506787, "eval_f1_m": 0.4947867325418346, "eval_loss": 0.6227597594261169, "eval_runtime": 12.5093, "eval_samples_per_second": 19.985, "eval_steps_per_second": 2.558, "step": 90 }, { "epoch": 1.456, "grad_norm": 9.375, "learning_rate": 1.3524590163934428e-05, "loss": 0.5715, "step": 91 }, { "epoch": 1.456, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5781990521327014, "eval_f1_m": 0.520756964124311, "eval_loss": 0.6186211109161377, "eval_runtime": 12.7601, "eval_samples_per_second": 19.592, "eval_steps_per_second": 2.508, "step": 91 }, { "epoch": 1.472, "grad_norm": 7.21875, "learning_rate": 1.3114754098360657e-05, "loss": 0.6274, "step": 92 }, { "epoch": 1.472, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5603864734299516, "eval_f1_m": 0.51300422933076, "eval_loss": 0.6171249747276306, "eval_runtime": 12.9738, "eval_samples_per_second": 19.27, "eval_steps_per_second": 2.467, "step": 92 }, { "epoch": 1.488, "grad_norm": 30.0, "learning_rate": 1.2704918032786885e-05, "loss": 0.7038, "step": 93 }, { "epoch": 1.488, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5784313725490196, "eval_f1_m": 0.5305293346109673, "eval_loss": 0.6147861480712891, "eval_runtime": 13.3609, "eval_samples_per_second": 18.711, "eval_steps_per_second": 2.395, "step": 93 }, { "epoch": 1.504, "grad_norm": 6.6875, "learning_rate": 1.2295081967213116e-05, "loss": 0.5364, "step": 94 }, { "epoch": 1.504, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5771144278606964, "eval_f1_m": 0.5379805568581079, "eval_loss": 0.6129326224327087, "eval_runtime": 13.0415, "eval_samples_per_second": 19.17, "eval_steps_per_second": 2.454, "step": 94 }, { "epoch": 1.52, "grad_norm": 27.75, "learning_rate": 1.1885245901639344e-05, "loss": 0.4883, "step": 95 }, { "epoch": 1.52, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5612244897959184, "eval_f1_m": 0.53111536989088, "eval_loss": 0.6093867421150208, "eval_runtime": 12.5993, "eval_samples_per_second": 19.842, "eval_steps_per_second": 2.54, "step": 95 }, { "epoch": 1.536, "grad_norm": 23.0, "learning_rate": 1.1475409836065575e-05, "loss": 0.4918, "step": 96 }, { "epoch": 1.536, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5567010309278351, "eval_f1_m": 0.526410154471379, "eval_loss": 0.6084589958190918, "eval_runtime": 12.6824, "eval_samples_per_second": 19.712, "eval_steps_per_second": 2.523, "step": 96 }, { "epoch": 1.552, "grad_norm": 5.71875, "learning_rate": 1.1065573770491803e-05, "loss": 0.5881, "step": 97 }, { "epoch": 1.552, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5502645502645503, "eval_f1_m": 0.5260332071556562, "eval_loss": 0.6075829863548279, "eval_runtime": 12.5867, "eval_samples_per_second": 19.862, "eval_steps_per_second": 2.542, "step": 97 }, { "epoch": 1.568, "grad_norm": 18.0, "learning_rate": 1.0655737704918032e-05, "loss": 0.4614, "step": 98 }, { "epoch": 1.568, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5608465608465608, "eval_f1_m": 0.5355139475037434, "eval_loss": 0.6062441468238831, "eval_runtime": 12.567, "eval_samples_per_second": 19.893, "eval_steps_per_second": 2.546, "step": 98 }, { "epoch": 1.584, "grad_norm": 22.625, "learning_rate": 1.0245901639344262e-05, "loss": 0.5693, "step": 99 }, { "epoch": 1.584, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5608465608465608, "eval_f1_m": 0.5325499047437824, "eval_loss": 0.6053320169448853, "eval_runtime": 12.573, "eval_samples_per_second": 19.884, "eval_steps_per_second": 2.545, "step": 99 }, { "epoch": 1.6, "grad_norm": 22.5, "learning_rate": 9.836065573770493e-06, "loss": 0.5239, "step": 100 }, { "epoch": 1.6, "eval_exact_match": 0.08163265306122448, "eval_f1_a": 0.5531914893617021, "eval_f1_m": 0.5152986752476547, "eval_loss": 0.6034590005874634, "eval_runtime": 12.5763, "eval_samples_per_second": 19.879, "eval_steps_per_second": 2.544, "step": 100 }, { "epoch": 1.616, "grad_norm": 17.25, "learning_rate": 9.426229508196721e-06, "loss": 0.4434, "step": 101 }, { "epoch": 1.616, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5405405405405405, "eval_f1_m": 0.5147288369227145, "eval_loss": 0.602425754070282, "eval_runtime": 12.5712, "eval_samples_per_second": 19.887, "eval_steps_per_second": 2.546, "step": 101 }, { "epoch": 1.6320000000000001, "grad_norm": 31.875, "learning_rate": 9.016393442622952e-06, "loss": 0.6336, "step": 102 }, { "epoch": 1.6320000000000001, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.53551912568306, "eval_f1_m": 0.5188104695757757, "eval_loss": 0.6012207269668579, "eval_runtime": 12.5726, "eval_samples_per_second": 19.885, "eval_steps_per_second": 2.545, "step": 102 }, { "epoch": 1.6480000000000001, "grad_norm": 10.875, "learning_rate": 8.60655737704918e-06, "loss": 0.5441, "step": 103 }, { "epoch": 1.6480000000000001, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5363128491620112, "eval_f1_m": 0.5246980627082668, "eval_loss": 0.6005741953849792, "eval_runtime": 12.5894, "eval_samples_per_second": 19.858, "eval_steps_per_second": 2.542, "step": 103 }, { "epoch": 1.6640000000000001, "grad_norm": 16.25, "learning_rate": 8.196721311475409e-06, "loss": 0.4561, "step": 104 }, { "epoch": 1.6640000000000001, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5202312138728324, "eval_f1_m": 0.5224427233430835, "eval_loss": 0.5999111533164978, "eval_runtime": 12.4649, "eval_samples_per_second": 20.056, "eval_steps_per_second": 2.567, "step": 104 }, { "epoch": 1.6800000000000002, "grad_norm": 12.4375, "learning_rate": 7.78688524590164e-06, "loss": 0.6999, "step": 105 }, { "epoch": 1.6800000000000002, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5207100591715976, "eval_f1_m": 0.5320394191642691, "eval_loss": 0.5994404554367065, "eval_runtime": 12.5816, "eval_samples_per_second": 19.87, "eval_steps_per_second": 2.543, "step": 105 }, { "epoch": 1.696, "grad_norm": 4.53125, "learning_rate": 7.3770491803278695e-06, "loss": 0.4781, "step": 106 }, { "epoch": 1.696, "eval_exact_match": 0.12244897959183673, "eval_f1_a": 0.524390243902439, "eval_f1_m": 0.5555706498383567, "eval_loss": 0.5982089638710022, "eval_runtime": 12.4629, "eval_samples_per_second": 20.06, "eval_steps_per_second": 2.568, "step": 106 }, { "epoch": 1.712, "grad_norm": 9.3125, "learning_rate": 6.967213114754098e-06, "loss": 0.4955, "step": 107 }, { "epoch": 1.712, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5000000000000001, "eval_f1_m": 0.5406607298263958, "eval_loss": 0.5973047018051147, "eval_runtime": 12.4809, "eval_samples_per_second": 20.031, "eval_steps_per_second": 2.564, "step": 107 }, { "epoch": 1.728, "grad_norm": 15.9375, "learning_rate": 6.557377049180328e-06, "loss": 0.7169, "step": 108 }, { "epoch": 1.728, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5061728395061729, "eval_f1_m": 0.5354615072802346, "eval_loss": 0.5978144407272339, "eval_runtime": 12.4727, "eval_samples_per_second": 20.044, "eval_steps_per_second": 2.566, "step": 108 }, { "epoch": 1.744, "grad_norm": 8.3125, "learning_rate": 6.147540983606558e-06, "loss": 0.5559, "step": 109 }, { "epoch": 1.744, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5185185185185185, "eval_f1_m": 0.538548896081909, "eval_loss": 0.5964921712875366, "eval_runtime": 12.4816, "eval_samples_per_second": 20.029, "eval_steps_per_second": 2.564, "step": 109 }, { "epoch": 1.76, "grad_norm": 14.25, "learning_rate": 5.737704918032787e-06, "loss": 0.5697, "step": 110 }, { "epoch": 1.76, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.49382716049382713, "eval_f1_m": 0.5282943547049189, "eval_loss": 0.5966972708702087, "eval_runtime": 12.4667, "eval_samples_per_second": 20.053, "eval_steps_per_second": 2.567, "step": 110 }, { "epoch": 1.776, "grad_norm": 8.375, "learning_rate": 5.327868852459016e-06, "loss": 0.4283, "step": 111 }, { "epoch": 1.776, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.5031446540880504, "eval_f1_m": 0.5433442707752428, "eval_loss": 0.5960127115249634, "eval_runtime": 12.4773, "eval_samples_per_second": 20.036, "eval_steps_per_second": 2.565, "step": 111 }, { "epoch": 1.792, "grad_norm": 16.75, "learning_rate": 4.918032786885246e-06, "loss": 0.6258, "step": 112 }, { "epoch": 1.792, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.4774193548387097, "eval_f1_m": 0.5284177007466322, "eval_loss": 0.5976787209510803, "eval_runtime": 12.4731, "eval_samples_per_second": 20.043, "eval_steps_per_second": 2.566, "step": 112 }, { "epoch": 1.808, "grad_norm": 7.96875, "learning_rate": 4.508196721311476e-06, "loss": 0.5256, "step": 113 }, { "epoch": 1.808, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.4615384615384615, "eval_f1_m": 0.5200843674132989, "eval_loss": 0.5962343811988831, "eval_runtime": 12.5684, "eval_samples_per_second": 19.891, "eval_steps_per_second": 2.546, "step": 113 }, { "epoch": 1.8239999999999998, "grad_norm": 15.125, "learning_rate": 4.098360655737704e-06, "loss": 0.5736, "step": 114 }, { "epoch": 1.8239999999999998, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.4675324675324675, "eval_f1_m": 0.5306153330463053, "eval_loss": 0.5951288938522339, "eval_runtime": 12.5788, "eval_samples_per_second": 19.875, "eval_steps_per_second": 2.544, "step": 114 }, { "epoch": 1.8399999999999999, "grad_norm": 20.25, "learning_rate": 3.6885245901639347e-06, "loss": 0.366, "step": 115 }, { "epoch": 1.8399999999999999, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.4645161290322581, "eval_f1_m": 0.5248462721752035, "eval_loss": 0.594918966293335, "eval_runtime": 12.4723, "eval_samples_per_second": 20.044, "eval_steps_per_second": 2.566, "step": 115 }, { "epoch": 1.8559999999999999, "grad_norm": 21.125, "learning_rate": 3.278688524590164e-06, "loss": 0.5082, "step": 116 }, { "epoch": 1.8559999999999999, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.48051948051948046, "eval_f1_m": 0.5353904898622783, "eval_loss": 0.5939072370529175, "eval_runtime": 12.5855, "eval_samples_per_second": 19.864, "eval_steps_per_second": 2.543, "step": 116 }, { "epoch": 1.8719999999999999, "grad_norm": 11.75, "learning_rate": 2.8688524590163937e-06, "loss": 0.5421, "step": 117 }, { "epoch": 1.8719999999999999, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.47435897435897434, "eval_f1_m": 0.5267037684204547, "eval_loss": 0.5957451462745667, "eval_runtime": 12.479, "eval_samples_per_second": 20.034, "eval_steps_per_second": 2.564, "step": 117 }, { "epoch": 1.888, "grad_norm": 8.0, "learning_rate": 2.459016393442623e-06, "loss": 0.62, "step": 118 }, { "epoch": 1.888, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.4705882352941176, "eval_f1_m": 0.5307986531275845, "eval_loss": 0.5949638485908508, "eval_runtime": 12.4749, "eval_samples_per_second": 20.04, "eval_steps_per_second": 2.565, "step": 118 }, { "epoch": 1.904, "grad_norm": 11.375, "learning_rate": 2.049180327868852e-06, "loss": 0.6126, "step": 119 }, { "epoch": 1.904, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.4774193548387097, "eval_f1_m": 0.5340785365095088, "eval_loss": 0.5944355726242065, "eval_runtime": 12.5766, "eval_samples_per_second": 19.878, "eval_steps_per_second": 2.544, "step": 119 }, { "epoch": 1.92, "grad_norm": 13.3125, "learning_rate": 1.639344262295082e-06, "loss": 0.527, "step": 120 }, { "epoch": 1.92, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.484076433121019, "eval_f1_m": 0.5245061361207819, "eval_loss": 0.5951045155525208, "eval_runtime": 12.4733, "eval_samples_per_second": 20.043, "eval_steps_per_second": 2.565, "step": 120 }, { "epoch": 1.936, "grad_norm": 14.9375, "learning_rate": 1.2295081967213116e-06, "loss": 0.5308, "step": 121 }, { "epoch": 1.936, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.484076433121019, "eval_f1_m": 0.5301669718836584, "eval_loss": 0.5943300724029541, "eval_runtime": 12.5706, "eval_samples_per_second": 19.888, "eval_steps_per_second": 2.546, "step": 121 }, { "epoch": 1.952, "grad_norm": 5.1875, "learning_rate": 8.19672131147541e-07, "loss": 0.5002, "step": 122 }, { "epoch": 1.952, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.4774193548387097, "eval_f1_m": 0.5352071697809991, "eval_loss": 0.5943408012390137, "eval_runtime": 12.4681, "eval_samples_per_second": 20.051, "eval_steps_per_second": 2.567, "step": 122 }, { "epoch": 1.968, "grad_norm": 10.5625, "learning_rate": 4.098360655737705e-07, "loss": 0.4733, "step": 123 }, { "epoch": 1.968, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.4903225806451614, "eval_f1_m": 0.5415129388418702, "eval_loss": 0.5952519774436951, "eval_runtime": 12.4602, "eval_samples_per_second": 20.064, "eval_steps_per_second": 2.568, "step": 123 }, { "epoch": 1.984, "grad_norm": 17.625, "learning_rate": 0.0, "loss": 0.4085, "step": 124 }, { "epoch": 1.984, "eval_exact_match": 0.10204081632653061, "eval_f1_a": 0.484076433121019, "eval_f1_m": 0.5301669718836584, "eval_loss": 0.5949286818504333, "eval_runtime": 12.5825, "eval_samples_per_second": 19.869, "eval_steps_per_second": 2.543, "step": 124 }, { "epoch": 1.984, "step": 124, "total_flos": 4.246810864386048e+16, "train_loss": 0.6719285288164693, "train_runtime": 1910.309, "train_samples_per_second": 1.047, "train_steps_per_second": 0.065 } ], "logging_steps": 1, "max_steps": 124, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.246810864386048e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }