{ "best_metric": null, "best_model_checkpoint": null, "epoch": 65.0, "global_step": 6760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "learning_rate": 4.9238165680473374e-05, "loss": 1.4565, "step": 103 }, { "epoch": 1.0, "eval_f1": 0.5061315988298739, "eval_loss": 1.226438045501709, "eval_runtime": 2.9031, "eval_samples_per_second": 190.487, "eval_steps_per_second": 12.056, "step": 104 }, { "epoch": 1.98, "learning_rate": 4.8483727810650895e-05, "loss": 1.08, "step": 206 }, { "epoch": 2.0, "eval_f1": 0.5585235698125446, "eval_loss": 1.1974396705627441, "eval_runtime": 2.9416, "eval_samples_per_second": 187.991, "eval_steps_per_second": 11.898, "step": 208 }, { "epoch": 2.97, "learning_rate": 4.772189349112427e-05, "loss": 0.8073, "step": 309 }, { "epoch": 3.0, "eval_f1": 0.5539323613883379, "eval_loss": 1.276489496231079, "eval_runtime": 2.969, "eval_samples_per_second": 186.259, "eval_steps_per_second": 11.789, "step": 312 }, { "epoch": 3.96, "learning_rate": 4.696005917159764e-05, "loss": 0.5577, "step": 412 }, { "epoch": 4.0, "eval_f1": 0.5553847052045314, "eval_loss": 1.427822232246399, "eval_runtime": 2.9361, "eval_samples_per_second": 188.346, "eval_steps_per_second": 11.921, "step": 416 }, { "epoch": 4.95, "learning_rate": 4.619822485207101e-05, "loss": 0.3941, "step": 515 }, { "epoch": 5.0, "eval_f1": 0.5570342860609194, "eval_loss": 1.6517128944396973, "eval_runtime": 2.9022, "eval_samples_per_second": 190.542, "eval_steps_per_second": 12.06, "step": 520 }, { "epoch": 5.94, "learning_rate": 4.543639053254438e-05, "loss": 0.2878, "step": 618 }, { "epoch": 6.0, "eval_f1": 0.5619826716090497, "eval_loss": 1.8180437088012695, "eval_runtime": 2.855, "eval_samples_per_second": 193.697, "eval_steps_per_second": 12.259, "step": 624 }, { "epoch": 6.93, "learning_rate": 4.468195266272189e-05, "loss": 0.2337, "step": 721 }, { "epoch": 7.0, "eval_f1": 0.5674708526030706, "eval_loss": 1.9061989784240723, "eval_runtime": 2.8641, "eval_samples_per_second": 193.077, "eval_steps_per_second": 12.22, "step": 728 }, { "epoch": 7.92, "learning_rate": 4.392011834319526e-05, "loss": 0.1743, "step": 824 }, { "epoch": 8.0, "eval_f1": 0.5571774381839604, "eval_loss": 2.166078805923462, "eval_runtime": 2.8885, "eval_samples_per_second": 191.449, "eval_steps_per_second": 12.117, "step": 832 }, { "epoch": 8.91, "learning_rate": 4.315828402366864e-05, "loss": 0.1324, "step": 927 }, { "epoch": 9.0, "eval_f1": 0.5586999359656486, "eval_loss": 2.1434192657470703, "eval_runtime": 2.8793, "eval_samples_per_second": 192.062, "eval_steps_per_second": 12.156, "step": 936 }, { "epoch": 9.9, "learning_rate": 4.239644970414201e-05, "loss": 0.1051, "step": 1030 }, { "epoch": 10.0, "eval_f1": 0.571861247626083, "eval_loss": 2.2514231204986572, "eval_runtime": 2.8876, "eval_samples_per_second": 191.511, "eval_steps_per_second": 12.121, "step": 1040 }, { "epoch": 10.89, "learning_rate": 4.163461538461539e-05, "loss": 0.1016, "step": 1133 }, { "epoch": 11.0, "eval_f1": 0.5608736700927537, "eval_loss": 2.452277898788452, "eval_runtime": 2.9662, "eval_samples_per_second": 186.434, "eval_steps_per_second": 11.8, "step": 1144 }, { "epoch": 11.88, "learning_rate": 4.0872781065088764e-05, "loss": 0.0814, "step": 1236 }, { "epoch": 12.0, "eval_f1": 0.5643677851728315, "eval_loss": 2.5340888500213623, "eval_runtime": 2.8457, "eval_samples_per_second": 194.329, "eval_steps_per_second": 12.299, "step": 1248 }, { "epoch": 12.88, "learning_rate": 4.0110946745562136e-05, "loss": 0.0673, "step": 1339 }, { "epoch": 13.0, "eval_f1": 0.5738915229311208, "eval_loss": 2.6217703819274902, "eval_runtime": 2.9035, "eval_samples_per_second": 190.46, "eval_steps_per_second": 12.054, "step": 1352 }, { "epoch": 13.87, "learning_rate": 3.934911242603551e-05, "loss": 0.0684, "step": 1442 }, { "epoch": 14.0, "eval_f1": 0.5366433281464598, "eval_loss": 2.9552414417266846, "eval_runtime": 2.9063, "eval_samples_per_second": 190.277, "eval_steps_per_second": 12.043, "step": 1456 }, { "epoch": 14.86, "learning_rate": 3.858727810650888e-05, "loss": 0.0466, "step": 1545 }, { "epoch": 15.0, "eval_f1": 0.5787084254032917, "eval_loss": 2.7240512371063232, "eval_runtime": 2.8887, "eval_samples_per_second": 191.438, "eval_steps_per_second": 12.116, "step": 1560 }, { "epoch": 15.85, "learning_rate": 3.782544378698225e-05, "loss": 0.0577, "step": 1648 }, { "epoch": 16.0, "eval_f1": 0.5666557248979172, "eval_loss": 2.821897506713867, "eval_runtime": 2.8994, "eval_samples_per_second": 190.727, "eval_steps_per_second": 12.071, "step": 1664 }, { "epoch": 16.84, "learning_rate": 3.706360946745562e-05, "loss": 0.042, "step": 1751 }, { "epoch": 17.0, "eval_f1": 0.56033452806457, "eval_loss": 2.9155900478363037, "eval_runtime": 2.9258, "eval_samples_per_second": 189.01, "eval_steps_per_second": 11.963, "step": 1768 }, { "epoch": 17.83, "learning_rate": 3.6301775147928995e-05, "loss": 0.0404, "step": 1854 }, { "epoch": 18.0, "eval_f1": 0.5621979513908701, "eval_loss": 2.893630266189575, "eval_runtime": 2.9319, "eval_samples_per_second": 188.613, "eval_steps_per_second": 11.938, "step": 1872 }, { "epoch": 18.82, "learning_rate": 3.553994082840237e-05, "loss": 0.0426, "step": 1957 }, { "epoch": 19.0, "eval_f1": 0.5766503161850353, "eval_loss": 3.00762939453125, "eval_runtime": 2.9064, "eval_samples_per_second": 190.271, "eval_steps_per_second": 12.042, "step": 1976 }, { "epoch": 19.81, "learning_rate": 3.477810650887574e-05, "loss": 0.0361, "step": 2060 }, { "epoch": 20.0, "eval_f1": 0.5565194377868121, "eval_loss": 3.043562173843384, "eval_runtime": 2.9099, "eval_samples_per_second": 190.043, "eval_steps_per_second": 12.028, "step": 2080 }, { "epoch": 20.8, "learning_rate": 3.401627218934911e-05, "loss": 0.039, "step": 2163 }, { "epoch": 21.0, "eval_f1": 0.5661283975776907, "eval_loss": 3.034050226211548, "eval_runtime": 2.8472, "eval_samples_per_second": 194.228, "eval_steps_per_second": 12.293, "step": 2184 }, { "epoch": 21.79, "learning_rate": 3.325443786982248e-05, "loss": 0.0311, "step": 2266 }, { "epoch": 22.0, "eval_f1": 0.5698598461896062, "eval_loss": 3.1546428203582764, "eval_runtime": 2.8965, "eval_samples_per_second": 190.921, "eval_steps_per_second": 12.084, "step": 2288 }, { "epoch": 22.78, "learning_rate": 3.2492603550295855e-05, "loss": 0.0296, "step": 2369 }, { "epoch": 23.0, "eval_f1": 0.5584145320343268, "eval_loss": 3.3160221576690674, "eval_runtime": 2.9004, "eval_samples_per_second": 190.664, "eval_steps_per_second": 12.067, "step": 2392 }, { "epoch": 23.77, "learning_rate": 3.1730769230769234e-05, "loss": 0.03, "step": 2472 }, { "epoch": 24.0, "eval_f1": 0.5765799312977243, "eval_loss": 3.2025678157806396, "eval_runtime": 2.837, "eval_samples_per_second": 194.921, "eval_steps_per_second": 12.337, "step": 2496 }, { "epoch": 24.76, "learning_rate": 3.0968934911242606e-05, "loss": 0.0333, "step": 2575 }, { "epoch": 25.0, "eval_f1": 0.5689553713820321, "eval_loss": 3.211634397506714, "eval_runtime": 2.9044, "eval_samples_per_second": 190.402, "eval_steps_per_second": 12.051, "step": 2600 }, { "epoch": 25.75, "learning_rate": 3.0207100591715974e-05, "loss": 0.0321, "step": 2678 }, { "epoch": 26.0, "eval_f1": 0.5756108062994573, "eval_loss": 3.2678425312042236, "eval_runtime": 2.8888, "eval_samples_per_second": 191.428, "eval_steps_per_second": 12.116, "step": 2704 }, { "epoch": 26.74, "learning_rate": 2.944526627218935e-05, "loss": 0.0263, "step": 2781 }, { "epoch": 27.0, "eval_f1": 0.5758065273285641, "eval_loss": 3.2969822883605957, "eval_runtime": 2.9527, "eval_samples_per_second": 187.286, "eval_steps_per_second": 11.854, "step": 2808 }, { "epoch": 27.73, "learning_rate": 2.8683431952662725e-05, "loss": 0.0281, "step": 2884 }, { "epoch": 28.0, "eval_f1": 0.5781354966097151, "eval_loss": 3.3730037212371826, "eval_runtime": 2.8614, "eval_samples_per_second": 193.264, "eval_steps_per_second": 12.232, "step": 2912 }, { "epoch": 28.72, "learning_rate": 2.7921597633136097e-05, "loss": 0.0282, "step": 2987 }, { "epoch": 29.0, "eval_f1": 0.5741866124789994, "eval_loss": 3.364117383956909, "eval_runtime": 2.8696, "eval_samples_per_second": 192.707, "eval_steps_per_second": 12.197, "step": 3016 }, { "epoch": 29.71, "learning_rate": 2.7159763313609472e-05, "loss": 0.0296, "step": 3090 }, { "epoch": 30.0, "eval_f1": 0.5771762774162508, "eval_loss": 3.3623032569885254, "eval_runtime": 2.9567, "eval_samples_per_second": 187.031, "eval_steps_per_second": 11.837, "step": 3120 }, { "epoch": 30.7, "learning_rate": 2.6397928994082844e-05, "loss": 0.0308, "step": 3193 }, { "epoch": 31.0, "eval_f1": 0.578537002980747, "eval_loss": 3.4039528369903564, "eval_runtime": 2.8263, "eval_samples_per_second": 195.66, "eval_steps_per_second": 12.384, "step": 3224 }, { "epoch": 31.69, "learning_rate": 2.5636094674556216e-05, "loss": 0.0308, "step": 3296 }, { "epoch": 32.0, "eval_f1": 0.575919412837488, "eval_loss": 3.392319679260254, "eval_runtime": 2.9375, "eval_samples_per_second": 188.254, "eval_steps_per_second": 11.915, "step": 3328 }, { "epoch": 32.68, "learning_rate": 2.4874260355029588e-05, "loss": 0.0262, "step": 3399 }, { "epoch": 33.0, "eval_f1": 0.5563772891428104, "eval_loss": 3.4757542610168457, "eval_runtime": 2.865, "eval_samples_per_second": 193.019, "eval_steps_per_second": 12.216, "step": 3432 }, { "epoch": 33.67, "learning_rate": 2.411242603550296e-05, "loss": 0.0319, "step": 3502 }, { "epoch": 34.0, "eval_f1": 0.5738865992034025, "eval_loss": 3.425334930419922, "eval_runtime": 2.9109, "eval_samples_per_second": 189.974, "eval_steps_per_second": 12.024, "step": 3536 }, { "epoch": 34.66, "learning_rate": 2.3350591715976332e-05, "loss": 0.0277, "step": 3605 }, { "epoch": 35.0, "eval_f1": 0.5785980513801816, "eval_loss": 3.4686436653137207, "eval_runtime": 2.93, "eval_samples_per_second": 188.738, "eval_steps_per_second": 11.945, "step": 3640 }, { "epoch": 35.65, "learning_rate": 2.2588757396449707e-05, "loss": 0.0289, "step": 3708 }, { "epoch": 36.0, "eval_f1": 0.5836924697871717, "eval_loss": 3.462078094482422, "eval_runtime": 2.8428, "eval_samples_per_second": 194.527, "eval_steps_per_second": 12.312, "step": 3744 }, { "epoch": 36.64, "learning_rate": 2.182692307692308e-05, "loss": 0.0247, "step": 3811 }, { "epoch": 37.0, "eval_f1": 0.5734707197245945, "eval_loss": 3.481998920440674, "eval_runtime": 3.0017, "eval_samples_per_second": 184.228, "eval_steps_per_second": 11.66, "step": 3848 }, { "epoch": 37.63, "learning_rate": 2.106508875739645e-05, "loss": 0.0303, "step": 3914 }, { "epoch": 38.0, "eval_f1": 0.5770262969511715, "eval_loss": 3.466510772705078, "eval_runtime": 2.8587, "eval_samples_per_second": 193.442, "eval_steps_per_second": 12.243, "step": 3952 }, { "epoch": 38.62, "learning_rate": 2.0303254437869823e-05, "loss": 0.0239, "step": 4017 }, { "epoch": 39.0, "eval_f1": 0.5666519467364683, "eval_loss": 3.5593807697296143, "eval_runtime": 2.8222, "eval_samples_per_second": 195.946, "eval_steps_per_second": 12.402, "step": 4056 }, { "epoch": 39.62, "learning_rate": 1.9541420118343195e-05, "loss": 0.0262, "step": 4120 }, { "epoch": 40.0, "eval_f1": 0.5808476343157906, "eval_loss": 3.5302422046661377, "eval_runtime": 2.8598, "eval_samples_per_second": 193.368, "eval_steps_per_second": 12.238, "step": 4160 }, { "epoch": 40.61, "learning_rate": 1.8779585798816567e-05, "loss": 0.0282, "step": 4223 }, { "epoch": 41.0, "eval_f1": 0.5835890408164021, "eval_loss": 3.4572339057922363, "eval_runtime": 2.8566, "eval_samples_per_second": 193.584, "eval_steps_per_second": 12.252, "step": 4264 }, { "epoch": 41.6, "learning_rate": 1.8025147928994084e-05, "loss": 0.0469, "step": 4326 }, { "epoch": 42.0, "eval_f1": 0.5685331156394952, "eval_loss": 3.609334707260132, "eval_runtime": 2.8251, "eval_samples_per_second": 195.747, "eval_steps_per_second": 12.389, "step": 4368 }, { "epoch": 42.59, "learning_rate": 1.7263313609467456e-05, "loss": 0.0302, "step": 4429 }, { "epoch": 43.0, "eval_f1": 0.5684067370608473, "eval_loss": 3.6115400791168213, "eval_runtime": 2.9194, "eval_samples_per_second": 189.42, "eval_steps_per_second": 11.989, "step": 4472 }, { "epoch": 43.58, "learning_rate": 1.650147928994083e-05, "loss": 0.0289, "step": 4532 }, { "epoch": 44.0, "eval_f1": 0.5757900647671246, "eval_loss": 3.629568099975586, "eval_runtime": 2.9036, "eval_samples_per_second": 190.453, "eval_steps_per_second": 12.054, "step": 4576 }, { "epoch": 44.57, "learning_rate": 1.5739644970414204e-05, "loss": 0.0254, "step": 4635 }, { "epoch": 45.0, "eval_f1": 0.5689505752768721, "eval_loss": 3.7250843048095703, "eval_runtime": 2.9726, "eval_samples_per_second": 186.035, "eval_steps_per_second": 11.774, "step": 4680 }, { "epoch": 45.56, "learning_rate": 1.4977810650887576e-05, "loss": 0.0283, "step": 4738 }, { "epoch": 46.0, "eval_f1": 0.5592198654774546, "eval_loss": 3.726353645324707, "eval_runtime": 2.9328, "eval_samples_per_second": 188.559, "eval_steps_per_second": 11.934, "step": 4784 }, { "epoch": 46.55, "learning_rate": 1.4215976331360948e-05, "loss": 0.0246, "step": 4841 }, { "epoch": 47.0, "eval_f1": 0.5650157110711802, "eval_loss": 3.7832093238830566, "eval_runtime": 2.9067, "eval_samples_per_second": 190.249, "eval_steps_per_second": 12.041, "step": 4888 }, { "epoch": 47.54, "learning_rate": 1.345414201183432e-05, "loss": 0.0311, "step": 4944 }, { "epoch": 48.0, "eval_f1": 0.5681512072556809, "eval_loss": 3.6964025497436523, "eval_runtime": 2.9008, "eval_samples_per_second": 190.634, "eval_steps_per_second": 12.065, "step": 4992 }, { "epoch": 48.53, "learning_rate": 1.2692307692307691e-05, "loss": 0.0268, "step": 5047 }, { "epoch": 49.0, "eval_f1": 0.5674808111122996, "eval_loss": 3.7195167541503906, "eval_runtime": 2.8604, "eval_samples_per_second": 193.33, "eval_steps_per_second": 12.236, "step": 5096 }, { "epoch": 49.52, "learning_rate": 1.1930473372781067e-05, "loss": 0.0293, "step": 5150 }, { "epoch": 50.0, "eval_f1": 0.5614419693521525, "eval_loss": 3.752530097961426, "eval_runtime": 2.8761, "eval_samples_per_second": 192.275, "eval_steps_per_second": 12.169, "step": 5200 }, { "epoch": 50.51, "learning_rate": 1.1168639053254439e-05, "loss": 0.0282, "step": 5253 }, { "epoch": 51.0, "eval_f1": 0.5655838635083059, "eval_loss": 3.7514984607696533, "eval_runtime": 2.8609, "eval_samples_per_second": 193.296, "eval_steps_per_second": 12.234, "step": 5304 }, { "epoch": 51.5, "learning_rate": 1.040680473372781e-05, "loss": 0.0248, "step": 5356 }, { "epoch": 52.0, "eval_f1": 0.5590951084274065, "eval_loss": 3.7639315128326416, "eval_runtime": 2.8211, "eval_samples_per_second": 196.025, "eval_steps_per_second": 12.407, "step": 5408 }, { "epoch": 52.49, "learning_rate": 9.644970414201183e-06, "loss": 0.0257, "step": 5459 }, { "epoch": 53.0, "eval_f1": 0.5480134247467852, "eval_loss": 3.824922800064087, "eval_runtime": 2.8475, "eval_samples_per_second": 194.205, "eval_steps_per_second": 12.291, "step": 5512 }, { "epoch": 53.48, "learning_rate": 8.883136094674558e-06, "loss": 0.0235, "step": 5562 }, { "epoch": 54.0, "eval_f1": 0.5565796472147394, "eval_loss": 3.7871253490448, "eval_runtime": 2.9817, "eval_samples_per_second": 185.462, "eval_steps_per_second": 11.738, "step": 5616 }, { "epoch": 54.47, "learning_rate": 8.12130177514793e-06, "loss": 0.0299, "step": 5665 }, { "epoch": 55.0, "eval_f1": 0.5574154263000176, "eval_loss": 3.788760185241699, "eval_runtime": 2.8852, "eval_samples_per_second": 191.665, "eval_steps_per_second": 12.131, "step": 5720 }, { "epoch": 55.46, "learning_rate": 7.359467455621302e-06, "loss": 0.0277, "step": 5768 }, { "epoch": 56.0, "eval_f1": 0.563024311843682, "eval_loss": 3.7907044887542725, "eval_runtime": 2.8658, "eval_samples_per_second": 192.962, "eval_steps_per_second": 12.213, "step": 5824 }, { "epoch": 56.45, "learning_rate": 6.597633136094675e-06, "loss": 0.0256, "step": 5871 }, { "epoch": 57.0, "eval_f1": 0.56153234588093, "eval_loss": 3.799422264099121, "eval_runtime": 2.8666, "eval_samples_per_second": 192.912, "eval_steps_per_second": 12.21, "step": 5928 }, { "epoch": 57.44, "learning_rate": 5.8357988165680474e-06, "loss": 0.0226, "step": 5974 }, { "epoch": 58.0, "eval_f1": 0.5555061070073688, "eval_loss": 3.811858892440796, "eval_runtime": 2.8683, "eval_samples_per_second": 192.797, "eval_steps_per_second": 12.202, "step": 6032 }, { "epoch": 58.43, "learning_rate": 5.07396449704142e-06, "loss": 0.0284, "step": 6077 }, { "epoch": 59.0, "eval_f1": 0.5597671150511061, "eval_loss": 3.8192451000213623, "eval_runtime": 2.8512, "eval_samples_per_second": 193.951, "eval_steps_per_second": 12.275, "step": 6136 }, { "epoch": 59.42, "learning_rate": 4.312130177514793e-06, "loss": 0.0233, "step": 6180 }, { "epoch": 60.0, "eval_f1": 0.5584681716027172, "eval_loss": 3.823091983795166, "eval_runtime": 2.9385, "eval_samples_per_second": 188.191, "eval_steps_per_second": 11.911, "step": 6240 }, { "epoch": 60.41, "learning_rate": 3.550295857988166e-06, "loss": 0.0266, "step": 6283 }, { "epoch": 61.0, "eval_f1": 0.5625000576804086, "eval_loss": 3.8085415363311768, "eval_runtime": 2.9015, "eval_samples_per_second": 190.588, "eval_steps_per_second": 12.063, "step": 6344 }, { "epoch": 61.4, "learning_rate": 2.7958579881656803e-06, "loss": 0.0267, "step": 6386 }, { "epoch": 62.0, "eval_f1": 0.5622167257088028, "eval_loss": 3.80642032623291, "eval_runtime": 2.8514, "eval_samples_per_second": 193.94, "eval_steps_per_second": 12.275, "step": 6448 }, { "epoch": 62.39, "learning_rate": 2.034023668639053e-06, "loss": 0.0281, "step": 6489 }, { "epoch": 63.0, "eval_f1": 0.564106811375439, "eval_loss": 3.8057875633239746, "eval_runtime": 2.8945, "eval_samples_per_second": 191.055, "eval_steps_per_second": 12.092, "step": 6552 }, { "epoch": 63.38, "learning_rate": 1.2721893491124261e-06, "loss": 0.025, "step": 6592 }, { "epoch": 64.0, "eval_f1": 0.5644375312998279, "eval_loss": 3.807055950164795, "eval_runtime": 2.8941, "eval_samples_per_second": 191.08, "eval_steps_per_second": 12.094, "step": 6656 }, { "epoch": 64.38, "learning_rate": 5.103550295857988e-07, "loss": 0.0226, "step": 6695 }, { "epoch": 65.0, "eval_f1": 0.5644375312998279, "eval_loss": 3.807528018951416, "eval_runtime": 2.8626, "eval_samples_per_second": 193.181, "eval_steps_per_second": 12.227, "step": 6760 } ], "max_steps": 6760, "num_train_epochs": 65, "total_flos": 1.4286659901696e+16, "trial_name": null, "trial_params": null }