{ "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "global_step": 9565, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "eval_loss": 2.4315900802612305, "eval_runtime": 4.6765, "eval_samples_per_second": 171.069, "eval_steps_per_second": 21.384, "step": 100 }, { "epoch": 0.1, "eval_loss": 2.3032939434051514, "eval_runtime": 4.6682, "eval_samples_per_second": 171.373, "eval_steps_per_second": 21.422, "step": 200 }, { "epoch": 0.16, "eval_loss": 2.217759609222412, "eval_runtime": 4.6679, "eval_samples_per_second": 171.381, "eval_steps_per_second": 21.423, "step": 300 }, { "epoch": 0.21, "eval_loss": 2.14911150932312, "eval_runtime": 4.6757, "eval_samples_per_second": 171.099, "eval_steps_per_second": 21.387, "step": 400 }, { "epoch": 0.26, "eval_loss": 2.155996322631836, "eval_runtime": 4.6665, "eval_samples_per_second": 171.433, "eval_steps_per_second": 21.429, "step": 500 }, { "epoch": 0.31, "eval_loss": 2.01924991607666, "eval_runtime": 4.6796, "eval_samples_per_second": 170.956, "eval_steps_per_second": 21.37, "step": 600 }, { "epoch": 0.37, "eval_loss": 2.0464463233947754, "eval_runtime": 4.6799, "eval_samples_per_second": 170.944, "eval_steps_per_second": 21.368, "step": 700 }, { "epoch": 0.42, "eval_loss": 1.977142333984375, "eval_runtime": 4.6707, "eval_samples_per_second": 171.279, "eval_steps_per_second": 21.41, "step": 800 }, { "epoch": 0.47, "eval_loss": 1.967787504196167, "eval_runtime": 4.6743, "eval_samples_per_second": 171.148, "eval_steps_per_second": 21.393, "step": 900 }, { "epoch": 0.52, "learning_rate": 8.954521693674857e-06, "loss": 2.2574, "step": 1000 }, { "epoch": 0.52, "eval_loss": 1.9178065061569214, "eval_runtime": 4.721, "eval_samples_per_second": 169.457, "eval_steps_per_second": 21.182, "step": 1000 }, { "epoch": 0.58, "eval_loss": 1.8966461420059204, "eval_runtime": 4.6821, "eval_samples_per_second": 170.864, "eval_steps_per_second": 21.358, "step": 1100 }, { "epoch": 0.63, "eval_loss": 1.818562626838684, "eval_runtime": 4.6827, "eval_samples_per_second": 170.841, "eval_steps_per_second": 21.355, "step": 1200 }, { "epoch": 0.68, "eval_loss": 1.8336358070373535, "eval_runtime": 4.6864, "eval_samples_per_second": 170.708, "eval_steps_per_second": 21.339, "step": 1300 }, { "epoch": 0.73, "eval_loss": 1.8185982704162598, "eval_runtime": 4.673, "eval_samples_per_second": 171.196, "eval_steps_per_second": 21.399, "step": 1400 }, { "epoch": 0.78, "eval_loss": 1.8159518241882324, "eval_runtime": 4.6851, "eval_samples_per_second": 170.755, "eval_steps_per_second": 21.344, "step": 1500 }, { "epoch": 0.84, "eval_loss": 1.806492567062378, "eval_runtime": 4.6846, "eval_samples_per_second": 170.771, "eval_steps_per_second": 21.346, "step": 1600 }, { "epoch": 0.89, "eval_loss": 1.7608861923217773, "eval_runtime": 4.6877, "eval_samples_per_second": 170.659, "eval_steps_per_second": 21.332, "step": 1700 }, { "epoch": 0.94, "eval_loss": 1.7618434429168701, "eval_runtime": 4.6769, "eval_samples_per_second": 171.055, "eval_steps_per_second": 21.382, "step": 1800 }, { "epoch": 0.99, "eval_loss": 1.7498806715011597, "eval_runtime": 4.6908, "eval_samples_per_second": 170.546, "eval_steps_per_second": 21.318, "step": 1900 }, { "epoch": 1.05, "learning_rate": 7.909043387349713e-06, "loss": 1.871, "step": 2000 }, { "epoch": 1.05, "eval_loss": 1.816186547279358, "eval_runtime": 4.7307, "eval_samples_per_second": 169.107, "eval_steps_per_second": 21.138, "step": 2000 }, { "epoch": 1.1, "eval_loss": 1.7303450107574463, "eval_runtime": 4.6832, "eval_samples_per_second": 170.822, "eval_steps_per_second": 21.353, "step": 2100 }, { "epoch": 1.15, "eval_loss": 1.6595067977905273, "eval_runtime": 4.6808, "eval_samples_per_second": 170.912, "eval_steps_per_second": 21.364, "step": 2200 }, { "epoch": 1.2, "eval_loss": 1.7004770040512085, "eval_runtime": 4.6859, "eval_samples_per_second": 170.726, "eval_steps_per_second": 21.341, "step": 2300 }, { "epoch": 1.25, "eval_loss": 1.6547716856002808, "eval_runtime": 4.6729, "eval_samples_per_second": 171.199, "eval_steps_per_second": 21.4, "step": 2400 }, { "epoch": 1.31, "eval_loss": 1.6392831802368164, "eval_runtime": 4.6758, "eval_samples_per_second": 171.094, "eval_steps_per_second": 21.387, "step": 2500 }, { "epoch": 1.36, "eval_loss": 1.6230372190475464, "eval_runtime": 4.6697, "eval_samples_per_second": 171.318, "eval_steps_per_second": 21.415, "step": 2600 }, { "epoch": 1.41, "eval_loss": 1.63216233253479, "eval_runtime": 4.6753, "eval_samples_per_second": 171.111, "eval_steps_per_second": 21.389, "step": 2700 }, { "epoch": 1.46, "eval_loss": 1.627321720123291, "eval_runtime": 4.6653, "eval_samples_per_second": 171.477, "eval_steps_per_second": 21.435, "step": 2800 }, { "epoch": 1.52, "eval_loss": 1.6078392267227173, "eval_runtime": 4.6673, "eval_samples_per_second": 171.404, "eval_steps_per_second": 21.425, "step": 2900 }, { "epoch": 1.57, "learning_rate": 6.863565081024569e-06, "loss": 1.7234, "step": 3000 }, { "epoch": 1.57, "eval_loss": 1.6030551195144653, "eval_runtime": 4.6964, "eval_samples_per_second": 170.344, "eval_steps_per_second": 21.293, "step": 3000 }, { "epoch": 1.62, "eval_loss": 1.553316593170166, "eval_runtime": 4.6663, "eval_samples_per_second": 171.442, "eval_steps_per_second": 21.43, "step": 3100 }, { "epoch": 1.67, "eval_loss": 1.6135989427566528, "eval_runtime": 4.6687, "eval_samples_per_second": 171.355, "eval_steps_per_second": 21.419, "step": 3200 }, { "epoch": 1.73, "eval_loss": 1.5696121454238892, "eval_runtime": 4.6764, "eval_samples_per_second": 171.072, "eval_steps_per_second": 21.384, "step": 3300 }, { "epoch": 1.78, "eval_loss": 1.565152883529663, "eval_runtime": 4.6699, "eval_samples_per_second": 171.31, "eval_steps_per_second": 21.414, "step": 3400 }, { "epoch": 1.83, "eval_loss": 1.522884488105774, "eval_runtime": 4.6578, "eval_samples_per_second": 171.755, "eval_steps_per_second": 21.469, "step": 3500 }, { "epoch": 1.88, "eval_loss": 1.5707228183746338, "eval_runtime": 4.6663, "eval_samples_per_second": 171.44, "eval_steps_per_second": 21.43, "step": 3600 }, { "epoch": 1.93, "eval_loss": 1.587827444076538, "eval_runtime": 4.6655, "eval_samples_per_second": 171.473, "eval_steps_per_second": 21.434, "step": 3700 }, { "epoch": 1.99, "eval_loss": 1.5495318174362183, "eval_runtime": 4.6751, "eval_samples_per_second": 171.121, "eval_steps_per_second": 21.39, "step": 3800 }, { "epoch": 2.04, "eval_loss": 1.5380765199661255, "eval_runtime": 4.6613, "eval_samples_per_second": 171.627, "eval_steps_per_second": 21.453, "step": 3900 }, { "epoch": 2.09, "learning_rate": 5.8180867746994255e-06, "loss": 1.611, "step": 4000 }, { "epoch": 2.09, "eval_loss": 1.52095627784729, "eval_runtime": 4.7023, "eval_samples_per_second": 170.13, "eval_steps_per_second": 21.266, "step": 4000 }, { "epoch": 2.14, "eval_loss": 1.511513113975525, "eval_runtime": 4.6667, "eval_samples_per_second": 171.428, "eval_steps_per_second": 21.428, "step": 4100 }, { "epoch": 2.2, "eval_loss": 1.511332392692566, "eval_runtime": 4.6592, "eval_samples_per_second": 171.703, "eval_steps_per_second": 21.463, "step": 4200 }, { "epoch": 2.25, "eval_loss": 1.4714492559432983, "eval_runtime": 4.6645, "eval_samples_per_second": 171.508, "eval_steps_per_second": 21.438, "step": 4300 }, { "epoch": 2.3, "eval_loss": 1.5099194049835205, "eval_runtime": 4.6633, "eval_samples_per_second": 171.551, "eval_steps_per_second": 21.444, "step": 4400 }, { "epoch": 2.35, "eval_loss": 1.4862964153289795, "eval_runtime": 4.6767, "eval_samples_per_second": 171.061, "eval_steps_per_second": 21.383, "step": 4500 }, { "epoch": 2.4, "eval_loss": 1.5304350852966309, "eval_runtime": 4.6709, "eval_samples_per_second": 171.275, "eval_steps_per_second": 21.409, "step": 4600 }, { "epoch": 2.46, "eval_loss": 1.4990843534469604, "eval_runtime": 4.6619, "eval_samples_per_second": 171.605, "eval_steps_per_second": 21.451, "step": 4700 }, { "epoch": 2.51, "eval_loss": 1.5017355680465698, "eval_runtime": 4.6598, "eval_samples_per_second": 171.682, "eval_steps_per_second": 21.46, "step": 4800 }, { "epoch": 2.56, "eval_loss": 1.4134238958358765, "eval_runtime": 4.6698, "eval_samples_per_second": 171.313, "eval_steps_per_second": 21.414, "step": 4900 }, { "epoch": 2.61, "learning_rate": 4.7726084683742815e-06, "loss": 1.5455, "step": 5000 }, { "epoch": 2.61, "eval_loss": 1.4892077445983887, "eval_runtime": 4.693, "eval_samples_per_second": 170.468, "eval_steps_per_second": 21.308, "step": 5000 }, { "epoch": 2.67, "eval_loss": 1.4631962776184082, "eval_runtime": 4.6718, "eval_samples_per_second": 171.242, "eval_steps_per_second": 21.405, "step": 5100 }, { "epoch": 2.72, "eval_loss": 1.4686871767044067, "eval_runtime": 4.6795, "eval_samples_per_second": 170.959, "eval_steps_per_second": 21.37, "step": 5200 }, { "epoch": 2.77, "eval_loss": 1.444468379020691, "eval_runtime": 4.6722, "eval_samples_per_second": 171.227, "eval_steps_per_second": 21.403, "step": 5300 }, { "epoch": 2.82, "eval_loss": 1.434273362159729, "eval_runtime": 4.6709, "eval_samples_per_second": 171.273, "eval_steps_per_second": 21.409, "step": 5400 }, { "epoch": 2.88, "eval_loss": 1.4033972024917603, "eval_runtime": 4.674, "eval_samples_per_second": 171.159, "eval_steps_per_second": 21.395, "step": 5500 }, { "epoch": 2.93, "eval_loss": 1.3725674152374268, "eval_runtime": 4.658, "eval_samples_per_second": 171.746, "eval_steps_per_second": 21.468, "step": 5600 }, { "epoch": 2.98, "eval_loss": 1.4471670389175415, "eval_runtime": 4.6706, "eval_samples_per_second": 171.284, "eval_steps_per_second": 21.411, "step": 5700 }, { "epoch": 3.03, "eval_loss": 1.3971011638641357, "eval_runtime": 4.6665, "eval_samples_per_second": 171.433, "eval_steps_per_second": 21.429, "step": 5800 }, { "epoch": 3.08, "eval_loss": 1.4093689918518066, "eval_runtime": 4.6837, "eval_samples_per_second": 170.804, "eval_steps_per_second": 21.351, "step": 5900 }, { "epoch": 3.14, "learning_rate": 3.727130162049138e-06, "loss": 1.4815, "step": 6000 }, { "epoch": 3.14, "eval_loss": 1.434360384941101, "eval_runtime": 4.7117, "eval_samples_per_second": 169.79, "eval_steps_per_second": 21.224, "step": 6000 }, { "epoch": 3.19, "eval_loss": 1.412831425666809, "eval_runtime": 4.678, "eval_samples_per_second": 171.013, "eval_steps_per_second": 21.377, "step": 6100 }, { "epoch": 3.24, "eval_loss": 1.4168850183486938, "eval_runtime": 4.6653, "eval_samples_per_second": 171.478, "eval_steps_per_second": 21.435, "step": 6200 }, { "epoch": 3.29, "eval_loss": 1.399338960647583, "eval_runtime": 4.6686, "eval_samples_per_second": 171.359, "eval_steps_per_second": 21.42, "step": 6300 }, { "epoch": 3.35, "eval_loss": 1.4034981727600098, "eval_runtime": 4.6638, "eval_samples_per_second": 171.535, "eval_steps_per_second": 21.442, "step": 6400 }, { "epoch": 3.4, "eval_loss": 1.3951754570007324, "eval_runtime": 4.6788, "eval_samples_per_second": 170.982, "eval_steps_per_second": 21.373, "step": 6500 }, { "epoch": 3.45, "eval_loss": 1.3877923488616943, "eval_runtime": 4.6855, "eval_samples_per_second": 170.739, "eval_steps_per_second": 21.342, "step": 6600 }, { "epoch": 3.5, "eval_loss": 1.3673046827316284, "eval_runtime": 4.6699, "eval_samples_per_second": 171.31, "eval_steps_per_second": 21.414, "step": 6700 }, { "epoch": 3.55, "eval_loss": 1.3622443675994873, "eval_runtime": 4.671, "eval_samples_per_second": 171.271, "eval_steps_per_second": 21.409, "step": 6800 }, { "epoch": 3.61, "eval_loss": 1.374872088432312, "eval_runtime": 4.6712, "eval_samples_per_second": 171.263, "eval_steps_per_second": 21.408, "step": 6900 }, { "epoch": 3.66, "learning_rate": 2.681651855723994e-06, "loss": 1.4492, "step": 7000 }, { "epoch": 3.66, "eval_loss": 1.4481711387634277, "eval_runtime": 4.6991, "eval_samples_per_second": 170.247, "eval_steps_per_second": 21.281, "step": 7000 }, { "epoch": 3.71, "eval_loss": 1.3453810214996338, "eval_runtime": 4.6891, "eval_samples_per_second": 170.607, "eval_steps_per_second": 21.326, "step": 7100 }, { "epoch": 3.76, "eval_loss": 1.3628772497177124, "eval_runtime": 4.691, "eval_samples_per_second": 170.54, "eval_steps_per_second": 21.317, "step": 7200 }, { "epoch": 3.82, "eval_loss": 1.4115179777145386, "eval_runtime": 4.6842, "eval_samples_per_second": 170.787, "eval_steps_per_second": 21.348, "step": 7300 }, { "epoch": 3.87, "eval_loss": 1.3432800769805908, "eval_runtime": 4.69, "eval_samples_per_second": 170.576, "eval_steps_per_second": 21.322, "step": 7400 }, { "epoch": 3.92, "eval_loss": 1.368696928024292, "eval_runtime": 4.6923, "eval_samples_per_second": 170.493, "eval_steps_per_second": 21.312, "step": 7500 }, { "epoch": 3.97, "eval_loss": 1.4239104986190796, "eval_runtime": 4.6873, "eval_samples_per_second": 170.674, "eval_steps_per_second": 21.334, "step": 7600 }, { "epoch": 4.03, "eval_loss": 1.3071486949920654, "eval_runtime": 4.6827, "eval_samples_per_second": 170.843, "eval_steps_per_second": 21.355, "step": 7700 }, { "epoch": 4.08, "eval_loss": 1.353852391242981, "eval_runtime": 4.6839, "eval_samples_per_second": 170.798, "eval_steps_per_second": 21.35, "step": 7800 }, { "epoch": 4.13, "eval_loss": 1.3432263135910034, "eval_runtime": 4.6946, "eval_samples_per_second": 170.409, "eval_steps_per_second": 21.301, "step": 7900 }, { "epoch": 4.18, "learning_rate": 1.63617354939885e-06, "loss": 1.4208, "step": 8000 }, { "epoch": 4.18, "eval_loss": 1.3502365350723267, "eval_runtime": 4.7172, "eval_samples_per_second": 169.592, "eval_steps_per_second": 21.199, "step": 8000 }, { "epoch": 4.23, "eval_loss": 1.363999605178833, "eval_runtime": 4.685, "eval_samples_per_second": 170.758, "eval_steps_per_second": 21.345, "step": 8100 }, { "epoch": 4.29, "eval_loss": 1.382441520690918, "eval_runtime": 4.6847, "eval_samples_per_second": 170.769, "eval_steps_per_second": 21.346, "step": 8200 }, { "epoch": 4.34, "eval_loss": 1.3818987607955933, "eval_runtime": 4.6986, "eval_samples_per_second": 170.265, "eval_steps_per_second": 21.283, "step": 8300 }, { "epoch": 4.39, "eval_loss": 1.340713620185852, "eval_runtime": 4.6878, "eval_samples_per_second": 170.655, "eval_steps_per_second": 21.332, "step": 8400 }, { "epoch": 4.44, "eval_loss": 1.3344806432724, "eval_runtime": 4.6958, "eval_samples_per_second": 170.364, "eval_steps_per_second": 21.295, "step": 8500 }, { "epoch": 4.5, "eval_loss": 1.3563097715377808, "eval_runtime": 4.6877, "eval_samples_per_second": 170.66, "eval_steps_per_second": 21.333, "step": 8600 }, { "epoch": 4.55, "eval_loss": 1.3303130865097046, "eval_runtime": 4.6903, "eval_samples_per_second": 170.565, "eval_steps_per_second": 21.321, "step": 8700 }, { "epoch": 4.6, "eval_loss": 1.3454128503799438, "eval_runtime": 4.6828, "eval_samples_per_second": 170.838, "eval_steps_per_second": 21.355, "step": 8800 }, { "epoch": 4.65, "eval_loss": 1.3808575868606567, "eval_runtime": 4.6966, "eval_samples_per_second": 170.335, "eval_steps_per_second": 21.292, "step": 8900 }, { "epoch": 4.7, "learning_rate": 5.906952430737063e-07, "loss": 1.4013, "step": 9000 }, { "epoch": 4.7, "eval_loss": 1.3647897243499756, "eval_runtime": 4.7177, "eval_samples_per_second": 169.575, "eval_steps_per_second": 21.197, "step": 9000 }, { "epoch": 4.76, "eval_loss": 1.34979248046875, "eval_runtime": 4.6998, "eval_samples_per_second": 170.221, "eval_steps_per_second": 21.278, "step": 9100 }, { "epoch": 4.81, "eval_loss": 1.3509832620620728, "eval_runtime": 4.685, "eval_samples_per_second": 170.758, "eval_steps_per_second": 21.345, "step": 9200 }, { "epoch": 4.86, "eval_loss": 1.3402700424194336, "eval_runtime": 4.6979, "eval_samples_per_second": 170.289, "eval_steps_per_second": 21.286, "step": 9300 }, { "epoch": 4.91, "eval_loss": 1.2718613147735596, "eval_runtime": 4.6876, "eval_samples_per_second": 170.662, "eval_steps_per_second": 21.333, "step": 9400 }, { "epoch": 4.97, "eval_loss": 1.3347008228302002, "eval_runtime": 4.6849, "eval_samples_per_second": 170.762, "eval_steps_per_second": 21.345, "step": 9500 }, { "epoch": 5.0, "step": 9565, "total_flos": 1.1622564417918384e+16, "train_loss": 1.626130136269521, "train_runtime": 5849.5407, "train_samples_per_second": 13.078, "train_steps_per_second": 1.635 } ], "max_steps": 9565, "num_train_epochs": 5, "total_flos": 1.1622564417918384e+16, "trial_name": null, "trial_params": null }