{ "best_metric": 2.4417428970336914, "best_model_checkpoint": "./model_tweets_2020_Q1_50/checkpoint-1888000", "epoch": 9.834292176820574, "eval_steps": 8000, "global_step": 2400000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "eval_loss": 2.89373779296875, "eval_runtime": 220.7068, "eval_samples_per_second": 931.145, "eval_steps_per_second": 58.199, "step": 8000 }, { "epoch": 0.07, "learning_rate": 4.0726666666666665e-07, "loss": 3.073, "step": 16000 }, { "epoch": 0.07, "eval_loss": 2.76598858833313, "eval_runtime": 221.0774, "eval_samples_per_second": 929.584, "eval_steps_per_second": 58.102, "step": 16000 }, { "epoch": 0.1, "eval_loss": 2.7232513427734375, "eval_runtime": 221.6288, "eval_samples_per_second": 927.271, "eval_steps_per_second": 57.957, "step": 24000 }, { "epoch": 0.13, "learning_rate": 4.0453333333333336e-07, "loss": 2.8244, "step": 32000 }, { "epoch": 0.13, "eval_loss": 2.687758207321167, "eval_runtime": 220.1262, "eval_samples_per_second": 933.601, "eval_steps_per_second": 58.353, "step": 32000 }, { "epoch": 0.16, "eval_loss": 2.6519503593444824, "eval_runtime": 220.495, "eval_samples_per_second": 932.039, "eval_steps_per_second": 58.255, "step": 40000 }, { "epoch": 0.2, "learning_rate": 4.018e-07, "loss": 2.7542, "step": 48000 }, { "epoch": 0.2, "eval_loss": 2.63004469871521, "eval_runtime": 220.2803, "eval_samples_per_second": 932.948, "eval_steps_per_second": 58.312, "step": 48000 }, { "epoch": 0.23, "eval_loss": 2.613522529602051, "eval_runtime": 221.2317, "eval_samples_per_second": 928.936, "eval_steps_per_second": 58.061, "step": 56000 }, { "epoch": 0.26, "learning_rate": 3.9906666666666667e-07, "loss": 2.7083, "step": 64000 }, { "epoch": 0.26, "eval_loss": 2.6067709922790527, "eval_runtime": 220.3177, "eval_samples_per_second": 932.789, "eval_steps_per_second": 58.302, "step": 64000 }, { "epoch": 0.3, "eval_loss": 2.5854294300079346, "eval_runtime": 220.8061, "eval_samples_per_second": 930.726, "eval_steps_per_second": 58.173, "step": 72000 }, { "epoch": 0.33, "learning_rate": 3.963333333333333e-07, "loss": 2.6752, "step": 80000 }, { "epoch": 0.33, "eval_loss": 2.575528860092163, "eval_runtime": 221.8521, "eval_samples_per_second": 926.338, "eval_steps_per_second": 57.899, "step": 80000 }, { "epoch": 0.36, "eval_loss": 2.5720720291137695, "eval_runtime": 221.4472, "eval_samples_per_second": 928.032, "eval_steps_per_second": 58.005, "step": 88000 }, { "epoch": 0.39, "learning_rate": 3.936e-07, "loss": 2.6657, "step": 96000 }, { "epoch": 0.39, "eval_loss": 2.5709290504455566, "eval_runtime": 220.9006, "eval_samples_per_second": 930.328, "eval_steps_per_second": 58.148, "step": 96000 }, { "epoch": 0.43, "eval_loss": 2.5656096935272217, "eval_runtime": 220.5433, "eval_samples_per_second": 931.835, "eval_steps_per_second": 58.243, "step": 104000 }, { "epoch": 0.46, "learning_rate": 3.908666666666667e-07, "loss": 2.6534, "step": 112000 }, { "epoch": 0.46, "eval_loss": 2.5558407306671143, "eval_runtime": 221.6371, "eval_samples_per_second": 927.236, "eval_steps_per_second": 57.955, "step": 112000 }, { "epoch": 0.49, "eval_loss": 2.5495829582214355, "eval_runtime": 220.7733, "eval_samples_per_second": 930.864, "eval_steps_per_second": 58.182, "step": 120000 }, { "epoch": 0.52, "learning_rate": 3.8813333333333334e-07, "loss": 2.646, "step": 128000 }, { "epoch": 0.52, "eval_loss": 2.547106981277466, "eval_runtime": 221.6448, "eval_samples_per_second": 927.204, "eval_steps_per_second": 57.953, "step": 128000 }, { "epoch": 0.56, "eval_loss": 2.5408244132995605, "eval_runtime": 221.6302, "eval_samples_per_second": 927.265, "eval_steps_per_second": 57.957, "step": 136000 }, { "epoch": 0.59, "learning_rate": 3.854e-07, "loss": 2.625, "step": 144000 }, { "epoch": 0.59, "eval_loss": 2.531517744064331, "eval_runtime": 223.6683, "eval_samples_per_second": 918.816, "eval_steps_per_second": 57.429, "step": 144000 }, { "epoch": 0.62, "eval_loss": 2.5364675521850586, "eval_runtime": 224.1465, "eval_samples_per_second": 916.856, "eval_steps_per_second": 57.306, "step": 152000 }, { "epoch": 0.66, "learning_rate": 3.8266666666666665e-07, "loss": 2.6222, "step": 160000 }, { "epoch": 0.66, "eval_loss": 2.5372273921966553, "eval_runtime": 221.7325, "eval_samples_per_second": 926.837, "eval_steps_per_second": 57.93, "step": 160000 }, { "epoch": 0.69, "eval_loss": 2.534186363220215, "eval_runtime": 222.3525, "eval_samples_per_second": 924.253, "eval_steps_per_second": 57.769, "step": 168000 }, { "epoch": 0.72, "learning_rate": 3.799333333333333e-07, "loss": 2.6256, "step": 176000 }, { "epoch": 0.72, "eval_loss": 2.5308265686035156, "eval_runtime": 221.538, "eval_samples_per_second": 927.651, "eval_steps_per_second": 57.981, "step": 176000 }, { "epoch": 0.75, "eval_loss": 2.5311617851257324, "eval_runtime": 224.2919, "eval_samples_per_second": 916.261, "eval_steps_per_second": 57.269, "step": 184000 }, { "epoch": 0.79, "learning_rate": 3.772e-07, "loss": 2.6074, "step": 192000 }, { "epoch": 0.79, "eval_loss": 2.522848129272461, "eval_runtime": 224.4507, "eval_samples_per_second": 915.613, "eval_steps_per_second": 57.229, "step": 192000 }, { "epoch": 0.82, "eval_loss": 2.529161214828491, "eval_runtime": 222.7477, "eval_samples_per_second": 922.613, "eval_steps_per_second": 57.666, "step": 200000 }, { "epoch": 0.85, "learning_rate": 3.7446666666666667e-07, "loss": 2.6071, "step": 208000 }, { "epoch": 0.85, "eval_loss": 2.5295047760009766, "eval_runtime": 223.0891, "eval_samples_per_second": 921.201, "eval_steps_per_second": 57.578, "step": 208000 }, { "epoch": 0.89, "eval_loss": 2.523491621017456, "eval_runtime": 221.4007, "eval_samples_per_second": 928.227, "eval_steps_per_second": 58.017, "step": 216000 }, { "epoch": 0.92, "learning_rate": 3.7173333333333333e-07, "loss": 2.5955, "step": 224000 }, { "epoch": 0.92, "eval_loss": 2.5219199657440186, "eval_runtime": 221.3605, "eval_samples_per_second": 928.395, "eval_steps_per_second": 58.028, "step": 224000 }, { "epoch": 0.95, "eval_loss": 2.5190882682800293, "eval_runtime": 221.1449, "eval_samples_per_second": 929.3, "eval_steps_per_second": 58.084, "step": 232000 }, { "epoch": 0.98, "learning_rate": 3.69e-07, "loss": 2.6036, "step": 240000 }, { "epoch": 0.98, "eval_loss": 2.517120361328125, "eval_runtime": 220.9198, "eval_samples_per_second": 930.247, "eval_steps_per_second": 58.143, "step": 240000 }, { "epoch": 1.02, "eval_loss": 2.5102434158325195, "eval_runtime": 221.7647, "eval_samples_per_second": 926.703, "eval_steps_per_second": 57.922, "step": 248000 }, { "epoch": 1.05, "learning_rate": 3.6626666666666664e-07, "loss": 2.6046, "step": 256000 }, { "epoch": 1.05, "eval_loss": 2.5070137977600098, "eval_runtime": 221.6584, "eval_samples_per_second": 927.147, "eval_steps_per_second": 57.95, "step": 256000 }, { "epoch": 1.08, "eval_loss": 2.5109376907348633, "eval_runtime": 221.3382, "eval_samples_per_second": 928.489, "eval_steps_per_second": 58.033, "step": 264000 }, { "epoch": 1.11, "learning_rate": 3.6353333333333335e-07, "loss": 2.5892, "step": 272000 }, { "epoch": 1.11, "eval_loss": 2.5104565620422363, "eval_runtime": 222.1683, "eval_samples_per_second": 925.019, "eval_steps_per_second": 57.817, "step": 272000 }, { "epoch": 1.15, "eval_loss": 2.508704423904419, "eval_runtime": 222.9629, "eval_samples_per_second": 921.723, "eval_steps_per_second": 57.61, "step": 280000 }, { "epoch": 1.18, "learning_rate": 3.608e-07, "loss": 2.5929, "step": 288000 }, { "epoch": 1.18, "eval_loss": 2.509392738342285, "eval_runtime": 223.3494, "eval_samples_per_second": 920.128, "eval_steps_per_second": 57.511, "step": 288000 }, { "epoch": 1.21, "eval_loss": 2.508585214614868, "eval_runtime": 222.7314, "eval_samples_per_second": 922.681, "eval_steps_per_second": 57.67, "step": 296000 }, { "epoch": 1.25, "learning_rate": 3.5806666666666666e-07, "loss": 2.5857, "step": 304000 }, { "epoch": 1.25, "eval_loss": 2.4991345405578613, "eval_runtime": 223.3332, "eval_samples_per_second": 920.195, "eval_steps_per_second": 57.515, "step": 304000 }, { "epoch": 1.28, "eval_loss": 2.508927822113037, "eval_runtime": 224.1404, "eval_samples_per_second": 916.881, "eval_steps_per_second": 57.308, "step": 312000 }, { "epoch": 1.31, "learning_rate": 3.553333333333333e-07, "loss": 2.5828, "step": 320000 }, { "epoch": 1.31, "eval_loss": 2.501734972000122, "eval_runtime": 223.1146, "eval_samples_per_second": 921.096, "eval_steps_per_second": 57.571, "step": 320000 }, { "epoch": 1.34, "eval_loss": 2.503918409347534, "eval_runtime": 223.3327, "eval_samples_per_second": 920.196, "eval_steps_per_second": 57.515, "step": 328000 }, { "epoch": 1.38, "learning_rate": 3.5259999999999997e-07, "loss": 2.5812, "step": 336000 }, { "epoch": 1.38, "eval_loss": 2.5064587593078613, "eval_runtime": 224.1587, "eval_samples_per_second": 916.806, "eval_steps_per_second": 57.303, "step": 336000 }, { "epoch": 1.41, "eval_loss": 2.508263111114502, "eval_runtime": 222.503, "eval_samples_per_second": 923.628, "eval_steps_per_second": 57.73, "step": 344000 }, { "epoch": 1.44, "learning_rate": 3.498666666666667e-07, "loss": 2.5775, "step": 352000 }, { "epoch": 1.44, "eval_loss": 2.509936571121216, "eval_runtime": 223.039, "eval_samples_per_second": 921.408, "eval_steps_per_second": 57.591, "step": 352000 }, { "epoch": 1.48, "eval_loss": 2.5078811645507812, "eval_runtime": 221.7646, "eval_samples_per_second": 926.703, "eval_steps_per_second": 57.922, "step": 360000 }, { "epoch": 1.51, "learning_rate": 3.4713333333333333e-07, "loss": 2.5711, "step": 368000 }, { "epoch": 1.51, "eval_loss": 2.4922046661376953, "eval_runtime": 223.0544, "eval_samples_per_second": 921.345, "eval_steps_per_second": 57.587, "step": 368000 }, { "epoch": 1.54, "eval_loss": 2.5012030601501465, "eval_runtime": 222.0392, "eval_samples_per_second": 925.557, "eval_steps_per_second": 57.85, "step": 376000 }, { "epoch": 1.57, "learning_rate": 3.444e-07, "loss": 2.5797, "step": 384000 }, { "epoch": 1.57, "eval_loss": 2.49989914894104, "eval_runtime": 223.8829, "eval_samples_per_second": 917.935, "eval_steps_per_second": 57.374, "step": 384000 }, { "epoch": 1.61, "eval_loss": 2.4881107807159424, "eval_runtime": 222.4413, "eval_samples_per_second": 923.884, "eval_steps_per_second": 57.746, "step": 392000 }, { "epoch": 1.64, "learning_rate": 3.416666666666667e-07, "loss": 2.5718, "step": 400000 }, { "epoch": 1.64, "eval_loss": 2.4960451126098633, "eval_runtime": 222.8741, "eval_samples_per_second": 922.09, "eval_steps_per_second": 57.633, "step": 400000 }, { "epoch": 1.67, "eval_loss": 2.490837574005127, "eval_runtime": 222.3679, "eval_samples_per_second": 924.189, "eval_steps_per_second": 57.765, "step": 408000 }, { "epoch": 1.7, "learning_rate": 3.3893333333333335e-07, "loss": 2.5627, "step": 416000 }, { "epoch": 1.7, "eval_loss": 2.4970648288726807, "eval_runtime": 223.472, "eval_samples_per_second": 919.623, "eval_steps_per_second": 57.479, "step": 416000 }, { "epoch": 1.74, "eval_loss": 2.4916465282440186, "eval_runtime": 222.5109, "eval_samples_per_second": 923.595, "eval_steps_per_second": 57.728, "step": 424000 }, { "epoch": 1.77, "learning_rate": 3.3619999999999995e-07, "loss": 2.5641, "step": 432000 }, { "epoch": 1.77, "eval_loss": 2.4971389770507812, "eval_runtime": 222.1533, "eval_samples_per_second": 925.082, "eval_steps_per_second": 57.82, "step": 432000 }, { "epoch": 1.8, "eval_loss": 2.495426654815674, "eval_runtime": 223.2728, "eval_samples_per_second": 920.444, "eval_steps_per_second": 57.531, "step": 440000 }, { "epoch": 1.84, "learning_rate": 3.3346666666666666e-07, "loss": 2.5633, "step": 448000 }, { "epoch": 1.84, "eval_loss": 2.485994815826416, "eval_runtime": 222.7264, "eval_samples_per_second": 922.702, "eval_steps_per_second": 57.672, "step": 448000 }, { "epoch": 1.87, "eval_loss": 2.4893651008605957, "eval_runtime": 223.4251, "eval_samples_per_second": 919.816, "eval_steps_per_second": 57.491, "step": 456000 }, { "epoch": 1.9, "learning_rate": 3.307333333333333e-07, "loss": 2.5676, "step": 464000 }, { "epoch": 1.9, "eval_loss": 2.489337205886841, "eval_runtime": 222.9423, "eval_samples_per_second": 921.808, "eval_steps_per_second": 57.616, "step": 464000 }, { "epoch": 1.93, "eval_loss": 2.4883553981781006, "eval_runtime": 223.2404, "eval_samples_per_second": 920.577, "eval_steps_per_second": 57.539, "step": 472000 }, { "epoch": 1.97, "learning_rate": 3.28e-07, "loss": 2.5687, "step": 480000 }, { "epoch": 1.97, "eval_loss": 2.4921038150787354, "eval_runtime": 223.8809, "eval_samples_per_second": 917.943, "eval_steps_per_second": 57.374, "step": 480000 }, { "epoch": 2.0, "eval_loss": 2.4873294830322266, "eval_runtime": 222.8771, "eval_samples_per_second": 922.078, "eval_steps_per_second": 57.633, "step": 488000 }, { "epoch": 2.03, "learning_rate": 3.252666666666667e-07, "loss": 2.5633, "step": 496000 }, { "epoch": 2.03, "eval_loss": 2.4919497966766357, "eval_runtime": 222.6439, "eval_samples_per_second": 923.043, "eval_steps_per_second": 57.693, "step": 496000 }, { "epoch": 2.07, "eval_loss": 2.482137441635132, "eval_runtime": 222.747, "eval_samples_per_second": 922.616, "eval_steps_per_second": 57.666, "step": 504000 }, { "epoch": 2.1, "learning_rate": 3.2253333333333334e-07, "loss": 2.5547, "step": 512000 }, { "epoch": 2.1, "eval_loss": 2.490872621536255, "eval_runtime": 222.6765, "eval_samples_per_second": 922.908, "eval_steps_per_second": 57.685, "step": 512000 }, { "epoch": 2.13, "eval_loss": 2.4818356037139893, "eval_runtime": 223.7166, "eval_samples_per_second": 918.617, "eval_steps_per_second": 57.416, "step": 520000 }, { "epoch": 2.16, "learning_rate": 3.198e-07, "loss": 2.5617, "step": 528000 }, { "epoch": 2.16, "eval_loss": 2.4854869842529297, "eval_runtime": 223.7715, "eval_samples_per_second": 918.392, "eval_steps_per_second": 57.402, "step": 528000 }, { "epoch": 2.2, "eval_loss": 2.48504638671875, "eval_runtime": 223.6654, "eval_samples_per_second": 918.828, "eval_steps_per_second": 57.43, "step": 536000 }, { "epoch": 2.23, "learning_rate": 3.1706666666666665e-07, "loss": 2.5569, "step": 544000 }, { "epoch": 2.23, "eval_loss": 2.480282783508301, "eval_runtime": 222.7744, "eval_samples_per_second": 922.503, "eval_steps_per_second": 57.659, "step": 544000 }, { "epoch": 2.26, "eval_loss": 2.4775896072387695, "eval_runtime": 223.0018, "eval_samples_per_second": 921.562, "eval_steps_per_second": 57.6, "step": 552000 }, { "epoch": 2.29, "learning_rate": 3.1433333333333336e-07, "loss": 2.5535, "step": 560000 }, { "epoch": 2.29, "eval_loss": 2.4824471473693848, "eval_runtime": 223.1733, "eval_samples_per_second": 920.854, "eval_steps_per_second": 57.556, "step": 560000 }, { "epoch": 2.33, "eval_loss": 2.4821510314941406, "eval_runtime": 224.1586, "eval_samples_per_second": 916.806, "eval_steps_per_second": 57.303, "step": 568000 }, { "epoch": 2.36, "learning_rate": 3.116e-07, "loss": 2.5534, "step": 576000 }, { "epoch": 2.36, "eval_loss": 2.476337432861328, "eval_runtime": 223.4733, "eval_samples_per_second": 919.618, "eval_steps_per_second": 57.479, "step": 576000 }, { "epoch": 2.39, "eval_loss": 2.47969388961792, "eval_runtime": 224.2217, "eval_samples_per_second": 916.548, "eval_steps_per_second": 57.287, "step": 584000 }, { "epoch": 2.43, "learning_rate": 3.0886666666666667e-07, "loss": 2.5583, "step": 592000 }, { "epoch": 2.43, "eval_loss": 2.4872305393218994, "eval_runtime": 224.237, "eval_samples_per_second": 916.486, "eval_steps_per_second": 57.283, "step": 592000 }, { "epoch": 2.46, "eval_loss": 2.4812192916870117, "eval_runtime": 222.6272, "eval_samples_per_second": 923.113, "eval_steps_per_second": 57.697, "step": 600000 }, { "epoch": 2.49, "learning_rate": 3.061333333333333e-07, "loss": 2.5545, "step": 608000 }, { "epoch": 2.49, "eval_loss": 2.474827527999878, "eval_runtime": 223.4042, "eval_samples_per_second": 919.902, "eval_steps_per_second": 57.497, "step": 608000 }, { "epoch": 2.52, "eval_loss": 2.4735865592956543, "eval_runtime": 224.1504, "eval_samples_per_second": 916.84, "eval_steps_per_second": 57.305, "step": 616000 }, { "epoch": 2.56, "learning_rate": 3.034e-07, "loss": 2.5561, "step": 624000 }, { "epoch": 2.56, "eval_loss": 2.4714128971099854, "eval_runtime": 223.2085, "eval_samples_per_second": 920.709, "eval_steps_per_second": 57.547, "step": 624000 }, { "epoch": 2.59, "eval_loss": 2.485759973526001, "eval_runtime": 222.8361, "eval_samples_per_second": 922.247, "eval_steps_per_second": 57.643, "step": 632000 }, { "epoch": 2.62, "learning_rate": 3.0066666666666663e-07, "loss": 2.5384, "step": 640000 }, { "epoch": 2.62, "eval_loss": 2.482938289642334, "eval_runtime": 223.4494, "eval_samples_per_second": 919.716, "eval_steps_per_second": 57.485, "step": 640000 }, { "epoch": 2.66, "eval_loss": 2.47662091255188, "eval_runtime": 222.9171, "eval_samples_per_second": 921.912, "eval_steps_per_second": 57.622, "step": 648000 }, { "epoch": 2.69, "learning_rate": 2.9793333333333334e-07, "loss": 2.541, "step": 656000 }, { "epoch": 2.69, "eval_loss": 2.4835963249206543, "eval_runtime": 223.4062, "eval_samples_per_second": 919.894, "eval_steps_per_second": 57.496, "step": 656000 }, { "epoch": 2.72, "eval_loss": 2.465118408203125, "eval_runtime": 226.1239, "eval_samples_per_second": 908.838, "eval_steps_per_second": 56.805, "step": 664000 }, { "epoch": 2.75, "learning_rate": 2.952e-07, "loss": 2.5439, "step": 672000 }, { "epoch": 2.75, "eval_loss": 2.4797005653381348, "eval_runtime": 224.1173, "eval_samples_per_second": 916.975, "eval_steps_per_second": 57.314, "step": 672000 }, { "epoch": 2.79, "eval_loss": 2.4702000617980957, "eval_runtime": 223.8532, "eval_samples_per_second": 918.057, "eval_steps_per_second": 57.381, "step": 680000 }, { "epoch": 2.82, "learning_rate": 2.9246666666666665e-07, "loss": 2.5597, "step": 688000 }, { "epoch": 2.82, "eval_loss": 2.475144386291504, "eval_runtime": 223.4589, "eval_samples_per_second": 919.677, "eval_steps_per_second": 57.483, "step": 688000 }, { "epoch": 2.85, "eval_loss": 2.474367618560791, "eval_runtime": 222.9092, "eval_samples_per_second": 921.945, "eval_steps_per_second": 57.624, "step": 696000 }, { "epoch": 2.88, "learning_rate": 2.897333333333333e-07, "loss": 2.5491, "step": 704000 }, { "epoch": 2.88, "eval_loss": 2.4756221771240234, "eval_runtime": 223.5443, "eval_samples_per_second": 919.325, "eval_steps_per_second": 57.461, "step": 704000 }, { "epoch": 2.92, "eval_loss": 2.4731247425079346, "eval_runtime": 223.5397, "eval_samples_per_second": 919.345, "eval_steps_per_second": 57.462, "step": 712000 }, { "epoch": 2.95, "learning_rate": 2.8699999999999996e-07, "loss": 2.5505, "step": 720000 }, { "epoch": 2.95, "eval_loss": 2.475615978240967, "eval_runtime": 223.941, "eval_samples_per_second": 917.697, "eval_steps_per_second": 57.359, "step": 720000 }, { "epoch": 2.98, "eval_loss": 2.4703986644744873, "eval_runtime": 224.1288, "eval_samples_per_second": 916.928, "eval_steps_per_second": 57.311, "step": 728000 }, { "epoch": 3.02, "learning_rate": 2.8426666666666667e-07, "loss": 2.5432, "step": 736000 }, { "epoch": 3.02, "eval_loss": 2.4762611389160156, "eval_runtime": 223.7009, "eval_samples_per_second": 918.682, "eval_steps_per_second": 57.42, "step": 736000 }, { "epoch": 3.05, "eval_loss": 2.4743261337280273, "eval_runtime": 224.407, "eval_samples_per_second": 915.791, "eval_steps_per_second": 57.24, "step": 744000 }, { "epoch": 3.08, "learning_rate": 2.815333333333333e-07, "loss": 2.5485, "step": 752000 }, { "epoch": 3.08, "eval_loss": 2.4626660346984863, "eval_runtime": 224.0612, "eval_samples_per_second": 917.205, "eval_steps_per_second": 57.328, "step": 752000 }, { "epoch": 3.11, "eval_loss": 2.471444606781006, "eval_runtime": 223.4318, "eval_samples_per_second": 919.788, "eval_steps_per_second": 57.49, "step": 760000 }, { "epoch": 3.15, "learning_rate": 2.7880000000000003e-07, "loss": 2.5482, "step": 768000 }, { "epoch": 3.15, "eval_loss": 2.4684672355651855, "eval_runtime": 224.1026, "eval_samples_per_second": 917.035, "eval_steps_per_second": 57.317, "step": 768000 }, { "epoch": 3.18, "eval_loss": 2.4672694206237793, "eval_runtime": 224.9545, "eval_samples_per_second": 913.562, "eval_steps_per_second": 57.1, "step": 776000 }, { "epoch": 3.21, "learning_rate": 2.7606666666666664e-07, "loss": 2.5411, "step": 784000 }, { "epoch": 3.21, "eval_loss": 2.4726006984710693, "eval_runtime": 224.2901, "eval_samples_per_second": 916.269, "eval_steps_per_second": 57.27, "step": 784000 }, { "epoch": 3.25, "eval_loss": 2.476133108139038, "eval_runtime": 224.2831, "eval_samples_per_second": 916.297, "eval_steps_per_second": 57.271, "step": 792000 }, { "epoch": 3.28, "learning_rate": 2.733333333333333e-07, "loss": 2.5407, "step": 800000 }, { "epoch": 3.28, "eval_loss": 2.4611737728118896, "eval_runtime": 223.8119, "eval_samples_per_second": 918.226, "eval_steps_per_second": 57.392, "step": 800000 }, { "epoch": 3.31, "eval_loss": 2.4742894172668457, "eval_runtime": 224.8606, "eval_samples_per_second": 913.944, "eval_steps_per_second": 57.124, "step": 808000 }, { "epoch": 3.34, "learning_rate": 2.706e-07, "loss": 2.5307, "step": 816000 }, { "epoch": 3.34, "eval_loss": 2.469853401184082, "eval_runtime": 224.177, "eval_samples_per_second": 916.731, "eval_steps_per_second": 57.298, "step": 816000 }, { "epoch": 3.38, "eval_loss": 2.4721498489379883, "eval_runtime": 223.5611, "eval_samples_per_second": 919.256, "eval_steps_per_second": 57.456, "step": 824000 }, { "epoch": 3.41, "learning_rate": 2.6786666666666666e-07, "loss": 2.5391, "step": 832000 }, { "epoch": 3.41, "eval_loss": 2.461381435394287, "eval_runtime": 224.3794, "eval_samples_per_second": 915.904, "eval_steps_per_second": 57.247, "step": 832000 }, { "epoch": 3.44, "eval_loss": 2.4641225337982178, "eval_runtime": 224.9887, "eval_samples_per_second": 913.423, "eval_steps_per_second": 57.092, "step": 840000 }, { "epoch": 3.47, "learning_rate": 2.651333333333333e-07, "loss": 2.5378, "step": 848000 }, { "epoch": 3.47, "eval_loss": 2.4652435779571533, "eval_runtime": 225.299, "eval_samples_per_second": 912.166, "eval_steps_per_second": 57.013, "step": 848000 }, { "epoch": 3.51, "eval_loss": 2.4640512466430664, "eval_runtime": 224.7841, "eval_samples_per_second": 914.255, "eval_steps_per_second": 57.144, "step": 856000 }, { "epoch": 3.54, "learning_rate": 2.624e-07, "loss": 2.5399, "step": 864000 }, { "epoch": 3.54, "eval_loss": 2.469067096710205, "eval_runtime": 225.0567, "eval_samples_per_second": 913.148, "eval_steps_per_second": 57.075, "step": 864000 }, { "epoch": 3.57, "eval_loss": 2.4611856937408447, "eval_runtime": 224.6227, "eval_samples_per_second": 914.912, "eval_steps_per_second": 57.185, "step": 872000 }, { "epoch": 3.61, "learning_rate": 2.596666666666667e-07, "loss": 2.5412, "step": 880000 }, { "epoch": 3.61, "eval_loss": 2.469621419906616, "eval_runtime": 225.3239, "eval_samples_per_second": 912.065, "eval_steps_per_second": 57.007, "step": 880000 }, { "epoch": 3.64, "eval_loss": 2.4638073444366455, "eval_runtime": 224.7878, "eval_samples_per_second": 914.24, "eval_steps_per_second": 57.143, "step": 888000 }, { "epoch": 3.67, "learning_rate": 2.5693333333333333e-07, "loss": 2.5389, "step": 896000 }, { "epoch": 3.67, "eval_loss": 2.4658303260803223, "eval_runtime": 224.4987, "eval_samples_per_second": 915.417, "eval_steps_per_second": 57.216, "step": 896000 }, { "epoch": 3.7, "eval_loss": 2.4725189208984375, "eval_runtime": 226.711, "eval_samples_per_second": 906.484, "eval_steps_per_second": 56.658, "step": 904000 }, { "epoch": 3.74, "learning_rate": 2.542e-07, "loss": 2.5325, "step": 912000 }, { "epoch": 3.74, "eval_loss": 2.46415114402771, "eval_runtime": 225.3655, "eval_samples_per_second": 911.896, "eval_steps_per_second": 56.996, "step": 912000 }, { "epoch": 3.77, "eval_loss": 2.4599404335021973, "eval_runtime": 224.7087, "eval_samples_per_second": 914.562, "eval_steps_per_second": 57.163, "step": 920000 }, { "epoch": 3.8, "learning_rate": 2.5146666666666664e-07, "loss": 2.5351, "step": 928000 }, { "epoch": 3.8, "eval_loss": 2.4616599082946777, "eval_runtime": 226.6966, "eval_samples_per_second": 906.542, "eval_steps_per_second": 56.662, "step": 928000 }, { "epoch": 3.84, "eval_loss": 2.464627265930176, "eval_runtime": 224.9933, "eval_samples_per_second": 913.405, "eval_steps_per_second": 57.091, "step": 936000 }, { "epoch": 3.87, "learning_rate": 2.4873333333333335e-07, "loss": 2.522, "step": 944000 }, { "epoch": 3.87, "eval_loss": 2.4665021896362305, "eval_runtime": 225.0978, "eval_samples_per_second": 912.981, "eval_steps_per_second": 57.064, "step": 944000 }, { "epoch": 3.9, "eval_loss": 2.4761972427368164, "eval_runtime": 224.2641, "eval_samples_per_second": 916.375, "eval_steps_per_second": 57.276, "step": 952000 }, { "epoch": 3.93, "learning_rate": 2.46e-07, "loss": 2.5331, "step": 960000 }, { "epoch": 3.93, "eval_loss": 2.4668779373168945, "eval_runtime": 225.069, "eval_samples_per_second": 913.098, "eval_steps_per_second": 57.071, "step": 960000 }, { "epoch": 3.97, "eval_loss": 2.4549825191497803, "eval_runtime": 224.677, "eval_samples_per_second": 914.691, "eval_steps_per_second": 57.171, "step": 968000 }, { "epoch": 4.0, "learning_rate": 2.4326666666666666e-07, "loss": 2.5276, "step": 976000 }, { "epoch": 4.0, "eval_loss": 2.466226577758789, "eval_runtime": 224.6009, "eval_samples_per_second": 915.001, "eval_steps_per_second": 57.19, "step": 976000 }, { "epoch": 4.03, "eval_loss": 2.464536190032959, "eval_runtime": 224.76, "eval_samples_per_second": 914.353, "eval_steps_per_second": 57.15, "step": 984000 }, { "epoch": 4.06, "learning_rate": 2.405333333333333e-07, "loss": 2.5206, "step": 992000 }, { "epoch": 4.06, "eval_loss": 2.4586591720581055, "eval_runtime": 225.6548, "eval_samples_per_second": 910.727, "eval_steps_per_second": 56.923, "step": 992000 }, { "epoch": 4.1, "eval_loss": 2.47253680229187, "eval_runtime": 225.9081, "eval_samples_per_second": 909.706, "eval_steps_per_second": 56.859, "step": 1000000 }, { "epoch": 4.13, "learning_rate": 2.3779999999999997e-07, "loss": 2.5294, "step": 1008000 }, { "epoch": 4.13, "eval_loss": 2.458824634552002, "eval_runtime": 224.8489, "eval_samples_per_second": 913.991, "eval_steps_per_second": 57.127, "step": 1008000 }, { "epoch": 4.16, "eval_loss": 2.4591076374053955, "eval_runtime": 225.8271, "eval_samples_per_second": 910.032, "eval_steps_per_second": 56.88, "step": 1016000 }, { "epoch": 4.2, "learning_rate": 2.3506666666666668e-07, "loss": 2.5312, "step": 1024000 }, { "epoch": 4.2, "eval_loss": 2.4680891036987305, "eval_runtime": 226.319, "eval_samples_per_second": 908.055, "eval_steps_per_second": 56.756, "step": 1024000 }, { "epoch": 4.23, "eval_loss": 2.4624712467193604, "eval_runtime": 226.6472, "eval_samples_per_second": 906.74, "eval_steps_per_second": 56.674, "step": 1032000 }, { "epoch": 4.26, "learning_rate": 2.3233333333333334e-07, "loss": 2.525, "step": 1040000 }, { "epoch": 4.26, "eval_loss": 2.4659371376037598, "eval_runtime": 225.5287, "eval_samples_per_second": 911.237, "eval_steps_per_second": 56.955, "step": 1040000 }, { "epoch": 4.29, "eval_loss": 2.460909843444824, "eval_runtime": 225.5374, "eval_samples_per_second": 911.201, "eval_steps_per_second": 56.953, "step": 1048000 }, { "epoch": 4.33, "learning_rate": 2.2960000000000002e-07, "loss": 2.5318, "step": 1056000 }, { "epoch": 4.33, "eval_loss": 2.4571011066436768, "eval_runtime": 225.1138, "eval_samples_per_second": 912.916, "eval_steps_per_second": 57.06, "step": 1056000 }, { "epoch": 4.36, "eval_loss": 2.4581968784332275, "eval_runtime": 226.7154, "eval_samples_per_second": 906.467, "eval_steps_per_second": 56.657, "step": 1064000 }, { "epoch": 4.39, "learning_rate": 2.2686666666666667e-07, "loss": 2.5332, "step": 1072000 }, { "epoch": 4.39, "eval_loss": 2.456618547439575, "eval_runtime": 225.422, "eval_samples_per_second": 911.668, "eval_steps_per_second": 56.982, "step": 1072000 }, { "epoch": 4.43, "eval_loss": 2.4587738513946533, "eval_runtime": 226.533, "eval_samples_per_second": 907.197, "eval_steps_per_second": 56.703, "step": 1080000 }, { "epoch": 4.46, "learning_rate": 2.2413333333333333e-07, "loss": 2.5168, "step": 1088000 }, { "epoch": 4.46, "eval_loss": 2.4606146812438965, "eval_runtime": 226.9924, "eval_samples_per_second": 905.361, "eval_steps_per_second": 56.588, "step": 1088000 }, { "epoch": 4.49, "eval_loss": 2.4597506523132324, "eval_runtime": 228.0241, "eval_samples_per_second": 901.264, "eval_steps_per_second": 56.332, "step": 1096000 }, { "epoch": 4.52, "learning_rate": 2.214e-07, "loss": 2.5181, "step": 1104000 }, { "epoch": 4.52, "eval_loss": 2.454252004623413, "eval_runtime": 228.6537, "eval_samples_per_second": 898.783, "eval_steps_per_second": 56.177, "step": 1104000 }, { "epoch": 4.56, "eval_loss": 2.4619953632354736, "eval_runtime": 226.293, "eval_samples_per_second": 908.159, "eval_steps_per_second": 56.763, "step": 1112000 }, { "epoch": 4.59, "learning_rate": 2.1866666666666667e-07, "loss": 2.5246, "step": 1120000 }, { "epoch": 4.59, "eval_loss": 2.4638657569885254, "eval_runtime": 228.163, "eval_samples_per_second": 900.716, "eval_steps_per_second": 56.297, "step": 1120000 }, { "epoch": 4.62, "eval_loss": 2.4556171894073486, "eval_runtime": 228.3656, "eval_samples_per_second": 899.917, "eval_steps_per_second": 56.248, "step": 1128000 }, { "epoch": 4.65, "learning_rate": 2.1593333333333332e-07, "loss": 2.5318, "step": 1136000 }, { "epoch": 4.65, "eval_loss": 2.457075595855713, "eval_runtime": 227.349, "eval_samples_per_second": 903.941, "eval_steps_per_second": 56.499, "step": 1136000 }, { "epoch": 4.69, "eval_loss": 2.4636013507843018, "eval_runtime": 228.1517, "eval_samples_per_second": 900.76, "eval_steps_per_second": 56.3, "step": 1144000 }, { "epoch": 4.72, "learning_rate": 2.132e-07, "loss": 2.512, "step": 1152000 }, { "epoch": 4.72, "eval_loss": 2.4567556381225586, "eval_runtime": 228.1066, "eval_samples_per_second": 900.938, "eval_steps_per_second": 56.311, "step": 1152000 }, { "epoch": 4.75, "eval_loss": 2.4644010066986084, "eval_runtime": 228.916, "eval_samples_per_second": 897.753, "eval_steps_per_second": 56.112, "step": 1160000 }, { "epoch": 4.79, "learning_rate": 2.1046666666666666e-07, "loss": 2.5174, "step": 1168000 }, { "epoch": 4.79, "eval_loss": 2.4528720378875732, "eval_runtime": 228.2634, "eval_samples_per_second": 900.32, "eval_steps_per_second": 56.273, "step": 1168000 }, { "epoch": 4.82, "eval_loss": 2.4613921642303467, "eval_runtime": 228.3765, "eval_samples_per_second": 899.874, "eval_steps_per_second": 56.245, "step": 1176000 }, { "epoch": 4.85, "learning_rate": 2.0773333333333334e-07, "loss": 2.5196, "step": 1184000 }, { "epoch": 4.85, "eval_loss": 2.463758707046509, "eval_runtime": 227.0091, "eval_samples_per_second": 905.294, "eval_steps_per_second": 56.584, "step": 1184000 }, { "epoch": 4.88, "eval_loss": 2.453406572341919, "eval_runtime": 227.3538, "eval_samples_per_second": 903.921, "eval_steps_per_second": 56.498, "step": 1192000 }, { "epoch": 4.92, "learning_rate": 2.05e-07, "loss": 2.5248, "step": 1200000 }, { "epoch": 4.92, "eval_loss": 2.4553115367889404, "eval_runtime": 227.0142, "eval_samples_per_second": 905.274, "eval_steps_per_second": 56.582, "step": 1200000 }, { "epoch": 4.95, "eval_loss": 2.453683853149414, "eval_runtime": 226.8684, "eval_samples_per_second": 905.855, "eval_steps_per_second": 56.619, "step": 1208000 }, { "epoch": 4.98, "learning_rate": 2.0226666666666668e-07, "loss": 2.5201, "step": 1216000 }, { "epoch": 4.98, "eval_loss": 2.4578709602355957, "eval_runtime": 226.9695, "eval_samples_per_second": 905.452, "eval_steps_per_second": 56.594, "step": 1216000 }, { "epoch": 5.02, "eval_loss": 2.4524765014648438, "eval_runtime": 226.5657, "eval_samples_per_second": 907.066, "eval_steps_per_second": 56.694, "step": 1224000 }, { "epoch": 5.05, "learning_rate": 1.9953333333333333e-07, "loss": 2.5164, "step": 1232000 }, { "epoch": 5.05, "eval_loss": 2.4645235538482666, "eval_runtime": 227.3689, "eval_samples_per_second": 903.861, "eval_steps_per_second": 56.494, "step": 1232000 }, { "epoch": 5.08, "eval_loss": 2.447993040084839, "eval_runtime": 228.8072, "eval_samples_per_second": 898.18, "eval_steps_per_second": 56.139, "step": 1240000 }, { "epoch": 5.11, "learning_rate": 1.968e-07, "loss": 2.5186, "step": 1248000 }, { "epoch": 5.11, "eval_loss": 2.4605581760406494, "eval_runtime": 229.2086, "eval_samples_per_second": 896.607, "eval_steps_per_second": 56.041, "step": 1248000 }, { "epoch": 5.15, "eval_loss": 2.4623043537139893, "eval_runtime": 229.8264, "eval_samples_per_second": 894.197, "eval_steps_per_second": 55.89, "step": 1256000 }, { "epoch": 5.18, "learning_rate": 1.9406666666666667e-07, "loss": 2.5123, "step": 1264000 }, { "epoch": 5.18, "eval_loss": 2.456583261489868, "eval_runtime": 230.427, "eval_samples_per_second": 891.866, "eval_steps_per_second": 55.744, "step": 1264000 }, { "epoch": 5.21, "eval_loss": 2.464402437210083, "eval_runtime": 229.1803, "eval_samples_per_second": 896.717, "eval_steps_per_second": 56.048, "step": 1272000 }, { "epoch": 5.24, "learning_rate": 1.9133333333333333e-07, "loss": 2.5233, "step": 1280000 }, { "epoch": 5.24, "eval_loss": 2.457606792449951, "eval_runtime": 227.9824, "eval_samples_per_second": 901.429, "eval_steps_per_second": 56.342, "step": 1280000 }, { "epoch": 5.28, "eval_loss": 2.451943874359131, "eval_runtime": 229.374, "eval_samples_per_second": 895.96, "eval_steps_per_second": 56.0, "step": 1288000 }, { "epoch": 5.31, "learning_rate": 1.886e-07, "loss": 2.513, "step": 1296000 }, { "epoch": 5.31, "eval_loss": 2.456979513168335, "eval_runtime": 228.051, "eval_samples_per_second": 901.158, "eval_steps_per_second": 56.325, "step": 1296000 }, { "epoch": 5.34, "eval_loss": 2.462719202041626, "eval_runtime": 228.489, "eval_samples_per_second": 899.43, "eval_steps_per_second": 56.217, "step": 1304000 }, { "epoch": 5.38, "learning_rate": 1.8586666666666666e-07, "loss": 2.5226, "step": 1312000 }, { "epoch": 5.38, "eval_loss": 2.449977397918701, "eval_runtime": 227.9952, "eval_samples_per_second": 901.379, "eval_steps_per_second": 56.339, "step": 1312000 }, { "epoch": 5.41, "eval_loss": 2.4563188552856445, "eval_runtime": 227.2759, "eval_samples_per_second": 904.231, "eval_steps_per_second": 56.517, "step": 1320000 }, { "epoch": 5.44, "learning_rate": 1.8313333333333332e-07, "loss": 2.5222, "step": 1328000 }, { "epoch": 5.44, "eval_loss": 2.4521265029907227, "eval_runtime": 226.8418, "eval_samples_per_second": 905.962, "eval_steps_per_second": 56.625, "step": 1328000 }, { "epoch": 5.47, "eval_loss": 2.4591453075408936, "eval_runtime": 226.8374, "eval_samples_per_second": 905.98, "eval_steps_per_second": 56.626, "step": 1336000 }, { "epoch": 5.51, "learning_rate": 1.804e-07, "loss": 2.5191, "step": 1344000 }, { "epoch": 5.51, "eval_loss": 2.4508602619171143, "eval_runtime": 228.6931, "eval_samples_per_second": 898.628, "eval_steps_per_second": 56.167, "step": 1344000 }, { "epoch": 5.54, "eval_loss": 2.455850124359131, "eval_runtime": 228.2295, "eval_samples_per_second": 900.453, "eval_steps_per_second": 56.281, "step": 1352000 }, { "epoch": 5.57, "learning_rate": 1.7766666666666666e-07, "loss": 2.5243, "step": 1360000 }, { "epoch": 5.57, "eval_loss": 2.4501898288726807, "eval_runtime": 228.7596, "eval_samples_per_second": 898.367, "eval_steps_per_second": 56.151, "step": 1360000 }, { "epoch": 5.61, "eval_loss": 2.4514639377593994, "eval_runtime": 227.2719, "eval_samples_per_second": 904.247, "eval_steps_per_second": 56.518, "step": 1368000 }, { "epoch": 5.64, "learning_rate": 1.7493333333333334e-07, "loss": 2.5157, "step": 1376000 }, { "epoch": 5.64, "eval_loss": 2.4562854766845703, "eval_runtime": 227.9532, "eval_samples_per_second": 901.545, "eval_steps_per_second": 56.349, "step": 1376000 }, { "epoch": 5.67, "eval_loss": 2.452606678009033, "eval_runtime": 227.4532, "eval_samples_per_second": 903.527, "eval_steps_per_second": 56.473, "step": 1384000 }, { "epoch": 5.7, "learning_rate": 1.722e-07, "loss": 2.5162, "step": 1392000 }, { "epoch": 5.7, "eval_loss": 2.458620071411133, "eval_runtime": 228.2374, "eval_samples_per_second": 900.422, "eval_steps_per_second": 56.279, "step": 1392000 }, { "epoch": 5.74, "eval_loss": 2.458387613296509, "eval_runtime": 228.0105, "eval_samples_per_second": 901.318, "eval_steps_per_second": 56.335, "step": 1400000 }, { "epoch": 5.77, "learning_rate": 1.6946666666666668e-07, "loss": 2.5169, "step": 1408000 }, { "epoch": 5.77, "eval_loss": 2.454158067703247, "eval_runtime": 227.4312, "eval_samples_per_second": 903.614, "eval_steps_per_second": 56.479, "step": 1408000 }, { "epoch": 5.8, "eval_loss": 2.460242986679077, "eval_runtime": 228.5958, "eval_samples_per_second": 899.01, "eval_steps_per_second": 56.191, "step": 1416000 }, { "epoch": 5.84, "learning_rate": 1.6673333333333333e-07, "loss": 2.5127, "step": 1424000 }, { "epoch": 5.84, "eval_loss": 2.458707809448242, "eval_runtime": 228.0452, "eval_samples_per_second": 901.181, "eval_steps_per_second": 56.327, "step": 1424000 }, { "epoch": 5.87, "eval_loss": 2.452913284301758, "eval_runtime": 227.4908, "eval_samples_per_second": 903.377, "eval_steps_per_second": 56.464, "step": 1432000 }, { "epoch": 5.9, "learning_rate": 1.64e-07, "loss": 2.5144, "step": 1440000 }, { "epoch": 5.9, "eval_loss": 2.462021827697754, "eval_runtime": 229.4885, "eval_samples_per_second": 895.513, "eval_steps_per_second": 55.972, "step": 1440000 }, { "epoch": 5.93, "eval_loss": 2.450927972793579, "eval_runtime": 227.9748, "eval_samples_per_second": 901.459, "eval_steps_per_second": 56.344, "step": 1448000 }, { "epoch": 5.97, "learning_rate": 1.6126666666666667e-07, "loss": 2.5175, "step": 1456000 }, { "epoch": 5.97, "eval_loss": 2.4503204822540283, "eval_runtime": 227.5178, "eval_samples_per_second": 903.27, "eval_steps_per_second": 56.457, "step": 1456000 }, { "epoch": 6.0, "eval_loss": 2.4545462131500244, "eval_runtime": 227.7963, "eval_samples_per_second": 902.165, "eval_steps_per_second": 56.388, "step": 1464000 }, { "epoch": 6.03, "learning_rate": 1.5853333333333332e-07, "loss": 2.5147, "step": 1472000 }, { "epoch": 6.03, "eval_loss": 2.4440090656280518, "eval_runtime": 227.8162, "eval_samples_per_second": 902.087, "eval_steps_per_second": 56.383, "step": 1472000 }, { "epoch": 6.06, "eval_loss": 2.457670211791992, "eval_runtime": 228.5245, "eval_samples_per_second": 899.291, "eval_steps_per_second": 56.208, "step": 1480000 }, { "epoch": 6.1, "learning_rate": 1.558e-07, "loss": 2.5128, "step": 1488000 }, { "epoch": 6.1, "eval_loss": 2.456602096557617, "eval_runtime": 230.1502, "eval_samples_per_second": 892.939, "eval_steps_per_second": 55.811, "step": 1488000 }, { "epoch": 6.13, "eval_loss": 2.449889659881592, "eval_runtime": 228.3041, "eval_samples_per_second": 900.159, "eval_steps_per_second": 56.263, "step": 1496000 }, { "epoch": 6.16, "learning_rate": 1.5306666666666666e-07, "loss": 2.5168, "step": 1504000 }, { "epoch": 6.16, "eval_loss": 2.4480044841766357, "eval_runtime": 228.2508, "eval_samples_per_second": 900.369, "eval_steps_per_second": 56.276, "step": 1504000 }, { "epoch": 6.2, "eval_loss": 2.4436299800872803, "eval_runtime": 229.3638, "eval_samples_per_second": 896.0, "eval_steps_per_second": 56.003, "step": 1512000 }, { "epoch": 6.23, "learning_rate": 1.5033333333333332e-07, "loss": 2.5225, "step": 1520000 }, { "epoch": 6.23, "eval_loss": 2.446739912033081, "eval_runtime": 228.4899, "eval_samples_per_second": 899.427, "eval_steps_per_second": 56.217, "step": 1520000 }, { "epoch": 6.26, "eval_loss": 2.4519920349121094, "eval_runtime": 228.2075, "eval_samples_per_second": 900.54, "eval_steps_per_second": 56.286, "step": 1528000 }, { "epoch": 6.29, "learning_rate": 1.476e-07, "loss": 2.5135, "step": 1536000 }, { "epoch": 6.29, "eval_loss": 2.4535210132598877, "eval_runtime": 228.7342, "eval_samples_per_second": 898.466, "eval_steps_per_second": 56.157, "step": 1536000 }, { "epoch": 6.33, "eval_loss": 2.4462831020355225, "eval_runtime": 229.9473, "eval_samples_per_second": 893.727, "eval_steps_per_second": 55.861, "step": 1544000 }, { "epoch": 6.36, "learning_rate": 1.4486666666666665e-07, "loss": 2.5161, "step": 1552000 }, { "epoch": 6.36, "eval_loss": 2.4556400775909424, "eval_runtime": 228.5872, "eval_samples_per_second": 899.044, "eval_steps_per_second": 56.193, "step": 1552000 }, { "epoch": 6.39, "eval_loss": 2.4604580402374268, "eval_runtime": 229.1233, "eval_samples_per_second": 896.941, "eval_steps_per_second": 56.062, "step": 1560000 }, { "epoch": 6.43, "learning_rate": 1.4213333333333334e-07, "loss": 2.5144, "step": 1568000 }, { "epoch": 6.43, "eval_loss": 2.4516451358795166, "eval_runtime": 229.9726, "eval_samples_per_second": 893.628, "eval_steps_per_second": 55.854, "step": 1568000 }, { "epoch": 6.46, "eval_loss": 2.4487648010253906, "eval_runtime": 229.4253, "eval_samples_per_second": 895.76, "eval_steps_per_second": 55.988, "step": 1576000 }, { "epoch": 6.49, "learning_rate": 1.3940000000000002e-07, "loss": 2.5209, "step": 1584000 }, { "epoch": 6.49, "eval_loss": 2.4525067806243896, "eval_runtime": 228.8527, "eval_samples_per_second": 898.001, "eval_steps_per_second": 56.128, "step": 1584000 }, { "epoch": 6.52, "eval_loss": 2.450185537338257, "eval_runtime": 230.8087, "eval_samples_per_second": 890.391, "eval_steps_per_second": 55.652, "step": 1592000 }, { "epoch": 6.56, "learning_rate": 1.3666666666666665e-07, "loss": 2.5102, "step": 1600000 }, { "epoch": 6.56, "eval_loss": 2.453780174255371, "eval_runtime": 229.4733, "eval_samples_per_second": 895.573, "eval_steps_per_second": 55.976, "step": 1600000 }, { "epoch": 6.59, "eval_loss": 2.4490787982940674, "eval_runtime": 229.059, "eval_samples_per_second": 897.192, "eval_steps_per_second": 56.077, "step": 1608000 }, { "epoch": 6.62, "learning_rate": 1.3393333333333333e-07, "loss": 2.5176, "step": 1616000 }, { "epoch": 6.62, "eval_loss": 2.452752113342285, "eval_runtime": 228.4962, "eval_samples_per_second": 899.402, "eval_steps_per_second": 56.215, "step": 1616000 }, { "epoch": 6.65, "eval_loss": 2.44599986076355, "eval_runtime": 228.5114, "eval_samples_per_second": 899.342, "eval_steps_per_second": 56.212, "step": 1624000 }, { "epoch": 6.69, "learning_rate": 1.312e-07, "loss": 2.5208, "step": 1632000 }, { "epoch": 6.69, "eval_loss": 2.4484992027282715, "eval_runtime": 230.1605, "eval_samples_per_second": 892.899, "eval_steps_per_second": 55.809, "step": 1632000 }, { "epoch": 6.72, "eval_loss": 2.451284646987915, "eval_runtime": 229.1401, "eval_samples_per_second": 896.875, "eval_steps_per_second": 56.057, "step": 1640000 }, { "epoch": 6.75, "learning_rate": 1.2846666666666667e-07, "loss": 2.5064, "step": 1648000 }, { "epoch": 6.75, "eval_loss": 2.451927900314331, "eval_runtime": 229.3071, "eval_samples_per_second": 896.222, "eval_steps_per_second": 56.017, "step": 1648000 }, { "epoch": 6.79, "eval_loss": 2.449305295944214, "eval_runtime": 231.2204, "eval_samples_per_second": 888.806, "eval_steps_per_second": 55.553, "step": 1656000 }, { "epoch": 6.82, "learning_rate": 1.2573333333333332e-07, "loss": 2.5111, "step": 1664000 }, { "epoch": 6.82, "eval_loss": 2.4505178928375244, "eval_runtime": 230.2462, "eval_samples_per_second": 892.566, "eval_steps_per_second": 55.788, "step": 1664000 }, { "epoch": 6.85, "eval_loss": 2.4501988887786865, "eval_runtime": 229.973, "eval_samples_per_second": 893.627, "eval_steps_per_second": 55.854, "step": 1672000 }, { "epoch": 6.88, "learning_rate": 1.23e-07, "loss": 2.5141, "step": 1680000 }, { "epoch": 6.88, "eval_loss": 2.4560253620147705, "eval_runtime": 229.6465, "eval_samples_per_second": 894.897, "eval_steps_per_second": 55.934, "step": 1680000 }, { "epoch": 6.92, "eval_loss": 2.4499940872192383, "eval_runtime": 229.0726, "eval_samples_per_second": 897.139, "eval_steps_per_second": 56.074, "step": 1688000 }, { "epoch": 6.95, "learning_rate": 1.2026666666666666e-07, "loss": 2.5089, "step": 1696000 }, { "epoch": 6.95, "eval_loss": 2.4512550830841064, "eval_runtime": 228.4897, "eval_samples_per_second": 899.428, "eval_steps_per_second": 56.217, "step": 1696000 }, { "epoch": 6.98, "eval_loss": 2.4418201446533203, "eval_runtime": 229.377, "eval_samples_per_second": 895.949, "eval_steps_per_second": 56.0, "step": 1704000 }, { "epoch": 7.02, "learning_rate": 1.1753333333333334e-07, "loss": 2.5174, "step": 1712000 }, { "epoch": 7.02, "eval_loss": 2.447690010070801, "eval_runtime": 231.0137, "eval_samples_per_second": 889.601, "eval_steps_per_second": 55.603, "step": 1712000 }, { "epoch": 7.05, "eval_loss": 2.450817584991455, "eval_runtime": 231.6212, "eval_samples_per_second": 887.268, "eval_steps_per_second": 55.457, "step": 1720000 }, { "epoch": 7.08, "learning_rate": 1.1480000000000001e-07, "loss": 2.5198, "step": 1728000 }, { "epoch": 7.08, "eval_loss": 2.448648691177368, "eval_runtime": 230.9308, "eval_samples_per_second": 889.92, "eval_steps_per_second": 55.623, "step": 1728000 }, { "epoch": 7.11, "eval_loss": 2.4577322006225586, "eval_runtime": 230.4865, "eval_samples_per_second": 891.636, "eval_steps_per_second": 55.73, "step": 1736000 }, { "epoch": 7.15, "learning_rate": 1.1206666666666666e-07, "loss": 2.4974, "step": 1744000 }, { "epoch": 7.15, "eval_loss": 2.4416255950927734, "eval_runtime": 229.8237, "eval_samples_per_second": 894.207, "eval_steps_per_second": 55.891, "step": 1744000 }, { "epoch": 7.18, "eval_loss": 2.4549336433410645, "eval_runtime": 229.7571, "eval_samples_per_second": 894.466, "eval_steps_per_second": 55.907, "step": 1752000 }, { "epoch": 7.21, "learning_rate": 1.0933333333333333e-07, "loss": 2.5016, "step": 1760000 }, { "epoch": 7.21, "eval_loss": 2.455679416656494, "eval_runtime": 230.1335, "eval_samples_per_second": 893.003, "eval_steps_per_second": 55.815, "step": 1760000 }, { "epoch": 7.24, "eval_loss": 2.4531571865081787, "eval_runtime": 231.3847, "eval_samples_per_second": 888.175, "eval_steps_per_second": 55.514, "step": 1768000 }, { "epoch": 7.28, "learning_rate": 1.066e-07, "loss": 2.5112, "step": 1776000 }, { "epoch": 7.28, "eval_loss": 2.445054531097412, "eval_runtime": 231.2999, "eval_samples_per_second": 888.5, "eval_steps_per_second": 55.534, "step": 1776000 }, { "epoch": 7.31, "eval_loss": 2.460723638534546, "eval_runtime": 230.196, "eval_samples_per_second": 892.761, "eval_steps_per_second": 55.8, "step": 1784000 }, { "epoch": 7.34, "learning_rate": 1.0386666666666667e-07, "loss": 2.5172, "step": 1792000 }, { "epoch": 7.34, "eval_loss": 2.4451537132263184, "eval_runtime": 231.1406, "eval_samples_per_second": 889.112, "eval_steps_per_second": 55.572, "step": 1792000 }, { "epoch": 7.38, "eval_loss": 2.4426777362823486, "eval_runtime": 230.7159, "eval_samples_per_second": 890.749, "eval_steps_per_second": 55.675, "step": 1800000 }, { "epoch": 7.41, "learning_rate": 1.0113333333333334e-07, "loss": 2.5089, "step": 1808000 }, { "epoch": 7.41, "eval_loss": 2.4511077404022217, "eval_runtime": 231.5975, "eval_samples_per_second": 887.359, "eval_steps_per_second": 55.463, "step": 1808000 }, { "epoch": 7.44, "eval_loss": 2.4440526962280273, "eval_runtime": 231.4447, "eval_samples_per_second": 887.944, "eval_steps_per_second": 55.499, "step": 1816000 }, { "epoch": 7.47, "learning_rate": 9.84e-08, "loss": 2.5136, "step": 1824000 }, { "epoch": 7.47, "eval_loss": 2.4492361545562744, "eval_runtime": 231.7181, "eval_samples_per_second": 886.896, "eval_steps_per_second": 55.434, "step": 1824000 }, { "epoch": 7.51, "eval_loss": 2.4523823261260986, "eval_runtime": 231.3659, "eval_samples_per_second": 888.247, "eval_steps_per_second": 55.518, "step": 1832000 }, { "epoch": 7.54, "learning_rate": 9.566666666666666e-08, "loss": 2.509, "step": 1840000 }, { "epoch": 7.54, "eval_loss": 2.451181411743164, "eval_runtime": 230.8127, "eval_samples_per_second": 890.376, "eval_steps_per_second": 55.651, "step": 1840000 }, { "epoch": 7.57, "eval_loss": 2.4528069496154785, "eval_runtime": 230.6096, "eval_samples_per_second": 891.16, "eval_steps_per_second": 55.7, "step": 1848000 }, { "epoch": 7.61, "learning_rate": 9.293333333333333e-08, "loss": 2.5157, "step": 1856000 }, { "epoch": 7.61, "eval_loss": 2.4439537525177, "eval_runtime": 233.382, "eval_samples_per_second": 880.573, "eval_steps_per_second": 55.039, "step": 1856000 }, { "epoch": 7.64, "eval_loss": 2.4401602745056152, "eval_runtime": 231.584, "eval_samples_per_second": 887.41, "eval_steps_per_second": 55.466, "step": 1864000 }, { "epoch": 7.67, "learning_rate": 9.02e-08, "loss": 2.5181, "step": 1872000 }, { "epoch": 7.67, "eval_loss": 2.4537830352783203, "eval_runtime": 230.4518, "eval_samples_per_second": 891.77, "eval_steps_per_second": 55.738, "step": 1872000 }, { "epoch": 7.7, "eval_loss": 2.4480724334716797, "eval_runtime": 229.9532, "eval_samples_per_second": 893.703, "eval_steps_per_second": 55.859, "step": 1880000 }, { "epoch": 7.74, "learning_rate": 8.746666666666667e-08, "loss": 2.5145, "step": 1888000 }, { "epoch": 7.74, "eval_loss": 2.4417428970336914, "eval_runtime": 231.464, "eval_samples_per_second": 887.87, "eval_steps_per_second": 55.495, "step": 1888000 }, { "epoch": 7.77, "eval_loss": 2.4512147903442383, "eval_runtime": 231.0711, "eval_samples_per_second": 889.38, "eval_steps_per_second": 55.589, "step": 1896000 }, { "epoch": 7.8, "learning_rate": 8.473333333333334e-08, "loss": 2.5013, "step": 1904000 }, { "epoch": 7.8, "eval_loss": 2.45603084564209, "eval_runtime": 231.877, "eval_samples_per_second": 886.289, "eval_steps_per_second": 55.396, "step": 1904000 }, { "epoch": 7.83, "eval_loss": 2.4508955478668213, "eval_runtime": 230.4147, "eval_samples_per_second": 891.913, "eval_steps_per_second": 55.747, "step": 1912000 }, { "epoch": 7.87, "learning_rate": 8.2e-08, "loss": 2.5064, "step": 1920000 }, { "epoch": 7.87, "eval_loss": 2.447256565093994, "eval_runtime": 231.4505, "eval_samples_per_second": 887.922, "eval_steps_per_second": 55.498, "step": 1920000 }, { "epoch": 7.9, "eval_loss": 2.457575559616089, "eval_runtime": 232.2387, "eval_samples_per_second": 884.908, "eval_steps_per_second": 55.309, "step": 1928000 }, { "epoch": 7.93, "learning_rate": 7.926666666666666e-08, "loss": 2.5068, "step": 1936000 }, { "epoch": 7.93, "eval_loss": 2.4460949897766113, "eval_runtime": 230.6448, "eval_samples_per_second": 891.024, "eval_steps_per_second": 55.692, "step": 1936000 }, { "epoch": 7.97, "eval_loss": 2.4451067447662354, "eval_runtime": 231.6713, "eval_samples_per_second": 887.076, "eval_steps_per_second": 55.445, "step": 1944000 }, { "epoch": 8.0, "learning_rate": 7.653333333333333e-08, "loss": 2.5152, "step": 1952000 }, { "epoch": 8.0, "eval_loss": 2.442117214202881, "eval_runtime": 231.4315, "eval_samples_per_second": 887.995, "eval_steps_per_second": 55.502, "step": 1952000 }, { "epoch": 8.03, "eval_loss": 2.4458179473876953, "eval_runtime": 230.6413, "eval_samples_per_second": 891.037, "eval_steps_per_second": 55.693, "step": 1960000 }, { "epoch": 8.06, "learning_rate": 7.38e-08, "loss": 2.5025, "step": 1968000 }, { "epoch": 8.06, "eval_loss": 2.4532368183135986, "eval_runtime": 230.9812, "eval_samples_per_second": 889.726, "eval_steps_per_second": 55.611, "step": 1968000 }, { "epoch": 8.1, "eval_loss": 2.4541139602661133, "eval_runtime": 231.1965, "eval_samples_per_second": 888.898, "eval_steps_per_second": 55.559, "step": 1976000 }, { "epoch": 8.13, "learning_rate": 7.106666666666667e-08, "loss": 2.5151, "step": 1984000 }, { "epoch": 8.13, "eval_loss": 2.4499058723449707, "eval_runtime": 231.2124, "eval_samples_per_second": 888.836, "eval_steps_per_second": 55.555, "step": 1984000 }, { "epoch": 8.16, "eval_loss": 2.4501264095306396, "eval_runtime": 231.2241, "eval_samples_per_second": 888.791, "eval_steps_per_second": 55.552, "step": 1992000 }, { "epoch": 8.2, "learning_rate": 6.833333333333332e-08, "loss": 2.5138, "step": 2000000 }, { "epoch": 8.2, "eval_loss": 2.444784641265869, "eval_runtime": 231.6831, "eval_samples_per_second": 887.031, "eval_steps_per_second": 55.442, "step": 2000000 }, { "epoch": 8.23, "eval_loss": 2.4562456607818604, "eval_runtime": 231.974, "eval_samples_per_second": 885.918, "eval_steps_per_second": 55.373, "step": 2008000 }, { "epoch": 8.26, "learning_rate": 6.56e-08, "loss": 2.5039, "step": 2016000 }, { "epoch": 8.26, "eval_loss": 2.4612646102905273, "eval_runtime": 234.4229, "eval_samples_per_second": 876.663, "eval_steps_per_second": 54.794, "step": 2016000 }, { "epoch": 8.29, "eval_loss": 2.4471163749694824, "eval_runtime": 233.3806, "eval_samples_per_second": 880.579, "eval_steps_per_second": 55.039, "step": 2024000 }, { "epoch": 8.33, "learning_rate": 6.286666666666666e-08, "loss": 2.5055, "step": 2032000 }, { "epoch": 8.33, "eval_loss": 2.445026159286499, "eval_runtime": 233.3418, "eval_samples_per_second": 880.725, "eval_steps_per_second": 55.048, "step": 2032000 }, { "epoch": 8.36, "eval_loss": 2.4492921829223633, "eval_runtime": 232.3875, "eval_samples_per_second": 884.342, "eval_steps_per_second": 55.274, "step": 2040000 }, { "epoch": 8.39, "learning_rate": 6.013333333333333e-08, "loss": 2.5085, "step": 2048000 }, { "epoch": 8.39, "eval_loss": 2.448164224624634, "eval_runtime": 233.4578, "eval_samples_per_second": 880.288, "eval_steps_per_second": 55.021, "step": 2048000 }, { "epoch": 8.42, "eval_loss": 2.4571895599365234, "eval_runtime": 235.4355, "eval_samples_per_second": 872.893, "eval_steps_per_second": 54.558, "step": 2056000 }, { "epoch": 8.46, "learning_rate": 5.7400000000000004e-08, "loss": 2.5114, "step": 2064000 }, { "epoch": 8.46, "eval_loss": 2.444307804107666, "eval_runtime": 234.4924, "eval_samples_per_second": 876.404, "eval_steps_per_second": 54.778, "step": 2064000 }, { "epoch": 8.49, "eval_loss": 2.445603132247925, "eval_runtime": 234.6223, "eval_samples_per_second": 875.919, "eval_steps_per_second": 54.748, "step": 2072000 }, { "epoch": 8.52, "learning_rate": 5.4666666666666666e-08, "loss": 2.5132, "step": 2080000 }, { "epoch": 8.52, "eval_loss": 2.4528441429138184, "eval_runtime": 234.3887, "eval_samples_per_second": 876.791, "eval_steps_per_second": 54.802, "step": 2080000 }, { "epoch": 8.56, "eval_loss": 2.449744939804077, "eval_runtime": 233.1003, "eval_samples_per_second": 881.638, "eval_steps_per_second": 55.105, "step": 2088000 }, { "epoch": 8.59, "learning_rate": 5.1933333333333335e-08, "loss": 2.5072, "step": 2096000 }, { "epoch": 8.59, "eval_loss": 2.4547877311706543, "eval_runtime": 232.2237, "eval_samples_per_second": 884.966, "eval_steps_per_second": 55.313, "step": 2096000 }, { "epoch": 8.62, "eval_loss": 2.4547617435455322, "eval_runtime": 232.0067, "eval_samples_per_second": 885.794, "eval_steps_per_second": 55.365, "step": 2104000 }, { "epoch": 8.65, "learning_rate": 4.92e-08, "loss": 2.504, "step": 2112000 }, { "epoch": 8.65, "eval_loss": 2.444261312484741, "eval_runtime": 232.4079, "eval_samples_per_second": 884.264, "eval_steps_per_second": 55.269, "step": 2112000 }, { "epoch": 8.69, "eval_loss": 2.445204734802246, "eval_runtime": 233.2645, "eval_samples_per_second": 881.017, "eval_steps_per_second": 55.066, "step": 2120000 }, { "epoch": 8.72, "learning_rate": 4.6466666666666666e-08, "loss": 2.5128, "step": 2128000 }, { "epoch": 8.72, "eval_loss": 2.4509565830230713, "eval_runtime": 233.1857, "eval_samples_per_second": 881.315, "eval_steps_per_second": 55.085, "step": 2128000 }, { "epoch": 8.75, "eval_loss": 2.447999954223633, "eval_runtime": 233.2452, "eval_samples_per_second": 881.09, "eval_steps_per_second": 55.071, "step": 2136000 }, { "epoch": 8.79, "learning_rate": 4.3733333333333335e-08, "loss": 2.5133, "step": 2144000 }, { "epoch": 8.79, "eval_loss": 2.4470479488372803, "eval_runtime": 234.9529, "eval_samples_per_second": 874.686, "eval_steps_per_second": 54.671, "step": 2144000 }, { "epoch": 8.82, "eval_loss": 2.4436631202697754, "eval_runtime": 234.9836, "eval_samples_per_second": 874.572, "eval_steps_per_second": 54.663, "step": 2152000 }, { "epoch": 8.85, "learning_rate": 4.1e-08, "loss": 2.5067, "step": 2160000 }, { "epoch": 8.85, "eval_loss": 2.444672107696533, "eval_runtime": 234.3233, "eval_samples_per_second": 877.036, "eval_steps_per_second": 54.817, "step": 2160000 }, { "epoch": 8.88, "eval_loss": 2.453118085861206, "eval_runtime": 233.5384, "eval_samples_per_second": 879.984, "eval_steps_per_second": 55.002, "step": 2168000 }, { "epoch": 8.92, "learning_rate": 3.8266666666666665e-08, "loss": 2.4996, "step": 2176000 }, { "epoch": 8.92, "eval_loss": 2.447479009628296, "eval_runtime": 235.7844, "eval_samples_per_second": 871.601, "eval_steps_per_second": 54.478, "step": 2176000 }, { "epoch": 8.95, "eval_loss": 2.4438347816467285, "eval_runtime": 233.6193, "eval_samples_per_second": 879.679, "eval_steps_per_second": 54.983, "step": 2184000 }, { "epoch": 8.98, "learning_rate": 3.5533333333333334e-08, "loss": 2.5123, "step": 2192000 }, { "epoch": 8.98, "eval_loss": 2.4552195072174072, "eval_runtime": 235.201, "eval_samples_per_second": 873.763, "eval_steps_per_second": 54.613, "step": 2192000 }, { "epoch": 9.01, "eval_loss": 2.4441311359405518, "eval_runtime": 234.6948, "eval_samples_per_second": 875.648, "eval_steps_per_second": 54.731, "step": 2200000 }, { "epoch": 9.05, "learning_rate": 3.28e-08, "loss": 2.5044, "step": 2208000 }, { "epoch": 9.05, "eval_loss": 2.4438366889953613, "eval_runtime": 233.1145, "eval_samples_per_second": 881.584, "eval_steps_per_second": 55.102, "step": 2208000 }, { "epoch": 9.08, "eval_loss": 2.453371286392212, "eval_runtime": 234.9783, "eval_samples_per_second": 874.592, "eval_steps_per_second": 54.665, "step": 2216000 }, { "epoch": 9.11, "learning_rate": 3.0066666666666665e-08, "loss": 2.5068, "step": 2224000 }, { "epoch": 9.11, "eval_loss": 2.449671745300293, "eval_runtime": 232.7881, "eval_samples_per_second": 882.82, "eval_steps_per_second": 55.179, "step": 2224000 }, { "epoch": 9.15, "eval_loss": 2.444044828414917, "eval_runtime": 233.2255, "eval_samples_per_second": 881.164, "eval_steps_per_second": 55.075, "step": 2232000 }, { "epoch": 9.18, "learning_rate": 2.7333333333333333e-08, "loss": 2.5165, "step": 2240000 }, { "epoch": 9.18, "eval_loss": 2.457695722579956, "eval_runtime": 234.2621, "eval_samples_per_second": 877.265, "eval_steps_per_second": 54.832, "step": 2240000 }, { "epoch": 9.21, "eval_loss": 2.4506990909576416, "eval_runtime": 232.4001, "eval_samples_per_second": 884.294, "eval_steps_per_second": 55.271, "step": 2248000 }, { "epoch": 9.24, "learning_rate": 2.46e-08, "loss": 2.5087, "step": 2256000 }, { "epoch": 9.24, "eval_loss": 2.4494166374206543, "eval_runtime": 233.0606, "eval_samples_per_second": 881.788, "eval_steps_per_second": 55.114, "step": 2256000 }, { "epoch": 9.28, "eval_loss": 2.4393150806427, "eval_runtime": 234.1923, "eval_samples_per_second": 877.527, "eval_steps_per_second": 54.848, "step": 2264000 }, { "epoch": 9.31, "learning_rate": 2.1866666666666667e-08, "loss": 2.5036, "step": 2272000 }, { "epoch": 9.31, "eval_loss": 2.4486756324768066, "eval_runtime": 233.5876, "eval_samples_per_second": 879.798, "eval_steps_per_second": 54.99, "step": 2272000 }, { "epoch": 9.34, "eval_loss": 2.442298173904419, "eval_runtime": 233.1053, "eval_samples_per_second": 881.619, "eval_steps_per_second": 55.104, "step": 2280000 }, { "epoch": 9.38, "learning_rate": 1.9133333333333333e-08, "loss": 2.5086, "step": 2288000 }, { "epoch": 9.38, "eval_loss": 2.4455623626708984, "eval_runtime": 232.7856, "eval_samples_per_second": 882.83, "eval_steps_per_second": 55.18, "step": 2288000 }, { "epoch": 9.41, "eval_loss": 2.449575185775757, "eval_runtime": 234.5471, "eval_samples_per_second": 876.199, "eval_steps_per_second": 54.765, "step": 2296000 }, { "epoch": 9.44, "learning_rate": 1.64e-08, "loss": 2.5034, "step": 2304000 }, { "epoch": 9.44, "eval_loss": 2.4498891830444336, "eval_runtime": 232.7935, "eval_samples_per_second": 882.8, "eval_steps_per_second": 55.178, "step": 2304000 }, { "epoch": 9.47, "eval_loss": 2.4432790279388428, "eval_runtime": 233.6332, "eval_samples_per_second": 879.627, "eval_steps_per_second": 54.979, "step": 2312000 }, { "epoch": 9.51, "learning_rate": 1.3666666666666667e-08, "loss": 2.5099, "step": 2320000 }, { "epoch": 9.51, "eval_loss": 2.4534084796905518, "eval_runtime": 233.6601, "eval_samples_per_second": 879.525, "eval_steps_per_second": 54.973, "step": 2320000 }, { "epoch": 9.54, "eval_loss": 2.4494857788085938, "eval_runtime": 233.4959, "eval_samples_per_second": 880.144, "eval_steps_per_second": 55.012, "step": 2328000 }, { "epoch": 9.57, "learning_rate": 1.0933333333333334e-08, "loss": 2.5065, "step": 2336000 }, { "epoch": 9.57, "eval_loss": 2.4510202407836914, "eval_runtime": 233.7041, "eval_samples_per_second": 879.36, "eval_steps_per_second": 54.963, "step": 2336000 }, { "epoch": 9.6, "eval_loss": 2.4512877464294434, "eval_runtime": 237.5888, "eval_samples_per_second": 864.982, "eval_steps_per_second": 54.064, "step": 2344000 }, { "epoch": 9.64, "learning_rate": 8.2e-09, "loss": 2.502, "step": 2352000 }, { "epoch": 9.64, "eval_loss": 2.451225996017456, "eval_runtime": 233.4207, "eval_samples_per_second": 880.427, "eval_steps_per_second": 55.029, "step": 2352000 }, { "epoch": 9.67, "eval_loss": 2.4469268321990967, "eval_runtime": 234.7938, "eval_samples_per_second": 875.279, "eval_steps_per_second": 54.708, "step": 2360000 }, { "epoch": 9.7, "learning_rate": 5.466666666666667e-09, "loss": 2.5043, "step": 2368000 }, { "epoch": 9.7, "eval_loss": 2.4544479846954346, "eval_runtime": 234.2869, "eval_samples_per_second": 877.173, "eval_steps_per_second": 54.826, "step": 2368000 }, { "epoch": 9.74, "eval_loss": 2.4492740631103516, "eval_runtime": 234.1805, "eval_samples_per_second": 877.571, "eval_steps_per_second": 54.851, "step": 2376000 }, { "epoch": 9.77, "learning_rate": 2.7333333333333334e-09, "loss": 2.5068, "step": 2384000 }, { "epoch": 9.77, "eval_loss": 2.453711748123169, "eval_runtime": 233.0608, "eval_samples_per_second": 881.787, "eval_steps_per_second": 55.114, "step": 2384000 }, { "epoch": 9.8, "eval_loss": 2.4386837482452393, "eval_runtime": 234.1662, "eval_samples_per_second": 877.625, "eval_steps_per_second": 54.854, "step": 2392000 }, { "epoch": 9.83, "learning_rate": 0.0, "loss": 2.5118, "step": 2400000 }, { "epoch": 9.83, "eval_loss": 2.4494030475616455, "eval_runtime": 234.0187, "eval_samples_per_second": 878.178, "eval_steps_per_second": 54.889, "step": 2400000 }, { "epoch": 9.83, "step": 2400000, "total_flos": 7.305293129309786e+17, "train_loss": 2.5438934391276042, "train_runtime": 220778.1092, "train_samples_per_second": 173.93, "train_steps_per_second": 10.871 } ], "logging_steps": 16000, "max_steps": 2400000, "num_train_epochs": 10, "save_steps": 32000, "total_flos": 7.305293129309786e+17, "trial_name": null, "trial_params": null }