{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.962131837307153, "global_step": 132, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 5e-06, "loss": 1.5608, "step": 1 }, { "epoch": 0.04, "learning_rate": 1e-05, "loss": 1.537, "step": 2 }, { "epoch": 0.07, "learning_rate": 1.5000000000000002e-05, "loss": 1.4795, "step": 3 }, { "epoch": 0.09, "learning_rate": 2e-05, "loss": 1.369, "step": 4 }, { "epoch": 0.11, "learning_rate": 1.9996988186962044e-05, "loss": 1.3342, "step": 5 }, { "epoch": 0.13, "learning_rate": 1.9987954562051724e-05, "loss": 1.3373, "step": 6 }, { "epoch": 0.16, "learning_rate": 1.9972904566786903e-05, "loss": 1.2752, "step": 7 }, { "epoch": 0.18, "learning_rate": 1.995184726672197e-05, "loss": 1.3176, "step": 8 }, { "epoch": 0.2, "learning_rate": 1.9924795345987103e-05, "loss": 1.3508, "step": 9 }, { "epoch": 0.22, "learning_rate": 1.989176509964781e-05, "loss": 1.3623, "step": 10 }, { "epoch": 0.25, "learning_rate": 1.9852776423889414e-05, "loss": 1.3363, "step": 11 }, { "epoch": 0.27, "learning_rate": 1.9807852804032306e-05, "loss": 1.2854, "step": 12 }, { "epoch": 0.29, "learning_rate": 1.9757021300385288e-05, "loss": 1.3665, "step": 13 }, { "epoch": 0.31, "learning_rate": 1.9700312531945444e-05, "loss": 1.3254, "step": 14 }, { "epoch": 0.34, "learning_rate": 1.96377606579544e-05, "loss": 1.3074, "step": 15 }, { "epoch": 0.36, "learning_rate": 1.956940335732209e-05, "loss": 1.2471, "step": 16 }, { "epoch": 0.38, "learning_rate": 1.949528180593037e-05, "loss": 1.295, "step": 17 }, { "epoch": 0.4, "learning_rate": 1.941544065183021e-05, "loss": 1.3434, "step": 18 }, { "epoch": 0.43, "learning_rate": 1.932992798834739e-05, "loss": 1.319, "step": 19 }, { "epoch": 0.45, "learning_rate": 1.9238795325112867e-05, "loss": 1.3529, "step": 20 }, { "epoch": 0.47, "learning_rate": 1.914209755703531e-05, "loss": 1.3155, "step": 21 }, { "epoch": 0.49, "learning_rate": 1.9039892931234434e-05, "loss": 1.3072, "step": 22 }, { "epoch": 0.52, "learning_rate": 1.8932243011955154e-05, "loss": 1.2897, "step": 23 }, { "epoch": 0.54, "learning_rate": 1.881921264348355e-05, "loss": 1.382, "step": 24 }, { "epoch": 0.56, "learning_rate": 1.8700869911087115e-05, "loss": 1.3016, "step": 25 }, { "epoch": 0.58, "learning_rate": 1.8577286100002723e-05, "loss": 1.2886, "step": 26 }, { "epoch": 0.61, "learning_rate": 1.8448535652497073e-05, "loss": 1.3313, "step": 27 }, { "epoch": 0.63, "learning_rate": 1.8314696123025456e-05, "loss": 1.301, "step": 28 }, { "epoch": 0.65, "learning_rate": 1.817584813151584e-05, "loss": 1.3474, "step": 29 }, { "epoch": 0.67, "learning_rate": 1.803207531480645e-05, "loss": 1.3523, "step": 30 }, { "epoch": 0.7, "learning_rate": 1.7883464276266064e-05, "loss": 1.3423, "step": 31 }, { "epoch": 0.72, "learning_rate": 1.773010453362737e-05, "loss": 1.2947, "step": 32 }, { "epoch": 0.74, "learning_rate": 1.7572088465064847e-05, "loss": 1.3074, "step": 33 }, { "epoch": 0.76, "learning_rate": 1.7409511253549592e-05, "loss": 1.3555, "step": 34 }, { "epoch": 0.79, "learning_rate": 1.7242470829514674e-05, "loss": 1.2528, "step": 35 }, { "epoch": 0.81, "learning_rate": 1.7071067811865477e-05, "loss": 1.3164, "step": 36 }, { "epoch": 0.83, "learning_rate": 1.689540544737067e-05, "loss": 1.3135, "step": 37 }, { "epoch": 0.85, "learning_rate": 1.6715589548470187e-05, "loss": 1.3569, "step": 38 }, { "epoch": 0.88, "learning_rate": 1.6531728429537766e-05, "loss": 1.2685, "step": 39 }, { "epoch": 0.9, "learning_rate": 1.6343932841636455e-05, "loss": 1.3216, "step": 40 }, { "epoch": 0.92, "learning_rate": 1.615231590580627e-05, "loss": 1.3005, "step": 41 }, { "epoch": 0.94, "learning_rate": 1.5956993044924334e-05, "loss": 1.2982, "step": 42 }, { "epoch": 0.96, "learning_rate": 1.5758081914178457e-05, "loss": 1.272, "step": 43 }, { "epoch": 0.99, "learning_rate": 1.5555702330196024e-05, "loss": 1.3246, "step": 44 }, { "epoch": 1.01, "learning_rate": 1.5349976198870974e-05, "loss": 1.1729, "step": 45 }, { "epoch": 1.03, "learning_rate": 1.5141027441932217e-05, "loss": 0.9817, "step": 46 }, { "epoch": 1.05, "learning_rate": 1.4928981922297842e-05, "loss": 0.9715, "step": 47 }, { "epoch": 1.08, "learning_rate": 1.4713967368259981e-05, "loss": 0.9656, "step": 48 }, { "epoch": 1.1, "learning_rate": 1.4496113296546068e-05, "loss": 0.9229, "step": 49 }, { "epoch": 1.12, "learning_rate": 1.4275550934302822e-05, "loss": 0.9596, "step": 50 }, { "epoch": 1.14, "learning_rate": 1.4052413140049898e-05, "loss": 0.8889, "step": 51 }, { "epoch": 1.17, "learning_rate": 1.3826834323650899e-05, "loss": 0.8685, "step": 52 }, { "epoch": 1.19, "learning_rate": 1.3598950365349884e-05, "loss": 0.939, "step": 53 }, { "epoch": 1.21, "learning_rate": 1.3368898533922202e-05, "loss": 0.9308, "step": 54 }, { "epoch": 1.23, "learning_rate": 1.3136817403988918e-05, "loss": 0.8947, "step": 55 }, { "epoch": 1.26, "learning_rate": 1.2902846772544625e-05, "loss": 0.8729, "step": 56 }, { "epoch": 1.28, "learning_rate": 1.2667127574748985e-05, "loss": 0.9327, "step": 57 }, { "epoch": 1.3, "learning_rate": 1.242980179903264e-05, "loss": 0.8962, "step": 58 }, { "epoch": 1.32, "learning_rate": 1.2191012401568698e-05, "loss": 0.8523, "step": 59 }, { "epoch": 1.35, "learning_rate": 1.1950903220161286e-05, "loss": 0.871, "step": 60 }, { "epoch": 1.37, "learning_rate": 1.1709618887603013e-05, "loss": 0.8349, "step": 61 }, { "epoch": 1.39, "learning_rate": 1.1467304744553618e-05, "loss": 0.8938, "step": 62 }, { "epoch": 1.41, "learning_rate": 1.1224106751992164e-05, "loss": 0.8923, "step": 63 }, { "epoch": 1.44, "learning_rate": 1.098017140329561e-05, "loss": 0.8612, "step": 64 }, { "epoch": 1.46, "learning_rate": 1.0735645635996676e-05, "loss": 0.8725, "step": 65 }, { "epoch": 1.48, "learning_rate": 1.0490676743274181e-05, "loss": 0.8748, "step": 66 }, { "epoch": 1.5, "learning_rate": 1.0245412285229124e-05, "loss": 0.9522, "step": 67 }, { "epoch": 1.53, "learning_rate": 1e-05, "loss": 0.8638, "step": 68 }, { "epoch": 1.55, "learning_rate": 9.75458771477088e-06, "loss": 0.9168, "step": 69 }, { "epoch": 1.57, "learning_rate": 9.50932325672582e-06, "loss": 0.8194, "step": 70 }, { "epoch": 1.59, "learning_rate": 9.264354364003327e-06, "loss": 0.8645, "step": 71 }, { "epoch": 1.62, "learning_rate": 9.019828596704394e-06, "loss": 0.8697, "step": 72 }, { "epoch": 1.64, "learning_rate": 8.77589324800784e-06, "loss": 0.8475, "step": 73 }, { "epoch": 1.66, "learning_rate": 8.532695255446384e-06, "loss": 0.8619, "step": 74 }, { "epoch": 1.68, "learning_rate": 8.290381112396989e-06, "loss": 0.8899, "step": 75 }, { "epoch": 1.71, "learning_rate": 8.04909677983872e-06, "loss": 0.8624, "step": 76 }, { "epoch": 1.73, "learning_rate": 7.808987598431303e-06, "loss": 0.8264, "step": 77 }, { "epoch": 1.75, "learning_rate": 7.570198200967363e-06, "loss": 0.8601, "step": 78 }, { "epoch": 1.77, "learning_rate": 7.332872425251017e-06, "loss": 0.8805, "step": 79 }, { "epoch": 1.8, "learning_rate": 7.097153227455379e-06, "loss": 0.8609, "step": 80 }, { "epoch": 1.82, "learning_rate": 6.8631825960110866e-06, "loss": 0.8157, "step": 81 }, { "epoch": 1.84, "learning_rate": 6.631101466077801e-06, "loss": 0.8573, "step": 82 }, { "epoch": 1.86, "learning_rate": 6.401049634650119e-06, "loss": 0.8475, "step": 83 }, { "epoch": 1.88, "learning_rate": 6.173165676349103e-06, "loss": 0.9107, "step": 84 }, { "epoch": 1.91, "learning_rate": 5.947586859950103e-06, "loss": 0.8895, "step": 85 }, { "epoch": 1.93, "learning_rate": 5.724449065697182e-06, "loss": 0.9007, "step": 86 }, { "epoch": 1.95, "learning_rate": 5.503886703453933e-06, "loss": 0.8571, "step": 87 }, { "epoch": 1.97, "learning_rate": 5.286032631740023e-06, "loss": 0.8509, "step": 88 }, { "epoch": 2.0, "learning_rate": 5.071018077702161e-06, "loss": 0.8653, "step": 89 }, { "epoch": 2.02, "learning_rate": 4.858972558067784e-06, "loss": 0.712, "step": 90 }, { "epoch": 2.04, "learning_rate": 4.65002380112903e-06, "loss": 0.67, "step": 91 }, { "epoch": 2.06, "learning_rate": 4.444297669803981e-06, "loss": 0.6964, "step": 92 }, { "epoch": 2.09, "learning_rate": 4.241918085821547e-06, "loss": 0.6407, "step": 93 }, { "epoch": 2.11, "learning_rate": 4.043006955075667e-06, "loss": 0.6147, "step": 94 }, { "epoch": 2.13, "learning_rate": 3.847684094193733e-06, "loss": 0.6482, "step": 95 }, { "epoch": 2.15, "learning_rate": 3.6560671583635467e-06, "loss": 0.642, "step": 96 }, { "epoch": 2.18, "learning_rate": 3.468271570462235e-06, "loss": 0.6269, "step": 97 }, { "epoch": 2.2, "learning_rate": 3.284410451529816e-06, "loss": 0.595, "step": 98 }, { "epoch": 2.22, "learning_rate": 3.1045945526293307e-06, "loss": 0.6096, "step": 99 }, { "epoch": 2.24, "learning_rate": 2.9289321881345257e-06, "loss": 0.63, "step": 100 }, { "epoch": 2.27, "learning_rate": 2.7575291704853325e-06, "loss": 0.628, "step": 101 }, { "epoch": 2.29, "learning_rate": 2.5904887464504115e-06, "loss": 0.6283, "step": 102 }, { "epoch": 2.31, "learning_rate": 2.4279115349351546e-06, "loss": 0.6003, "step": 103 }, { "epoch": 2.33, "learning_rate": 2.26989546637263e-06, "loss": 0.6256, "step": 104 }, { "epoch": 2.36, "learning_rate": 2.116535723733938e-06, "loss": 0.5855, "step": 105 }, { "epoch": 2.38, "learning_rate": 1.967924685193552e-06, "loss": 0.6104, "step": 106 }, { "epoch": 2.4, "learning_rate": 1.8241518684841642e-06, "loss": 0.611, "step": 107 }, { "epoch": 2.42, "learning_rate": 1.6853038769745466e-06, "loss": 0.6471, "step": 108 }, { "epoch": 2.45, "learning_rate": 1.551464347502929e-06, "loss": 0.5739, "step": 109 }, { "epoch": 2.47, "learning_rate": 1.4227138999972801e-06, "loss": 0.5972, "step": 110 }, { "epoch": 2.49, "learning_rate": 1.2991300889128867e-06, "loss": 0.635, "step": 111 }, { "epoch": 2.51, "learning_rate": 1.1807873565164507e-06, "loss": 0.6071, "step": 112 }, { "epoch": 2.54, "learning_rate": 1.0677569880448479e-06, "loss": 0.5941, "step": 113 }, { "epoch": 2.56, "learning_rate": 9.601070687655667e-07, "loss": 0.6395, "step": 114 }, { "epoch": 2.58, "learning_rate": 8.579024429646932e-07, "loss": 0.625, "step": 115 }, { "epoch": 2.6, "learning_rate": 7.612046748871327e-07, "loss": 0.6235, "step": 116 }, { "epoch": 2.63, "learning_rate": 6.700720116526116e-07, "loss": 0.6185, "step": 117 }, { "epoch": 2.65, "learning_rate": 5.845593481697931e-07, "loss": 0.5909, "step": 118 }, { "epoch": 2.67, "learning_rate": 5.047181940696333e-07, "loss": 0.6195, "step": 119 }, { "epoch": 2.69, "learning_rate": 4.305966426779118e-07, "loss": 0.5715, "step": 120 }, { "epoch": 2.72, "learning_rate": 3.6223934204560165e-07, "loss": 0.6129, "step": 121 }, { "epoch": 2.74, "learning_rate": 2.996874680545603e-07, "loss": 0.601, "step": 122 }, { "epoch": 2.76, "learning_rate": 2.4297869961471544e-07, "loss": 0.5971, "step": 123 }, { "epoch": 2.78, "learning_rate": 1.921471959676957e-07, "loss": 0.6276, "step": 124 }, { "epoch": 2.81, "learning_rate": 1.472235761105878e-07, "loss": 0.6267, "step": 125 }, { "epoch": 2.83, "learning_rate": 1.0823490035218986e-07, "loss": 0.5968, "step": 126 }, { "epoch": 2.85, "learning_rate": 7.520465401290033e-08, "loss": 0.64, "step": 127 }, { "epoch": 2.87, "learning_rate": 4.815273327803183e-08, "loss": 0.6304, "step": 128 }, { "epoch": 2.89, "learning_rate": 2.7095433213097933e-08, "loss": 0.5567, "step": 129 }, { "epoch": 2.92, "learning_rate": 1.2045437948275952e-08, "loss": 0.6299, "step": 130 }, { "epoch": 2.94, "learning_rate": 3.0118130379575005e-09, "loss": 0.6133, "step": 131 }, { "epoch": 2.96, "learning_rate": 0.0, "loss": 0.6049, "step": 132 }, { "epoch": 2.96, "step": 132, "total_flos": 8.052252031844352e+16, "train_loss": 0.9501514579310562, "train_runtime": 19803.7183, "train_samples_per_second": 0.864, "train_steps_per_second": 0.007 } ], "max_steps": 132, "num_train_epochs": 3, "total_flos": 8.052252031844352e+16, "trial_name": null, "trial_params": null }