|
{ |
|
"best_metric": 0.5939880609512329, |
|
"best_model_checkpoint": "/mnt/data1/sheshuaijie/Output/CoT/Trained/vicuna-13b_english-cot+auto-cot_0.0002/lora/checkpoint-1036", |
|
"epoch": 9.929896907216495, |
|
"global_step": 1505, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.05, |
|
"eval_loss": 1.7208857536315918, |
|
"eval_runtime": 39.046, |
|
"eval_samples_per_second": 76.832, |
|
"eval_steps_per_second": 2.407, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"eval_loss": 1.3302656412124634, |
|
"eval_runtime": 39.1446, |
|
"eval_samples_per_second": 76.639, |
|
"eval_steps_per_second": 2.401, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019933110367892977, |
|
"loss": 1.607, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 1.0993696451187134, |
|
"eval_runtime": 39.2624, |
|
"eval_samples_per_second": 76.409, |
|
"eval_steps_per_second": 2.394, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"eval_loss": 0.9883869886398315, |
|
"eval_runtime": 39.2607, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"eval_loss": 0.9121341109275818, |
|
"eval_runtime": 39.2818, |
|
"eval_samples_per_second": 76.371, |
|
"eval_steps_per_second": 2.393, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019665551839464883, |
|
"loss": 1.0077, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 0.8665392398834229, |
|
"eval_runtime": 39.261, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_loss": 0.8299428820610046, |
|
"eval_runtime": 39.2723, |
|
"eval_samples_per_second": 76.39, |
|
"eval_steps_per_second": 2.394, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 0.7965301275253296, |
|
"eval_runtime": 39.2718, |
|
"eval_samples_per_second": 76.391, |
|
"eval_steps_per_second": 2.394, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001939799331103679, |
|
"loss": 0.8626, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 0.7661889791488647, |
|
"eval_runtime": 39.2752, |
|
"eval_samples_per_second": 76.384, |
|
"eval_steps_per_second": 2.393, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"eval_loss": 0.744417130947113, |
|
"eval_runtime": 39.2899, |
|
"eval_samples_per_second": 76.355, |
|
"eval_steps_per_second": 2.392, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 0.728394627571106, |
|
"eval_runtime": 39.298, |
|
"eval_samples_per_second": 76.34, |
|
"eval_steps_per_second": 2.392, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019130434782608697, |
|
"loss": 0.7683, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 0.7151542901992798, |
|
"eval_runtime": 39.272, |
|
"eval_samples_per_second": 76.39, |
|
"eval_steps_per_second": 2.394, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_loss": 0.7049417495727539, |
|
"eval_runtime": 39.2657, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_loss": 0.6961150765419006, |
|
"eval_runtime": 39.2274, |
|
"eval_samples_per_second": 76.477, |
|
"eval_steps_per_second": 2.396, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00018862876254180605, |
|
"loss": 0.7346, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.6891586780548096, |
|
"eval_runtime": 39.2698, |
|
"eval_samples_per_second": 76.395, |
|
"eval_steps_per_second": 2.394, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 0.6833620667457581, |
|
"eval_runtime": 39.2474, |
|
"eval_samples_per_second": 76.438, |
|
"eval_steps_per_second": 2.395, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_loss": 0.678981602191925, |
|
"eval_runtime": 39.2363, |
|
"eval_samples_per_second": 76.46, |
|
"eval_steps_per_second": 2.396, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001859531772575251, |
|
"loss": 0.7095, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 0.6739740967750549, |
|
"eval_runtime": 39.2467, |
|
"eval_samples_per_second": 76.439, |
|
"eval_steps_per_second": 2.395, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 0.6704814434051514, |
|
"eval_runtime": 39.2828, |
|
"eval_samples_per_second": 76.369, |
|
"eval_steps_per_second": 2.393, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00018327759197324413, |
|
"loss": 0.6989, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"eval_loss": 0.6668062210083008, |
|
"eval_runtime": 39.1861, |
|
"eval_samples_per_second": 76.558, |
|
"eval_steps_per_second": 2.399, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"eval_loss": 0.6635003089904785, |
|
"eval_runtime": 39.2627, |
|
"eval_samples_per_second": 76.408, |
|
"eval_steps_per_second": 2.394, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 0.6594184637069702, |
|
"eval_runtime": 39.2634, |
|
"eval_samples_per_second": 76.407, |
|
"eval_steps_per_second": 2.394, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00018060200668896322, |
|
"loss": 0.6753, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"eval_loss": 0.656818151473999, |
|
"eval_runtime": 39.2093, |
|
"eval_samples_per_second": 76.513, |
|
"eval_steps_per_second": 2.397, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 0.6542237401008606, |
|
"eval_runtime": 39.2619, |
|
"eval_samples_per_second": 76.41, |
|
"eval_steps_per_second": 2.394, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 0.6509793400764465, |
|
"eval_runtime": 39.2795, |
|
"eval_samples_per_second": 76.376, |
|
"eval_steps_per_second": 2.393, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00017792642140468227, |
|
"loss": 0.6742, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_loss": 0.6501123905181885, |
|
"eval_runtime": 39.2889, |
|
"eval_samples_per_second": 76.357, |
|
"eval_steps_per_second": 2.393, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 0.6488311290740967, |
|
"eval_runtime": 39.2821, |
|
"eval_samples_per_second": 76.371, |
|
"eval_steps_per_second": 2.393, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"eval_loss": 0.6458473205566406, |
|
"eval_runtime": 39.2749, |
|
"eval_samples_per_second": 76.385, |
|
"eval_steps_per_second": 2.393, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00017525083612040135, |
|
"loss": 0.6727, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"eval_loss": 0.6445983648300171, |
|
"eval_runtime": 39.2655, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"eval_loss": 0.6414983868598938, |
|
"eval_runtime": 39.2575, |
|
"eval_samples_per_second": 76.418, |
|
"eval_steps_per_second": 2.394, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"eval_loss": 0.6403743624687195, |
|
"eval_runtime": 39.2601, |
|
"eval_samples_per_second": 76.413, |
|
"eval_steps_per_second": 2.394, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001725752508361204, |
|
"loss": 0.6651, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 0.6375772953033447, |
|
"eval_runtime": 39.2616, |
|
"eval_samples_per_second": 76.411, |
|
"eval_steps_per_second": 2.394, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 0.6363030076026917, |
|
"eval_runtime": 39.2685, |
|
"eval_samples_per_second": 76.397, |
|
"eval_steps_per_second": 2.394, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"eval_loss": 0.6365154981613159, |
|
"eval_runtime": 39.2615, |
|
"eval_samples_per_second": 76.411, |
|
"eval_steps_per_second": 2.394, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00016989966555183946, |
|
"loss": 0.6569, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 0.6351213455200195, |
|
"eval_runtime": 39.2374, |
|
"eval_samples_per_second": 76.458, |
|
"eval_steps_per_second": 2.396, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"eval_loss": 0.633696436882019, |
|
"eval_runtime": 39.2576, |
|
"eval_samples_per_second": 76.418, |
|
"eval_steps_per_second": 2.394, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"eval_loss": 0.6320024132728577, |
|
"eval_runtime": 39.2456, |
|
"eval_samples_per_second": 76.442, |
|
"eval_steps_per_second": 2.395, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00016722408026755855, |
|
"loss": 0.6535, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"eval_loss": 0.6302981972694397, |
|
"eval_runtime": 39.2723, |
|
"eval_samples_per_second": 76.39, |
|
"eval_steps_per_second": 2.394, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_loss": 0.6285908818244934, |
|
"eval_runtime": 39.2745, |
|
"eval_samples_per_second": 76.385, |
|
"eval_steps_per_second": 2.393, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0001645484949832776, |
|
"loss": 0.6504, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 0.6279519200325012, |
|
"eval_runtime": 39.1978, |
|
"eval_samples_per_second": 76.535, |
|
"eval_steps_per_second": 2.398, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 0.6275761723518372, |
|
"eval_runtime": 39.2574, |
|
"eval_samples_per_second": 76.419, |
|
"eval_steps_per_second": 2.394, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"eval_loss": 0.6262693405151367, |
|
"eval_runtime": 39.2587, |
|
"eval_samples_per_second": 76.416, |
|
"eval_steps_per_second": 2.394, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00016187290969899666, |
|
"loss": 0.6447, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 0.6255723237991333, |
|
"eval_runtime": 39.2593, |
|
"eval_samples_per_second": 76.415, |
|
"eval_steps_per_second": 2.394, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"eval_loss": 0.624893307685852, |
|
"eval_runtime": 39.2732, |
|
"eval_samples_per_second": 76.388, |
|
"eval_steps_per_second": 2.393, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_loss": 0.6238787174224854, |
|
"eval_runtime": 39.2648, |
|
"eval_samples_per_second": 76.404, |
|
"eval_steps_per_second": 2.394, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00015919732441471574, |
|
"loss": 0.6418, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 0.6227560043334961, |
|
"eval_runtime": 39.2517, |
|
"eval_samples_per_second": 76.43, |
|
"eval_steps_per_second": 2.395, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_loss": 0.621408998966217, |
|
"eval_runtime": 39.2673, |
|
"eval_samples_per_second": 76.4, |
|
"eval_steps_per_second": 2.394, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 0.6207154989242554, |
|
"eval_runtime": 39.2707, |
|
"eval_samples_per_second": 76.393, |
|
"eval_steps_per_second": 2.394, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0001565217391304348, |
|
"loss": 0.6294, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 0.6207785606384277, |
|
"eval_runtime": 39.2687, |
|
"eval_samples_per_second": 76.397, |
|
"eval_steps_per_second": 2.394, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"eval_loss": 0.619699239730835, |
|
"eval_runtime": 39.2528, |
|
"eval_samples_per_second": 76.428, |
|
"eval_steps_per_second": 2.395, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 0.6189907789230347, |
|
"eval_runtime": 39.2383, |
|
"eval_samples_per_second": 76.456, |
|
"eval_steps_per_second": 2.396, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00015384615384615385, |
|
"loss": 0.6323, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 0.6188793182373047, |
|
"eval_runtime": 39.2507, |
|
"eval_samples_per_second": 76.432, |
|
"eval_steps_per_second": 2.395, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"eval_loss": 0.6180170774459839, |
|
"eval_runtime": 39.2506, |
|
"eval_samples_per_second": 76.432, |
|
"eval_steps_per_second": 2.395, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 0.6175986528396606, |
|
"eval_runtime": 39.2493, |
|
"eval_samples_per_second": 76.434, |
|
"eval_steps_per_second": 2.395, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00015117056856187293, |
|
"loss": 0.6194, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_loss": 0.6155608296394348, |
|
"eval_runtime": 39.2549, |
|
"eval_samples_per_second": 76.424, |
|
"eval_steps_per_second": 2.395, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 0.6149768829345703, |
|
"eval_runtime": 39.2507, |
|
"eval_samples_per_second": 76.432, |
|
"eval_steps_per_second": 2.395, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 0.614321768283844, |
|
"eval_runtime": 39.2607, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00014849498327759196, |
|
"loss": 0.6165, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_loss": 0.6136913299560547, |
|
"eval_runtime": 39.256, |
|
"eval_samples_per_second": 76.422, |
|
"eval_steps_per_second": 2.395, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"eval_loss": 0.6127980351448059, |
|
"eval_runtime": 39.2695, |
|
"eval_samples_per_second": 76.395, |
|
"eval_steps_per_second": 2.394, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00014581939799331104, |
|
"loss": 0.6202, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 0.6126558780670166, |
|
"eval_runtime": 39.2344, |
|
"eval_samples_per_second": 76.464, |
|
"eval_steps_per_second": 2.396, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_loss": 0.6126319766044617, |
|
"eval_runtime": 39.2692, |
|
"eval_samples_per_second": 76.396, |
|
"eval_steps_per_second": 2.394, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 0.6124591827392578, |
|
"eval_runtime": 39.2771, |
|
"eval_samples_per_second": 76.38, |
|
"eval_steps_per_second": 2.393, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.0001431438127090301, |
|
"loss": 0.6186, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 0.6117784976959229, |
|
"eval_runtime": 39.2494, |
|
"eval_samples_per_second": 76.434, |
|
"eval_steps_per_second": 2.395, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 0.6105948090553284, |
|
"eval_runtime": 39.2716, |
|
"eval_samples_per_second": 76.391, |
|
"eval_steps_per_second": 2.394, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 0.6107361912727356, |
|
"eval_runtime": 39.2828, |
|
"eval_samples_per_second": 76.369, |
|
"eval_steps_per_second": 2.393, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 0.00014046822742474916, |
|
"loss": 0.6165, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"eval_loss": 0.6106633543968201, |
|
"eval_runtime": 39.2701, |
|
"eval_samples_per_second": 76.394, |
|
"eval_steps_per_second": 2.394, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"eval_loss": 0.6104211807250977, |
|
"eval_runtime": 39.2794, |
|
"eval_samples_per_second": 76.376, |
|
"eval_steps_per_second": 2.393, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"eval_loss": 0.611173152923584, |
|
"eval_runtime": 39.2596, |
|
"eval_samples_per_second": 76.415, |
|
"eval_steps_per_second": 2.394, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 0.00013779264214046824, |
|
"loss": 0.6021, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"eval_loss": 0.6094884276390076, |
|
"eval_runtime": 39.2429, |
|
"eval_samples_per_second": 76.447, |
|
"eval_steps_per_second": 2.395, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 0.6093204617500305, |
|
"eval_runtime": 39.278, |
|
"eval_samples_per_second": 76.379, |
|
"eval_steps_per_second": 2.393, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 3.28, |
|
"eval_loss": 0.60869961977005, |
|
"eval_runtime": 39.269, |
|
"eval_samples_per_second": 76.396, |
|
"eval_steps_per_second": 2.394, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 0.0001351170568561873, |
|
"loss": 0.6057, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 0.6093556880950928, |
|
"eval_runtime": 39.2597, |
|
"eval_samples_per_second": 76.414, |
|
"eval_steps_per_second": 2.394, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 0.6078519821166992, |
|
"eval_runtime": 39.2561, |
|
"eval_samples_per_second": 76.421, |
|
"eval_steps_per_second": 2.395, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 3.42, |
|
"eval_loss": 0.6079010367393494, |
|
"eval_runtime": 39.2536, |
|
"eval_samples_per_second": 76.426, |
|
"eval_steps_per_second": 2.395, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 0.00013244147157190635, |
|
"loss": 0.598, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 3.46, |
|
"eval_loss": 0.6074483394622803, |
|
"eval_runtime": 39.2832, |
|
"eval_samples_per_second": 76.369, |
|
"eval_steps_per_second": 2.393, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_loss": 0.6073596477508545, |
|
"eval_runtime": 39.2701, |
|
"eval_samples_per_second": 76.394, |
|
"eval_steps_per_second": 2.394, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"eval_loss": 0.606430172920227, |
|
"eval_runtime": 39.2546, |
|
"eval_samples_per_second": 76.424, |
|
"eval_steps_per_second": 2.395, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00012976588628762543, |
|
"loss": 0.5948, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 0.6060574650764465, |
|
"eval_runtime": 39.2503, |
|
"eval_samples_per_second": 76.433, |
|
"eval_steps_per_second": 2.395, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"eval_loss": 0.6067923307418823, |
|
"eval_runtime": 39.2654, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 0.0001270903010033445, |
|
"loss": 0.5962, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"eval_loss": 0.6042212843894958, |
|
"eval_runtime": 39.2032, |
|
"eval_samples_per_second": 76.524, |
|
"eval_steps_per_second": 2.398, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 0.6041299700737, |
|
"eval_runtime": 39.2396, |
|
"eval_samples_per_second": 76.453, |
|
"eval_steps_per_second": 2.396, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"eval_loss": 0.6047356128692627, |
|
"eval_runtime": 39.274, |
|
"eval_samples_per_second": 76.386, |
|
"eval_steps_per_second": 2.393, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"learning_rate": 0.00012441471571906357, |
|
"loss": 0.5977, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.83, |
|
"eval_loss": 0.6040154099464417, |
|
"eval_runtime": 39.2677, |
|
"eval_samples_per_second": 76.399, |
|
"eval_steps_per_second": 2.394, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"eval_loss": 0.603416383266449, |
|
"eval_runtime": 39.2621, |
|
"eval_samples_per_second": 76.41, |
|
"eval_steps_per_second": 2.394, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"eval_loss": 0.6036480069160461, |
|
"eval_runtime": 39.2609, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 3.96, |
|
"learning_rate": 0.00012173913043478263, |
|
"loss": 0.5903, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_loss": 0.6035267114639282, |
|
"eval_runtime": 39.2828, |
|
"eval_samples_per_second": 76.369, |
|
"eval_steps_per_second": 2.393, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 4.02, |
|
"eval_loss": 0.6025964617729187, |
|
"eval_runtime": 39.2634, |
|
"eval_samples_per_second": 76.407, |
|
"eval_steps_per_second": 2.394, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"eval_loss": 0.6028868556022644, |
|
"eval_runtime": 39.2591, |
|
"eval_samples_per_second": 76.415, |
|
"eval_steps_per_second": 2.394, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 0.0001190635451505017, |
|
"loss": 0.5927, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 0.6027114391326904, |
|
"eval_runtime": 39.2648, |
|
"eval_samples_per_second": 76.404, |
|
"eval_steps_per_second": 2.394, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"eval_loss": 0.6030986905097961, |
|
"eval_runtime": 39.2746, |
|
"eval_samples_per_second": 76.385, |
|
"eval_steps_per_second": 2.393, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 4.2, |
|
"eval_loss": 0.6026434898376465, |
|
"eval_runtime": 39.2646, |
|
"eval_samples_per_second": 76.405, |
|
"eval_steps_per_second": 2.394, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 4.22, |
|
"learning_rate": 0.00011638795986622074, |
|
"loss": 0.581, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 4.25, |
|
"eval_loss": 0.6008206009864807, |
|
"eval_runtime": 39.2718, |
|
"eval_samples_per_second": 76.391, |
|
"eval_steps_per_second": 2.394, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 4.3, |
|
"eval_loss": 0.6018855571746826, |
|
"eval_runtime": 39.2587, |
|
"eval_samples_per_second": 76.416, |
|
"eval_steps_per_second": 2.394, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 4.34, |
|
"eval_loss": 0.6018174886703491, |
|
"eval_runtime": 39.2445, |
|
"eval_samples_per_second": 76.444, |
|
"eval_steps_per_second": 2.395, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 0.00011371237458193979, |
|
"loss": 0.5965, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"eval_loss": 0.6006762981414795, |
|
"eval_runtime": 39.2498, |
|
"eval_samples_per_second": 76.433, |
|
"eval_steps_per_second": 2.395, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 4.43, |
|
"eval_loss": 0.6006374359130859, |
|
"eval_runtime": 39.2758, |
|
"eval_samples_per_second": 76.383, |
|
"eval_steps_per_second": 2.393, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 0.5997828245162964, |
|
"eval_runtime": 39.2794, |
|
"eval_samples_per_second": 76.376, |
|
"eval_steps_per_second": 2.393, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 4.49, |
|
"learning_rate": 0.00011103678929765886, |
|
"loss": 0.5896, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"eval_loss": 0.6000981330871582, |
|
"eval_runtime": 39.2629, |
|
"eval_samples_per_second": 76.408, |
|
"eval_steps_per_second": 2.394, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"eval_loss": 0.5991115570068359, |
|
"eval_runtime": 39.2774, |
|
"eval_samples_per_second": 76.38, |
|
"eval_steps_per_second": 2.393, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 0.00010836120401337793, |
|
"loss": 0.5854, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"eval_loss": 0.6001954674720764, |
|
"eval_runtime": 39.2333, |
|
"eval_samples_per_second": 76.466, |
|
"eval_steps_per_second": 2.396, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"eval_loss": 0.6007575988769531, |
|
"eval_runtime": 39.2801, |
|
"eval_samples_per_second": 76.374, |
|
"eval_steps_per_second": 2.393, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"eval_loss": 0.5983864068984985, |
|
"eval_runtime": 39.2469, |
|
"eval_samples_per_second": 76.439, |
|
"eval_steps_per_second": 2.395, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"learning_rate": 0.00010568561872909698, |
|
"loss": 0.5844, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"eval_loss": 0.5985506772994995, |
|
"eval_runtime": 39.2426, |
|
"eval_samples_per_second": 76.448, |
|
"eval_steps_per_second": 2.395, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_loss": 0.5978309512138367, |
|
"eval_runtime": 39.2604, |
|
"eval_samples_per_second": 76.413, |
|
"eval_steps_per_second": 2.394, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 0.5981310606002808, |
|
"eval_runtime": 39.2686, |
|
"eval_samples_per_second": 76.397, |
|
"eval_steps_per_second": 2.394, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 0.00010301003344481605, |
|
"loss": 0.5784, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 4.9, |
|
"eval_loss": 0.5985335111618042, |
|
"eval_runtime": 39.2557, |
|
"eval_samples_per_second": 76.422, |
|
"eval_steps_per_second": 2.395, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"eval_loss": 0.5975944995880127, |
|
"eval_runtime": 39.2644, |
|
"eval_samples_per_second": 76.405, |
|
"eval_steps_per_second": 2.394, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 0.596754252910614, |
|
"eval_runtime": 39.2365, |
|
"eval_samples_per_second": 76.459, |
|
"eval_steps_per_second": 2.396, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 0.00010033444816053512, |
|
"loss": 0.5825, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"eval_loss": 0.5977214574813843, |
|
"eval_runtime": 39.235, |
|
"eval_samples_per_second": 76.462, |
|
"eval_steps_per_second": 2.396, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"eval_loss": 0.5982287526130676, |
|
"eval_runtime": 39.2483, |
|
"eval_samples_per_second": 76.436, |
|
"eval_steps_per_second": 2.395, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 5.13, |
|
"eval_loss": 0.5973477959632874, |
|
"eval_runtime": 39.2692, |
|
"eval_samples_per_second": 76.396, |
|
"eval_steps_per_second": 2.394, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 9.765886287625419e-05, |
|
"loss": 0.5724, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 5.17, |
|
"eval_loss": 0.598833441734314, |
|
"eval_runtime": 39.2608, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 0.5973609089851379, |
|
"eval_runtime": 39.2557, |
|
"eval_samples_per_second": 76.422, |
|
"eval_steps_per_second": 2.395, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 5.27, |
|
"eval_loss": 0.5983055233955383, |
|
"eval_runtime": 39.2613, |
|
"eval_samples_per_second": 76.411, |
|
"eval_steps_per_second": 2.394, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 5.28, |
|
"learning_rate": 9.498327759197325e-05, |
|
"loss": 0.5765, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"eval_loss": 0.597219705581665, |
|
"eval_runtime": 39.2532, |
|
"eval_samples_per_second": 76.427, |
|
"eval_steps_per_second": 2.395, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_loss": 0.5974920392036438, |
|
"eval_runtime": 39.2428, |
|
"eval_samples_per_second": 76.447, |
|
"eval_steps_per_second": 2.395, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 5.4, |
|
"eval_loss": 0.5970295667648315, |
|
"eval_runtime": 39.2255, |
|
"eval_samples_per_second": 76.481, |
|
"eval_steps_per_second": 2.396, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 9.230769230769232e-05, |
|
"loss": 0.5662, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 5.45, |
|
"eval_loss": 0.5995200872421265, |
|
"eval_runtime": 39.2763, |
|
"eval_samples_per_second": 76.382, |
|
"eval_steps_per_second": 2.393, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"eval_loss": 0.5961365699768066, |
|
"eval_runtime": 39.2442, |
|
"eval_samples_per_second": 76.444, |
|
"eval_steps_per_second": 2.395, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 8.963210702341137e-05, |
|
"loss": 0.5594, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"eval_loss": 0.5958811640739441, |
|
"eval_runtime": 39.224, |
|
"eval_samples_per_second": 76.484, |
|
"eval_steps_per_second": 2.396, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 0.5974062085151672, |
|
"eval_runtime": 39.2479, |
|
"eval_samples_per_second": 76.437, |
|
"eval_steps_per_second": 2.395, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"eval_loss": 0.5959305167198181, |
|
"eval_runtime": 39.1122, |
|
"eval_samples_per_second": 76.702, |
|
"eval_steps_per_second": 2.403, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 5.67, |
|
"learning_rate": 8.695652173913044e-05, |
|
"loss": 0.5569, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"eval_loss": 0.597082257270813, |
|
"eval_runtime": 39.2419, |
|
"eval_samples_per_second": 76.449, |
|
"eval_steps_per_second": 2.395, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 0.5964935421943665, |
|
"eval_runtime": 39.2482, |
|
"eval_samples_per_second": 76.437, |
|
"eval_steps_per_second": 2.395, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 5.77, |
|
"eval_loss": 0.596628725528717, |
|
"eval_runtime": 39.2684, |
|
"eval_samples_per_second": 76.397, |
|
"eval_steps_per_second": 2.394, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 5.81, |
|
"learning_rate": 8.42809364548495e-05, |
|
"loss": 0.5711, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"eval_loss": 0.596688449382782, |
|
"eval_runtime": 39.262, |
|
"eval_samples_per_second": 76.41, |
|
"eval_steps_per_second": 2.394, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 5.87, |
|
"eval_loss": 0.5974501967430115, |
|
"eval_runtime": 39.2621, |
|
"eval_samples_per_second": 76.409, |
|
"eval_steps_per_second": 2.394, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"eval_loss": 0.5951861143112183, |
|
"eval_runtime": 39.2622, |
|
"eval_samples_per_second": 76.409, |
|
"eval_steps_per_second": 2.394, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 8.160535117056857e-05, |
|
"loss": 0.5703, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_loss": 0.5963322520256042, |
|
"eval_runtime": 39.2656, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 0.5958115458488464, |
|
"eval_runtime": 39.2804, |
|
"eval_samples_per_second": 76.374, |
|
"eval_steps_per_second": 2.393, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 6.05, |
|
"eval_loss": 0.5968443155288696, |
|
"eval_runtime": 39.2618, |
|
"eval_samples_per_second": 76.41, |
|
"eval_steps_per_second": 2.394, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 6.07, |
|
"learning_rate": 7.892976588628763e-05, |
|
"loss": 0.5551, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_loss": 0.5958288311958313, |
|
"eval_runtime": 39.2648, |
|
"eval_samples_per_second": 76.404, |
|
"eval_steps_per_second": 2.394, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"eval_loss": 0.5968209505081177, |
|
"eval_runtime": 39.2563, |
|
"eval_samples_per_second": 76.421, |
|
"eval_steps_per_second": 2.395, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 6.19, |
|
"eval_loss": 0.5957658886909485, |
|
"eval_runtime": 39.2499, |
|
"eval_samples_per_second": 76.433, |
|
"eval_steps_per_second": 2.395, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 7.62541806020067e-05, |
|
"loss": 0.5636, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"eval_loss": 0.5955784916877747, |
|
"eval_runtime": 39.279, |
|
"eval_samples_per_second": 76.377, |
|
"eval_steps_per_second": 2.393, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 6.28, |
|
"eval_loss": 0.5963084101676941, |
|
"eval_runtime": 39.2656, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 0.595792829990387, |
|
"eval_runtime": 39.2577, |
|
"eval_samples_per_second": 76.418, |
|
"eval_steps_per_second": 2.394, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 7.357859531772575e-05, |
|
"loss": 0.5676, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_loss": 0.5953949093818665, |
|
"eval_runtime": 39.2554, |
|
"eval_samples_per_second": 76.423, |
|
"eval_steps_per_second": 2.395, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 6.42, |
|
"eval_loss": 0.595146894454956, |
|
"eval_runtime": 39.2386, |
|
"eval_samples_per_second": 76.455, |
|
"eval_steps_per_second": 2.396, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 7.090301003344481e-05, |
|
"loss": 0.5551, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 0.5957517027854919, |
|
"eval_runtime": 39.197, |
|
"eval_samples_per_second": 76.536, |
|
"eval_steps_per_second": 2.398, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 6.51, |
|
"eval_loss": 0.596603512763977, |
|
"eval_runtime": 39.2315, |
|
"eval_samples_per_second": 76.469, |
|
"eval_steps_per_second": 2.396, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"eval_loss": 0.5952173471450806, |
|
"eval_runtime": 39.2393, |
|
"eval_samples_per_second": 76.454, |
|
"eval_steps_per_second": 2.396, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 6.822742474916388e-05, |
|
"loss": 0.5539, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 0.5954132676124573, |
|
"eval_runtime": 39.2213, |
|
"eval_samples_per_second": 76.489, |
|
"eval_steps_per_second": 2.397, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"eval_loss": 0.5956953167915344, |
|
"eval_runtime": 39.2503, |
|
"eval_samples_per_second": 76.432, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_loss": 0.5959665775299072, |
|
"eval_runtime": 39.2657, |
|
"eval_samples_per_second": 76.403, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 6.73, |
|
"learning_rate": 6.555183946488295e-05, |
|
"loss": 0.5607, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 0.5952425003051758, |
|
"eval_runtime": 39.2705, |
|
"eval_samples_per_second": 76.393, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"eval_loss": 0.5953785181045532, |
|
"eval_runtime": 39.2403, |
|
"eval_samples_per_second": 76.452, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_loss": 0.5939880609512329, |
|
"eval_runtime": 39.2586, |
|
"eval_samples_per_second": 76.416, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 6.86, |
|
"learning_rate": 6.287625418060201e-05, |
|
"loss": 0.5535, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"eval_loss": 0.5965318083763123, |
|
"eval_runtime": 39.2698, |
|
"eval_samples_per_second": 76.395, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"eval_loss": 0.5945996642112732, |
|
"eval_runtime": 39.2883, |
|
"eval_samples_per_second": 76.359, |
|
"eval_steps_per_second": 2.393, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_loss": 0.5955180525779724, |
|
"eval_runtime": 39.2482, |
|
"eval_samples_per_second": 76.437, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 6.0200668896321076e-05, |
|
"loss": 0.5488, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_loss": 0.5957381725311279, |
|
"eval_runtime": 39.2638, |
|
"eval_samples_per_second": 76.406, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_loss": 0.595982551574707, |
|
"eval_runtime": 39.2492, |
|
"eval_samples_per_second": 76.435, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 0.5966017842292786, |
|
"eval_runtime": 39.2599, |
|
"eval_samples_per_second": 76.414, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 7.13, |
|
"learning_rate": 5.752508361204013e-05, |
|
"loss": 0.5397, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"eval_loss": 0.5957372784614563, |
|
"eval_runtime": 39.2429, |
|
"eval_samples_per_second": 76.447, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_loss": 0.5966220498085022, |
|
"eval_runtime": 39.2131, |
|
"eval_samples_per_second": 76.505, |
|
"eval_steps_per_second": 2.397, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 7.25, |
|
"eval_loss": 0.5961087346076965, |
|
"eval_runtime": 39.2548, |
|
"eval_samples_per_second": 76.424, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 7.26, |
|
"learning_rate": 5.4849498327759194e-05, |
|
"loss": 0.5478, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 7.3, |
|
"eval_loss": 0.5953994393348694, |
|
"eval_runtime": 39.2497, |
|
"eval_samples_per_second": 76.434, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 0.5952059626579285, |
|
"eval_runtime": 39.2593, |
|
"eval_samples_per_second": 76.415, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 5.217391304347826e-05, |
|
"loss": 0.5443, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"eval_loss": 0.5956901907920837, |
|
"eval_runtime": 39.2077, |
|
"eval_samples_per_second": 76.516, |
|
"eval_steps_per_second": 2.397, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 0.595130980014801, |
|
"eval_runtime": 39.2681, |
|
"eval_samples_per_second": 76.398, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_loss": 0.5952557921409607, |
|
"eval_runtime": 39.2635, |
|
"eval_samples_per_second": 76.407, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 7.52, |
|
"learning_rate": 4.9498327759197325e-05, |
|
"loss": 0.5482, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 7.53, |
|
"eval_loss": 0.5957027077674866, |
|
"eval_runtime": 39.2636, |
|
"eval_samples_per_second": 76.407, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 7.57, |
|
"eval_loss": 0.5959120988845825, |
|
"eval_runtime": 39.2726, |
|
"eval_samples_per_second": 76.389, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"eval_loss": 0.5966920852661133, |
|
"eval_runtime": 39.2671, |
|
"eval_samples_per_second": 76.4, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"learning_rate": 4.6822742474916394e-05, |
|
"loss": 0.5398, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 7.67, |
|
"eval_loss": 0.5955999493598938, |
|
"eval_runtime": 39.2344, |
|
"eval_samples_per_second": 76.464, |
|
"eval_steps_per_second": 2.396, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_loss": 0.5959904193878174, |
|
"eval_runtime": 39.2449, |
|
"eval_samples_per_second": 76.443, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 7.76, |
|
"eval_loss": 0.595309853553772, |
|
"eval_runtime": 39.2603, |
|
"eval_samples_per_second": 76.413, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 7.79, |
|
"learning_rate": 4.414715719063545e-05, |
|
"loss": 0.5405, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_loss": 0.5953694581985474, |
|
"eval_runtime": 39.25, |
|
"eval_samples_per_second": 76.433, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_loss": 0.5948453545570374, |
|
"eval_runtime": 39.2551, |
|
"eval_samples_per_second": 76.423, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 7.9, |
|
"eval_loss": 0.596403956413269, |
|
"eval_runtime": 39.2502, |
|
"eval_samples_per_second": 76.433, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"learning_rate": 4.147157190635452e-05, |
|
"loss": 0.5454, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 7.94, |
|
"eval_loss": 0.594862699508667, |
|
"eval_runtime": 39.2569, |
|
"eval_samples_per_second": 76.42, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 7.99, |
|
"eval_loss": 0.5970881581306458, |
|
"eval_runtime": 39.2602, |
|
"eval_samples_per_second": 76.413, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 8.04, |
|
"eval_loss": 0.595177173614502, |
|
"eval_runtime": 39.2663, |
|
"eval_samples_per_second": 76.401, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 3.879598662207358e-05, |
|
"loss": 0.5407, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 0.5961460471153259, |
|
"eval_runtime": 39.2651, |
|
"eval_samples_per_second": 76.404, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 8.13, |
|
"eval_loss": 0.5952489972114563, |
|
"eval_runtime": 39.2417, |
|
"eval_samples_per_second": 76.449, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 8.17, |
|
"eval_loss": 0.5965322852134705, |
|
"eval_runtime": 39.244, |
|
"eval_samples_per_second": 76.445, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 8.18, |
|
"learning_rate": 3.612040133779264e-05, |
|
"loss": 0.5397, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 0.5953331589698792, |
|
"eval_runtime": 39.242, |
|
"eval_samples_per_second": 76.449, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 8.27, |
|
"eval_loss": 0.5962971448898315, |
|
"eval_runtime": 39.2251, |
|
"eval_samples_per_second": 76.482, |
|
"eval_steps_per_second": 2.396, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"learning_rate": 3.3444816053511705e-05, |
|
"loss": 0.5456, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 8.31, |
|
"eval_loss": 0.5961341261863708, |
|
"eval_runtime": 39.2121, |
|
"eval_samples_per_second": 76.507, |
|
"eval_steps_per_second": 2.397, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 8.36, |
|
"eval_loss": 0.595777153968811, |
|
"eval_runtime": 39.2544, |
|
"eval_samples_per_second": 76.425, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 8.41, |
|
"eval_loss": 0.5961927771568298, |
|
"eval_runtime": 39.2566, |
|
"eval_samples_per_second": 76.42, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"learning_rate": 3.0769230769230774e-05, |
|
"loss": 0.5361, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_loss": 0.5967215895652771, |
|
"eval_runtime": 39.251, |
|
"eval_samples_per_second": 76.431, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 8.5, |
|
"eval_loss": 0.5971426367759705, |
|
"eval_runtime": 39.1956, |
|
"eval_samples_per_second": 76.539, |
|
"eval_steps_per_second": 2.398, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 8.54, |
|
"eval_loss": 0.5957850217819214, |
|
"eval_runtime": 39.1348, |
|
"eval_samples_per_second": 76.658, |
|
"eval_steps_per_second": 2.402, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 8.58, |
|
"learning_rate": 2.8093645484949833e-05, |
|
"loss": 0.5299, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_loss": 0.5976316928863525, |
|
"eval_runtime": 39.248, |
|
"eval_samples_per_second": 76.437, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 8.64, |
|
"eval_loss": 0.5961042642593384, |
|
"eval_runtime": 39.2812, |
|
"eval_samples_per_second": 76.372, |
|
"eval_steps_per_second": 2.393, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 8.68, |
|
"eval_loss": 0.5960709452629089, |
|
"eval_runtime": 39.1595, |
|
"eval_samples_per_second": 76.61, |
|
"eval_steps_per_second": 2.4, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 8.71, |
|
"learning_rate": 2.54180602006689e-05, |
|
"loss": 0.5352, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 8.73, |
|
"eval_loss": 0.5965536236763, |
|
"eval_runtime": 39.2514, |
|
"eval_samples_per_second": 76.43, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1323 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"eval_loss": 0.5955998301506042, |
|
"eval_runtime": 39.2608, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 8.82, |
|
"eval_loss": 0.5966908931732178, |
|
"eval_runtime": 39.2659, |
|
"eval_samples_per_second": 76.402, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1337 |
|
}, |
|
{ |
|
"epoch": 8.84, |
|
"learning_rate": 2.274247491638796e-05, |
|
"loss": 0.5287, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 8.87, |
|
"eval_loss": 0.5965719223022461, |
|
"eval_runtime": 39.2743, |
|
"eval_samples_per_second": 76.386, |
|
"eval_steps_per_second": 2.393, |
|
"step": 1344 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"eval_loss": 0.5965744853019714, |
|
"eval_runtime": 39.2692, |
|
"eval_samples_per_second": 76.396, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1351 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 0.5964776277542114, |
|
"eval_runtime": 39.2476, |
|
"eval_samples_per_second": 76.438, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1358 |
|
}, |
|
{ |
|
"epoch": 8.97, |
|
"learning_rate": 2.0066889632107023e-05, |
|
"loss": 0.5349, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"eval_loss": 0.5952075719833374, |
|
"eval_runtime": 39.2647, |
|
"eval_samples_per_second": 76.404, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 9.05, |
|
"eval_loss": 0.5961645841598511, |
|
"eval_runtime": 39.268, |
|
"eval_samples_per_second": 76.398, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1372 |
|
}, |
|
{ |
|
"epoch": 9.1, |
|
"eval_loss": 0.5963994264602661, |
|
"eval_runtime": 39.2348, |
|
"eval_samples_per_second": 76.463, |
|
"eval_steps_per_second": 2.396, |
|
"step": 1379 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 1.739130434782609e-05, |
|
"loss": 0.5325, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 9.14, |
|
"eval_loss": 0.5961940884590149, |
|
"eval_runtime": 39.2561, |
|
"eval_samples_per_second": 76.421, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1386 |
|
}, |
|
{ |
|
"epoch": 9.19, |
|
"eval_loss": 0.5965219140052795, |
|
"eval_runtime": 39.211, |
|
"eval_samples_per_second": 76.509, |
|
"eval_steps_per_second": 2.397, |
|
"step": 1393 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 1.4715719063545153e-05, |
|
"loss": 0.5337, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"eval_loss": 0.5960851311683655, |
|
"eval_runtime": 39.1812, |
|
"eval_samples_per_second": 76.567, |
|
"eval_steps_per_second": 2.399, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 9.28, |
|
"eval_loss": 0.5964084267616272, |
|
"eval_runtime": 39.2326, |
|
"eval_samples_per_second": 76.467, |
|
"eval_steps_per_second": 2.396, |
|
"step": 1407 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_loss": 0.596020519733429, |
|
"eval_runtime": 39.2491, |
|
"eval_samples_per_second": 76.435, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1414 |
|
}, |
|
{ |
|
"epoch": 9.37, |
|
"learning_rate": 1.2040133779264215e-05, |
|
"loss": 0.5304, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 9.38, |
|
"eval_loss": 0.5962061285972595, |
|
"eval_runtime": 39.2549, |
|
"eval_samples_per_second": 76.424, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1421 |
|
}, |
|
{ |
|
"epoch": 9.42, |
|
"eval_loss": 0.5967465043067932, |
|
"eval_runtime": 39.2538, |
|
"eval_samples_per_second": 76.426, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1428 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_loss": 0.5967243909835815, |
|
"eval_runtime": 39.2606, |
|
"eval_samples_per_second": 76.412, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 9.364548494983277e-06, |
|
"loss": 0.5326, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 9.51, |
|
"eval_loss": 0.5967350602149963, |
|
"eval_runtime": 39.2525, |
|
"eval_samples_per_second": 76.428, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1442 |
|
}, |
|
{ |
|
"epoch": 9.56, |
|
"eval_loss": 0.5968228578567505, |
|
"eval_runtime": 39.236, |
|
"eval_samples_per_second": 76.46, |
|
"eval_steps_per_second": 2.396, |
|
"step": 1449 |
|
}, |
|
{ |
|
"epoch": 9.61, |
|
"eval_loss": 0.5971869230270386, |
|
"eval_runtime": 39.2598, |
|
"eval_samples_per_second": 76.414, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1456 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 6.688963210702341e-06, |
|
"loss": 0.527, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 9.65, |
|
"eval_loss": 0.5972290635108948, |
|
"eval_runtime": 39.2414, |
|
"eval_samples_per_second": 76.45, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1463 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"eval_loss": 0.5970906019210815, |
|
"eval_runtime": 39.2764, |
|
"eval_samples_per_second": 76.382, |
|
"eval_steps_per_second": 2.393, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"eval_loss": 0.5970170497894287, |
|
"eval_runtime": 39.2685, |
|
"eval_samples_per_second": 76.397, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1477 |
|
}, |
|
{ |
|
"epoch": 9.76, |
|
"learning_rate": 4.013377926421405e-06, |
|
"loss": 0.5276, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 9.79, |
|
"eval_loss": 0.5967093110084534, |
|
"eval_runtime": 39.2546, |
|
"eval_samples_per_second": 76.424, |
|
"eval_steps_per_second": 2.395, |
|
"step": 1484 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_loss": 0.5967251658439636, |
|
"eval_runtime": 39.276, |
|
"eval_samples_per_second": 76.382, |
|
"eval_steps_per_second": 2.393, |
|
"step": 1491 |
|
}, |
|
{ |
|
"epoch": 9.88, |
|
"eval_loss": 0.5968044996261597, |
|
"eval_runtime": 39.2838, |
|
"eval_samples_per_second": 76.367, |
|
"eval_steps_per_second": 2.393, |
|
"step": 1498 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 1.3377926421404683e-06, |
|
"loss": 0.5222, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"eval_loss": 0.5967151522636414, |
|
"eval_runtime": 39.2673, |
|
"eval_samples_per_second": 76.399, |
|
"eval_steps_per_second": 2.394, |
|
"step": 1505 |
|
} |
|
], |
|
"max_steps": 1510, |
|
"num_train_epochs": 10, |
|
"total_flos": 1.3117762030029242e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|