|
{ |
|
"best_metric": 5.340388298034668, |
|
"best_model_checkpoint": "./results/models/checkpoint-9325", |
|
"epoch": 40.99982412944073, |
|
"eval_steps": 500, |
|
"global_step": 9325, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 6.240943908691406, |
|
"eval_runtime": 1306.4602, |
|
"eval_samples_per_second": 2228.206, |
|
"eval_steps_per_second": 2.176, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 5.840852737426758, |
|
"eval_runtime": 1315.4788, |
|
"eval_samples_per_second": 2212.93, |
|
"eval_steps_per_second": 2.161, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00047797356828193835, |
|
"loss": 6.2671, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 5.713865756988525, |
|
"eval_runtime": 1312.4705, |
|
"eval_samples_per_second": 2218.002, |
|
"eval_steps_per_second": 2.166, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 5.635574817657471, |
|
"eval_runtime": 1313.1826, |
|
"eval_samples_per_second": 2216.799, |
|
"eval_steps_per_second": 2.165, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"learning_rate": 0.00045594713656387664, |
|
"loss": 5.7007, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 5.646896839141846, |
|
"eval_runtime": 1315.5043, |
|
"eval_samples_per_second": 2212.887, |
|
"eval_steps_per_second": 2.161, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 5.588204383850098, |
|
"eval_runtime": 1308.1379, |
|
"eval_samples_per_second": 2225.348, |
|
"eval_steps_per_second": 2.173, |
|
"step": 1364 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 0.000433920704845815, |
|
"loss": 5.577, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 5.526153564453125, |
|
"eval_runtime": 1313.5897, |
|
"eval_samples_per_second": 2216.112, |
|
"eval_steps_per_second": 2.164, |
|
"step": 1592 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 5.503279685974121, |
|
"eval_runtime": 1319.9731, |
|
"eval_samples_per_second": 2205.395, |
|
"eval_steps_per_second": 2.154, |
|
"step": 1819 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"learning_rate": 0.00041189427312775327, |
|
"loss": 5.521, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 5.491898536682129, |
|
"eval_runtime": 1312.4959, |
|
"eval_samples_per_second": 2217.959, |
|
"eval_steps_per_second": 2.166, |
|
"step": 2046 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 5.471615314483643, |
|
"eval_runtime": 1334.8552, |
|
"eval_samples_per_second": 2180.807, |
|
"eval_steps_per_second": 2.13, |
|
"step": 2274 |
|
}, |
|
{ |
|
"epoch": 10.99, |
|
"learning_rate": 0.0003898678414096916, |
|
"loss": 5.4662, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 5.465639114379883, |
|
"eval_runtime": 1311.5138, |
|
"eval_samples_per_second": 2219.62, |
|
"eval_steps_per_second": 2.168, |
|
"step": 2501 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 5.454081058502197, |
|
"eval_runtime": 1331.8262, |
|
"eval_samples_per_second": 2185.767, |
|
"eval_steps_per_second": 2.135, |
|
"step": 2729 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 5.4356560707092285, |
|
"eval_runtime": 1314.9113, |
|
"eval_samples_per_second": 2213.885, |
|
"eval_steps_per_second": 2.162, |
|
"step": 2956 |
|
}, |
|
{ |
|
"epoch": 13.19, |
|
"learning_rate": 0.00036784140969163, |
|
"loss": 5.4335, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 5.438130855560303, |
|
"eval_runtime": 1321.1785, |
|
"eval_samples_per_second": 2203.383, |
|
"eval_steps_per_second": 2.152, |
|
"step": 3184 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 5.445527076721191, |
|
"eval_runtime": 1373.6525, |
|
"eval_samples_per_second": 2119.213, |
|
"eval_steps_per_second": 2.07, |
|
"step": 3411 |
|
}, |
|
{ |
|
"epoch": 15.39, |
|
"learning_rate": 0.0003458149779735683, |
|
"loss": 5.4113, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 5.419914722442627, |
|
"eval_runtime": 1306.18, |
|
"eval_samples_per_second": 2228.684, |
|
"eval_steps_per_second": 2.177, |
|
"step": 3639 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 5.416874885559082, |
|
"eval_runtime": 1324.9112, |
|
"eval_samples_per_second": 2197.175, |
|
"eval_steps_per_second": 2.146, |
|
"step": 3866 |
|
}, |
|
{ |
|
"epoch": 17.59, |
|
"learning_rate": 0.00032378854625550663, |
|
"loss": 5.3891, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 5.415682315826416, |
|
"eval_runtime": 1305.4824, |
|
"eval_samples_per_second": 2229.875, |
|
"eval_steps_per_second": 2.178, |
|
"step": 4093 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 5.391342639923096, |
|
"eval_runtime": 1304.433, |
|
"eval_samples_per_second": 2231.669, |
|
"eval_steps_per_second": 2.179, |
|
"step": 4321 |
|
}, |
|
{ |
|
"epoch": 19.79, |
|
"learning_rate": 0.000301762114537445, |
|
"loss": 5.3728, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 5.3856120109558105, |
|
"eval_runtime": 1316.8368, |
|
"eval_samples_per_second": 2210.648, |
|
"eval_steps_per_second": 2.159, |
|
"step": 4548 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 5.4083051681518555, |
|
"eval_runtime": 1315.5537, |
|
"eval_samples_per_second": 2212.804, |
|
"eval_steps_per_second": 2.161, |
|
"step": 4776 |
|
}, |
|
{ |
|
"epoch": 21.98, |
|
"learning_rate": 0.00027973568281938326, |
|
"loss": 5.3561, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 5.3793134689331055, |
|
"eval_runtime": 1313.7832, |
|
"eval_samples_per_second": 2215.786, |
|
"eval_steps_per_second": 2.164, |
|
"step": 5003 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 5.421042442321777, |
|
"eval_runtime": 1337.1348, |
|
"eval_samples_per_second": 2177.089, |
|
"eval_steps_per_second": 2.126, |
|
"step": 5231 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 5.371836185455322, |
|
"eval_runtime": 1305.6324, |
|
"eval_samples_per_second": 2229.618, |
|
"eval_steps_per_second": 2.177, |
|
"step": 5458 |
|
}, |
|
{ |
|
"epoch": 24.18, |
|
"learning_rate": 0.0002577092511013216, |
|
"loss": 5.3458, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 5.3682403564453125, |
|
"eval_runtime": 1309.0037, |
|
"eval_samples_per_second": 2223.876, |
|
"eval_steps_per_second": 2.172, |
|
"step": 5686 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 5.368312358856201, |
|
"eval_runtime": 1303.9762, |
|
"eval_samples_per_second": 2232.45, |
|
"eval_steps_per_second": 2.18, |
|
"step": 5913 |
|
}, |
|
{ |
|
"epoch": 26.38, |
|
"learning_rate": 0.00023568281938325992, |
|
"loss": 5.3355, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 5.381237983703613, |
|
"eval_runtime": 1308.1689, |
|
"eval_samples_per_second": 2225.295, |
|
"eval_steps_per_second": 2.173, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 5.359594821929932, |
|
"eval_runtime": 1327.0099, |
|
"eval_samples_per_second": 2193.7, |
|
"eval_steps_per_second": 2.142, |
|
"step": 6368 |
|
}, |
|
{ |
|
"epoch": 28.58, |
|
"learning_rate": 0.00021365638766519823, |
|
"loss": 5.325, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 5.356747627258301, |
|
"eval_runtime": 1307.1198, |
|
"eval_samples_per_second": 2227.081, |
|
"eval_steps_per_second": 2.175, |
|
"step": 6595 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 5.362793922424316, |
|
"eval_runtime": 1308.9269, |
|
"eval_samples_per_second": 2224.007, |
|
"eval_steps_per_second": 2.172, |
|
"step": 6823 |
|
}, |
|
{ |
|
"epoch": 30.78, |
|
"learning_rate": 0.00019162995594713657, |
|
"loss": 5.3152, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 5.352448463439941, |
|
"eval_runtime": 1309.5201, |
|
"eval_samples_per_second": 2222.999, |
|
"eval_steps_per_second": 2.171, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 5.350729942321777, |
|
"eval_runtime": 1330.3256, |
|
"eval_samples_per_second": 2188.233, |
|
"eval_steps_per_second": 2.137, |
|
"step": 7278 |
|
}, |
|
{ |
|
"epoch": 32.98, |
|
"learning_rate": 0.0001696035242290749, |
|
"loss": 5.308, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 5.350089073181152, |
|
"eval_runtime": 1308.4994, |
|
"eval_samples_per_second": 2224.733, |
|
"eval_steps_per_second": 2.173, |
|
"step": 7505 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 5.350754261016846, |
|
"eval_runtime": 1309.6607, |
|
"eval_samples_per_second": 2222.76, |
|
"eval_steps_per_second": 2.171, |
|
"step": 7732 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 5.346126556396484, |
|
"eval_runtime": 1313.4913, |
|
"eval_samples_per_second": 2216.278, |
|
"eval_steps_per_second": 2.164, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 35.17, |
|
"learning_rate": 0.00014757709251101323, |
|
"loss": 5.2995, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 5.345256805419922, |
|
"eval_runtime": 1308.5486, |
|
"eval_samples_per_second": 2224.65, |
|
"eval_steps_per_second": 2.173, |
|
"step": 8187 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 5.342759132385254, |
|
"eval_runtime": 1310.0897, |
|
"eval_samples_per_second": 2222.033, |
|
"eval_steps_per_second": 2.17, |
|
"step": 8415 |
|
}, |
|
{ |
|
"epoch": 37.37, |
|
"learning_rate": 0.00012555066079295154, |
|
"loss": 5.2925, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 5.342674255371094, |
|
"eval_runtime": 1309.6934, |
|
"eval_samples_per_second": 2222.705, |
|
"eval_steps_per_second": 2.171, |
|
"step": 8642 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 5.3418426513671875, |
|
"eval_runtime": 1305.0501, |
|
"eval_samples_per_second": 2230.613, |
|
"eval_steps_per_second": 2.178, |
|
"step": 8870 |
|
}, |
|
{ |
|
"epoch": 39.57, |
|
"learning_rate": 0.00010352422907488987, |
|
"loss": 5.2885, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 5.341084957122803, |
|
"eval_runtime": 1304.7409, |
|
"eval_samples_per_second": 2231.142, |
|
"eval_steps_per_second": 2.179, |
|
"step": 9097 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 5.340388298034668, |
|
"eval_runtime": 1304.8663, |
|
"eval_samples_per_second": 2230.927, |
|
"eval_steps_per_second": 2.179, |
|
"step": 9325 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 11350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 50, |
|
"save_steps": 500, |
|
"total_flos": 1.6473372597391884e+19, |
|
"train_batch_size": 1024, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|