|
{ |
|
"best_metric": 2.555936574935913, |
|
"best_model_checkpoint": "./model_tweets_2020_Q2_50/checkpoint-160000", |
|
"epoch": 10.105263157894736, |
|
"eval_steps": 8000, |
|
"global_step": 2400000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"eval_loss": 2.664475917816162, |
|
"eval_runtime": 220.2299, |
|
"eval_samples_per_second": 908.142, |
|
"eval_steps_per_second": 56.759, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.939131159843243e-06, |
|
"loss": 2.8656, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.6464931964874268, |
|
"eval_runtime": 221.9882, |
|
"eval_samples_per_second": 900.949, |
|
"eval_steps_per_second": 56.309, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_loss": 2.6185896396636963, |
|
"eval_runtime": 220.2227, |
|
"eval_samples_per_second": 908.172, |
|
"eval_steps_per_second": 56.761, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 9.872425581589261e-06, |
|
"loss": 2.7946, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_loss": 2.6234936714172363, |
|
"eval_runtime": 220.6677, |
|
"eval_samples_per_second": 906.34, |
|
"eval_steps_per_second": 56.646, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_loss": 2.615138292312622, |
|
"eval_runtime": 221.7491, |
|
"eval_samples_per_second": 901.92, |
|
"eval_steps_per_second": 56.37, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 9.80572000333528e-06, |
|
"loss": 2.7911, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_loss": 2.6128268241882324, |
|
"eval_runtime": 221.3475, |
|
"eval_samples_per_second": 903.557, |
|
"eval_steps_per_second": 56.472, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_loss": 2.6009654998779297, |
|
"eval_runtime": 221.2039, |
|
"eval_samples_per_second": 904.143, |
|
"eval_steps_per_second": 56.509, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.739014425081299e-06, |
|
"loss": 2.7898, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_loss": 2.614436149597168, |
|
"eval_runtime": 221.9246, |
|
"eval_samples_per_second": 901.207, |
|
"eval_steps_per_second": 56.325, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"eval_loss": 2.597571611404419, |
|
"eval_runtime": 222.8659, |
|
"eval_samples_per_second": 897.401, |
|
"eval_steps_per_second": 56.088, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 9.672308846827316e-06, |
|
"loss": 2.7791, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"eval_loss": 2.6006274223327637, |
|
"eval_runtime": 221.9145, |
|
"eval_samples_per_second": 901.248, |
|
"eval_steps_per_second": 56.328, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_loss": 2.5888915061950684, |
|
"eval_runtime": 223.9796, |
|
"eval_samples_per_second": 892.938, |
|
"eval_steps_per_second": 55.809, |
|
"step": 88000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 9.605603268573334e-06, |
|
"loss": 2.7776, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_loss": 2.5888023376464844, |
|
"eval_runtime": 221.625, |
|
"eval_samples_per_second": 902.425, |
|
"eval_steps_per_second": 56.402, |
|
"step": 96000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 2.584191083908081, |
|
"eval_runtime": 222.5849, |
|
"eval_samples_per_second": 898.533, |
|
"eval_steps_per_second": 56.158, |
|
"step": 104000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 9.538897690319354e-06, |
|
"loss": 2.7702, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_loss": 2.5760483741760254, |
|
"eval_runtime": 222.9149, |
|
"eval_samples_per_second": 897.203, |
|
"eval_steps_per_second": 56.075, |
|
"step": 112000 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 2.5719943046569824, |
|
"eval_runtime": 220.2346, |
|
"eval_samples_per_second": 908.123, |
|
"eval_steps_per_second": 56.758, |
|
"step": 120000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.472192112065373e-06, |
|
"loss": 2.7661, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_loss": 2.5709779262542725, |
|
"eval_runtime": 221.9288, |
|
"eval_samples_per_second": 901.19, |
|
"eval_steps_per_second": 56.324, |
|
"step": 128000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_loss": 2.56732177734375, |
|
"eval_runtime": 222.0037, |
|
"eval_samples_per_second": 900.886, |
|
"eval_steps_per_second": 56.305, |
|
"step": 136000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 9.405486533811392e-06, |
|
"loss": 2.7609, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_loss": 2.5692689418792725, |
|
"eval_runtime": 221.4862, |
|
"eval_samples_per_second": 902.991, |
|
"eval_steps_per_second": 56.437, |
|
"step": 144000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_loss": 2.5623199939727783, |
|
"eval_runtime": 222.634, |
|
"eval_samples_per_second": 898.335, |
|
"eval_steps_per_second": 56.146, |
|
"step": 152000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 9.338780955557409e-06, |
|
"loss": 2.7557, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_loss": 2.555936574935913, |
|
"eval_runtime": 222.9827, |
|
"eval_samples_per_second": 896.93, |
|
"eval_steps_per_second": 56.058, |
|
"step": 160000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_loss": 2.564979314804077, |
|
"eval_runtime": 223.4268, |
|
"eval_samples_per_second": 895.148, |
|
"eval_steps_per_second": 55.947, |
|
"step": 168000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.272075377303427e-06, |
|
"loss": 2.7584, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_loss": 2.558361053466797, |
|
"eval_runtime": 223.2855, |
|
"eval_samples_per_second": 895.714, |
|
"eval_steps_per_second": 55.982, |
|
"step": 176000 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_loss": 2.5590734481811523, |
|
"eval_runtime": 224.766, |
|
"eval_samples_per_second": 889.814, |
|
"eval_steps_per_second": 55.613, |
|
"step": 184000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.205369799049446e-06, |
|
"loss": 2.7619, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_loss": 2.5597450733184814, |
|
"eval_runtime": 223.3408, |
|
"eval_samples_per_second": 895.492, |
|
"eval_steps_per_second": 55.968, |
|
"step": 192000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"eval_loss": 2.564985990524292, |
|
"eval_runtime": 222.3245, |
|
"eval_samples_per_second": 899.586, |
|
"eval_steps_per_second": 56.224, |
|
"step": 200000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 9.138664220795464e-06, |
|
"loss": 2.7678, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_loss": 2.572838306427002, |
|
"eval_runtime": 222.9177, |
|
"eval_samples_per_second": 897.192, |
|
"eval_steps_per_second": 56.074, |
|
"step": 208000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_loss": 2.571180820465088, |
|
"eval_runtime": 222.4106, |
|
"eval_samples_per_second": 899.238, |
|
"eval_steps_per_second": 56.202, |
|
"step": 216000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 9.071958642541483e-06, |
|
"loss": 2.7735, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_loss": 2.5728507041931152, |
|
"eval_runtime": 221.0881, |
|
"eval_samples_per_second": 904.617, |
|
"eval_steps_per_second": 56.539, |
|
"step": 224000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_loss": 2.5754916667938232, |
|
"eval_runtime": 224.4187, |
|
"eval_samples_per_second": 891.191, |
|
"eval_steps_per_second": 55.699, |
|
"step": 232000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 9.005253064287502e-06, |
|
"loss": 2.777, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"eval_loss": 2.571467876434326, |
|
"eval_runtime": 223.3627, |
|
"eval_samples_per_second": 895.405, |
|
"eval_steps_per_second": 55.963, |
|
"step": 240000 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_loss": 2.5747482776641846, |
|
"eval_runtime": 223.2929, |
|
"eval_samples_per_second": 895.685, |
|
"eval_steps_per_second": 55.98, |
|
"step": 248000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 8.93854748603352e-06, |
|
"loss": 2.7692, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_loss": 2.5781774520874023, |
|
"eval_runtime": 225.2908, |
|
"eval_samples_per_second": 887.742, |
|
"eval_steps_per_second": 55.484, |
|
"step": 256000 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"eval_loss": 2.58413028717041, |
|
"eval_runtime": 223.7883, |
|
"eval_samples_per_second": 893.702, |
|
"eval_steps_per_second": 55.856, |
|
"step": 264000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.871841907779539e-06, |
|
"loss": 2.7826, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_loss": 2.573080539703369, |
|
"eval_runtime": 222.4765, |
|
"eval_samples_per_second": 898.971, |
|
"eval_steps_per_second": 56.186, |
|
"step": 272000 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"eval_loss": 2.5836124420166016, |
|
"eval_runtime": 222.4727, |
|
"eval_samples_per_second": 898.987, |
|
"eval_steps_per_second": 56.187, |
|
"step": 280000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 8.805136329525557e-06, |
|
"loss": 2.7845, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"eval_loss": 2.5840952396392822, |
|
"eval_runtime": 223.8774, |
|
"eval_samples_per_second": 893.346, |
|
"eval_steps_per_second": 55.834, |
|
"step": 288000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_loss": 2.5810587406158447, |
|
"eval_runtime": 224.0492, |
|
"eval_samples_per_second": 892.661, |
|
"eval_steps_per_second": 55.791, |
|
"step": 296000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 8.738430751271576e-06, |
|
"loss": 2.7909, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"eval_loss": 2.592771530151367, |
|
"eval_runtime": 224.5453, |
|
"eval_samples_per_second": 890.689, |
|
"eval_steps_per_second": 55.668, |
|
"step": 304000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 2.597700834274292, |
|
"eval_runtime": 222.8877, |
|
"eval_samples_per_second": 897.313, |
|
"eval_steps_per_second": 56.082, |
|
"step": 312000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 8.671725173017595e-06, |
|
"loss": 2.7993, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"eval_loss": 2.6025121212005615, |
|
"eval_runtime": 223.4062, |
|
"eval_samples_per_second": 895.23, |
|
"eval_steps_per_second": 55.952, |
|
"step": 320000 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 2.6072068214416504, |
|
"eval_runtime": 222.1596, |
|
"eval_samples_per_second": 900.254, |
|
"eval_steps_per_second": 56.266, |
|
"step": 328000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 8.605019594763613e-06, |
|
"loss": 2.8107, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_loss": 2.6110291481018066, |
|
"eval_runtime": 221.489, |
|
"eval_samples_per_second": 902.979, |
|
"eval_steps_per_second": 56.436, |
|
"step": 336000 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"eval_loss": 2.6020007133483887, |
|
"eval_runtime": 221.5356, |
|
"eval_samples_per_second": 902.79, |
|
"eval_steps_per_second": 56.424, |
|
"step": 344000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 8.538314016509632e-06, |
|
"loss": 2.8102, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 2.606468677520752, |
|
"eval_runtime": 221.9142, |
|
"eval_samples_per_second": 901.249, |
|
"eval_steps_per_second": 56.328, |
|
"step": 352000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_loss": 2.620694637298584, |
|
"eval_runtime": 223.9159, |
|
"eval_samples_per_second": 893.193, |
|
"eval_steps_per_second": 55.825, |
|
"step": 360000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 8.471608438255649e-06, |
|
"loss": 2.8247, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"eval_loss": 2.6191916465759277, |
|
"eval_runtime": 224.0761, |
|
"eval_samples_per_second": 892.554, |
|
"eval_steps_per_second": 55.785, |
|
"step": 368000 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"eval_loss": 2.6223857402801514, |
|
"eval_runtime": 224.2988, |
|
"eval_samples_per_second": 891.668, |
|
"eval_steps_per_second": 55.729, |
|
"step": 376000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 8.404902860001667e-06, |
|
"loss": 2.8271, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"eval_loss": 2.6205480098724365, |
|
"eval_runtime": 224.6631, |
|
"eval_samples_per_second": 890.222, |
|
"eval_steps_per_second": 55.639, |
|
"step": 384000 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 2.62916898727417, |
|
"eval_runtime": 221.5526, |
|
"eval_samples_per_second": 902.72, |
|
"eval_steps_per_second": 56.42, |
|
"step": 392000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 8.338197281747686e-06, |
|
"loss": 2.8415, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 2.6347849369049072, |
|
"eval_runtime": 222.2459, |
|
"eval_samples_per_second": 899.904, |
|
"eval_steps_per_second": 56.244, |
|
"step": 400000 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"eval_loss": 2.6518216133117676, |
|
"eval_runtime": 222.7257, |
|
"eval_samples_per_second": 897.966, |
|
"eval_steps_per_second": 56.123, |
|
"step": 408000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 8.271491703493705e-06, |
|
"loss": 2.842, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 2.6465137004852295, |
|
"eval_runtime": 221.6934, |
|
"eval_samples_per_second": 902.147, |
|
"eval_steps_per_second": 56.384, |
|
"step": 416000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"eval_loss": 2.6434342861175537, |
|
"eval_runtime": 222.3028, |
|
"eval_samples_per_second": 899.674, |
|
"eval_steps_per_second": 56.23, |
|
"step": 424000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 8.204786125239725e-06, |
|
"loss": 2.8431, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 2.641423225402832, |
|
"eval_runtime": 222.8849, |
|
"eval_samples_per_second": 897.324, |
|
"eval_steps_per_second": 56.083, |
|
"step": 432000 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"eval_loss": 2.6531593799591064, |
|
"eval_runtime": 223.101, |
|
"eval_samples_per_second": 896.455, |
|
"eval_steps_per_second": 56.028, |
|
"step": 440000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 8.138080546985743e-06, |
|
"loss": 2.8599, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 2.6645281314849854, |
|
"eval_runtime": 222.7835, |
|
"eval_samples_per_second": 897.732, |
|
"eval_steps_per_second": 56.108, |
|
"step": 448000 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"eval_loss": 2.6651265621185303, |
|
"eval_runtime": 222.493, |
|
"eval_samples_per_second": 898.905, |
|
"eval_steps_per_second": 56.182, |
|
"step": 456000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 8.07137496873176e-06, |
|
"loss": 2.8567, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_loss": 2.6693992614746094, |
|
"eval_runtime": 221.9941, |
|
"eval_samples_per_second": 900.925, |
|
"eval_steps_per_second": 56.308, |
|
"step": 464000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"eval_loss": 2.66097354888916, |
|
"eval_runtime": 222.2278, |
|
"eval_samples_per_second": 899.977, |
|
"eval_steps_per_second": 56.249, |
|
"step": 472000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 8.004669390477779e-06, |
|
"loss": 2.8682, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"eval_loss": 2.687664747238159, |
|
"eval_runtime": 222.351, |
|
"eval_samples_per_second": 899.479, |
|
"eval_steps_per_second": 56.217, |
|
"step": 480000 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"eval_loss": 2.6723899841308594, |
|
"eval_runtime": 224.037, |
|
"eval_samples_per_second": 892.71, |
|
"eval_steps_per_second": 55.794, |
|
"step": 488000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 7.937963812223798e-06, |
|
"loss": 2.8693, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"eval_loss": 2.683910608291626, |
|
"eval_runtime": 223.5884, |
|
"eval_samples_per_second": 894.501, |
|
"eval_steps_per_second": 55.906, |
|
"step": 496000 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"eval_loss": 2.692282199859619, |
|
"eval_runtime": 222.8054, |
|
"eval_samples_per_second": 897.644, |
|
"eval_steps_per_second": 56.103, |
|
"step": 504000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 7.871258233969816e-06, |
|
"loss": 2.8881, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"eval_loss": 2.696408987045288, |
|
"eval_runtime": 223.0143, |
|
"eval_samples_per_second": 896.803, |
|
"eval_steps_per_second": 56.05, |
|
"step": 512000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 2.698155403137207, |
|
"eval_runtime": 223.8418, |
|
"eval_samples_per_second": 893.488, |
|
"eval_steps_per_second": 55.843, |
|
"step": 520000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 7.804552655715835e-06, |
|
"loss": 2.8874, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"eval_loss": 2.6960911750793457, |
|
"eval_runtime": 224.8442, |
|
"eval_samples_per_second": 889.505, |
|
"eval_steps_per_second": 55.594, |
|
"step": 528000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 2.6883530616760254, |
|
"eval_runtime": 223.4198, |
|
"eval_samples_per_second": 895.176, |
|
"eval_steps_per_second": 55.948, |
|
"step": 536000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 7.737847077461853e-06, |
|
"loss": 2.8899, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"eval_loss": 2.7055277824401855, |
|
"eval_runtime": 222.7527, |
|
"eval_samples_per_second": 897.857, |
|
"eval_steps_per_second": 56.116, |
|
"step": 544000 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"eval_loss": 2.6987791061401367, |
|
"eval_runtime": 226.517, |
|
"eval_samples_per_second": 882.936, |
|
"eval_steps_per_second": 55.183, |
|
"step": 552000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 7.671141499207872e-06, |
|
"loss": 2.8966, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"eval_loss": 2.7103066444396973, |
|
"eval_runtime": 226.9023, |
|
"eval_samples_per_second": 881.437, |
|
"eval_steps_per_second": 55.09, |
|
"step": 560000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_loss": 2.709984302520752, |
|
"eval_runtime": 226.5608, |
|
"eval_samples_per_second": 882.765, |
|
"eval_steps_per_second": 55.173, |
|
"step": 568000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 7.604435920953891e-06, |
|
"loss": 2.9, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"eval_loss": 2.716878652572632, |
|
"eval_runtime": 227.2343, |
|
"eval_samples_per_second": 880.149, |
|
"eval_steps_per_second": 55.009, |
|
"step": 576000 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"eval_loss": 2.718041181564331, |
|
"eval_runtime": 224.0002, |
|
"eval_samples_per_second": 892.856, |
|
"eval_steps_per_second": 55.804, |
|
"step": 584000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 7.537730342699909e-06, |
|
"loss": 2.9237, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"eval_loss": 2.7270028591156006, |
|
"eval_runtime": 223.2886, |
|
"eval_samples_per_second": 895.702, |
|
"eval_steps_per_second": 55.981, |
|
"step": 592000 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"eval_loss": 2.726536989212036, |
|
"eval_runtime": 222.779, |
|
"eval_samples_per_second": 897.751, |
|
"eval_steps_per_second": 56.109, |
|
"step": 600000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 7.471024764445928e-06, |
|
"loss": 2.9236, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"eval_loss": 2.732328176498413, |
|
"eval_runtime": 223.8713, |
|
"eval_samples_per_second": 893.37, |
|
"eval_steps_per_second": 55.836, |
|
"step": 608000 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"eval_loss": 2.73500394821167, |
|
"eval_runtime": 225.493, |
|
"eval_samples_per_second": 886.945, |
|
"eval_steps_per_second": 55.434, |
|
"step": 616000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 7.4043191861919465e-06, |
|
"loss": 2.9276, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"eval_loss": 2.7333498001098633, |
|
"eval_runtime": 224.8806, |
|
"eval_samples_per_second": 889.361, |
|
"eval_steps_per_second": 55.585, |
|
"step": 624000 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"eval_loss": 2.734511613845825, |
|
"eval_runtime": 225.2251, |
|
"eval_samples_per_second": 888.001, |
|
"eval_steps_per_second": 55.5, |
|
"step": 632000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 7.337613607937964e-06, |
|
"loss": 2.9252, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"eval_loss": 2.749704360961914, |
|
"eval_runtime": 225.1054, |
|
"eval_samples_per_second": 888.473, |
|
"eval_steps_per_second": 55.53, |
|
"step": 640000 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"eval_loss": 2.74284029006958, |
|
"eval_runtime": 224.0229, |
|
"eval_samples_per_second": 892.766, |
|
"eval_steps_per_second": 55.798, |
|
"step": 648000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 7.270908029683983e-06, |
|
"loss": 2.9364, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"eval_loss": 2.7391881942749023, |
|
"eval_runtime": 224.6028, |
|
"eval_samples_per_second": 890.461, |
|
"eval_steps_per_second": 55.654, |
|
"step": 656000 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_loss": 2.750549077987671, |
|
"eval_runtime": 223.6418, |
|
"eval_samples_per_second": 894.287, |
|
"eval_steps_per_second": 55.893, |
|
"step": 664000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 7.2042024514300015e-06, |
|
"loss": 2.9366, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"eval_loss": 2.7392961978912354, |
|
"eval_runtime": 223.5241, |
|
"eval_samples_per_second": 894.758, |
|
"eval_steps_per_second": 55.922, |
|
"step": 672000 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_loss": 2.7371537685394287, |
|
"eval_runtime": 223.9923, |
|
"eval_samples_per_second": 892.888, |
|
"eval_steps_per_second": 55.805, |
|
"step": 680000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 7.13749687317602e-06, |
|
"loss": 2.9437, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"eval_loss": 2.7450687885284424, |
|
"eval_runtime": 223.0769, |
|
"eval_samples_per_second": 896.552, |
|
"eval_steps_per_second": 56.034, |
|
"step": 688000 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"eval_loss": 2.748831033706665, |
|
"eval_runtime": 222.9228, |
|
"eval_samples_per_second": 897.172, |
|
"eval_steps_per_second": 56.073, |
|
"step": 696000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 7.070791294922038e-06, |
|
"loss": 2.9483, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 2.7586183547973633, |
|
"eval_runtime": 223.3142, |
|
"eval_samples_per_second": 895.599, |
|
"eval_steps_per_second": 55.975, |
|
"step": 704000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 2.7612552642822266, |
|
"eval_runtime": 222.226, |
|
"eval_samples_per_second": 899.985, |
|
"eval_steps_per_second": 56.249, |
|
"step": 712000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 7.0040857166680564e-06, |
|
"loss": 2.9588, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"eval_loss": 2.76190447807312, |
|
"eval_runtime": 222.4583, |
|
"eval_samples_per_second": 899.045, |
|
"eval_steps_per_second": 56.19, |
|
"step": 720000 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"eval_loss": 2.7680461406707764, |
|
"eval_runtime": 221.9857, |
|
"eval_samples_per_second": 900.959, |
|
"eval_steps_per_second": 56.31, |
|
"step": 728000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 6.937380138414076e-06, |
|
"loss": 2.9422, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 2.754580020904541, |
|
"eval_runtime": 221.9355, |
|
"eval_samples_per_second": 901.163, |
|
"eval_steps_per_second": 56.323, |
|
"step": 736000 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"eval_loss": 2.762883424758911, |
|
"eval_runtime": 221.6295, |
|
"eval_samples_per_second": 902.407, |
|
"eval_steps_per_second": 56.4, |
|
"step": 744000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 6.8706745601600945e-06, |
|
"loss": 2.965, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"eval_loss": 2.759537696838379, |
|
"eval_runtime": 221.2415, |
|
"eval_samples_per_second": 903.99, |
|
"eval_steps_per_second": 56.499, |
|
"step": 752000 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_loss": 2.776278018951416, |
|
"eval_runtime": 221.0108, |
|
"eval_samples_per_second": 904.933, |
|
"eval_steps_per_second": 56.558, |
|
"step": 760000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 6.803968981906113e-06, |
|
"loss": 2.959, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"eval_loss": 2.7738993167877197, |
|
"eval_runtime": 221.3449, |
|
"eval_samples_per_second": 903.567, |
|
"eval_steps_per_second": 56.473, |
|
"step": 768000 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"eval_loss": 2.7838892936706543, |
|
"eval_runtime": 223.8916, |
|
"eval_samples_per_second": 893.29, |
|
"eval_steps_per_second": 55.831, |
|
"step": 776000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 6.737263403652131e-06, |
|
"loss": 2.9604, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 2.7680771350860596, |
|
"eval_runtime": 223.8457, |
|
"eval_samples_per_second": 893.473, |
|
"eval_steps_per_second": 55.842, |
|
"step": 784000 |
|
}, |
|
{ |
|
"epoch": 3.33, |
|
"eval_loss": 2.7816002368927, |
|
"eval_runtime": 224.025, |
|
"eval_samples_per_second": 892.757, |
|
"eval_steps_per_second": 55.797, |
|
"step": 792000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 6.6705578253981495e-06, |
|
"loss": 2.9638, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"eval_loss": 2.7812399864196777, |
|
"eval_runtime": 224.4231, |
|
"eval_samples_per_second": 891.174, |
|
"eval_steps_per_second": 55.698, |
|
"step": 800000 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"eval_loss": 2.7845778465270996, |
|
"eval_runtime": 223.1998, |
|
"eval_samples_per_second": 896.058, |
|
"eval_steps_per_second": 56.004, |
|
"step": 808000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 6.603852247144168e-06, |
|
"loss": 2.9704, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"eval_loss": 2.7766318321228027, |
|
"eval_runtime": 222.3046, |
|
"eval_samples_per_second": 899.667, |
|
"eval_steps_per_second": 56.229, |
|
"step": 816000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"eval_loss": 2.786909580230713, |
|
"eval_runtime": 221.8638, |
|
"eval_samples_per_second": 901.454, |
|
"eval_steps_per_second": 56.341, |
|
"step": 824000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 6.537146668890187e-06, |
|
"loss": 2.9684, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"eval_loss": 2.7741353511810303, |
|
"eval_runtime": 222.0395, |
|
"eval_samples_per_second": 900.741, |
|
"eval_steps_per_second": 56.296, |
|
"step": 832000 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"eval_loss": 2.773477077484131, |
|
"eval_runtime": 225.8502, |
|
"eval_samples_per_second": 885.543, |
|
"eval_steps_per_second": 55.346, |
|
"step": 840000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"learning_rate": 6.4704410906362044e-06, |
|
"loss": 2.9723, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 3.57, |
|
"eval_loss": 2.7700908184051514, |
|
"eval_runtime": 226.2294, |
|
"eval_samples_per_second": 884.058, |
|
"eval_steps_per_second": 55.254, |
|
"step": 848000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_loss": 2.7779886722564697, |
|
"eval_runtime": 224.2673, |
|
"eval_samples_per_second": 891.793, |
|
"eval_steps_per_second": 55.737, |
|
"step": 856000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 6.403735512382223e-06, |
|
"loss": 2.9734, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_loss": 2.7833447456359863, |
|
"eval_runtime": 223.9605, |
|
"eval_samples_per_second": 893.014, |
|
"eval_steps_per_second": 55.813, |
|
"step": 864000 |
|
}, |
|
{ |
|
"epoch": 3.67, |
|
"eval_loss": 2.790961503982544, |
|
"eval_runtime": 223.0622, |
|
"eval_samples_per_second": 896.611, |
|
"eval_steps_per_second": 56.038, |
|
"step": 872000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 6.337029934128242e-06, |
|
"loss": 2.9806, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"eval_loss": 2.794116258621216, |
|
"eval_runtime": 222.8246, |
|
"eval_samples_per_second": 897.567, |
|
"eval_steps_per_second": 56.098, |
|
"step": 880000 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"eval_loss": 2.7997074127197266, |
|
"eval_runtime": 223.2842, |
|
"eval_samples_per_second": 895.719, |
|
"eval_steps_per_second": 55.982, |
|
"step": 888000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 6.270324355874261e-06, |
|
"loss": 2.9808, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"eval_loss": 2.802687406539917, |
|
"eval_runtime": 223.8034, |
|
"eval_samples_per_second": 893.641, |
|
"eval_steps_per_second": 55.853, |
|
"step": 896000 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"eval_loss": 2.797201156616211, |
|
"eval_runtime": 221.8286, |
|
"eval_samples_per_second": 901.597, |
|
"eval_steps_per_second": 56.35, |
|
"step": 904000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 6.20361877762028e-06, |
|
"loss": 3.0008, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"eval_loss": 2.8025898933410645, |
|
"eval_runtime": 222.2117, |
|
"eval_samples_per_second": 900.042, |
|
"eval_steps_per_second": 56.253, |
|
"step": 912000 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"eval_loss": 2.7974584102630615, |
|
"eval_runtime": 222.1337, |
|
"eval_samples_per_second": 900.358, |
|
"eval_steps_per_second": 56.272, |
|
"step": 920000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 6.1369131993662975e-06, |
|
"loss": 2.9934, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"eval_loss": 2.797086000442505, |
|
"eval_runtime": 221.5435, |
|
"eval_samples_per_second": 902.757, |
|
"eval_steps_per_second": 56.422, |
|
"step": 928000 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"eval_loss": 2.8030388355255127, |
|
"eval_runtime": 226.5332, |
|
"eval_samples_per_second": 882.873, |
|
"eval_steps_per_second": 55.18, |
|
"step": 936000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6.070207621112316e-06, |
|
"loss": 2.9927, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"eval_loss": 2.8082187175750732, |
|
"eval_runtime": 224.5948, |
|
"eval_samples_per_second": 890.492, |
|
"eval_steps_per_second": 55.656, |
|
"step": 944000 |
|
}, |
|
{ |
|
"epoch": 4.01, |
|
"eval_loss": 2.820798397064209, |
|
"eval_runtime": 224.7429, |
|
"eval_samples_per_second": 889.906, |
|
"eval_steps_per_second": 55.619, |
|
"step": 952000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"learning_rate": 6.003502042858335e-06, |
|
"loss": 3.0013, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 4.04, |
|
"eval_loss": 2.8129076957702637, |
|
"eval_runtime": 224.0828, |
|
"eval_samples_per_second": 892.527, |
|
"eval_steps_per_second": 55.783, |
|
"step": 960000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"eval_loss": 2.823551893234253, |
|
"eval_runtime": 222.6379, |
|
"eval_samples_per_second": 898.32, |
|
"eval_steps_per_second": 56.145, |
|
"step": 968000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"learning_rate": 5.936796464604353e-06, |
|
"loss": 2.9996, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 4.11, |
|
"eval_loss": 2.8225581645965576, |
|
"eval_runtime": 223.2923, |
|
"eval_samples_per_second": 895.687, |
|
"eval_steps_per_second": 55.98, |
|
"step": 976000 |
|
}, |
|
{ |
|
"epoch": 4.14, |
|
"eval_loss": 2.827303409576416, |
|
"eval_runtime": 223.5156, |
|
"eval_samples_per_second": 894.792, |
|
"eval_steps_per_second": 55.925, |
|
"step": 984000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 5.870090886350371e-06, |
|
"loss": 3.0125, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"eval_loss": 2.8161449432373047, |
|
"eval_runtime": 222.4898, |
|
"eval_samples_per_second": 898.917, |
|
"eval_steps_per_second": 56.182, |
|
"step": 992000 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"eval_loss": 2.8249175548553467, |
|
"eval_runtime": 224.1746, |
|
"eval_samples_per_second": 892.162, |
|
"eval_steps_per_second": 55.76, |
|
"step": 1000000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 5.80338530809639e-06, |
|
"loss": 3.0086, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"eval_loss": 2.832012414932251, |
|
"eval_runtime": 224.9255, |
|
"eval_samples_per_second": 889.184, |
|
"eval_steps_per_second": 55.574, |
|
"step": 1008000 |
|
}, |
|
{ |
|
"epoch": 4.28, |
|
"eval_loss": 2.831321954727173, |
|
"eval_runtime": 225.1137, |
|
"eval_samples_per_second": 888.44, |
|
"eval_steps_per_second": 55.528, |
|
"step": 1016000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 5.736679729842408e-06, |
|
"loss": 3.0077, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"eval_loss": 2.83213210105896, |
|
"eval_runtime": 224.7924, |
|
"eval_samples_per_second": 889.71, |
|
"eval_steps_per_second": 55.607, |
|
"step": 1024000 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"eval_loss": 2.833178758621216, |
|
"eval_runtime": 225.4632, |
|
"eval_samples_per_second": 887.063, |
|
"eval_steps_per_second": 55.441, |
|
"step": 1032000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 5.669974151588427e-06, |
|
"loss": 3.0186, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"eval_loss": 2.8288471698760986, |
|
"eval_runtime": 225.9333, |
|
"eval_samples_per_second": 885.217, |
|
"eval_steps_per_second": 55.326, |
|
"step": 1040000 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"eval_loss": 2.839233160018921, |
|
"eval_runtime": 225.2383, |
|
"eval_samples_per_second": 887.949, |
|
"eval_steps_per_second": 55.497, |
|
"step": 1048000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 5.603268573334446e-06, |
|
"loss": 3.0311, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"eval_loss": 2.824310302734375, |
|
"eval_runtime": 223.8873, |
|
"eval_samples_per_second": 893.307, |
|
"eval_steps_per_second": 55.832, |
|
"step": 1056000 |
|
}, |
|
{ |
|
"epoch": 4.48, |
|
"eval_loss": 2.852445602416992, |
|
"eval_runtime": 226.2506, |
|
"eval_samples_per_second": 883.976, |
|
"eval_steps_per_second": 55.248, |
|
"step": 1064000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"learning_rate": 5.536562995080464e-06, |
|
"loss": 3.0199, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 4.51, |
|
"eval_loss": 2.834698438644409, |
|
"eval_runtime": 224.6576, |
|
"eval_samples_per_second": 890.244, |
|
"eval_steps_per_second": 55.64, |
|
"step": 1072000 |
|
}, |
|
{ |
|
"epoch": 4.55, |
|
"eval_loss": 2.8437862396240234, |
|
"eval_runtime": 224.6897, |
|
"eval_samples_per_second": 890.116, |
|
"eval_steps_per_second": 55.632, |
|
"step": 1080000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"learning_rate": 5.469857416826483e-06, |
|
"loss": 3.0198, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 4.58, |
|
"eval_loss": 2.8415181636810303, |
|
"eval_runtime": 223.938, |
|
"eval_samples_per_second": 893.104, |
|
"eval_steps_per_second": 55.819, |
|
"step": 1088000 |
|
}, |
|
{ |
|
"epoch": 4.61, |
|
"eval_loss": 2.84600567817688, |
|
"eval_runtime": 222.512, |
|
"eval_samples_per_second": 898.828, |
|
"eval_steps_per_second": 56.177, |
|
"step": 1096000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 5.403151838572501e-06, |
|
"loss": 3.0279, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"eval_loss": 2.855103015899658, |
|
"eval_runtime": 224.3844, |
|
"eval_samples_per_second": 891.328, |
|
"eval_steps_per_second": 55.708, |
|
"step": 1104000 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"eval_loss": 2.8528149127960205, |
|
"eval_runtime": 222.2925, |
|
"eval_samples_per_second": 899.715, |
|
"eval_steps_per_second": 56.232, |
|
"step": 1112000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"learning_rate": 5.33644626031852e-06, |
|
"loss": 3.0319, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 4.72, |
|
"eval_loss": 2.8601133823394775, |
|
"eval_runtime": 225.9192, |
|
"eval_samples_per_second": 885.272, |
|
"eval_steps_per_second": 55.33, |
|
"step": 1120000 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 2.8543853759765625, |
|
"eval_runtime": 228.4752, |
|
"eval_samples_per_second": 875.369, |
|
"eval_steps_per_second": 54.711, |
|
"step": 1128000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"learning_rate": 5.269740682064538e-06, |
|
"loss": 3.0371, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 4.78, |
|
"eval_loss": 2.855318069458008, |
|
"eval_runtime": 229.1947, |
|
"eval_samples_per_second": 872.621, |
|
"eval_steps_per_second": 54.539, |
|
"step": 1136000 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"eval_loss": 2.8596949577331543, |
|
"eval_runtime": 228.9063, |
|
"eval_samples_per_second": 873.72, |
|
"eval_steps_per_second": 54.607, |
|
"step": 1144000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 5.203035103810556e-06, |
|
"loss": 3.038, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"eval_loss": 2.865326166152954, |
|
"eval_runtime": 228.6229, |
|
"eval_samples_per_second": 874.803, |
|
"eval_steps_per_second": 54.675, |
|
"step": 1152000 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"eval_loss": 2.856044292449951, |
|
"eval_runtime": 224.6889, |
|
"eval_samples_per_second": 890.12, |
|
"eval_steps_per_second": 55.632, |
|
"step": 1160000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"learning_rate": 5.136329525556575e-06, |
|
"loss": 3.0318, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 4.92, |
|
"eval_loss": 2.860161542892456, |
|
"eval_runtime": 223.8596, |
|
"eval_samples_per_second": 893.417, |
|
"eval_steps_per_second": 55.839, |
|
"step": 1168000 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 2.8483996391296387, |
|
"eval_runtime": 223.3074, |
|
"eval_samples_per_second": 895.627, |
|
"eval_steps_per_second": 55.977, |
|
"step": 1176000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"learning_rate": 5.0696239473025935e-06, |
|
"loss": 3.0449, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"eval_loss": 2.861185073852539, |
|
"eval_runtime": 223.8763, |
|
"eval_samples_per_second": 893.35, |
|
"eval_steps_per_second": 55.834, |
|
"step": 1184000 |
|
}, |
|
{ |
|
"epoch": 5.02, |
|
"eval_loss": 2.8597800731658936, |
|
"eval_runtime": 223.5703, |
|
"eval_samples_per_second": 894.573, |
|
"eval_steps_per_second": 55.911, |
|
"step": 1192000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"learning_rate": 5.002918369048611e-06, |
|
"loss": 3.0384, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 5.05, |
|
"eval_loss": 2.8580985069274902, |
|
"eval_runtime": 223.5118, |
|
"eval_samples_per_second": 894.807, |
|
"eval_steps_per_second": 55.925, |
|
"step": 1200000 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"eval_loss": 2.8481242656707764, |
|
"eval_runtime": 222.9723, |
|
"eval_samples_per_second": 896.972, |
|
"eval_steps_per_second": 56.061, |
|
"step": 1208000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 4.936212790794631e-06, |
|
"loss": 3.0243, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"eval_loss": 2.845810651779175, |
|
"eval_runtime": 223.4767, |
|
"eval_samples_per_second": 894.948, |
|
"eval_steps_per_second": 55.934, |
|
"step": 1216000 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"eval_loss": 2.849405527114868, |
|
"eval_runtime": 224.1558, |
|
"eval_samples_per_second": 892.237, |
|
"eval_steps_per_second": 55.765, |
|
"step": 1224000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"learning_rate": 4.869507212540649e-06, |
|
"loss": 3.0345, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 5.19, |
|
"eval_loss": 2.854433536529541, |
|
"eval_runtime": 223.6399, |
|
"eval_samples_per_second": 894.295, |
|
"eval_steps_per_second": 55.893, |
|
"step": 1232000 |
|
}, |
|
{ |
|
"epoch": 5.22, |
|
"eval_loss": 2.8487536907196045, |
|
"eval_runtime": 223.5008, |
|
"eval_samples_per_second": 894.851, |
|
"eval_steps_per_second": 55.928, |
|
"step": 1240000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"learning_rate": 4.802801634286667e-06, |
|
"loss": 3.0251, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 5.25, |
|
"eval_loss": 2.845292568206787, |
|
"eval_runtime": 224.0031, |
|
"eval_samples_per_second": 892.845, |
|
"eval_steps_per_second": 55.803, |
|
"step": 1248000 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"eval_loss": 2.8464181423187256, |
|
"eval_runtime": 225.2034, |
|
"eval_samples_per_second": 888.086, |
|
"eval_steps_per_second": 55.505, |
|
"step": 1256000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 4.7360960560326865e-06, |
|
"loss": 3.0234, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"eval_loss": 2.848585367202759, |
|
"eval_runtime": 223.6745, |
|
"eval_samples_per_second": 894.156, |
|
"eval_steps_per_second": 55.885, |
|
"step": 1264000 |
|
}, |
|
{ |
|
"epoch": 5.36, |
|
"eval_loss": 2.8435869216918945, |
|
"eval_runtime": 223.3913, |
|
"eval_samples_per_second": 895.29, |
|
"eval_steps_per_second": 55.956, |
|
"step": 1272000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 4.669390477778704e-06, |
|
"loss": 3.0205, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"eval_loss": 2.8476340770721436, |
|
"eval_runtime": 223.5929, |
|
"eval_samples_per_second": 894.483, |
|
"eval_steps_per_second": 55.905, |
|
"step": 1280000 |
|
}, |
|
{ |
|
"epoch": 5.42, |
|
"eval_loss": 2.8326635360717773, |
|
"eval_runtime": 224.8548, |
|
"eval_samples_per_second": 889.463, |
|
"eval_steps_per_second": 55.591, |
|
"step": 1288000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"learning_rate": 4.602684899524723e-06, |
|
"loss": 3.0228, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 5.46, |
|
"eval_loss": 2.8452436923980713, |
|
"eval_runtime": 223.4053, |
|
"eval_samples_per_second": 895.234, |
|
"eval_steps_per_second": 55.952, |
|
"step": 1296000 |
|
}, |
|
{ |
|
"epoch": 5.49, |
|
"eval_loss": 2.837240695953369, |
|
"eval_runtime": 225.2471, |
|
"eval_samples_per_second": 887.914, |
|
"eval_steps_per_second": 55.495, |
|
"step": 1304000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"learning_rate": 4.5359793212707415e-06, |
|
"loss": 3.0063, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 5.52, |
|
"eval_loss": 2.830629348754883, |
|
"eval_runtime": 224.2293, |
|
"eval_samples_per_second": 891.944, |
|
"eval_steps_per_second": 55.746, |
|
"step": 1312000 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"eval_loss": 2.841139078140259, |
|
"eval_runtime": 226.2486, |
|
"eval_samples_per_second": 883.983, |
|
"eval_steps_per_second": 55.249, |
|
"step": 1320000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.46927374301676e-06, |
|
"loss": 3.0068, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"eval_loss": 2.827270030975342, |
|
"eval_runtime": 226.3948, |
|
"eval_samples_per_second": 883.413, |
|
"eval_steps_per_second": 55.213, |
|
"step": 1328000 |
|
}, |
|
{ |
|
"epoch": 5.63, |
|
"eval_loss": 2.834273338317871, |
|
"eval_runtime": 226.3338, |
|
"eval_samples_per_second": 883.651, |
|
"eval_steps_per_second": 55.228, |
|
"step": 1336000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"learning_rate": 4.402568164762779e-06, |
|
"loss": 3.0109, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 5.66, |
|
"eval_loss": 2.8328187465667725, |
|
"eval_runtime": 225.4917, |
|
"eval_samples_per_second": 886.95, |
|
"eval_steps_per_second": 55.434, |
|
"step": 1344000 |
|
}, |
|
{ |
|
"epoch": 5.69, |
|
"eval_loss": 2.843144655227661, |
|
"eval_runtime": 224.1529, |
|
"eval_samples_per_second": 892.248, |
|
"eval_steps_per_second": 55.766, |
|
"step": 1352000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"learning_rate": 4.335862586508797e-06, |
|
"loss": 3.0068, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 5.73, |
|
"eval_loss": 2.8331680297851562, |
|
"eval_runtime": 224.2839, |
|
"eval_samples_per_second": 891.727, |
|
"eval_steps_per_second": 55.733, |
|
"step": 1360000 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"eval_loss": 2.827512502670288, |
|
"eval_runtime": 223.7519, |
|
"eval_samples_per_second": 893.847, |
|
"eval_steps_per_second": 55.865, |
|
"step": 1368000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 4.269157008254816e-06, |
|
"loss": 3.002, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"eval_loss": 2.8313817977905273, |
|
"eval_runtime": 224.094, |
|
"eval_samples_per_second": 892.483, |
|
"eval_steps_per_second": 55.78, |
|
"step": 1376000 |
|
}, |
|
{ |
|
"epoch": 5.83, |
|
"eval_loss": 2.8324134349823, |
|
"eval_runtime": 226.0373, |
|
"eval_samples_per_second": 884.81, |
|
"eval_steps_per_second": 55.301, |
|
"step": 1384000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"learning_rate": 4.202451430000834e-06, |
|
"loss": 3.0037, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 5.86, |
|
"eval_loss": 2.839409351348877, |
|
"eval_runtime": 223.5509, |
|
"eval_samples_per_second": 894.651, |
|
"eval_steps_per_second": 55.916, |
|
"step": 1392000 |
|
}, |
|
{ |
|
"epoch": 5.89, |
|
"eval_loss": 2.8337831497192383, |
|
"eval_runtime": 223.4898, |
|
"eval_samples_per_second": 894.895, |
|
"eval_steps_per_second": 55.931, |
|
"step": 1400000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 4.135745851746852e-06, |
|
"loss": 3.0086, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"eval_loss": 2.8447976112365723, |
|
"eval_runtime": 223.3032, |
|
"eval_samples_per_second": 895.643, |
|
"eval_steps_per_second": 55.978, |
|
"step": 1408000 |
|
}, |
|
{ |
|
"epoch": 5.96, |
|
"eval_loss": 2.8326330184936523, |
|
"eval_runtime": 225.1553, |
|
"eval_samples_per_second": 888.276, |
|
"eval_steps_per_second": 55.517, |
|
"step": 1416000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 4.069040273492872e-06, |
|
"loss": 2.9977, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 2.8310978412628174, |
|
"eval_runtime": 224.0959, |
|
"eval_samples_per_second": 892.475, |
|
"eval_steps_per_second": 55.78, |
|
"step": 1424000 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"eval_loss": 2.8410427570343018, |
|
"eval_runtime": 223.4994, |
|
"eval_samples_per_second": 894.857, |
|
"eval_steps_per_second": 55.929, |
|
"step": 1432000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 4.0023346952388895e-06, |
|
"loss": 2.9984, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"eval_loss": 2.8358559608459473, |
|
"eval_runtime": 223.5492, |
|
"eval_samples_per_second": 894.658, |
|
"eval_steps_per_second": 55.916, |
|
"step": 1440000 |
|
}, |
|
{ |
|
"epoch": 6.1, |
|
"eval_loss": 2.839256763458252, |
|
"eval_runtime": 228.1513, |
|
"eval_samples_per_second": 876.611, |
|
"eval_steps_per_second": 54.788, |
|
"step": 1448000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"learning_rate": 3.935629116984908e-06, |
|
"loss": 3.0095, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 6.13, |
|
"eval_loss": 2.838825225830078, |
|
"eval_runtime": 226.4727, |
|
"eval_samples_per_second": 883.109, |
|
"eval_steps_per_second": 55.194, |
|
"step": 1456000 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"eval_loss": 2.844802141189575, |
|
"eval_runtime": 226.1587, |
|
"eval_samples_per_second": 884.335, |
|
"eval_steps_per_second": 55.271, |
|
"step": 1464000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"learning_rate": 3.868923538730927e-06, |
|
"loss": 3.0051, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 6.2, |
|
"eval_loss": 2.8472418785095215, |
|
"eval_runtime": 228.0091, |
|
"eval_samples_per_second": 877.158, |
|
"eval_steps_per_second": 54.822, |
|
"step": 1472000 |
|
}, |
|
{ |
|
"epoch": 6.23, |
|
"eval_loss": 2.842092514038086, |
|
"eval_runtime": 224.1053, |
|
"eval_samples_per_second": 892.438, |
|
"eval_steps_per_second": 55.777, |
|
"step": 1480000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"learning_rate": 3.8022179604769453e-06, |
|
"loss": 3.0142, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 6.27, |
|
"eval_loss": 2.842365264892578, |
|
"eval_runtime": 223.8921, |
|
"eval_samples_per_second": 893.287, |
|
"eval_steps_per_second": 55.83, |
|
"step": 1488000 |
|
}, |
|
{ |
|
"epoch": 6.3, |
|
"eval_loss": 2.847733974456787, |
|
"eval_runtime": 223.983, |
|
"eval_samples_per_second": 892.925, |
|
"eval_steps_per_second": 55.808, |
|
"step": 1496000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"learning_rate": 3.735512382222964e-06, |
|
"loss": 3.0149, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 6.33, |
|
"eval_loss": 2.842820405960083, |
|
"eval_runtime": 224.541, |
|
"eval_samples_per_second": 890.706, |
|
"eval_steps_per_second": 55.669, |
|
"step": 1504000 |
|
}, |
|
{ |
|
"epoch": 6.37, |
|
"eval_loss": 2.8529434204101562, |
|
"eval_runtime": 229.172, |
|
"eval_samples_per_second": 872.707, |
|
"eval_steps_per_second": 54.544, |
|
"step": 1512000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"learning_rate": 3.668806803968982e-06, |
|
"loss": 3.0147, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_loss": 2.854137420654297, |
|
"eval_runtime": 228.0077, |
|
"eval_samples_per_second": 877.163, |
|
"eval_steps_per_second": 54.823, |
|
"step": 1520000 |
|
}, |
|
{ |
|
"epoch": 6.43, |
|
"eval_loss": 2.8518521785736084, |
|
"eval_runtime": 227.8943, |
|
"eval_samples_per_second": 877.6, |
|
"eval_steps_per_second": 54.85, |
|
"step": 1528000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 3.6021012257150007e-06, |
|
"loss": 3.0205, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"eval_loss": 2.852667808532715, |
|
"eval_runtime": 227.5511, |
|
"eval_samples_per_second": 878.924, |
|
"eval_steps_per_second": 54.933, |
|
"step": 1536000 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"eval_loss": 2.8470675945281982, |
|
"eval_runtime": 223.3626, |
|
"eval_samples_per_second": 895.405, |
|
"eval_steps_per_second": 55.963, |
|
"step": 1544000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 3.535395647461019e-06, |
|
"loss": 3.029, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"eval_loss": 2.8583133220672607, |
|
"eval_runtime": 224.8105, |
|
"eval_samples_per_second": 889.638, |
|
"eval_steps_per_second": 55.602, |
|
"step": 1552000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"eval_loss": 2.84967303276062, |
|
"eval_runtime": 223.9635, |
|
"eval_samples_per_second": 893.002, |
|
"eval_steps_per_second": 55.813, |
|
"step": 1560000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"learning_rate": 3.468690069207038e-06, |
|
"loss": 3.024, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 6.6, |
|
"eval_loss": 2.865325689315796, |
|
"eval_runtime": 223.4985, |
|
"eval_samples_per_second": 894.86, |
|
"eval_steps_per_second": 55.929, |
|
"step": 1568000 |
|
}, |
|
{ |
|
"epoch": 6.64, |
|
"eval_loss": 2.855334997177124, |
|
"eval_runtime": 225.852, |
|
"eval_samples_per_second": 885.536, |
|
"eval_steps_per_second": 55.346, |
|
"step": 1576000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"learning_rate": 3.4019844909530565e-06, |
|
"loss": 3.0371, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 6.67, |
|
"eval_loss": 2.865299940109253, |
|
"eval_runtime": 224.5284, |
|
"eval_samples_per_second": 890.756, |
|
"eval_steps_per_second": 55.672, |
|
"step": 1584000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"eval_loss": 2.860386848449707, |
|
"eval_runtime": 223.6209, |
|
"eval_samples_per_second": 894.371, |
|
"eval_steps_per_second": 55.898, |
|
"step": 1592000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 3.3352789126990747e-06, |
|
"loss": 3.0319, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"eval_loss": 2.862384080886841, |
|
"eval_runtime": 223.9592, |
|
"eval_samples_per_second": 893.02, |
|
"eval_steps_per_second": 55.814, |
|
"step": 1600000 |
|
}, |
|
{ |
|
"epoch": 6.77, |
|
"eval_loss": 2.8657121658325195, |
|
"eval_runtime": 226.5681, |
|
"eval_samples_per_second": 882.737, |
|
"eval_steps_per_second": 55.171, |
|
"step": 1608000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"learning_rate": 3.2685733344450933e-06, |
|
"loss": 3.0369, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_loss": 2.861598491668701, |
|
"eval_runtime": 224.0599, |
|
"eval_samples_per_second": 892.618, |
|
"eval_steps_per_second": 55.789, |
|
"step": 1616000 |
|
}, |
|
{ |
|
"epoch": 6.84, |
|
"eval_loss": 2.8666746616363525, |
|
"eval_runtime": 224.0122, |
|
"eval_samples_per_second": 892.808, |
|
"eval_steps_per_second": 55.801, |
|
"step": 1624000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"learning_rate": 3.2018677561911115e-06, |
|
"loss": 3.0357, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 6.87, |
|
"eval_loss": 2.86602783203125, |
|
"eval_runtime": 223.9311, |
|
"eval_samples_per_second": 893.132, |
|
"eval_steps_per_second": 55.821, |
|
"step": 1632000 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"eval_loss": 2.868190050125122, |
|
"eval_runtime": 224.6108, |
|
"eval_samples_per_second": 890.429, |
|
"eval_steps_per_second": 55.652, |
|
"step": 1640000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 3.1351621779371306e-06, |
|
"loss": 3.0342, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"eval_loss": 2.867553472518921, |
|
"eval_runtime": 224.9283, |
|
"eval_samples_per_second": 889.172, |
|
"eval_steps_per_second": 55.573, |
|
"step": 1648000 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"eval_loss": 2.881544589996338, |
|
"eval_runtime": 225.5949, |
|
"eval_samples_per_second": 886.545, |
|
"eval_steps_per_second": 55.409, |
|
"step": 1656000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 3.0684565996831487e-06, |
|
"loss": 3.0375, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"eval_loss": 2.8667211532592773, |
|
"eval_runtime": 224.8671, |
|
"eval_samples_per_second": 889.414, |
|
"eval_steps_per_second": 55.588, |
|
"step": 1664000 |
|
}, |
|
{ |
|
"epoch": 7.04, |
|
"eval_loss": 2.8734593391418457, |
|
"eval_runtime": 226.9159, |
|
"eval_samples_per_second": 881.384, |
|
"eval_steps_per_second": 55.086, |
|
"step": 1672000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"learning_rate": 3.0017510214291673e-06, |
|
"loss": 3.0419, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 7.07, |
|
"eval_loss": 2.8788318634033203, |
|
"eval_runtime": 226.3899, |
|
"eval_samples_per_second": 883.432, |
|
"eval_steps_per_second": 55.214, |
|
"step": 1680000 |
|
}, |
|
{ |
|
"epoch": 7.11, |
|
"eval_loss": 2.8766632080078125, |
|
"eval_runtime": 225.7385, |
|
"eval_samples_per_second": 885.981, |
|
"eval_steps_per_second": 55.374, |
|
"step": 1688000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"learning_rate": 2.9350454431751855e-06, |
|
"loss": 3.0403, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 7.14, |
|
"eval_loss": 2.8811895847320557, |
|
"eval_runtime": 225.9242, |
|
"eval_samples_per_second": 885.253, |
|
"eval_steps_per_second": 55.328, |
|
"step": 1696000 |
|
}, |
|
{ |
|
"epoch": 7.17, |
|
"eval_loss": 2.879542827606201, |
|
"eval_runtime": 225.7881, |
|
"eval_samples_per_second": 885.786, |
|
"eval_steps_per_second": 55.362, |
|
"step": 1704000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"learning_rate": 2.868339864921204e-06, |
|
"loss": 3.0482, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 7.21, |
|
"eval_loss": 2.88046932220459, |
|
"eval_runtime": 225.2755, |
|
"eval_samples_per_second": 887.802, |
|
"eval_steps_per_second": 55.488, |
|
"step": 1712000 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"eval_loss": 2.8794021606445312, |
|
"eval_runtime": 226.8559, |
|
"eval_samples_per_second": 881.617, |
|
"eval_steps_per_second": 55.101, |
|
"step": 1720000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"learning_rate": 2.801634286667223e-06, |
|
"loss": 3.0533, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 7.28, |
|
"eval_loss": 2.8787782192230225, |
|
"eval_runtime": 225.0025, |
|
"eval_samples_per_second": 888.879, |
|
"eval_steps_per_second": 55.555, |
|
"step": 1728000 |
|
}, |
|
{ |
|
"epoch": 7.31, |
|
"eval_loss": 2.884382724761963, |
|
"eval_runtime": 225.9472, |
|
"eval_samples_per_second": 885.163, |
|
"eval_steps_per_second": 55.323, |
|
"step": 1736000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"learning_rate": 2.7349287084132413e-06, |
|
"loss": 3.0453, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 7.34, |
|
"eval_loss": 2.870943069458008, |
|
"eval_runtime": 225.043, |
|
"eval_samples_per_second": 888.719, |
|
"eval_steps_per_second": 55.545, |
|
"step": 1744000 |
|
}, |
|
{ |
|
"epoch": 7.38, |
|
"eval_loss": 2.8835349082946777, |
|
"eval_runtime": 225.0959, |
|
"eval_samples_per_second": 888.51, |
|
"eval_steps_per_second": 55.532, |
|
"step": 1752000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"learning_rate": 2.66822313015926e-06, |
|
"loss": 3.0562, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 7.41, |
|
"eval_loss": 2.8891103267669678, |
|
"eval_runtime": 224.5495, |
|
"eval_samples_per_second": 890.672, |
|
"eval_steps_per_second": 55.667, |
|
"step": 1760000 |
|
}, |
|
{ |
|
"epoch": 7.44, |
|
"eval_loss": 2.8902649879455566, |
|
"eval_runtime": 225.1215, |
|
"eval_samples_per_second": 888.409, |
|
"eval_steps_per_second": 55.526, |
|
"step": 1768000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"learning_rate": 2.601517551905278e-06, |
|
"loss": 3.0617, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 7.48, |
|
"eval_loss": 2.884901762008667, |
|
"eval_runtime": 225.0729, |
|
"eval_samples_per_second": 888.601, |
|
"eval_steps_per_second": 55.538, |
|
"step": 1776000 |
|
}, |
|
{ |
|
"epoch": 7.51, |
|
"eval_loss": 2.8766396045684814, |
|
"eval_runtime": 224.6011, |
|
"eval_samples_per_second": 890.468, |
|
"eval_steps_per_second": 55.654, |
|
"step": 1784000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 2.5348119736512967e-06, |
|
"loss": 3.0539, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"eval_loss": 2.8871917724609375, |
|
"eval_runtime": 224.4825, |
|
"eval_samples_per_second": 890.938, |
|
"eval_steps_per_second": 55.684, |
|
"step": 1792000 |
|
}, |
|
{ |
|
"epoch": 7.58, |
|
"eval_loss": 2.898136615753174, |
|
"eval_runtime": 225.2799, |
|
"eval_samples_per_second": 887.784, |
|
"eval_steps_per_second": 55.487, |
|
"step": 1800000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"learning_rate": 2.4681063953973154e-06, |
|
"loss": 3.0561, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 7.61, |
|
"eval_loss": 2.886209487915039, |
|
"eval_runtime": 226.1006, |
|
"eval_samples_per_second": 884.562, |
|
"eval_steps_per_second": 55.285, |
|
"step": 1808000 |
|
}, |
|
{ |
|
"epoch": 7.65, |
|
"eval_loss": 2.8940441608428955, |
|
"eval_runtime": 227.2765, |
|
"eval_samples_per_second": 879.986, |
|
"eval_steps_per_second": 54.999, |
|
"step": 1816000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"learning_rate": 2.4014008171433335e-06, |
|
"loss": 3.0529, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 7.68, |
|
"eval_loss": 2.887427568435669, |
|
"eval_runtime": 225.5383, |
|
"eval_samples_per_second": 886.767, |
|
"eval_steps_per_second": 55.423, |
|
"step": 1824000 |
|
}, |
|
{ |
|
"epoch": 7.71, |
|
"eval_loss": 2.883918046951294, |
|
"eval_runtime": 226.0624, |
|
"eval_samples_per_second": 884.711, |
|
"eval_steps_per_second": 55.294, |
|
"step": 1832000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"learning_rate": 2.334695238889352e-06, |
|
"loss": 3.0484, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 7.75, |
|
"eval_loss": 2.883819103240967, |
|
"eval_runtime": 225.1615, |
|
"eval_samples_per_second": 888.251, |
|
"eval_steps_per_second": 55.516, |
|
"step": 1840000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"eval_loss": 2.8856074810028076, |
|
"eval_runtime": 226.1802, |
|
"eval_samples_per_second": 884.251, |
|
"eval_steps_per_second": 55.266, |
|
"step": 1848000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"learning_rate": 2.2679896606353707e-06, |
|
"loss": 3.0562, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 7.81, |
|
"eval_loss": 2.8983583450317383, |
|
"eval_runtime": 227.809, |
|
"eval_samples_per_second": 877.929, |
|
"eval_steps_per_second": 54.871, |
|
"step": 1856000 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"eval_loss": 2.884408473968506, |
|
"eval_runtime": 228.3309, |
|
"eval_samples_per_second": 875.922, |
|
"eval_steps_per_second": 54.745, |
|
"step": 1864000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"learning_rate": 2.2012840823813894e-06, |
|
"loss": 3.0578, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 7.88, |
|
"eval_loss": 2.8873543739318848, |
|
"eval_runtime": 226.4275, |
|
"eval_samples_per_second": 883.285, |
|
"eval_steps_per_second": 55.205, |
|
"step": 1872000 |
|
}, |
|
{ |
|
"epoch": 7.92, |
|
"eval_loss": 2.8886616230010986, |
|
"eval_runtime": 226.5836, |
|
"eval_samples_per_second": 882.676, |
|
"eval_steps_per_second": 55.167, |
|
"step": 1880000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 2.134578504127408e-06, |
|
"loss": 3.0553, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"eval_loss": 2.879803419113159, |
|
"eval_runtime": 228.4861, |
|
"eval_samples_per_second": 875.327, |
|
"eval_steps_per_second": 54.708, |
|
"step": 1888000 |
|
}, |
|
{ |
|
"epoch": 7.98, |
|
"eval_loss": 2.8788740634918213, |
|
"eval_runtime": 228.0679, |
|
"eval_samples_per_second": 876.932, |
|
"eval_steps_per_second": 54.808, |
|
"step": 1896000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"learning_rate": 2.067872925873426e-06, |
|
"loss": 3.0623, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 8.02, |
|
"eval_loss": 2.8968303203582764, |
|
"eval_runtime": 229.4287, |
|
"eval_samples_per_second": 871.731, |
|
"eval_steps_per_second": 54.483, |
|
"step": 1904000 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"eval_loss": 2.8834211826324463, |
|
"eval_runtime": 227.0353, |
|
"eval_samples_per_second": 880.92, |
|
"eval_steps_per_second": 55.058, |
|
"step": 1912000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"learning_rate": 2.0011673476194448e-06, |
|
"loss": 3.0652, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 8.08, |
|
"eval_loss": 2.8902077674865723, |
|
"eval_runtime": 227.3091, |
|
"eval_samples_per_second": 879.859, |
|
"eval_steps_per_second": 54.991, |
|
"step": 1920000 |
|
}, |
|
{ |
|
"epoch": 8.12, |
|
"eval_loss": 2.8821847438812256, |
|
"eval_runtime": 226.7104, |
|
"eval_samples_per_second": 882.183, |
|
"eval_steps_per_second": 55.136, |
|
"step": 1928000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"learning_rate": 1.9344617693654634e-06, |
|
"loss": 3.0487, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 8.15, |
|
"eval_loss": 2.8844268321990967, |
|
"eval_runtime": 227.086, |
|
"eval_samples_per_second": 880.724, |
|
"eval_steps_per_second": 55.045, |
|
"step": 1936000 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"eval_loss": 2.890925407409668, |
|
"eval_runtime": 227.5282, |
|
"eval_samples_per_second": 879.012, |
|
"eval_steps_per_second": 54.938, |
|
"step": 1944000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"learning_rate": 1.867756191111482e-06, |
|
"loss": 3.0546, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 8.22, |
|
"eval_loss": 2.8915293216705322, |
|
"eval_runtime": 226.2178, |
|
"eval_samples_per_second": 884.104, |
|
"eval_steps_per_second": 55.256, |
|
"step": 1952000 |
|
}, |
|
{ |
|
"epoch": 8.25, |
|
"eval_loss": 2.8869712352752686, |
|
"eval_runtime": 234.1736, |
|
"eval_samples_per_second": 854.067, |
|
"eval_steps_per_second": 53.379, |
|
"step": 1960000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"learning_rate": 1.8010506128575004e-06, |
|
"loss": 3.0524, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 8.29, |
|
"eval_loss": 2.882768154144287, |
|
"eval_runtime": 232.3633, |
|
"eval_samples_per_second": 860.721, |
|
"eval_steps_per_second": 53.795, |
|
"step": 1968000 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"eval_loss": 2.878105401992798, |
|
"eval_runtime": 232.7641, |
|
"eval_samples_per_second": 859.239, |
|
"eval_steps_per_second": 53.702, |
|
"step": 1976000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"learning_rate": 1.734345034603519e-06, |
|
"loss": 3.0491, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 8.35, |
|
"eval_loss": 2.894814968109131, |
|
"eval_runtime": 235.0584, |
|
"eval_samples_per_second": 850.852, |
|
"eval_steps_per_second": 53.178, |
|
"step": 1984000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"eval_loss": 2.8903963565826416, |
|
"eval_runtime": 227.9139, |
|
"eval_samples_per_second": 877.524, |
|
"eval_steps_per_second": 54.845, |
|
"step": 1992000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"learning_rate": 1.6676394563495374e-06, |
|
"loss": 3.0534, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 8.42, |
|
"eval_loss": 2.8839056491851807, |
|
"eval_runtime": 229.0151, |
|
"eval_samples_per_second": 873.305, |
|
"eval_steps_per_second": 54.582, |
|
"step": 2000000 |
|
}, |
|
{ |
|
"epoch": 8.45, |
|
"eval_loss": 2.891777276992798, |
|
"eval_runtime": 227.2746, |
|
"eval_samples_per_second": 879.993, |
|
"eval_steps_per_second": 55.0, |
|
"step": 2008000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"learning_rate": 1.6009338780955558e-06, |
|
"loss": 3.0547, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 8.49, |
|
"eval_loss": 2.8738794326782227, |
|
"eval_runtime": 227.7519, |
|
"eval_samples_per_second": 878.149, |
|
"eval_steps_per_second": 54.884, |
|
"step": 2016000 |
|
}, |
|
{ |
|
"epoch": 8.52, |
|
"eval_loss": 2.868389129638672, |
|
"eval_runtime": 228.3511, |
|
"eval_samples_per_second": 875.844, |
|
"eval_steps_per_second": 54.74, |
|
"step": 2024000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"learning_rate": 1.5342282998415744e-06, |
|
"loss": 3.0544, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 8.56, |
|
"eval_loss": 2.8739755153656006, |
|
"eval_runtime": 229.9365, |
|
"eval_samples_per_second": 869.806, |
|
"eval_steps_per_second": 54.363, |
|
"step": 2032000 |
|
}, |
|
{ |
|
"epoch": 8.59, |
|
"eval_loss": 2.8784215450286865, |
|
"eval_runtime": 228.6391, |
|
"eval_samples_per_second": 874.741, |
|
"eval_steps_per_second": 54.671, |
|
"step": 2040000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 1.4675227215875928e-06, |
|
"loss": 3.0448, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"eval_loss": 2.8758127689361572, |
|
"eval_runtime": 229.161, |
|
"eval_samples_per_second": 872.749, |
|
"eval_steps_per_second": 54.547, |
|
"step": 2048000 |
|
}, |
|
{ |
|
"epoch": 8.66, |
|
"eval_loss": 2.880105972290039, |
|
"eval_runtime": 230.876, |
|
"eval_samples_per_second": 866.266, |
|
"eval_steps_per_second": 54.142, |
|
"step": 2056000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 1.4008171433336116e-06, |
|
"loss": 3.0499, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"eval_loss": 2.8793435096740723, |
|
"eval_runtime": 229.0938, |
|
"eval_samples_per_second": 873.005, |
|
"eval_steps_per_second": 54.563, |
|
"step": 2064000 |
|
}, |
|
{ |
|
"epoch": 8.72, |
|
"eval_loss": 2.8707237243652344, |
|
"eval_runtime": 228.2778, |
|
"eval_samples_per_second": 876.126, |
|
"eval_steps_per_second": 54.758, |
|
"step": 2072000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"learning_rate": 1.33411156507963e-06, |
|
"loss": 3.0368, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 8.76, |
|
"eval_loss": 2.872204065322876, |
|
"eval_runtime": 229.9264, |
|
"eval_samples_per_second": 869.844, |
|
"eval_steps_per_second": 54.365, |
|
"step": 2080000 |
|
}, |
|
{ |
|
"epoch": 8.79, |
|
"eval_loss": 2.875173807144165, |
|
"eval_runtime": 229.2278, |
|
"eval_samples_per_second": 872.495, |
|
"eval_steps_per_second": 54.531, |
|
"step": 2088000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"learning_rate": 1.2674059868256484e-06, |
|
"loss": 3.0548, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 8.83, |
|
"eval_loss": 2.8879776000976562, |
|
"eval_runtime": 228.5322, |
|
"eval_samples_per_second": 875.15, |
|
"eval_steps_per_second": 54.697, |
|
"step": 2096000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"eval_loss": 2.87813663482666, |
|
"eval_runtime": 228.0244, |
|
"eval_samples_per_second": 877.099, |
|
"eval_steps_per_second": 54.819, |
|
"step": 2104000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"learning_rate": 1.2007004085716668e-06, |
|
"loss": 3.0457, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 8.89, |
|
"eval_loss": 2.882504463195801, |
|
"eval_runtime": 228.7296, |
|
"eval_samples_per_second": 874.395, |
|
"eval_steps_per_second": 54.65, |
|
"step": 2112000 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"eval_loss": 2.8827481269836426, |
|
"eval_runtime": 231.2951, |
|
"eval_samples_per_second": 864.696, |
|
"eval_steps_per_second": 54.044, |
|
"step": 2120000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"learning_rate": 1.1339948303176854e-06, |
|
"loss": 3.0377, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 8.96, |
|
"eval_loss": 2.880984306335449, |
|
"eval_runtime": 231.2589, |
|
"eval_samples_per_second": 864.832, |
|
"eval_steps_per_second": 54.052, |
|
"step": 2128000 |
|
}, |
|
{ |
|
"epoch": 8.99, |
|
"eval_loss": 2.872668981552124, |
|
"eval_runtime": 231.1403, |
|
"eval_samples_per_second": 865.275, |
|
"eval_steps_per_second": 54.08, |
|
"step": 2136000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"learning_rate": 1.067289252063704e-06, |
|
"loss": 3.0341, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 9.03, |
|
"eval_loss": 2.8749947547912598, |
|
"eval_runtime": 232.6534, |
|
"eval_samples_per_second": 859.648, |
|
"eval_steps_per_second": 53.728, |
|
"step": 2144000 |
|
}, |
|
{ |
|
"epoch": 9.06, |
|
"eval_loss": 2.8637659549713135, |
|
"eval_runtime": 232.1804, |
|
"eval_samples_per_second": 861.399, |
|
"eval_steps_per_second": 53.837, |
|
"step": 2152000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 1.0005836738097224e-06, |
|
"loss": 3.0275, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"eval_loss": 2.8689756393432617, |
|
"eval_runtime": 233.8191, |
|
"eval_samples_per_second": 855.362, |
|
"eval_steps_per_second": 53.46, |
|
"step": 2160000 |
|
}, |
|
{ |
|
"epoch": 9.13, |
|
"eval_loss": 2.866030693054199, |
|
"eval_runtime": 231.4154, |
|
"eval_samples_per_second": 864.247, |
|
"eval_steps_per_second": 54.015, |
|
"step": 2168000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 9.33878095555741e-07, |
|
"loss": 3.0413, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"eval_loss": 2.8578262329101562, |
|
"eval_runtime": 233.839, |
|
"eval_samples_per_second": 855.289, |
|
"eval_steps_per_second": 53.456, |
|
"step": 2176000 |
|
}, |
|
{ |
|
"epoch": 9.2, |
|
"eval_loss": 2.8692455291748047, |
|
"eval_runtime": 236.8158, |
|
"eval_samples_per_second": 844.538, |
|
"eval_steps_per_second": 52.784, |
|
"step": 2184000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"learning_rate": 8.671725173017595e-07, |
|
"loss": 3.0272, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 9.23, |
|
"eval_loss": 2.8701837062835693, |
|
"eval_runtime": 235.4116, |
|
"eval_samples_per_second": 849.576, |
|
"eval_steps_per_second": 53.098, |
|
"step": 2192000 |
|
}, |
|
{ |
|
"epoch": 9.26, |
|
"eval_loss": 2.870734453201294, |
|
"eval_runtime": 236.6161, |
|
"eval_samples_per_second": 845.251, |
|
"eval_steps_per_second": 52.828, |
|
"step": 2200000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"learning_rate": 8.004669390477779e-07, |
|
"loss": 3.034, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 9.3, |
|
"eval_loss": 2.866581916809082, |
|
"eval_runtime": 233.8393, |
|
"eval_samples_per_second": 855.288, |
|
"eval_steps_per_second": 53.456, |
|
"step": 2208000 |
|
}, |
|
{ |
|
"epoch": 9.33, |
|
"eval_loss": 2.873441219329834, |
|
"eval_runtime": 229.9559, |
|
"eval_samples_per_second": 869.732, |
|
"eval_steps_per_second": 54.358, |
|
"step": 2216000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"learning_rate": 7.337613607937964e-07, |
|
"loss": 3.0346, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 9.36, |
|
"eval_loss": 2.8685038089752197, |
|
"eval_runtime": 229.1295, |
|
"eval_samples_per_second": 872.869, |
|
"eval_steps_per_second": 54.554, |
|
"step": 2224000 |
|
}, |
|
{ |
|
"epoch": 9.4, |
|
"eval_loss": 2.867513656616211, |
|
"eval_runtime": 228.6384, |
|
"eval_samples_per_second": 874.744, |
|
"eval_steps_per_second": 54.671, |
|
"step": 2232000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"learning_rate": 6.67055782539815e-07, |
|
"loss": 3.0234, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 9.43, |
|
"eval_loss": 2.866205930709839, |
|
"eval_runtime": 228.8031, |
|
"eval_samples_per_second": 874.114, |
|
"eval_steps_per_second": 54.632, |
|
"step": 2240000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"eval_loss": 2.8670058250427246, |
|
"eval_runtime": 230.0362, |
|
"eval_samples_per_second": 869.428, |
|
"eval_steps_per_second": 54.339, |
|
"step": 2248000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"learning_rate": 6.003502042858334e-07, |
|
"loss": 3.0256, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 9.5, |
|
"eval_loss": 2.8764402866363525, |
|
"eval_runtime": 228.8894, |
|
"eval_samples_per_second": 873.784, |
|
"eval_steps_per_second": 54.612, |
|
"step": 2256000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"eval_loss": 2.8664441108703613, |
|
"eval_runtime": 228.7947, |
|
"eval_samples_per_second": 874.146, |
|
"eval_steps_per_second": 54.634, |
|
"step": 2264000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"learning_rate": 5.33644626031852e-07, |
|
"loss": 3.0232, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 9.57, |
|
"eval_loss": 2.8624887466430664, |
|
"eval_runtime": 229.0315, |
|
"eval_samples_per_second": 873.242, |
|
"eval_steps_per_second": 54.578, |
|
"step": 2272000 |
|
}, |
|
{ |
|
"epoch": 9.6, |
|
"eval_loss": 2.8646833896636963, |
|
"eval_runtime": 229.8068, |
|
"eval_samples_per_second": 870.296, |
|
"eval_steps_per_second": 54.394, |
|
"step": 2280000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 4.669390477778705e-07, |
|
"loss": 3.0309, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"eval_loss": 2.8561413288116455, |
|
"eval_runtime": 229.8225, |
|
"eval_samples_per_second": 870.237, |
|
"eval_steps_per_second": 54.39, |
|
"step": 2288000 |
|
}, |
|
{ |
|
"epoch": 9.67, |
|
"eval_loss": 2.8657453060150146, |
|
"eval_runtime": 230.8107, |
|
"eval_samples_per_second": 866.511, |
|
"eval_steps_per_second": 54.157, |
|
"step": 2296000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 4.0023346952388894e-07, |
|
"loss": 3.0254, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"eval_loss": 2.8666698932647705, |
|
"eval_runtime": 230.9054, |
|
"eval_samples_per_second": 866.156, |
|
"eval_steps_per_second": 54.135, |
|
"step": 2304000 |
|
}, |
|
{ |
|
"epoch": 9.73, |
|
"eval_loss": 2.861841917037964, |
|
"eval_runtime": 233.616, |
|
"eval_samples_per_second": 856.106, |
|
"eval_steps_per_second": 53.507, |
|
"step": 2312000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"learning_rate": 3.335278912699075e-07, |
|
"loss": 3.0198, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 9.77, |
|
"eval_loss": 2.8649652004241943, |
|
"eval_runtime": 232.9095, |
|
"eval_samples_per_second": 858.702, |
|
"eval_steps_per_second": 53.669, |
|
"step": 2320000 |
|
}, |
|
{ |
|
"epoch": 9.8, |
|
"eval_loss": 2.8629865646362305, |
|
"eval_runtime": 233.276, |
|
"eval_samples_per_second": 857.353, |
|
"eval_steps_per_second": 53.585, |
|
"step": 2328000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"learning_rate": 2.66822313015926e-07, |
|
"loss": 3.0109, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 9.84, |
|
"eval_loss": 2.8533174991607666, |
|
"eval_runtime": 232.7296, |
|
"eval_samples_per_second": 859.366, |
|
"eval_steps_per_second": 53.71, |
|
"step": 2336000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"eval_loss": 2.8656232357025146, |
|
"eval_runtime": 230.1435, |
|
"eval_samples_per_second": 869.023, |
|
"eval_steps_per_second": 54.314, |
|
"step": 2344000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"learning_rate": 2.0011673476194447e-07, |
|
"loss": 3.0316, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 9.9, |
|
"eval_loss": 2.8606715202331543, |
|
"eval_runtime": 229.4357, |
|
"eval_samples_per_second": 871.704, |
|
"eval_steps_per_second": 54.482, |
|
"step": 2352000 |
|
}, |
|
{ |
|
"epoch": 9.94, |
|
"eval_loss": 2.8572158813476562, |
|
"eval_runtime": 229.9275, |
|
"eval_samples_per_second": 869.839, |
|
"eval_steps_per_second": 54.365, |
|
"step": 2360000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"learning_rate": 1.33411156507963e-07, |
|
"loss": 3.0225, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 9.97, |
|
"eval_loss": 2.8617327213287354, |
|
"eval_runtime": 229.6061, |
|
"eval_samples_per_second": 871.057, |
|
"eval_steps_per_second": 54.441, |
|
"step": 2368000 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 2.8604278564453125, |
|
"eval_runtime": 229.8413, |
|
"eval_samples_per_second": 870.166, |
|
"eval_steps_per_second": 54.385, |
|
"step": 2376000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"learning_rate": 6.67055782539815e-08, |
|
"loss": 3.0132, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 10.04, |
|
"eval_loss": 2.857710838317871, |
|
"eval_runtime": 229.9337, |
|
"eval_samples_per_second": 869.816, |
|
"eval_steps_per_second": 54.364, |
|
"step": 2384000 |
|
}, |
|
{ |
|
"epoch": 10.07, |
|
"eval_loss": 2.8534834384918213, |
|
"eval_runtime": 230.8863, |
|
"eval_samples_per_second": 866.227, |
|
"eval_steps_per_second": 54.139, |
|
"step": 2392000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 0.0, |
|
"loss": 3.0202, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"eval_loss": 2.8565549850463867, |
|
"eval_runtime": 230.1736, |
|
"eval_samples_per_second": 868.909, |
|
"eval_steps_per_second": 54.307, |
|
"step": 2400000 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"step": 2400000, |
|
"total_flos": 7.688849395607474e+17, |
|
"train_loss": 2.9681437548828127, |
|
"train_runtime": 221059.9809, |
|
"train_samples_per_second": 173.709, |
|
"train_steps_per_second": 10.857 |
|
} |
|
], |
|
"logging_steps": 16000, |
|
"max_steps": 2400000, |
|
"num_train_epochs": 11, |
|
"save_steps": 32000, |
|
"total_flos": 7.688849395607474e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|