|
{ |
|
"best_metric": 2.513946056365967, |
|
"best_model_checkpoint": "checkpoints-finetuning/checkpoint-1080", |
|
"epoch": 193.14128943758573, |
|
"eval_steps": 40, |
|
"global_step": 2200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 2.0000000000000002e-07, |
|
"loss": 3.595, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_loss": 3.5299072265625, |
|
"eval_runtime": 5.0148, |
|
"eval_samples_per_second": 62.416, |
|
"eval_steps_per_second": 15.753, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"loss": 3.4769, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 7.02, |
|
"eval_loss": 3.3721721172332764, |
|
"eval_runtime": 4.4435, |
|
"eval_samples_per_second": 70.441, |
|
"eval_steps_per_second": 17.779, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"learning_rate": 6.000000000000001e-07, |
|
"loss": 3.3037, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"eval_loss": 3.1870808601379395, |
|
"eval_runtime": 4.6407, |
|
"eval_samples_per_second": 67.446, |
|
"eval_steps_per_second": 17.023, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"learning_rate": 8.000000000000001e-07, |
|
"loss": 3.1255, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 14.05, |
|
"eval_loss": 3.0087945461273193, |
|
"eval_runtime": 4.7026, |
|
"eval_samples_per_second": 66.559, |
|
"eval_steps_per_second": 16.799, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"loss": 2.9615, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 17.56, |
|
"eval_loss": 2.8684051036834717, |
|
"eval_runtime": 4.6401, |
|
"eval_samples_per_second": 67.455, |
|
"eval_steps_per_second": 17.026, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 21.07, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"loss": 2.8468, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 21.07, |
|
"eval_loss": 2.780834436416626, |
|
"eval_runtime": 4.4221, |
|
"eval_samples_per_second": 70.78, |
|
"eval_steps_per_second": 17.865, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 24.58, |
|
"learning_rate": 1.4000000000000001e-06, |
|
"loss": 2.7699, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 24.58, |
|
"eval_loss": 2.720453977584839, |
|
"eval_runtime": 4.5663, |
|
"eval_samples_per_second": 68.546, |
|
"eval_steps_per_second": 17.301, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 28.09, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"loss": 2.7139, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 28.09, |
|
"eval_loss": 2.679349422454834, |
|
"eval_runtime": 4.7784, |
|
"eval_samples_per_second": 65.504, |
|
"eval_steps_per_second": 16.533, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"learning_rate": 1.8000000000000001e-06, |
|
"loss": 2.6712, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 31.6, |
|
"eval_loss": 2.650853395462036, |
|
"eval_runtime": 4.6713, |
|
"eval_samples_per_second": 67.005, |
|
"eval_steps_per_second": 16.912, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 35.12, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.6356, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 35.12, |
|
"eval_loss": 2.6293749809265137, |
|
"eval_runtime": 4.6364, |
|
"eval_samples_per_second": 67.51, |
|
"eval_steps_per_second": 17.039, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 38.63, |
|
"learning_rate": 2.2e-06, |
|
"loss": 2.6048, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 38.63, |
|
"eval_loss": 2.611950635910034, |
|
"eval_runtime": 4.5116, |
|
"eval_samples_per_second": 69.377, |
|
"eval_steps_per_second": 17.511, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 42.14, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"loss": 2.5823, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 42.14, |
|
"eval_loss": 2.597449541091919, |
|
"eval_runtime": 4.5213, |
|
"eval_samples_per_second": 69.228, |
|
"eval_steps_per_second": 17.473, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"learning_rate": 2.6e-06, |
|
"loss": 2.5536, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 45.65, |
|
"eval_loss": 2.5848779678344727, |
|
"eval_runtime": 4.4398, |
|
"eval_samples_per_second": 70.498, |
|
"eval_steps_per_second": 17.793, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 49.16, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"loss": 2.5293, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 49.16, |
|
"eval_loss": 2.574049472808838, |
|
"eval_runtime": 4.6572, |
|
"eval_samples_per_second": 67.208, |
|
"eval_steps_per_second": 16.963, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 52.67, |
|
"learning_rate": 3e-06, |
|
"loss": 2.5058, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 52.67, |
|
"eval_loss": 2.5643808841705322, |
|
"eval_runtime": 4.6849, |
|
"eval_samples_per_second": 66.81, |
|
"eval_steps_per_second": 16.863, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 56.19, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"loss": 2.482, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 56.19, |
|
"eval_loss": 2.555607557296753, |
|
"eval_runtime": 4.491, |
|
"eval_samples_per_second": 69.695, |
|
"eval_steps_per_second": 17.591, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 59.7, |
|
"learning_rate": 3.4000000000000005e-06, |
|
"loss": 2.4575, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 59.7, |
|
"eval_loss": 2.547734260559082, |
|
"eval_runtime": 4.6182, |
|
"eval_samples_per_second": 67.776, |
|
"eval_steps_per_second": 17.106, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"loss": 2.4339, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 63.21, |
|
"eval_loss": 2.5405359268188477, |
|
"eval_runtime": 4.5137, |
|
"eval_samples_per_second": 69.345, |
|
"eval_steps_per_second": 17.502, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 66.72, |
|
"learning_rate": 3.8000000000000005e-06, |
|
"loss": 2.4073, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 66.72, |
|
"eval_loss": 2.5350451469421387, |
|
"eval_runtime": 4.6034, |
|
"eval_samples_per_second": 67.993, |
|
"eval_steps_per_second": 17.161, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 70.23, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.3845, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 70.23, |
|
"eval_loss": 2.530299186706543, |
|
"eval_runtime": 4.6325, |
|
"eval_samples_per_second": 67.566, |
|
"eval_steps_per_second": 17.053, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 73.74, |
|
"learning_rate": 4.2000000000000004e-06, |
|
"loss": 2.3606, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 73.74, |
|
"eval_loss": 2.525312662124634, |
|
"eval_runtime": 4.4668, |
|
"eval_samples_per_second": 70.072, |
|
"eval_steps_per_second": 17.686, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 77.26, |
|
"learning_rate": 4.4e-06, |
|
"loss": 2.329, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 77.26, |
|
"eval_loss": 2.5215225219726562, |
|
"eval_runtime": 4.4699, |
|
"eval_samples_per_second": 70.023, |
|
"eval_steps_per_second": 17.674, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 80.77, |
|
"learning_rate": 4.600000000000001e-06, |
|
"loss": 2.3071, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 80.77, |
|
"eval_loss": 2.5184576511383057, |
|
"eval_runtime": 4.3807, |
|
"eval_samples_per_second": 71.45, |
|
"eval_steps_per_second": 18.034, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 84.28, |
|
"learning_rate": 4.800000000000001e-06, |
|
"loss": 2.2768, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 84.28, |
|
"eval_loss": 2.515460729598999, |
|
"eval_runtime": 4.6634, |
|
"eval_samples_per_second": 67.119, |
|
"eval_steps_per_second": 16.941, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 87.79, |
|
"learning_rate": 5e-06, |
|
"loss": 2.2479, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 87.79, |
|
"eval_loss": 2.514392852783203, |
|
"eval_runtime": 4.5583, |
|
"eval_samples_per_second": 68.665, |
|
"eval_steps_per_second": 17.331, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 91.3, |
|
"learning_rate": 4.986304738420684e-06, |
|
"loss": 2.2181, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 91.3, |
|
"eval_loss": 2.515076160430908, |
|
"eval_runtime": 4.6324, |
|
"eval_samples_per_second": 67.568, |
|
"eval_steps_per_second": 17.054, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 94.81, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"loss": 2.1901, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 94.81, |
|
"eval_loss": 2.513946056365967, |
|
"eval_runtime": 4.635, |
|
"eval_samples_per_second": 67.53, |
|
"eval_steps_per_second": 17.044, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 98.33, |
|
"learning_rate": 4.8776412907378845e-06, |
|
"loss": 2.1571, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 98.33, |
|
"eval_loss": 2.514775037765503, |
|
"eval_runtime": 4.7132, |
|
"eval_samples_per_second": 66.41, |
|
"eval_steps_per_second": 16.762, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 101.84, |
|
"learning_rate": 4.783863644106502e-06, |
|
"loss": 2.1308, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 101.84, |
|
"eval_loss": 2.5165762901306152, |
|
"eval_runtime": 4.6347, |
|
"eval_samples_per_second": 67.535, |
|
"eval_steps_per_second": 17.046, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 105.35, |
|
"learning_rate": 4.665063509461098e-06, |
|
"loss": 2.1032, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 105.35, |
|
"eval_loss": 2.5192971229553223, |
|
"eval_runtime": 4.6292, |
|
"eval_samples_per_second": 67.614, |
|
"eval_steps_per_second": 17.066, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 108.86, |
|
"learning_rate": 4.522542485937369e-06, |
|
"loss": 2.0761, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 108.86, |
|
"eval_loss": 2.5203866958618164, |
|
"eval_runtime": 4.6638, |
|
"eval_samples_per_second": 67.113, |
|
"eval_steps_per_second": 16.939, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 112.37, |
|
"learning_rate": 4.357862063693486e-06, |
|
"loss": 2.0495, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 112.37, |
|
"eval_loss": 2.5268709659576416, |
|
"eval_runtime": 4.6504, |
|
"eval_samples_per_second": 67.306, |
|
"eval_steps_per_second": 16.988, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 115.88, |
|
"learning_rate": 4.172826515897146e-06, |
|
"loss": 2.0231, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 115.88, |
|
"eval_loss": 2.5284526348114014, |
|
"eval_runtime": 4.6029, |
|
"eval_samples_per_second": 68.0, |
|
"eval_steps_per_second": 17.163, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 119.4, |
|
"learning_rate": 3.969463130731183e-06, |
|
"loss": 2.0021, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 119.4, |
|
"eval_loss": 2.5327632427215576, |
|
"eval_runtime": 4.7118, |
|
"eval_samples_per_second": 66.429, |
|
"eval_steps_per_second": 16.767, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 122.91, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"loss": 1.9793, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 122.91, |
|
"eval_loss": 2.5382816791534424, |
|
"eval_runtime": 4.6299, |
|
"eval_samples_per_second": 67.603, |
|
"eval_steps_per_second": 17.063, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 126.42, |
|
"learning_rate": 3.516841607689501e-06, |
|
"loss": 1.9575, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 126.42, |
|
"eval_loss": 2.5441536903381348, |
|
"eval_runtime": 4.6442, |
|
"eval_samples_per_second": 67.396, |
|
"eval_steps_per_second": 17.01, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 129.93, |
|
"learning_rate": 3.272542485937369e-06, |
|
"loss": 1.9368, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 129.93, |
|
"eval_loss": 2.5487852096557617, |
|
"eval_runtime": 4.6396, |
|
"eval_samples_per_second": 67.462, |
|
"eval_steps_per_second": 17.027, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 133.44, |
|
"learning_rate": 3.019779227044398e-06, |
|
"loss": 1.9216, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 133.44, |
|
"eval_loss": 2.5533745288848877, |
|
"eval_runtime": 4.6038, |
|
"eval_samples_per_second": 67.987, |
|
"eval_steps_per_second": 17.16, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 136.95, |
|
"learning_rate": 2.761321158169134e-06, |
|
"loss": 1.902, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 136.95, |
|
"eval_loss": 2.558429479598999, |
|
"eval_runtime": 4.605, |
|
"eval_samples_per_second": 67.969, |
|
"eval_steps_per_second": 17.155, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 140.47, |
|
"learning_rate": 2.5e-06, |
|
"loss": 1.8885, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 140.47, |
|
"eval_loss": 2.560931444168091, |
|
"eval_runtime": 4.6137, |
|
"eval_samples_per_second": 67.842, |
|
"eval_steps_per_second": 17.123, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 143.98, |
|
"learning_rate": 2.238678841830867e-06, |
|
"loss": 1.8728, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 143.98, |
|
"eval_loss": 2.565746307373047, |
|
"eval_runtime": 4.6085, |
|
"eval_samples_per_second": 67.918, |
|
"eval_steps_per_second": 17.142, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 147.49, |
|
"learning_rate": 1.9802207729556023e-06, |
|
"loss": 1.8605, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 147.49, |
|
"eval_loss": 2.569748640060425, |
|
"eval_runtime": 4.6652, |
|
"eval_samples_per_second": 67.092, |
|
"eval_steps_per_second": 16.934, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"learning_rate": 1.7274575140626318e-06, |
|
"loss": 1.8476, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 151.0, |
|
"eval_loss": 2.5741446018218994, |
|
"eval_runtime": 4.7429, |
|
"eval_samples_per_second": 65.994, |
|
"eval_steps_per_second": 16.657, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 154.51, |
|
"learning_rate": 1.4831583923105e-06, |
|
"loss": 1.8402, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 154.51, |
|
"eval_loss": 2.5770394802093506, |
|
"eval_runtime": 4.6184, |
|
"eval_samples_per_second": 67.772, |
|
"eval_steps_per_second": 17.105, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 158.02, |
|
"learning_rate": 1.2500000000000007e-06, |
|
"loss": 1.8274, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 158.02, |
|
"eval_loss": 2.580260992050171, |
|
"eval_runtime": 4.5687, |
|
"eval_samples_per_second": 68.509, |
|
"eval_steps_per_second": 17.291, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 161.54, |
|
"learning_rate": 1.0305368692688175e-06, |
|
"loss": 1.8218, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 161.54, |
|
"eval_loss": 2.582859992980957, |
|
"eval_runtime": 4.6266, |
|
"eval_samples_per_second": 67.653, |
|
"eval_steps_per_second": 17.075, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 165.05, |
|
"learning_rate": 8.271734841028553e-07, |
|
"loss": 1.8144, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 165.05, |
|
"eval_loss": 2.5846669673919678, |
|
"eval_runtime": 4.601, |
|
"eval_samples_per_second": 68.029, |
|
"eval_steps_per_second": 17.17, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 168.56, |
|
"learning_rate": 6.421379363065142e-07, |
|
"loss": 1.8097, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 168.56, |
|
"eval_loss": 2.5867464542388916, |
|
"eval_runtime": 4.593, |
|
"eval_samples_per_second": 68.148, |
|
"eval_steps_per_second": 17.2, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 172.07, |
|
"learning_rate": 4.774575140626317e-07, |
|
"loss": 1.8076, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 172.07, |
|
"eval_loss": 2.5882575511932373, |
|
"eval_runtime": 4.601, |
|
"eval_samples_per_second": 68.028, |
|
"eval_steps_per_second": 17.17, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 175.58, |
|
"learning_rate": 3.3493649053890325e-07, |
|
"loss": 1.8014, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 175.58, |
|
"eval_loss": 2.589245080947876, |
|
"eval_runtime": 4.5976, |
|
"eval_samples_per_second": 68.079, |
|
"eval_steps_per_second": 17.183, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 179.09, |
|
"learning_rate": 2.1613635589349756e-07, |
|
"loss": 1.8001, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 179.09, |
|
"eval_loss": 2.589866876602173, |
|
"eval_runtime": 4.5824, |
|
"eval_samples_per_second": 68.305, |
|
"eval_steps_per_second": 17.24, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 182.61, |
|
"learning_rate": 1.223587092621162e-07, |
|
"loss": 1.7987, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 182.61, |
|
"eval_loss": 2.5903093814849854, |
|
"eval_runtime": 4.6146, |
|
"eval_samples_per_second": 67.829, |
|
"eval_steps_per_second": 17.12, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 186.12, |
|
"learning_rate": 5.463099816548578e-08, |
|
"loss": 1.7971, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 186.12, |
|
"eval_loss": 2.590583562850952, |
|
"eval_runtime": 4.609, |
|
"eval_samples_per_second": 67.911, |
|
"eval_steps_per_second": 17.141, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 189.63, |
|
"learning_rate": 1.3695261579316776e-08, |
|
"loss": 1.7979, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 189.63, |
|
"eval_loss": 2.5907208919525146, |
|
"eval_runtime": 4.6125, |
|
"eval_samples_per_second": 67.859, |
|
"eval_steps_per_second": 17.127, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 193.14, |
|
"learning_rate": 0.0, |
|
"loss": 1.7975, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 193.14, |
|
"eval_loss": 2.590698719024658, |
|
"eval_runtime": 4.6213, |
|
"eval_samples_per_second": 67.729, |
|
"eval_steps_per_second": 17.095, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 193.14, |
|
"step": 2200, |
|
"total_flos": 1.0517861659312128e+18, |
|
"train_loss": 2.2616969472711737, |
|
"train_runtime": 20093.6832, |
|
"train_samples_per_second": 29.024, |
|
"train_steps_per_second": 0.109 |
|
} |
|
], |
|
"logging_steps": 40, |
|
"max_steps": 2200, |
|
"num_train_epochs": 200, |
|
"save_steps": 40, |
|
"total_flos": 1.0517861659312128e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|