{ "best_metric": 2.5792043209075928, "best_model_checkpoint": "./clip-roberta-finetuned/checkpoint-48000", "epoch": 10.0, "global_step": 68710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 4.963760733517683e-05, "loss": 2.9841, "step": 500 }, { "epoch": 0.07, "eval_loss": 3.411221504211426, "eval_runtime": 218.2214, "eval_samples_per_second": 447.83, "eval_steps_per_second": 1.751, "step": 500 }, { "epoch": 0.15, "learning_rate": 4.927521467035366e-05, "loss": 2.72, "step": 1000 }, { "epoch": 0.15, "eval_loss": 3.3430113792419434, "eval_runtime": 214.9656, "eval_samples_per_second": 454.612, "eval_steps_per_second": 1.777, "step": 1000 }, { "epoch": 0.22, "learning_rate": 4.891209430941639e-05, "loss": 2.6319, "step": 1500 }, { "epoch": 0.22, "eval_loss": 3.2295451164245605, "eval_runtime": 250.7246, "eval_samples_per_second": 389.774, "eval_steps_per_second": 1.524, "step": 1500 }, { "epoch": 0.29, "learning_rate": 4.854824625236501e-05, "loss": 2.5781, "step": 2000 }, { "epoch": 0.29, "eval_loss": 3.1644504070281982, "eval_runtime": 249.2113, "eval_samples_per_second": 392.141, "eval_steps_per_second": 1.533, "step": 2000 }, { "epoch": 0.36, "learning_rate": 4.818439819531364e-05, "loss": 2.5339, "step": 2500 }, { "epoch": 0.36, "eval_loss": 3.1226284503936768, "eval_runtime": 249.7319, "eval_samples_per_second": 391.324, "eval_steps_per_second": 1.53, "step": 2500 }, { "epoch": 0.44, "learning_rate": 4.782055013826226e-05, "loss": 2.503, "step": 3000 }, { "epoch": 0.44, "eval_loss": 3.0856029987335205, "eval_runtime": 260.6356, "eval_samples_per_second": 374.953, "eval_steps_per_second": 1.466, "step": 3000 }, { "epoch": 0.51, "learning_rate": 4.745670208121089e-05, "loss": 2.4581, "step": 3500 }, { "epoch": 0.51, "eval_loss": 3.063863515853882, "eval_runtime": 246.5877, "eval_samples_per_second": 396.313, "eval_steps_per_second": 1.549, "step": 3500 }, { "epoch": 0.58, "learning_rate": 4.709285402415951e-05, "loss": 2.4494, "step": 4000 }, { "epoch": 0.58, "eval_loss": 3.0415244102478027, "eval_runtime": 244.6941, "eval_samples_per_second": 399.38, "eval_steps_per_second": 1.561, "step": 4000 }, { "epoch": 0.65, "learning_rate": 4.6729005967108134e-05, "loss": 2.4275, "step": 4500 }, { "epoch": 0.65, "eval_loss": 3.0244903564453125, "eval_runtime": 210.3742, "eval_samples_per_second": 464.534, "eval_steps_per_second": 1.816, "step": 4500 }, { "epoch": 0.73, "learning_rate": 4.636515791005676e-05, "loss": 2.3909, "step": 5000 }, { "epoch": 0.73, "eval_loss": 2.999117851257324, "eval_runtime": 210.4024, "eval_samples_per_second": 464.472, "eval_steps_per_second": 1.816, "step": 5000 }, { "epoch": 0.8, "learning_rate": 4.6001309853005384e-05, "loss": 2.3902, "step": 5500 }, { "epoch": 0.8, "eval_loss": 2.9931323528289795, "eval_runtime": 208.7009, "eval_samples_per_second": 468.259, "eval_steps_per_second": 1.83, "step": 5500 }, { "epoch": 0.87, "learning_rate": 4.563746179595401e-05, "loss": 2.3741, "step": 6000 }, { "epoch": 0.87, "eval_loss": 2.9612369537353516, "eval_runtime": 212.7001, "eval_samples_per_second": 459.454, "eval_steps_per_second": 1.796, "step": 6000 }, { "epoch": 0.95, "learning_rate": 4.5273613738902634e-05, "loss": 2.3536, "step": 6500 }, { "epoch": 0.95, "eval_loss": 2.9508631229400635, "eval_runtime": 210.803, "eval_samples_per_second": 463.589, "eval_steps_per_second": 1.812, "step": 6500 }, { "epoch": 1.02, "learning_rate": 4.490976568185126e-05, "loss": 2.3392, "step": 7000 }, { "epoch": 1.02, "eval_loss": 2.9288971424102783, "eval_runtime": 210.6758, "eval_samples_per_second": 463.869, "eval_steps_per_second": 1.813, "step": 7000 }, { "epoch": 1.09, "learning_rate": 4.454591762479989e-05, "loss": 2.3083, "step": 7500 }, { "epoch": 1.09, "eval_loss": 2.9214062690734863, "eval_runtime": 211.6271, "eval_samples_per_second": 461.784, "eval_steps_per_second": 1.805, "step": 7500 }, { "epoch": 1.16, "learning_rate": 4.418206956774851e-05, "loss": 2.3094, "step": 8000 }, { "epoch": 1.16, "eval_loss": 2.915283441543579, "eval_runtime": 210.1146, "eval_samples_per_second": 465.108, "eval_steps_per_second": 1.818, "step": 8000 }, { "epoch": 1.24, "learning_rate": 4.3818221510697134e-05, "loss": 2.2864, "step": 8500 }, { "epoch": 1.24, "eval_loss": 2.903420925140381, "eval_runtime": 214.0395, "eval_samples_per_second": 456.579, "eval_steps_per_second": 1.785, "step": 8500 }, { "epoch": 1.31, "learning_rate": 4.3454373453645755e-05, "loss": 2.2893, "step": 9000 }, { "epoch": 1.31, "eval_loss": 2.8963093757629395, "eval_runtime": 218.4194, "eval_samples_per_second": 447.424, "eval_steps_per_second": 1.749, "step": 9000 }, { "epoch": 1.38, "learning_rate": 4.3090525396594384e-05, "loss": 2.2697, "step": 9500 }, { "epoch": 1.38, "eval_loss": 2.884676456451416, "eval_runtime": 207.7625, "eval_samples_per_second": 470.374, "eval_steps_per_second": 1.839, "step": 9500 }, { "epoch": 1.46, "learning_rate": 4.2726677339543005e-05, "loss": 2.2762, "step": 10000 }, { "epoch": 1.46, "eval_loss": 2.866511106491089, "eval_runtime": 207.4714, "eval_samples_per_second": 471.034, "eval_steps_per_second": 1.841, "step": 10000 }, { "epoch": 1.53, "learning_rate": 4.2363556978605734e-05, "loss": 2.2667, "step": 10500 }, { "epoch": 1.53, "eval_loss": 2.853637456893921, "eval_runtime": 209.4841, "eval_samples_per_second": 466.508, "eval_steps_per_second": 1.824, "step": 10500 }, { "epoch": 1.6, "learning_rate": 4.1999708921554356e-05, "loss": 2.2548, "step": 11000 }, { "epoch": 1.6, "eval_loss": 2.8472321033477783, "eval_runtime": 207.7935, "eval_samples_per_second": 470.303, "eval_steps_per_second": 1.838, "step": 11000 }, { "epoch": 1.67, "learning_rate": 4.1635860864502984e-05, "loss": 2.238, "step": 11500 }, { "epoch": 1.67, "eval_loss": 2.849086284637451, "eval_runtime": 207.5183, "eval_samples_per_second": 470.927, "eval_steps_per_second": 1.841, "step": 11500 }, { "epoch": 1.75, "learning_rate": 4.127201280745161e-05, "loss": 2.2423, "step": 12000 }, { "epoch": 1.75, "eval_loss": 2.825746774673462, "eval_runtime": 218.1498, "eval_samples_per_second": 447.977, "eval_steps_per_second": 1.751, "step": 12000 }, { "epoch": 1.82, "learning_rate": 4.0908164750400234e-05, "loss": 2.2406, "step": 12500 }, { "epoch": 1.82, "eval_loss": 2.82869029045105, "eval_runtime": 208.9781, "eval_samples_per_second": 467.637, "eval_steps_per_second": 1.828, "step": 12500 }, { "epoch": 1.89, "learning_rate": 4.054431669334886e-05, "loss": 2.2248, "step": 13000 }, { "epoch": 1.89, "eval_loss": 2.81931734085083, "eval_runtime": 210.3496, "eval_samples_per_second": 464.588, "eval_steps_per_second": 1.816, "step": 13000 }, { "epoch": 1.96, "learning_rate": 4.0181196332411585e-05, "loss": 2.223, "step": 13500 }, { "epoch": 1.96, "eval_loss": 2.810143232345581, "eval_runtime": 211.5447, "eval_samples_per_second": 461.964, "eval_steps_per_second": 1.806, "step": 13500 }, { "epoch": 2.04, "learning_rate": 3.981734827536021e-05, "loss": 2.1995, "step": 14000 }, { "epoch": 2.04, "eval_loss": 2.802741527557373, "eval_runtime": 210.9696, "eval_samples_per_second": 463.223, "eval_steps_per_second": 1.811, "step": 14000 }, { "epoch": 2.11, "learning_rate": 3.9453500218308835e-05, "loss": 2.1834, "step": 14500 }, { "epoch": 2.11, "eval_loss": 2.787959098815918, "eval_runtime": 207.5007, "eval_samples_per_second": 470.967, "eval_steps_per_second": 1.841, "step": 14500 }, { "epoch": 2.18, "learning_rate": 3.908965216125746e-05, "loss": 2.1723, "step": 15000 }, { "epoch": 2.18, "eval_loss": 2.778273582458496, "eval_runtime": 217.4843, "eval_samples_per_second": 449.347, "eval_steps_per_second": 1.756, "step": 15000 }, { "epoch": 2.26, "learning_rate": 3.8725804104206085e-05, "loss": 2.1651, "step": 15500 }, { "epoch": 2.26, "eval_loss": 2.773916721343994, "eval_runtime": 211.3325, "eval_samples_per_second": 462.428, "eval_steps_per_second": 1.808, "step": 15500 }, { "epoch": 2.33, "learning_rate": 3.836195604715471e-05, "loss": 2.1575, "step": 16000 }, { "epoch": 2.33, "eval_loss": 2.782458543777466, "eval_runtime": 214.023, "eval_samples_per_second": 456.615, "eval_steps_per_second": 1.785, "step": 16000 }, { "epoch": 2.4, "learning_rate": 3.7998107990103335e-05, "loss": 2.1598, "step": 16500 }, { "epoch": 2.4, "eval_loss": 2.7659904956817627, "eval_runtime": 211.2594, "eval_samples_per_second": 462.588, "eval_steps_per_second": 1.808, "step": 16500 }, { "epoch": 2.47, "learning_rate": 3.7634259933051956e-05, "loss": 2.1667, "step": 17000 }, { "epoch": 2.47, "eval_loss": 2.75777530670166, "eval_runtime": 209.9442, "eval_samples_per_second": 465.486, "eval_steps_per_second": 1.82, "step": 17000 }, { "epoch": 2.55, "learning_rate": 3.7271139572114685e-05, "loss": 2.1565, "step": 17500 }, { "epoch": 2.55, "eval_loss": 2.757976770401001, "eval_runtime": 211.0178, "eval_samples_per_second": 463.117, "eval_steps_per_second": 1.81, "step": 17500 }, { "epoch": 2.62, "learning_rate": 3.6907291515063314e-05, "loss": 2.1558, "step": 18000 }, { "epoch": 2.62, "eval_loss": 2.7561423778533936, "eval_runtime": 210.4961, "eval_samples_per_second": 464.265, "eval_steps_per_second": 1.815, "step": 18000 }, { "epoch": 2.69, "learning_rate": 3.6543443458011935e-05, "loss": 2.1642, "step": 18500 }, { "epoch": 2.69, "eval_loss": 2.751215934753418, "eval_runtime": 210.4158, "eval_samples_per_second": 464.442, "eval_steps_per_second": 1.815, "step": 18500 }, { "epoch": 2.77, "learning_rate": 3.617959540096056e-05, "loss": 2.1374, "step": 19000 }, { "epoch": 2.77, "eval_loss": 2.736060619354248, "eval_runtime": 214.104, "eval_samples_per_second": 456.442, "eval_steps_per_second": 1.784, "step": 19000 }, { "epoch": 2.84, "learning_rate": 3.5815747343909185e-05, "loss": 2.1402, "step": 19500 }, { "epoch": 2.84, "eval_loss": 2.7384564876556396, "eval_runtime": 210.1981, "eval_samples_per_second": 464.923, "eval_steps_per_second": 1.817, "step": 19500 }, { "epoch": 2.91, "learning_rate": 3.545189928685781e-05, "loss": 2.1326, "step": 20000 }, { "epoch": 2.91, "eval_loss": 2.723484516143799, "eval_runtime": 210.8719, "eval_samples_per_second": 463.438, "eval_steps_per_second": 1.812, "step": 20000 }, { "epoch": 2.98, "learning_rate": 3.5088051229806435e-05, "loss": 2.1272, "step": 20500 }, { "epoch": 2.98, "eval_loss": 2.7183401584625244, "eval_runtime": 239.7397, "eval_samples_per_second": 407.634, "eval_steps_per_second": 1.593, "step": 20500 }, { "epoch": 3.06, "learning_rate": 3.4724203172755057e-05, "loss": 2.0954, "step": 21000 }, { "epoch": 3.06, "eval_loss": 2.7156314849853516, "eval_runtime": 211.1012, "eval_samples_per_second": 462.934, "eval_steps_per_second": 1.81, "step": 21000 }, { "epoch": 3.13, "learning_rate": 3.4360355115703685e-05, "loss": 2.0842, "step": 21500 }, { "epoch": 3.13, "eval_loss": 2.7065327167510986, "eval_runtime": 210.8515, "eval_samples_per_second": 463.483, "eval_steps_per_second": 1.812, "step": 21500 }, { "epoch": 3.2, "learning_rate": 3.399650705865231e-05, "loss": 2.0859, "step": 22000 }, { "epoch": 3.2, "eval_loss": 2.7088747024536133, "eval_runtime": 215.2076, "eval_samples_per_second": 454.101, "eval_steps_per_second": 1.775, "step": 22000 }, { "epoch": 3.27, "learning_rate": 3.3632659001600935e-05, "loss": 2.0856, "step": 22500 }, { "epoch": 3.27, "eval_loss": 2.6962101459503174, "eval_runtime": 210.6214, "eval_samples_per_second": 463.989, "eval_steps_per_second": 1.814, "step": 22500 }, { "epoch": 3.35, "learning_rate": 3.3268810944549556e-05, "loss": 2.0775, "step": 23000 }, { "epoch": 3.35, "eval_loss": 2.693091630935669, "eval_runtime": 210.6882, "eval_samples_per_second": 463.842, "eval_steps_per_second": 1.813, "step": 23000 }, { "epoch": 3.42, "learning_rate": 3.2905690583612286e-05, "loss": 2.0821, "step": 23500 }, { "epoch": 3.42, "eval_loss": 2.693345069885254, "eval_runtime": 217.6654, "eval_samples_per_second": 448.973, "eval_steps_per_second": 1.755, "step": 23500 }, { "epoch": 3.49, "learning_rate": 3.2541842526560914e-05, "loss": 2.0706, "step": 24000 }, { "epoch": 3.49, "eval_loss": 2.70108699798584, "eval_runtime": 210.1763, "eval_samples_per_second": 464.971, "eval_steps_per_second": 1.818, "step": 24000 }, { "epoch": 3.57, "learning_rate": 3.2177994469509535e-05, "loss": 2.0689, "step": 24500 }, { "epoch": 3.57, "eval_loss": 2.7009191513061523, "eval_runtime": 207.9861, "eval_samples_per_second": 469.868, "eval_steps_per_second": 1.837, "step": 24500 }, { "epoch": 3.64, "learning_rate": 3.181414641245816e-05, "loss": 2.0807, "step": 25000 }, { "epoch": 3.64, "eval_loss": 2.682542324066162, "eval_runtime": 214.5749, "eval_samples_per_second": 455.44, "eval_steps_per_second": 1.78, "step": 25000 }, { "epoch": 3.71, "learning_rate": 3.1450298355406785e-05, "loss": 2.0639, "step": 25500 }, { "epoch": 3.71, "eval_loss": 2.674436330795288, "eval_runtime": 245.508, "eval_samples_per_second": 398.056, "eval_steps_per_second": 1.556, "step": 25500 }, { "epoch": 3.78, "learning_rate": 3.108645029835541e-05, "loss": 2.0742, "step": 26000 }, { "epoch": 3.78, "eval_loss": 2.677746295928955, "eval_runtime": 245.3374, "eval_samples_per_second": 398.333, "eval_steps_per_second": 1.557, "step": 26000 }, { "epoch": 3.86, "learning_rate": 3.0722602241304035e-05, "loss": 2.0789, "step": 26500 }, { "epoch": 3.86, "eval_loss": 2.6688921451568604, "eval_runtime": 246.4423, "eval_samples_per_second": 396.547, "eval_steps_per_second": 1.55, "step": 26500 }, { "epoch": 3.93, "learning_rate": 3.0359481880366758e-05, "loss": 2.0594, "step": 27000 }, { "epoch": 3.93, "eval_loss": 2.6566038131713867, "eval_runtime": 252.2995, "eval_samples_per_second": 387.341, "eval_steps_per_second": 1.514, "step": 27000 }, { "epoch": 4.0, "learning_rate": 2.9995633823315383e-05, "loss": 2.056, "step": 27500 }, { "epoch": 4.0, "eval_loss": 2.667599678039551, "eval_runtime": 245.2202, "eval_samples_per_second": 398.523, "eval_steps_per_second": 1.558, "step": 27500 }, { "epoch": 4.08, "learning_rate": 2.9631785766264007e-05, "loss": 2.0223, "step": 28000 }, { "epoch": 4.08, "eval_loss": 2.6711361408233643, "eval_runtime": 245.2433, "eval_samples_per_second": 398.486, "eval_steps_per_second": 1.558, "step": 28000 }, { "epoch": 4.15, "learning_rate": 2.9267937709212632e-05, "loss": 2.0185, "step": 28500 }, { "epoch": 4.15, "eval_loss": 2.65678071975708, "eval_runtime": 208.848, "eval_samples_per_second": 467.929, "eval_steps_per_second": 1.829, "step": 28500 }, { "epoch": 4.22, "learning_rate": 2.890408965216126e-05, "loss": 2.018, "step": 29000 }, { "epoch": 4.22, "eval_loss": 2.656717538833618, "eval_runtime": 209.2801, "eval_samples_per_second": 466.963, "eval_steps_per_second": 1.825, "step": 29000 }, { "epoch": 4.29, "learning_rate": 2.8540241595109886e-05, "loss": 2.0036, "step": 29500 }, { "epoch": 4.29, "eval_loss": 2.6545379161834717, "eval_runtime": 210.0273, "eval_samples_per_second": 465.301, "eval_steps_per_second": 1.819, "step": 29500 }, { "epoch": 4.37, "learning_rate": 2.817639353805851e-05, "loss": 2.0238, "step": 30000 }, { "epoch": 4.37, "eval_loss": 2.6558964252471924, "eval_runtime": 211.3486, "eval_samples_per_second": 462.392, "eval_steps_per_second": 1.807, "step": 30000 }, { "epoch": 4.44, "learning_rate": 2.781472856934944e-05, "loss": 2.0091, "step": 30500 }, { "epoch": 4.44, "eval_loss": 2.6450281143188477, "eval_runtime": 208.486, "eval_samples_per_second": 468.741, "eval_steps_per_second": 1.832, "step": 30500 }, { "epoch": 4.51, "learning_rate": 2.7450880512298066e-05, "loss": 2.0096, "step": 31000 }, { "epoch": 4.51, "eval_loss": 2.6388843059539795, "eval_runtime": 210.8413, "eval_samples_per_second": 463.505, "eval_steps_per_second": 1.812, "step": 31000 }, { "epoch": 4.58, "learning_rate": 2.708703245524669e-05, "loss": 2.0083, "step": 31500 }, { "epoch": 4.58, "eval_loss": 2.6401255130767822, "eval_runtime": 213.1597, "eval_samples_per_second": 458.464, "eval_steps_per_second": 1.792, "step": 31500 }, { "epoch": 4.66, "learning_rate": 2.6723184398195316e-05, "loss": 2.0012, "step": 32000 }, { "epoch": 4.66, "eval_loss": 2.639948844909668, "eval_runtime": 234.1271, "eval_samples_per_second": 417.406, "eval_steps_per_second": 1.632, "step": 32000 }, { "epoch": 4.73, "learning_rate": 2.635933634114394e-05, "loss": 2.0166, "step": 32500 }, { "epoch": 4.73, "eval_loss": 2.628899097442627, "eval_runtime": 242.1039, "eval_samples_per_second": 403.653, "eval_steps_per_second": 1.578, "step": 32500 }, { "epoch": 4.8, "learning_rate": 2.5995488284092563e-05, "loss": 1.9963, "step": 33000 }, { "epoch": 4.8, "eval_loss": 2.634817361831665, "eval_runtime": 275.7387, "eval_samples_per_second": 354.415, "eval_steps_per_second": 1.385, "step": 33000 }, { "epoch": 4.88, "learning_rate": 2.5631640227041188e-05, "loss": 1.9943, "step": 33500 }, { "epoch": 4.88, "eval_loss": 2.6239511966705322, "eval_runtime": 223.4038, "eval_samples_per_second": 437.441, "eval_steps_per_second": 1.71, "step": 33500 }, { "epoch": 4.95, "learning_rate": 2.5267792169989813e-05, "loss": 2.0099, "step": 34000 }, { "epoch": 4.95, "eval_loss": 2.618997812271118, "eval_runtime": 215.7939, "eval_samples_per_second": 452.867, "eval_steps_per_second": 1.77, "step": 34000 }, { "epoch": 5.02, "learning_rate": 2.4903944112938438e-05, "loss": 1.9895, "step": 34500 }, { "epoch": 5.02, "eval_loss": 2.630808115005493, "eval_runtime": 235.2775, "eval_samples_per_second": 415.365, "eval_steps_per_second": 1.624, "step": 34500 }, { "epoch": 5.09, "learning_rate": 2.4540096055887063e-05, "loss": 1.9581, "step": 35000 }, { "epoch": 5.09, "eval_loss": 2.638457775115967, "eval_runtime": 232.5729, "eval_samples_per_second": 420.195, "eval_steps_per_second": 1.642, "step": 35000 }, { "epoch": 5.17, "learning_rate": 2.4176247998835687e-05, "loss": 1.9502, "step": 35500 }, { "epoch": 5.17, "eval_loss": 2.6236515045166016, "eval_runtime": 233.0168, "eval_samples_per_second": 419.395, "eval_steps_per_second": 1.639, "step": 35500 }, { "epoch": 5.24, "learning_rate": 2.3812399941784312e-05, "loss": 1.9485, "step": 36000 }, { "epoch": 5.24, "eval_loss": 2.624785900115967, "eval_runtime": 246.6057, "eval_samples_per_second": 396.284, "eval_steps_per_second": 1.549, "step": 36000 }, { "epoch": 5.31, "learning_rate": 2.3448551884732937e-05, "loss": 1.9643, "step": 36500 }, { "epoch": 5.31, "eval_loss": 2.627931833267212, "eval_runtime": 212.8412, "eval_samples_per_second": 459.15, "eval_steps_per_second": 1.795, "step": 36500 }, { "epoch": 5.38, "learning_rate": 2.3084703827681562e-05, "loss": 1.9535, "step": 37000 }, { "epoch": 5.38, "eval_loss": 2.6185333728790283, "eval_runtime": 210.7296, "eval_samples_per_second": 463.751, "eval_steps_per_second": 1.813, "step": 37000 }, { "epoch": 5.46, "learning_rate": 2.2720855770630187e-05, "loss": 1.9575, "step": 37500 }, { "epoch": 5.46, "eval_loss": 2.614642381668091, "eval_runtime": 211.3001, "eval_samples_per_second": 462.499, "eval_steps_per_second": 1.808, "step": 37500 }, { "epoch": 5.53, "learning_rate": 2.235700771357881e-05, "loss": 1.9475, "step": 38000 }, { "epoch": 5.53, "eval_loss": 2.6092729568481445, "eval_runtime": 212.2513, "eval_samples_per_second": 460.426, "eval_steps_per_second": 1.8, "step": 38000 }, { "epoch": 5.6, "learning_rate": 2.1993159656527434e-05, "loss": 1.9434, "step": 38500 }, { "epoch": 5.6, "eval_loss": 2.60904598236084, "eval_runtime": 209.1736, "eval_samples_per_second": 467.2, "eval_steps_per_second": 1.826, "step": 38500 }, { "epoch": 5.68, "learning_rate": 2.1630039295590163e-05, "loss": 1.954, "step": 39000 }, { "epoch": 5.68, "eval_loss": 2.60274338722229, "eval_runtime": 223.8437, "eval_samples_per_second": 436.581, "eval_steps_per_second": 1.707, "step": 39000 }, { "epoch": 5.75, "learning_rate": 2.1266191238538788e-05, "loss": 1.9509, "step": 39500 }, { "epoch": 5.75, "eval_loss": 2.6107161045074463, "eval_runtime": 213.2878, "eval_samples_per_second": 458.188, "eval_steps_per_second": 1.791, "step": 39500 }, { "epoch": 5.82, "learning_rate": 2.0902343181487413e-05, "loss": 1.9454, "step": 40000 }, { "epoch": 5.82, "eval_loss": 2.59796142578125, "eval_runtime": 214.4371, "eval_samples_per_second": 455.733, "eval_steps_per_second": 1.781, "step": 40000 }, { "epoch": 5.89, "learning_rate": 2.053922282055014e-05, "loss": 1.9479, "step": 40500 }, { "epoch": 5.89, "eval_loss": 2.6016438007354736, "eval_runtime": 218.6403, "eval_samples_per_second": 446.972, "eval_steps_per_second": 1.747, "step": 40500 }, { "epoch": 5.97, "learning_rate": 2.0175374763498764e-05, "loss": 1.9539, "step": 41000 }, { "epoch": 5.97, "eval_loss": 2.5970652103424072, "eval_runtime": 214.0818, "eval_samples_per_second": 456.489, "eval_steps_per_second": 1.784, "step": 41000 }, { "epoch": 6.04, "learning_rate": 1.981152670644739e-05, "loss": 1.9119, "step": 41500 }, { "epoch": 6.04, "eval_loss": 2.622750759124756, "eval_runtime": 211.471, "eval_samples_per_second": 462.125, "eval_steps_per_second": 1.806, "step": 41500 }, { "epoch": 6.11, "learning_rate": 1.9447678649396013e-05, "loss": 1.8974, "step": 42000 }, { "epoch": 6.11, "eval_loss": 2.6169052124023438, "eval_runtime": 221.6976, "eval_samples_per_second": 440.808, "eval_steps_per_second": 1.723, "step": 42000 }, { "epoch": 6.19, "learning_rate": 1.908383059234464e-05, "loss": 1.9038, "step": 42500 }, { "epoch": 6.19, "eval_loss": 2.6027112007141113, "eval_runtime": 212.7185, "eval_samples_per_second": 459.415, "eval_steps_per_second": 1.796, "step": 42500 }, { "epoch": 6.26, "learning_rate": 1.8719982535293263e-05, "loss": 1.9008, "step": 43000 }, { "epoch": 6.26, "eval_loss": 2.602651357650757, "eval_runtime": 212.6929, "eval_samples_per_second": 459.47, "eval_steps_per_second": 1.796, "step": 43000 }, { "epoch": 6.33, "learning_rate": 1.8356134478241888e-05, "loss": 1.9142, "step": 43500 }, { "epoch": 6.33, "eval_loss": 2.6011383533477783, "eval_runtime": 217.9682, "eval_samples_per_second": 448.35, "eval_steps_per_second": 1.753, "step": 43500 }, { "epoch": 6.4, "learning_rate": 1.7992286421190513e-05, "loss": 1.8783, "step": 44000 }, { "epoch": 6.4, "eval_loss": 2.595999002456665, "eval_runtime": 216.5321, "eval_samples_per_second": 451.323, "eval_steps_per_second": 1.764, "step": 44000 }, { "epoch": 6.48, "learning_rate": 1.7628438364139135e-05, "loss": 1.8896, "step": 44500 }, { "epoch": 6.48, "eval_loss": 2.6111366748809814, "eval_runtime": 209.5809, "eval_samples_per_second": 466.293, "eval_steps_per_second": 1.823, "step": 44500 }, { "epoch": 6.55, "learning_rate": 1.7265318003201864e-05, "loss": 1.8975, "step": 45000 }, { "epoch": 6.55, "eval_loss": 2.588871955871582, "eval_runtime": 215.4187, "eval_samples_per_second": 453.656, "eval_steps_per_second": 1.773, "step": 45000 }, { "epoch": 6.62, "learning_rate": 1.690146994615049e-05, "loss": 1.9048, "step": 45500 }, { "epoch": 6.62, "eval_loss": 2.600691556930542, "eval_runtime": 225.7312, "eval_samples_per_second": 432.931, "eval_steps_per_second": 1.692, "step": 45500 }, { "epoch": 6.69, "learning_rate": 1.6537621889099114e-05, "loss": 1.9049, "step": 46000 }, { "epoch": 6.69, "eval_loss": 2.5971837043762207, "eval_runtime": 213.1258, "eval_samples_per_second": 458.537, "eval_steps_per_second": 1.792, "step": 46000 }, { "epoch": 6.77, "learning_rate": 1.6173773832047735e-05, "loss": 1.8969, "step": 46500 }, { "epoch": 6.77, "eval_loss": 2.605257987976074, "eval_runtime": 212.2604, "eval_samples_per_second": 460.406, "eval_steps_per_second": 1.8, "step": 46500 }, { "epoch": 6.84, "learning_rate": 1.580992577499636e-05, "loss": 1.9105, "step": 47000 }, { "epoch": 6.84, "eval_loss": 2.589334726333618, "eval_runtime": 211.6138, "eval_samples_per_second": 461.813, "eval_steps_per_second": 1.805, "step": 47000 }, { "epoch": 6.91, "learning_rate": 1.544680541405909e-05, "loss": 1.8921, "step": 47500 }, { "epoch": 6.91, "eval_loss": 2.5882816314697266, "eval_runtime": 211.2215, "eval_samples_per_second": 462.671, "eval_steps_per_second": 1.809, "step": 47500 }, { "epoch": 6.99, "learning_rate": 1.5083685053121819e-05, "loss": 1.8918, "step": 48000 }, { "epoch": 6.99, "eval_loss": 2.5792043209075928, "eval_runtime": 211.5529, "eval_samples_per_second": 461.946, "eval_steps_per_second": 1.806, "step": 48000 }, { "epoch": 7.06, "learning_rate": 1.471983699607044e-05, "loss": 1.8671, "step": 48500 }, { "epoch": 7.06, "eval_loss": 2.604069232940674, "eval_runtime": 212.2008, "eval_samples_per_second": 460.536, "eval_steps_per_second": 1.8, "step": 48500 }, { "epoch": 7.13, "learning_rate": 1.4355988939019067e-05, "loss": 1.8551, "step": 49000 }, { "epoch": 7.13, "eval_loss": 2.6070237159729004, "eval_runtime": 220.205, "eval_samples_per_second": 443.795, "eval_steps_per_second": 1.735, "step": 49000 }, { "epoch": 7.2, "learning_rate": 1.3992140881967692e-05, "loss": 1.8555, "step": 49500 }, { "epoch": 7.2, "eval_loss": 2.614821434020996, "eval_runtime": 263.5514, "eval_samples_per_second": 370.804, "eval_steps_per_second": 1.449, "step": 49500 }, { "epoch": 7.28, "learning_rate": 1.3628292824916317e-05, "loss": 1.8543, "step": 50000 }, { "epoch": 7.28, "eval_loss": 2.607656955718994, "eval_runtime": 264.5295, "eval_samples_per_second": 369.433, "eval_steps_per_second": 1.444, "step": 50000 }, { "epoch": 7.35, "learning_rate": 1.326444476786494e-05, "loss": 1.8485, "step": 50500 }, { "epoch": 7.35, "eval_loss": 2.613083839416504, "eval_runtime": 263.0661, "eval_samples_per_second": 371.488, "eval_steps_per_second": 1.452, "step": 50500 }, { "epoch": 7.42, "learning_rate": 1.2900596710813565e-05, "loss": 1.8474, "step": 51000 }, { "epoch": 7.42, "eval_loss": 2.603851079940796, "eval_runtime": 285.6161, "eval_samples_per_second": 342.159, "eval_steps_per_second": 1.337, "step": 51000 }, { "epoch": 7.5, "learning_rate": 1.253674865376219e-05, "loss": 1.8474, "step": 51500 }, { "epoch": 7.5, "eval_loss": 2.5973451137542725, "eval_runtime": 273.9949, "eval_samples_per_second": 356.671, "eval_steps_per_second": 1.394, "step": 51500 }, { "epoch": 7.57, "learning_rate": 1.2172900596710813e-05, "loss": 1.8442, "step": 52000 }, { "epoch": 7.57, "eval_loss": 2.5946028232574463, "eval_runtime": 244.7175, "eval_samples_per_second": 399.342, "eval_steps_per_second": 1.561, "step": 52000 }, { "epoch": 7.64, "learning_rate": 1.1809780235773542e-05, "loss": 1.8329, "step": 52500 }, { "epoch": 7.64, "eval_loss": 2.606858253479004, "eval_runtime": 275.6967, "eval_samples_per_second": 354.469, "eval_steps_per_second": 1.386, "step": 52500 }, { "epoch": 7.71, "learning_rate": 1.1445932178722165e-05, "loss": 1.8551, "step": 53000 }, { "epoch": 7.71, "eval_loss": 2.592348337173462, "eval_runtime": 253.7347, "eval_samples_per_second": 385.15, "eval_steps_per_second": 1.506, "step": 53000 }, { "epoch": 7.79, "learning_rate": 1.108208412167079e-05, "loss": 1.8433, "step": 53500 }, { "epoch": 7.79, "eval_loss": 2.592151641845703, "eval_runtime": 250.8033, "eval_samples_per_second": 389.652, "eval_steps_per_second": 1.523, "step": 53500 }, { "epoch": 7.86, "learning_rate": 1.0718236064619415e-05, "loss": 1.851, "step": 54000 }, { "epoch": 7.86, "eval_loss": 2.5993497371673584, "eval_runtime": 244.9443, "eval_samples_per_second": 398.972, "eval_steps_per_second": 1.56, "step": 54000 }, { "epoch": 7.93, "learning_rate": 1.035438800756804e-05, "loss": 1.8313, "step": 54500 }, { "epoch": 7.93, "eval_loss": 2.59601092338562, "eval_runtime": 247.9824, "eval_samples_per_second": 394.084, "eval_steps_per_second": 1.54, "step": 54500 }, { "epoch": 8.0, "learning_rate": 9.991267646630768e-06, "loss": 1.8298, "step": 55000 }, { "epoch": 8.0, "eval_loss": 2.6058406829833984, "eval_runtime": 248.1822, "eval_samples_per_second": 393.767, "eval_steps_per_second": 1.539, "step": 55000 }, { "epoch": 8.08, "learning_rate": 9.628147285693495e-06, "loss": 1.8159, "step": 55500 }, { "epoch": 8.08, "eval_loss": 2.6286239624023438, "eval_runtime": 249.0138, "eval_samples_per_second": 392.452, "eval_steps_per_second": 1.534, "step": 55500 }, { "epoch": 8.15, "learning_rate": 9.264299228642118e-06, "loss": 1.817, "step": 56000 }, { "epoch": 8.15, "eval_loss": 2.634847402572632, "eval_runtime": 257.4805, "eval_samples_per_second": 379.547, "eval_steps_per_second": 1.484, "step": 56000 }, { "epoch": 8.22, "learning_rate": 8.900451171590745e-06, "loss": 1.8066, "step": 56500 }, { "epoch": 8.22, "eval_loss": 2.6410584449768066, "eval_runtime": 307.9556, "eval_samples_per_second": 317.338, "eval_steps_per_second": 1.24, "step": 56500 }, { "epoch": 8.3, "learning_rate": 8.536603114539368e-06, "loss": 1.7935, "step": 57000 }, { "epoch": 8.3, "eval_loss": 2.633836269378662, "eval_runtime": 253.2423, "eval_samples_per_second": 385.899, "eval_steps_per_second": 1.508, "step": 57000 }, { "epoch": 8.37, "learning_rate": 8.172755057487993e-06, "loss": 1.809, "step": 57500 }, { "epoch": 8.37, "eval_loss": 2.629018783569336, "eval_runtime": 250.9739, "eval_samples_per_second": 389.387, "eval_steps_per_second": 1.522, "step": 57500 }, { "epoch": 8.44, "learning_rate": 7.808907000436618e-06, "loss": 1.812, "step": 58000 }, { "epoch": 8.44, "eval_loss": 2.6257762908935547, "eval_runtime": 253.0757, "eval_samples_per_second": 386.153, "eval_steps_per_second": 1.509, "step": 58000 }, { "epoch": 8.51, "learning_rate": 7.445058943385242e-06, "loss": 1.79, "step": 58500 }, { "epoch": 8.51, "eval_loss": 2.6320676803588867, "eval_runtime": 250.9004, "eval_samples_per_second": 389.501, "eval_steps_per_second": 1.523, "step": 58500 }, { "epoch": 8.59, "learning_rate": 7.0812108863338665e-06, "loss": 1.8046, "step": 59000 }, { "epoch": 8.59, "eval_loss": 2.6290555000305176, "eval_runtime": 248.8573, "eval_samples_per_second": 392.699, "eval_steps_per_second": 1.535, "step": 59000 }, { "epoch": 8.66, "learning_rate": 6.718090525396595e-06, "loss": 1.7975, "step": 59500 }, { "epoch": 8.66, "eval_loss": 2.6282989978790283, "eval_runtime": 245.3595, "eval_samples_per_second": 398.297, "eval_steps_per_second": 1.557, "step": 59500 }, { "epoch": 8.73, "learning_rate": 6.354970164459321e-06, "loss": 1.7968, "step": 60000 }, { "epoch": 8.73, "eval_loss": 2.628397226333618, "eval_runtime": 253.0259, "eval_samples_per_second": 386.229, "eval_steps_per_second": 1.51, "step": 60000 }, { "epoch": 8.81, "learning_rate": 5.991122107407947e-06, "loss": 1.7779, "step": 60500 }, { "epoch": 8.81, "eval_loss": 2.625650405883789, "eval_runtime": 247.6056, "eval_samples_per_second": 394.684, "eval_steps_per_second": 1.543, "step": 60500 }, { "epoch": 8.88, "learning_rate": 5.627274050356571e-06, "loss": 1.7664, "step": 61000 }, { "epoch": 8.88, "eval_loss": 2.623215675354004, "eval_runtime": 268.5188, "eval_samples_per_second": 363.945, "eval_steps_per_second": 1.423, "step": 61000 }, { "epoch": 8.95, "learning_rate": 5.263425993305196e-06, "loss": 1.792, "step": 61500 }, { "epoch": 8.95, "eval_loss": 2.63053297996521, "eval_runtime": 248.0445, "eval_samples_per_second": 393.986, "eval_steps_per_second": 1.54, "step": 61500 }, { "epoch": 9.02, "learning_rate": 4.89957793625382e-06, "loss": 1.7725, "step": 62000 }, { "epoch": 9.02, "eval_loss": 2.6525118350982666, "eval_runtime": 247.425, "eval_samples_per_second": 394.972, "eval_steps_per_second": 1.544, "step": 62000 }, { "epoch": 9.1, "learning_rate": 4.535729879202445e-06, "loss": 1.7563, "step": 62500 }, { "epoch": 9.1, "eval_loss": 2.679419755935669, "eval_runtime": 249.9688, "eval_samples_per_second": 390.953, "eval_steps_per_second": 1.528, "step": 62500 }, { "epoch": 9.17, "learning_rate": 4.17188182215107e-06, "loss": 1.7606, "step": 63000 }, { "epoch": 9.17, "eval_loss": 2.6783671379089355, "eval_runtime": 248.5877, "eval_samples_per_second": 393.125, "eval_steps_per_second": 1.537, "step": 63000 }, { "epoch": 9.24, "learning_rate": 3.8080337650996943e-06, "loss": 1.7666, "step": 63500 }, { "epoch": 9.24, "eval_loss": 2.679766893386841, "eval_runtime": 253.2315, "eval_samples_per_second": 385.916, "eval_steps_per_second": 1.509, "step": 63500 }, { "epoch": 9.31, "learning_rate": 3.4449134041624217e-06, "loss": 1.7551, "step": 64000 }, { "epoch": 9.31, "eval_loss": 2.6813337802886963, "eval_runtime": 248.5037, "eval_samples_per_second": 393.258, "eval_steps_per_second": 1.537, "step": 64000 }, { "epoch": 9.39, "learning_rate": 3.0810653471110467e-06, "loss": 1.7578, "step": 64500 }, { "epoch": 9.39, "eval_loss": 2.683032751083374, "eval_runtime": 245.3373, "eval_samples_per_second": 398.333, "eval_steps_per_second": 1.557, "step": 64500 }, { "epoch": 9.46, "learning_rate": 2.717217290059671e-06, "loss": 1.7483, "step": 65000 }, { "epoch": 9.46, "eval_loss": 2.6832828521728516, "eval_runtime": 266.4744, "eval_samples_per_second": 366.737, "eval_steps_per_second": 1.434, "step": 65000 }, { "epoch": 9.53, "learning_rate": 2.3533692330082957e-06, "loss": 1.7431, "step": 65500 }, { "epoch": 9.53, "eval_loss": 2.6883933544158936, "eval_runtime": 256.629, "eval_samples_per_second": 380.807, "eval_steps_per_second": 1.489, "step": 65500 }, { "epoch": 9.61, "learning_rate": 1.9895211759569207e-06, "loss": 1.743, "step": 66000 }, { "epoch": 9.61, "eval_loss": 2.6931965351104736, "eval_runtime": 260.7406, "eval_samples_per_second": 374.802, "eval_steps_per_second": 1.465, "step": 66000 }, { "epoch": 9.68, "learning_rate": 1.6264008150196477e-06, "loss": 1.7395, "step": 66500 }, { "epoch": 9.68, "eval_loss": 2.6927101612091064, "eval_runtime": 254.3781, "eval_samples_per_second": 384.176, "eval_steps_per_second": 1.502, "step": 66500 }, { "epoch": 9.75, "learning_rate": 1.2625527579682726e-06, "loss": 1.7473, "step": 67000 }, { "epoch": 9.75, "eval_loss": 2.6903834342956543, "eval_runtime": 221.4662, "eval_samples_per_second": 441.268, "eval_steps_per_second": 1.725, "step": 67000 }, { "epoch": 9.82, "learning_rate": 8.987047009168971e-07, "loss": 1.7413, "step": 67500 }, { "epoch": 9.82, "eval_loss": 2.6892080307006836, "eval_runtime": 233.1215, "eval_samples_per_second": 419.206, "eval_steps_per_second": 1.639, "step": 67500 }, { "epoch": 9.9, "learning_rate": 5.348566438655218e-07, "loss": 1.7437, "step": 68000 }, { "epoch": 9.9, "eval_loss": 2.6897966861724854, "eval_runtime": 217.2083, "eval_samples_per_second": 449.918, "eval_steps_per_second": 1.759, "step": 68000 }, { "epoch": 9.97, "learning_rate": 1.7173628292824918e-07, "loss": 1.7546, "step": 68500 }, { "epoch": 9.97, "eval_loss": 2.689425468444824, "eval_runtime": 214.7197, "eval_samples_per_second": 455.133, "eval_steps_per_second": 1.779, "step": 68500 }, { "epoch": 10.0, "step": 68710, "total_flos": 1.1742684555264e+18, "train_loss": 2.026226943438967, "train_runtime": 84552.1773, "train_samples_per_second": 104.022, "train_steps_per_second": 0.813 } ], "max_steps": 68710, "num_train_epochs": 10, "total_flos": 1.1742684555264e+18, "trial_name": null, "trial_params": null }