|
{ |
|
"best_metric": 0.18403036166096146, |
|
"best_model_checkpoint": "videomae-base-finetuned-ucfcrime-full2/checkpoint-700", |
|
"epoch": 3.25, |
|
"eval_steps": 500, |
|
"global_step": 700, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 14.482718467712402, |
|
"learning_rate": 3.5714285714285714e-06, |
|
"loss": 2.6878, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 21.39861297607422, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 2.6074, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 17.860671997070312, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 2.7214, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 16.79393768310547, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 2.5776, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 14.525908470153809, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 2.689, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 12.049698829650879, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 2.6499, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 13.192450523376465, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.5365, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.041342735290527, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 2.4945, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 10.616167068481445, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 2.7132, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 11.666309356689453, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 2.5329, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 8.298579216003418, |
|
"learning_rate": 3.928571428571429e-05, |
|
"loss": 2.4331, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 9.002656936645508, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 2.3991, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 10.443252563476562, |
|
"learning_rate": 4.642857142857143e-05, |
|
"loss": 2.7184, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 8.290853500366211, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4219, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 8.40210247039795, |
|
"learning_rate": 4.960317460317461e-05, |
|
"loss": 2.3317, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 9.528456687927246, |
|
"learning_rate": 4.9206349206349204e-05, |
|
"loss": 2.7167, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 10.601034164428711, |
|
"learning_rate": 4.880952380952381e-05, |
|
"loss": 2.5201, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 10.306778907775879, |
|
"learning_rate": 4.841269841269841e-05, |
|
"loss": 2.4928, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 10.532262802124023, |
|
"learning_rate": 4.801587301587302e-05, |
|
"loss": 2.4065, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 10.122940063476562, |
|
"learning_rate": 4.761904761904762e-05, |
|
"loss": 2.7106, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 9.78131103515625, |
|
"learning_rate": 4.722222222222222e-05, |
|
"loss": 2.5572, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 10.649138450622559, |
|
"learning_rate": 4.682539682539683e-05, |
|
"loss": 2.5781, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 7.493427753448486, |
|
"learning_rate": 4.642857142857143e-05, |
|
"loss": 2.6114, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 7.953385829925537, |
|
"learning_rate": 4.603174603174603e-05, |
|
"loss": 2.6687, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 8.582961082458496, |
|
"learning_rate": 4.563492063492064e-05, |
|
"loss": 2.6787, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 8.697818756103516, |
|
"learning_rate": 4.523809523809524e-05, |
|
"loss": 2.5829, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 9.811901092529297, |
|
"learning_rate": 4.4841269841269846e-05, |
|
"loss": 2.6221, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 9.103715896606445, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 2.6806, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 8.211301803588867, |
|
"learning_rate": 4.404761904761905e-05, |
|
"loss": 2.6136, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 10.060530662536621, |
|
"learning_rate": 4.3650793650793655e-05, |
|
"loss": 2.6461, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 10.009857177734375, |
|
"learning_rate": 4.3253968253968256e-05, |
|
"loss": 2.571, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 11.69568157196045, |
|
"learning_rate": 4.2857142857142856e-05, |
|
"loss": 2.758, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 8.031876564025879, |
|
"learning_rate": 4.2460317460317464e-05, |
|
"loss": 2.4975, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 8.202585220336914, |
|
"learning_rate": 4.2063492063492065e-05, |
|
"loss": 2.719, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 8.405922889709473, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 2.6564, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.17190058044351839, |
|
"eval_loss": 2.5464329719543457, |
|
"eval_runtime": 2790.7409, |
|
"eval_samples_per_second": 4.815, |
|
"eval_steps_per_second": 1.204, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 8.090776443481445, |
|
"learning_rate": 4.126984126984127e-05, |
|
"loss": 2.5153, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 7.668066024780273, |
|
"learning_rate": 4.0873015873015874e-05, |
|
"loss": 2.6691, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"grad_norm": 7.451665878295898, |
|
"learning_rate": 4.047619047619048e-05, |
|
"loss": 2.455, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 6.811094284057617, |
|
"learning_rate": 4.007936507936508e-05, |
|
"loss": 2.7581, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 8.437348365783691, |
|
"learning_rate": 3.968253968253968e-05, |
|
"loss": 2.6151, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 9.83616828918457, |
|
"learning_rate": 3.928571428571429e-05, |
|
"loss": 2.7035, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 7.769816875457764, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 2.6254, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 11.614434242248535, |
|
"learning_rate": 3.84920634920635e-05, |
|
"loss": 2.5088, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 7.872964859008789, |
|
"learning_rate": 3.809523809523809e-05, |
|
"loss": 2.4313, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 9.000481605529785, |
|
"learning_rate": 3.76984126984127e-05, |
|
"loss": 2.6319, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"grad_norm": 7.4448394775390625, |
|
"learning_rate": 3.730158730158731e-05, |
|
"loss": 2.6064, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 9.439050674438477, |
|
"learning_rate": 3.690476190476191e-05, |
|
"loss": 2.4276, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 7.45771598815918, |
|
"learning_rate": 3.650793650793651e-05, |
|
"loss": 2.6828, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 9.655268669128418, |
|
"learning_rate": 3.611111111111111e-05, |
|
"loss": 2.4295, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 7.304623603820801, |
|
"learning_rate": 3.571428571428572e-05, |
|
"loss": 2.3941, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 8.936468124389648, |
|
"learning_rate": 3.5317460317460324e-05, |
|
"loss": 2.5164, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 6.037725448608398, |
|
"learning_rate": 3.492063492063492e-05, |
|
"loss": 2.3629, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 9.806131362915039, |
|
"learning_rate": 3.4523809523809526e-05, |
|
"loss": 2.1973, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 7.448783874511719, |
|
"learning_rate": 3.412698412698413e-05, |
|
"loss": 2.6785, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 7.902461528778076, |
|
"learning_rate": 3.3730158730158734e-05, |
|
"loss": 2.5689, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 8.133126258850098, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 2.705, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 7.387323379516602, |
|
"learning_rate": 3.2936507936507936e-05, |
|
"loss": 2.4184, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 7.738976001739502, |
|
"learning_rate": 3.253968253968254e-05, |
|
"loss": 2.5947, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 7.019045352935791, |
|
"learning_rate": 3.2142857142857144e-05, |
|
"loss": 2.3066, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 7.5370588302612305, |
|
"learning_rate": 3.1746031746031745e-05, |
|
"loss": 2.4445, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 7.962660312652588, |
|
"learning_rate": 3.134920634920635e-05, |
|
"loss": 2.4807, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 8.314027786254883, |
|
"learning_rate": 3.095238095238095e-05, |
|
"loss": 2.5981, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 10.364777565002441, |
|
"learning_rate": 3.055555555555556e-05, |
|
"loss": 2.5207, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 8.472153663635254, |
|
"learning_rate": 3.0158730158730158e-05, |
|
"loss": 2.5291, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 8.788971900939941, |
|
"learning_rate": 2.9761904761904762e-05, |
|
"loss": 2.5238, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 8.0416898727417, |
|
"learning_rate": 2.9365079365079366e-05, |
|
"loss": 2.5041, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"grad_norm": 8.097698211669922, |
|
"learning_rate": 2.8968253968253974e-05, |
|
"loss": 2.3977, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 8.505749702453613, |
|
"learning_rate": 2.857142857142857e-05, |
|
"loss": 2.2704, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 7.548125267028809, |
|
"learning_rate": 2.8174603174603175e-05, |
|
"loss": 1.993, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 8.76427936553955, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 2.3285, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"eval_accuracy": 0.1255395148087513, |
|
"eval_loss": 2.866504192352295, |
|
"eval_runtime": 2724.8913, |
|
"eval_samples_per_second": 4.932, |
|
"eval_steps_per_second": 1.233, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 9.777434349060059, |
|
"learning_rate": 2.7380952380952383e-05, |
|
"loss": 2.3064, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"grad_norm": 9.634062767028809, |
|
"learning_rate": 2.6984126984126984e-05, |
|
"loss": 2.6674, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 7.418952941894531, |
|
"learning_rate": 2.6587301587301588e-05, |
|
"loss": 2.3623, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 9.471118927001953, |
|
"learning_rate": 2.6190476190476192e-05, |
|
"loss": 2.5705, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 8.835360527038574, |
|
"learning_rate": 2.5793650793650796e-05, |
|
"loss": 2.6313, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"grad_norm": 9.770784378051758, |
|
"learning_rate": 2.5396825396825397e-05, |
|
"loss": 2.3423, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 11.036508560180664, |
|
"learning_rate": 2.5e-05, |
|
"loss": 2.4206, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 10.88526725769043, |
|
"learning_rate": 2.4603174603174602e-05, |
|
"loss": 2.3682, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 9.554337501525879, |
|
"learning_rate": 2.4206349206349206e-05, |
|
"loss": 2.3234, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"grad_norm": 10.911561012268066, |
|
"learning_rate": 2.380952380952381e-05, |
|
"loss": 2.7633, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 8.518780708312988, |
|
"learning_rate": 2.3412698412698414e-05, |
|
"loss": 2.209, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 10.131900787353516, |
|
"learning_rate": 2.3015873015873015e-05, |
|
"loss": 2.5754, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 7.803353309631348, |
|
"learning_rate": 2.261904761904762e-05, |
|
"loss": 2.2815, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 8.873805046081543, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 2.4105, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 10.056378364562988, |
|
"learning_rate": 2.1825396825396827e-05, |
|
"loss": 2.4577, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 9.494528770446777, |
|
"learning_rate": 2.1428571428571428e-05, |
|
"loss": 2.5227, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"grad_norm": 7.30636739730835, |
|
"learning_rate": 2.1031746031746032e-05, |
|
"loss": 2.1842, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 11.144043922424316, |
|
"learning_rate": 2.0634920634920636e-05, |
|
"loss": 2.3519, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 7.896585941314697, |
|
"learning_rate": 2.023809523809524e-05, |
|
"loss": 2.6106, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 9.881820678710938, |
|
"learning_rate": 1.984126984126984e-05, |
|
"loss": 2.5722, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 7.789708137512207, |
|
"learning_rate": 1.9444444444444445e-05, |
|
"loss": 2.385, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 15.662388801574707, |
|
"learning_rate": 1.9047619047619046e-05, |
|
"loss": 2.4097, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 11.497434616088867, |
|
"learning_rate": 1.8650793650793654e-05, |
|
"loss": 2.3551, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 8.680765151977539, |
|
"learning_rate": 1.8253968253968254e-05, |
|
"loss": 2.4937, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 9.611832618713379, |
|
"learning_rate": 1.785714285714286e-05, |
|
"loss": 2.2117, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 8.637816429138184, |
|
"learning_rate": 1.746031746031746e-05, |
|
"loss": 2.2063, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 8.593392372131348, |
|
"learning_rate": 1.7063492063492063e-05, |
|
"loss": 2.5038, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 9.811333656311035, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 2.1935, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 9.25893497467041, |
|
"learning_rate": 1.626984126984127e-05, |
|
"loss": 2.7302, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 8.803714752197266, |
|
"learning_rate": 1.5873015873015872e-05, |
|
"loss": 2.3372, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"grad_norm": 10.265325546264648, |
|
"learning_rate": 1.5476190476190476e-05, |
|
"loss": 2.3112, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 8.800261497497559, |
|
"learning_rate": 1.5079365079365079e-05, |
|
"loss": 2.1571, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 9.066210746765137, |
|
"learning_rate": 1.4682539682539683e-05, |
|
"loss": 2.4612, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"grad_norm": 11.743760108947754, |
|
"learning_rate": 1.4285714285714285e-05, |
|
"loss": 2.3783, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 7.336353302001953, |
|
"learning_rate": 1.388888888888889e-05, |
|
"loss": 2.3545, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_accuracy": 0.13528798928411967, |
|
"eval_loss": 2.775129556655884, |
|
"eval_runtime": 2705.5287, |
|
"eval_samples_per_second": 4.967, |
|
"eval_steps_per_second": 1.242, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 11.444567680358887, |
|
"learning_rate": 1.3492063492063492e-05, |
|
"loss": 2.7387, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 9.247318267822266, |
|
"learning_rate": 1.3095238095238096e-05, |
|
"loss": 2.7047, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 3.02, |
|
"grad_norm": 9.126996040344238, |
|
"learning_rate": 1.2698412698412699e-05, |
|
"loss": 2.3381, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"grad_norm": 8.4282865524292, |
|
"learning_rate": 1.2301587301587301e-05, |
|
"loss": 2.292, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 9.275003433227539, |
|
"learning_rate": 1.1904761904761905e-05, |
|
"loss": 2.3291, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 9.664990425109863, |
|
"learning_rate": 1.1507936507936508e-05, |
|
"loss": 2.3276, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 3.05, |
|
"grad_norm": 9.327167510986328, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 2.1339, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 8.076509475708008, |
|
"learning_rate": 1.0714285714285714e-05, |
|
"loss": 2.1437, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 10.702942848205566, |
|
"learning_rate": 1.0317460317460318e-05, |
|
"loss": 2.4547, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 3.07, |
|
"grad_norm": 13.38920783996582, |
|
"learning_rate": 9.92063492063492e-06, |
|
"loss": 2.5248, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"grad_norm": 7.934788227081299, |
|
"learning_rate": 9.523809523809523e-06, |
|
"loss": 2.4829, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 8.314118385314941, |
|
"learning_rate": 9.126984126984127e-06, |
|
"loss": 2.1392, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 11.382800102233887, |
|
"learning_rate": 8.73015873015873e-06, |
|
"loss": 2.2964, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"grad_norm": 7.882699012756348, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 2.1905, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 9.530142784118652, |
|
"learning_rate": 7.936507936507936e-06, |
|
"loss": 2.3815, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 3.11, |
|
"grad_norm": 8.318964958190918, |
|
"learning_rate": 7.5396825396825394e-06, |
|
"loss": 2.4007, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 10.5969820022583, |
|
"learning_rate": 7.142857142857143e-06, |
|
"loss": 2.3558, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"grad_norm": 9.391063690185547, |
|
"learning_rate": 6.746031746031746e-06, |
|
"loss": 2.3825, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 9.316353797912598, |
|
"learning_rate": 6.349206349206349e-06, |
|
"loss": 2.4972, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 9.888083457946777, |
|
"learning_rate": 5.9523809523809525e-06, |
|
"loss": 2.2321, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"grad_norm": 8.392168045043945, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 2.196, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 9.588519096374512, |
|
"learning_rate": 5.158730158730159e-06, |
|
"loss": 2.2595, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"grad_norm": 8.25489616394043, |
|
"learning_rate": 4.7619047619047615e-06, |
|
"loss": 2.0537, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 10.085835456848145, |
|
"learning_rate": 4.365079365079365e-06, |
|
"loss": 2.4893, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"grad_norm": 10.919437408447266, |
|
"learning_rate": 3.968253968253968e-06, |
|
"loss": 2.4032, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 8.23149299621582, |
|
"learning_rate": 3.5714285714285714e-06, |
|
"loss": 2.1834, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 11.596048355102539, |
|
"learning_rate": 3.1746031746031746e-06, |
|
"loss": 2.3302, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 11.665302276611328, |
|
"learning_rate": 2.777777777777778e-06, |
|
"loss": 2.3287, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 9.233076095581055, |
|
"learning_rate": 2.3809523809523808e-06, |
|
"loss": 2.1008, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"grad_norm": 11.429000854492188, |
|
"learning_rate": 1.984126984126984e-06, |
|
"loss": 2.2577, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 8.783037185668945, |
|
"learning_rate": 1.5873015873015873e-06, |
|
"loss": 2.0016, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"grad_norm": 8.462553977966309, |
|
"learning_rate": 1.1904761904761904e-06, |
|
"loss": 2.3779, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"grad_norm": 9.461167335510254, |
|
"learning_rate": 7.936507936507937e-07, |
|
"loss": 1.9491, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"grad_norm": 12.051897048950195, |
|
"learning_rate": 3.9682539682539683e-07, |
|
"loss": 2.0514, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 11.628158569335938, |
|
"learning_rate": 0.0, |
|
"loss": 1.9726, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 0.18403036166096146, |
|
"eval_loss": 2.5616397857666016, |
|
"eval_runtime": 2737.1764, |
|
"eval_samples_per_second": 4.909, |
|
"eval_steps_per_second": 1.228, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"step": 700, |
|
"total_flos": 3.4893542331777024e+18, |
|
"train_loss": 2.449405036653791, |
|
"train_runtime": 12421.7881, |
|
"train_samples_per_second": 0.225, |
|
"train_steps_per_second": 0.056 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 0.11733870967741936, |
|
"eval_loss": 2.6881906986236572, |
|
"eval_runtime": 975.7268, |
|
"eval_samples_per_second": 5.083, |
|
"eval_steps_per_second": 1.271, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"eval_accuracy": 0.11733870967741936, |
|
"eval_loss": 2.6881909370422363, |
|
"eval_runtime": 956.0688, |
|
"eval_samples_per_second": 5.188, |
|
"eval_steps_per_second": 1.297, |
|
"step": 700 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"total_flos": 3.4893542331777024e+18, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|