|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 29.23728813559322, |
|
"eval_steps": 500, |
|
"global_step": 69000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 2.978813559322034e-05, |
|
"loss": 0.2425, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 2.9576271186440677e-05, |
|
"loss": 0.2113, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 2.9364406779661017e-05, |
|
"loss": 0.2168, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 2.9152542372881356e-05, |
|
"loss": 0.2157, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14853818714618683, |
|
"eval_runtime": 22.6834, |
|
"eval_samples_per_second": 26.054, |
|
"eval_steps_per_second": 1.102, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 2.8940677966101696e-05, |
|
"loss": 0.2233, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.8728813559322036e-05, |
|
"loss": 0.2157, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 2.8516949152542372e-05, |
|
"loss": 0.2232, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.8305084745762712e-05, |
|
"loss": 0.2166, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.809322033898305e-05, |
|
"loss": 0.216, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.13325555622577667, |
|
"eval_runtime": 22.9508, |
|
"eval_samples_per_second": 25.751, |
|
"eval_steps_per_second": 1.089, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 2.788135593220339e-05, |
|
"loss": 0.2205, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 2.766949152542373e-05, |
|
"loss": 0.2207, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 2.7457627118644068e-05, |
|
"loss": 0.221, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 2.7245762711864407e-05, |
|
"loss": 0.217, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 2.7033898305084747e-05, |
|
"loss": 0.2173, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14965683221817017, |
|
"eval_runtime": 22.3153, |
|
"eval_samples_per_second": 26.484, |
|
"eval_steps_per_second": 1.12, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 2.6822033898305083e-05, |
|
"loss": 0.2149, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 2.6610169491525427e-05, |
|
"loss": 0.2158, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"learning_rate": 2.6398305084745763e-05, |
|
"loss": 0.2149, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.81, |
|
"learning_rate": 2.6186440677966103e-05, |
|
"loss": 0.2208, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14246971905231476, |
|
"eval_runtime": 22.0006, |
|
"eval_samples_per_second": 26.863, |
|
"eval_steps_per_second": 1.136, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 2.5974576271186442e-05, |
|
"loss": 0.2182, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 2.576271186440678e-05, |
|
"loss": 0.2152, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.45, |
|
"learning_rate": 2.555084745762712e-05, |
|
"loss": 0.2199, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.66, |
|
"learning_rate": 2.5338983050847458e-05, |
|
"loss": 0.2145, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.87, |
|
"learning_rate": 2.5127118644067798e-05, |
|
"loss": 0.2165, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.1418760120868683, |
|
"eval_runtime": 22.3285, |
|
"eval_samples_per_second": 26.468, |
|
"eval_steps_per_second": 1.12, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 2.4915254237288138e-05, |
|
"loss": 0.2225, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 2.4703389830508474e-05, |
|
"loss": 0.2141, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.51, |
|
"learning_rate": 2.4491525423728814e-05, |
|
"loss": 0.2199, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 5.72, |
|
"learning_rate": 2.427966101694915e-05, |
|
"loss": 0.2169, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 2.4067796610169493e-05, |
|
"loss": 0.2152, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14399726688861847, |
|
"eval_runtime": 22.3281, |
|
"eval_samples_per_second": 26.469, |
|
"eval_steps_per_second": 1.12, |
|
"step": 14160 |
|
}, |
|
{ |
|
"epoch": 6.14, |
|
"learning_rate": 2.3855932203389833e-05, |
|
"loss": 0.2198, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 6.36, |
|
"learning_rate": 2.364406779661017e-05, |
|
"loss": 0.2207, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 6.57, |
|
"learning_rate": 2.343220338983051e-05, |
|
"loss": 0.2171, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 2.3220338983050846e-05, |
|
"loss": 0.2123, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 6.99, |
|
"learning_rate": 2.3008474576271185e-05, |
|
"loss": 0.2166, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14218531548976898, |
|
"eval_runtime": 22.4949, |
|
"eval_samples_per_second": 26.273, |
|
"eval_steps_per_second": 1.111, |
|
"step": 16520 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"learning_rate": 2.279661016949153e-05, |
|
"loss": 0.2142, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 7.42, |
|
"learning_rate": 2.2584745762711865e-05, |
|
"loss": 0.2167, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 7.63, |
|
"learning_rate": 2.2372881355932205e-05, |
|
"loss": 0.2224, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 7.84, |
|
"learning_rate": 2.2161016949152544e-05, |
|
"loss": 0.2129, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.13575270771980286, |
|
"eval_runtime": 22.1798, |
|
"eval_samples_per_second": 26.646, |
|
"eval_steps_per_second": 1.127, |
|
"step": 18880 |
|
}, |
|
{ |
|
"epoch": 8.05, |
|
"learning_rate": 2.194915254237288e-05, |
|
"loss": 0.2231, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 8.26, |
|
"learning_rate": 2.173728813559322e-05, |
|
"loss": 0.2178, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 2.152542372881356e-05, |
|
"loss": 0.2218, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 8.69, |
|
"learning_rate": 2.13135593220339e-05, |
|
"loss": 0.2124, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 8.9, |
|
"learning_rate": 2.110169491525424e-05, |
|
"loss": 0.2158, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14425306022167206, |
|
"eval_runtime": 22.3031, |
|
"eval_samples_per_second": 26.499, |
|
"eval_steps_per_second": 1.121, |
|
"step": 21240 |
|
}, |
|
{ |
|
"epoch": 9.11, |
|
"learning_rate": 2.0889830508474576e-05, |
|
"loss": 0.2114, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 2.0677966101694916e-05, |
|
"loss": 0.218, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 9.53, |
|
"learning_rate": 2.0466101694915252e-05, |
|
"loss": 0.2198, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 9.75, |
|
"learning_rate": 2.0254237288135595e-05, |
|
"loss": 0.2207, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 9.96, |
|
"learning_rate": 2.0042372881355935e-05, |
|
"loss": 0.2112, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14262421429157257, |
|
"eval_runtime": 22.232, |
|
"eval_samples_per_second": 26.583, |
|
"eval_steps_per_second": 1.125, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 10.17, |
|
"learning_rate": 1.983050847457627e-05, |
|
"loss": 0.2188, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 10.38, |
|
"learning_rate": 1.961864406779661e-05, |
|
"loss": 0.2103, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 1.9406779661016948e-05, |
|
"loss": 0.2221, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 10.81, |
|
"learning_rate": 1.9194915254237287e-05, |
|
"loss": 0.2128, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.13920965790748596, |
|
"eval_runtime": 21.869, |
|
"eval_samples_per_second": 27.025, |
|
"eval_steps_per_second": 1.143, |
|
"step": 25960 |
|
}, |
|
{ |
|
"epoch": 11.02, |
|
"learning_rate": 1.898305084745763e-05, |
|
"loss": 0.2196, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 11.23, |
|
"learning_rate": 1.8771186440677967e-05, |
|
"loss": 0.2134, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 11.44, |
|
"learning_rate": 1.8559322033898307e-05, |
|
"loss": 0.2157, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 11.65, |
|
"learning_rate": 1.8347457627118643e-05, |
|
"loss": 0.2112, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 1.8135593220338983e-05, |
|
"loss": 0.2242, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.13710011541843414, |
|
"eval_runtime": 22.1879, |
|
"eval_samples_per_second": 26.636, |
|
"eval_steps_per_second": 1.127, |
|
"step": 28320 |
|
}, |
|
{ |
|
"epoch": 12.08, |
|
"learning_rate": 1.7923728813559322e-05, |
|
"loss": 0.2147, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 12.29, |
|
"learning_rate": 1.7711864406779662e-05, |
|
"loss": 0.2104, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.7500000000000002e-05, |
|
"loss": 0.2192, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 1.728813559322034e-05, |
|
"loss": 0.2196, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 12.92, |
|
"learning_rate": 1.7076271186440678e-05, |
|
"loss": 0.219, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.13678474724292755, |
|
"eval_runtime": 22.3039, |
|
"eval_samples_per_second": 26.498, |
|
"eval_steps_per_second": 1.121, |
|
"step": 30680 |
|
}, |
|
{ |
|
"epoch": 13.14, |
|
"learning_rate": 1.6864406779661018e-05, |
|
"loss": 0.2198, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 13.35, |
|
"learning_rate": 1.6652542372881354e-05, |
|
"loss": 0.2193, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 13.56, |
|
"learning_rate": 1.6440677966101694e-05, |
|
"loss": 0.2149, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 13.77, |
|
"learning_rate": 1.6228813559322037e-05, |
|
"loss": 0.2136, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 13.98, |
|
"learning_rate": 1.6016949152542373e-05, |
|
"loss": 0.215, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.1423649787902832, |
|
"eval_runtime": 22.5006, |
|
"eval_samples_per_second": 26.266, |
|
"eval_steps_per_second": 1.111, |
|
"step": 33040 |
|
}, |
|
{ |
|
"epoch": 14.19, |
|
"learning_rate": 1.5805084745762713e-05, |
|
"loss": 0.2148, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 14.41, |
|
"learning_rate": 1.559322033898305e-05, |
|
"loss": 0.2197, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 14.62, |
|
"learning_rate": 1.538135593220339e-05, |
|
"loss": 0.2165, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 14.83, |
|
"learning_rate": 1.5169491525423727e-05, |
|
"loss": 0.2183, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14253665506839752, |
|
"eval_runtime": 22.0912, |
|
"eval_samples_per_second": 26.753, |
|
"eval_steps_per_second": 1.132, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 15.04, |
|
"learning_rate": 1.4957627118644067e-05, |
|
"loss": 0.21, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 15.25, |
|
"learning_rate": 1.4745762711864408e-05, |
|
"loss": 0.2191, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 15.47, |
|
"learning_rate": 1.4533898305084746e-05, |
|
"loss": 0.2156, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 15.68, |
|
"learning_rate": 1.4322033898305085e-05, |
|
"loss": 0.2171, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 15.89, |
|
"learning_rate": 1.4110169491525424e-05, |
|
"loss": 0.2196, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14593291282653809, |
|
"eval_runtime": 22.3278, |
|
"eval_samples_per_second": 26.469, |
|
"eval_steps_per_second": 1.12, |
|
"step": 37760 |
|
}, |
|
{ |
|
"epoch": 16.1, |
|
"learning_rate": 1.3898305084745764e-05, |
|
"loss": 0.2096, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 16.31, |
|
"learning_rate": 1.3686440677966102e-05, |
|
"loss": 0.2163, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 16.53, |
|
"learning_rate": 1.3474576271186442e-05, |
|
"loss": 0.2202, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 16.74, |
|
"learning_rate": 1.326271186440678e-05, |
|
"loss": 0.2143, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 16.95, |
|
"learning_rate": 1.3050847457627118e-05, |
|
"loss": 0.2166, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14286492764949799, |
|
"eval_runtime": 22.0561, |
|
"eval_samples_per_second": 26.795, |
|
"eval_steps_per_second": 1.133, |
|
"step": 40120 |
|
}, |
|
{ |
|
"epoch": 17.16, |
|
"learning_rate": 1.283898305084746e-05, |
|
"loss": 0.2153, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 17.37, |
|
"learning_rate": 1.2627118644067797e-05, |
|
"loss": 0.218, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 17.58, |
|
"learning_rate": 1.2415254237288135e-05, |
|
"loss": 0.2148, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 17.8, |
|
"learning_rate": 1.2203389830508475e-05, |
|
"loss": 0.2127, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14040569961071014, |
|
"eval_runtime": 21.8515, |
|
"eval_samples_per_second": 27.046, |
|
"eval_steps_per_second": 1.144, |
|
"step": 42480 |
|
}, |
|
{ |
|
"epoch": 18.01, |
|
"learning_rate": 1.1991525423728813e-05, |
|
"loss": 0.2214, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 18.22, |
|
"learning_rate": 1.1779661016949153e-05, |
|
"loss": 0.2125, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 18.43, |
|
"learning_rate": 1.1567796610169493e-05, |
|
"loss": 0.2161, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 18.64, |
|
"learning_rate": 1.135593220338983e-05, |
|
"loss": 0.2197, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 18.86, |
|
"learning_rate": 1.1144067796610169e-05, |
|
"loss": 0.2174, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.13983392715454102, |
|
"eval_runtime": 22.1759, |
|
"eval_samples_per_second": 26.651, |
|
"eval_steps_per_second": 1.127, |
|
"step": 44840 |
|
}, |
|
{ |
|
"epoch": 19.07, |
|
"learning_rate": 1.0932203389830509e-05, |
|
"loss": 0.2163, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 19.28, |
|
"learning_rate": 1.0720338983050848e-05, |
|
"loss": 0.2206, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 19.49, |
|
"learning_rate": 1.0508474576271186e-05, |
|
"loss": 0.2159, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 19.7, |
|
"learning_rate": 1.0296610169491524e-05, |
|
"loss": 0.2129, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 19.92, |
|
"learning_rate": 1.0084745762711864e-05, |
|
"loss": 0.2174, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14546626806259155, |
|
"eval_runtime": 22.2976, |
|
"eval_samples_per_second": 26.505, |
|
"eval_steps_per_second": 1.121, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 20.13, |
|
"learning_rate": 9.872881355932204e-06, |
|
"loss": 0.2146, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 20.34, |
|
"learning_rate": 9.661016949152542e-06, |
|
"loss": 0.2163, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 20.55, |
|
"learning_rate": 9.449152542372882e-06, |
|
"loss": 0.2113, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 20.76, |
|
"learning_rate": 9.23728813559322e-06, |
|
"loss": 0.2208, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 20.97, |
|
"learning_rate": 9.02542372881356e-06, |
|
"loss": 0.2114, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14164069294929504, |
|
"eval_runtime": 22.6001, |
|
"eval_samples_per_second": 26.15, |
|
"eval_steps_per_second": 1.106, |
|
"step": 49560 |
|
}, |
|
{ |
|
"epoch": 21.19, |
|
"learning_rate": 8.8135593220339e-06, |
|
"loss": 0.2155, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 21.4, |
|
"learning_rate": 8.601694915254237e-06, |
|
"loss": 0.2197, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 21.61, |
|
"learning_rate": 8.389830508474575e-06, |
|
"loss": 0.2151, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 21.82, |
|
"learning_rate": 8.177966101694915e-06, |
|
"loss": 0.2123, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.13964423537254333, |
|
"eval_runtime": 22.1829, |
|
"eval_samples_per_second": 26.642, |
|
"eval_steps_per_second": 1.127, |
|
"step": 51920 |
|
}, |
|
{ |
|
"epoch": 22.03, |
|
"learning_rate": 7.966101694915255e-06, |
|
"loss": 0.2176, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 22.25, |
|
"learning_rate": 7.754237288135593e-06, |
|
"loss": 0.2136, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 22.46, |
|
"learning_rate": 7.542372881355933e-06, |
|
"loss": 0.2261, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 22.67, |
|
"learning_rate": 7.330508474576272e-06, |
|
"loss": 0.2111, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 22.88, |
|
"learning_rate": 7.1186440677966106e-06, |
|
"loss": 0.2129, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14013490080833435, |
|
"eval_runtime": 22.24, |
|
"eval_samples_per_second": 26.574, |
|
"eval_steps_per_second": 1.124, |
|
"step": 54280 |
|
}, |
|
{ |
|
"epoch": 23.09, |
|
"learning_rate": 6.906779661016949e-06, |
|
"loss": 0.2161, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 23.31, |
|
"learning_rate": 6.694915254237288e-06, |
|
"loss": 0.2181, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 23.52, |
|
"learning_rate": 6.483050847457627e-06, |
|
"loss": 0.2151, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 23.73, |
|
"learning_rate": 6.271186440677966e-06, |
|
"loss": 0.215, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 23.94, |
|
"learning_rate": 6.059322033898305e-06, |
|
"loss": 0.2156, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14107845723628998, |
|
"eval_runtime": 22.1976, |
|
"eval_samples_per_second": 26.625, |
|
"eval_steps_per_second": 1.126, |
|
"step": 56640 |
|
}, |
|
{ |
|
"epoch": 24.15, |
|
"learning_rate": 5.847457627118645e-06, |
|
"loss": 0.2168, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 24.36, |
|
"learning_rate": 5.635593220338983e-06, |
|
"loss": 0.2138, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 24.58, |
|
"learning_rate": 5.423728813559322e-06, |
|
"loss": 0.2113, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 24.79, |
|
"learning_rate": 5.2118644067796615e-06, |
|
"loss": 0.2153, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 4.9999999999999996e-06, |
|
"loss": 0.2193, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.13969872891902924, |
|
"eval_runtime": 22.1245, |
|
"eval_samples_per_second": 26.712, |
|
"eval_steps_per_second": 1.13, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 25.21, |
|
"learning_rate": 4.788135593220339e-06, |
|
"loss": 0.2183, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 25.42, |
|
"learning_rate": 4.576271186440678e-06, |
|
"loss": 0.2184, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 25.64, |
|
"learning_rate": 4.364406779661017e-06, |
|
"loss": 0.2161, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 25.85, |
|
"learning_rate": 4.152542372881356e-06, |
|
"loss": 0.2072, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14177070558071136, |
|
"eval_runtime": 22.3038, |
|
"eval_samples_per_second": 26.498, |
|
"eval_steps_per_second": 1.121, |
|
"step": 61360 |
|
}, |
|
{ |
|
"epoch": 26.06, |
|
"learning_rate": 3.940677966101696e-06, |
|
"loss": 0.2173, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 26.27, |
|
"learning_rate": 3.7288135593220342e-06, |
|
"loss": 0.2123, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 26.48, |
|
"learning_rate": 3.516949152542373e-06, |
|
"loss": 0.2133, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 26.69, |
|
"learning_rate": 3.305084745762712e-06, |
|
"loss": 0.2166, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 26.91, |
|
"learning_rate": 3.093220338983051e-06, |
|
"loss": 0.2188, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.13952849805355072, |
|
"eval_runtime": 22.4612, |
|
"eval_samples_per_second": 26.312, |
|
"eval_steps_per_second": 1.113, |
|
"step": 63720 |
|
}, |
|
{ |
|
"epoch": 27.12, |
|
"learning_rate": 2.88135593220339e-06, |
|
"loss": 0.2155, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 27.33, |
|
"learning_rate": 2.6694915254237287e-06, |
|
"loss": 0.2186, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 27.54, |
|
"learning_rate": 2.4576271186440676e-06, |
|
"loss": 0.2195, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 27.75, |
|
"learning_rate": 2.245762711864407e-06, |
|
"loss": 0.222, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 27.97, |
|
"learning_rate": 2.033898305084746e-06, |
|
"loss": 0.2082, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14052434265613556, |
|
"eval_runtime": 22.7926, |
|
"eval_samples_per_second": 25.929, |
|
"eval_steps_per_second": 1.097, |
|
"step": 66080 |
|
}, |
|
{ |
|
"epoch": 28.18, |
|
"learning_rate": 1.8220338983050848e-06, |
|
"loss": 0.211, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 28.39, |
|
"learning_rate": 1.6101694915254237e-06, |
|
"loss": 0.2196, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 28.6, |
|
"learning_rate": 1.3983050847457628e-06, |
|
"loss": 0.2172, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 28.81, |
|
"learning_rate": 1.186440677966102e-06, |
|
"loss": 0.214, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_accuracy": 0.7225042301184433, |
|
"eval_loss": 0.14071397483348846, |
|
"eval_runtime": 22.1609, |
|
"eval_samples_per_second": 26.669, |
|
"eval_steps_per_second": 1.128, |
|
"step": 68440 |
|
}, |
|
{ |
|
"epoch": 29.03, |
|
"learning_rate": 9.745762711864406e-07, |
|
"loss": 0.2116, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 29.24, |
|
"learning_rate": 7.627118644067797e-07, |
|
"loss": 0.216, |
|
"step": 69000 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 70800, |
|
"num_train_epochs": 30, |
|
"save_steps": 1500, |
|
"total_flos": 1.8723845102688e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|