|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9920199230667882, |
|
"global_step": 24000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 9.999793741991486e-06, |
|
"loss": 1.742, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 9.999587070640269e-06, |
|
"loss": 1.7307, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 9.999380399289051e-06, |
|
"loss": 1.7414, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.999173727937835e-06, |
|
"loss": 1.7396, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_accuracy": 0.6747098550914694, |
|
"eval_loss": 1.6244006156921387, |
|
"eval_runtime": 13.0499, |
|
"eval_samples_per_second": 306.516, |
|
"eval_steps_per_second": 0.766, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 9.998967056586617e-06, |
|
"loss": 1.738, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 9.9987603852354e-06, |
|
"loss": 1.7357, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 9.998553713884183e-06, |
|
"loss": 1.7349, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 9.998347042532966e-06, |
|
"loss": 1.7309, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_accuracy": 0.6821878279118573, |
|
"eval_loss": 1.6006274223327637, |
|
"eval_runtime": 13.0815, |
|
"eval_samples_per_second": 305.775, |
|
"eval_steps_per_second": 0.764, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 9.998140371181748e-06, |
|
"loss": 1.7322, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 9.99793369983053e-06, |
|
"loss": 1.7302, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 9.997727028479312e-06, |
|
"loss": 1.7281, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 9.997520357128095e-06, |
|
"loss": 1.7273, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"eval_accuracy": 0.6896347482724581, |
|
"eval_loss": 1.5701262950897217, |
|
"eval_runtime": 13.0657, |
|
"eval_samples_per_second": 306.145, |
|
"eval_steps_per_second": 0.765, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 9.997313685776879e-06, |
|
"loss": 1.7252, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 9.997107427768363e-06, |
|
"loss": 1.7231, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 9.996901169759848e-06, |
|
"loss": 1.7223, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 9.99669449840863e-06, |
|
"loss": 1.7215, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.6799366211130917, |
|
"eval_loss": 1.6207975149154663, |
|
"eval_runtime": 11.6877, |
|
"eval_samples_per_second": 342.241, |
|
"eval_steps_per_second": 0.856, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 9.996487827057414e-06, |
|
"loss": 1.7182, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 9.996281155706197e-06, |
|
"loss": 1.7189, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 9.996074484354979e-06, |
|
"loss": 1.7169, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 9.995868226346464e-06, |
|
"loss": 1.7173, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_accuracy": 0.6820005349023803, |
|
"eval_loss": 1.6004101037979126, |
|
"eval_runtime": 13.0596, |
|
"eval_samples_per_second": 306.287, |
|
"eval_steps_per_second": 0.766, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 9.995661554995247e-06, |
|
"loss": 1.7152, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 9.99545488364403e-06, |
|
"loss": 1.7126, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 9.995248212292813e-06, |
|
"loss": 1.7114, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 9.995041954284299e-06, |
|
"loss": 1.7098, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_accuracy": 0.6854211592498203, |
|
"eval_loss": 1.6027569770812988, |
|
"eval_runtime": 13.0635, |
|
"eval_samples_per_second": 306.196, |
|
"eval_steps_per_second": 0.765, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 9.99483528293308e-06, |
|
"loss": 1.7092, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 9.994628611581863e-06, |
|
"loss": 1.7081, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 9.994421940230647e-06, |
|
"loss": 1.7071, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 9.99421526887943e-06, |
|
"loss": 1.7066, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_accuracy": 0.6892430278884463, |
|
"eval_loss": 1.568811058998108, |
|
"eval_runtime": 11.6681, |
|
"eval_samples_per_second": 342.815, |
|
"eval_steps_per_second": 0.857, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 9.994008597528212e-06, |
|
"loss": 1.7062, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.993801926176994e-06, |
|
"loss": 1.7041, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.993595254825776e-06, |
|
"loss": 1.7028, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 9.99338858347456e-06, |
|
"loss": 1.702, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.689404839764552, |
|
"eval_loss": 1.5777287483215332, |
|
"eval_runtime": 13.0408, |
|
"eval_samples_per_second": 306.729, |
|
"eval_steps_per_second": 0.767, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 9.993181912123342e-06, |
|
"loss": 1.6984, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 9.992975240772125e-06, |
|
"loss": 1.6999, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 9.99276898276361e-06, |
|
"loss": 1.6983, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.992562724755096e-06, |
|
"loss": 1.6972, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_accuracy": 0.6915187376725839, |
|
"eval_loss": 1.5638251304626465, |
|
"eval_runtime": 11.6615, |
|
"eval_samples_per_second": 343.008, |
|
"eval_steps_per_second": 0.858, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 9.992356053403878e-06, |
|
"loss": 1.694, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 9.992149795395364e-06, |
|
"loss": 1.6954, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 9.991943124044146e-06, |
|
"loss": 1.6946, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 9.991736452692928e-06, |
|
"loss": 1.6939, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_accuracy": 0.6928982725527831, |
|
"eval_loss": 1.5452697277069092, |
|
"eval_runtime": 13.0226, |
|
"eval_samples_per_second": 307.159, |
|
"eval_steps_per_second": 0.768, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 9.99152978134171e-06, |
|
"loss": 1.6927, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 9.991323109990494e-06, |
|
"loss": 1.6901, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 9.991116438639277e-06, |
|
"loss": 1.6892, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 9.990909767288059e-06, |
|
"loss": 1.6898, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_accuracy": 0.683527801316733, |
|
"eval_loss": 1.5818458795547485, |
|
"eval_runtime": 13.0663, |
|
"eval_samples_per_second": 306.132, |
|
"eval_steps_per_second": 0.765, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 9.990703095936843e-06, |
|
"loss": 1.6889, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 9.990496424585625e-06, |
|
"loss": 1.6855, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 9.990289753234407e-06, |
|
"loss": 1.6853, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 9.990083908568595e-06, |
|
"loss": 1.6856, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.6877091672681934, |
|
"eval_loss": 1.5628535747528076, |
|
"eval_runtime": 13.0163, |
|
"eval_samples_per_second": 307.306, |
|
"eval_steps_per_second": 0.768, |
|
"step": 24000 |
|
} |
|
], |
|
"max_steps": 24193000, |
|
"num_train_epochs": 1000, |
|
"total_flos": 1.78960468082688e+19, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|