|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.994840041279669, |
|
"global_step": 2420, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 3.4135334491729736, |
|
"eval_runtime": 150.5586, |
|
"eval_samples_per_second": 25.744, |
|
"eval_steps_per_second": 3.221, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_loss": 2.9520680904388428, |
|
"eval_runtime": 111.0015, |
|
"eval_samples_per_second": 34.918, |
|
"eval_steps_per_second": 4.369, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 2.333911418914795, |
|
"eval_runtime": 110.7137, |
|
"eval_samples_per_second": 35.009, |
|
"eval_steps_per_second": 4.381, |
|
"eval_wer": 0.9364516945447853, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_loss": 1.2433439493179321, |
|
"eval_runtime": 111.6882, |
|
"eval_samples_per_second": 34.704, |
|
"eval_steps_per_second": 4.342, |
|
"eval_wer": 0.8259439063111153, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00029759999999999997, |
|
"loss": 3.1912, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"eval_loss": 0.8614437580108643, |
|
"eval_runtime": 110.721, |
|
"eval_samples_per_second": 35.007, |
|
"eval_steps_per_second": 4.38, |
|
"eval_wer": 0.6385201205623173, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 0.7556686401367188, |
|
"eval_runtime": 111.0146, |
|
"eval_samples_per_second": 34.914, |
|
"eval_steps_per_second": 4.369, |
|
"eval_wer": 0.5611899776821664, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_loss": 0.6781343221664429, |
|
"eval_runtime": 110.9557, |
|
"eval_samples_per_second": 34.933, |
|
"eval_steps_per_second": 4.371, |
|
"eval_wer": 0.5194533281181695, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"eval_loss": 0.6363114714622498, |
|
"eval_runtime": 111.5716, |
|
"eval_samples_per_second": 34.74, |
|
"eval_steps_per_second": 4.347, |
|
"eval_wer": 0.4878862480730736, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"eval_loss": 0.5959396362304688, |
|
"eval_runtime": 111.2645, |
|
"eval_samples_per_second": 34.836, |
|
"eval_steps_per_second": 4.359, |
|
"eval_wer": 0.4559050226629547, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00022281249999999997, |
|
"loss": 0.8237, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"eval_loss": 0.5430020093917847, |
|
"eval_runtime": 111.36, |
|
"eval_samples_per_second": 34.806, |
|
"eval_steps_per_second": 4.355, |
|
"eval_wer": 0.42597151600211675, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"eval_loss": 0.5292537212371826, |
|
"eval_runtime": 111.5324, |
|
"eval_samples_per_second": 34.752, |
|
"eval_steps_per_second": 4.349, |
|
"eval_wer": 0.4097738306145457, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 0.5140984654426575, |
|
"eval_runtime": 111.6965, |
|
"eval_samples_per_second": 34.701, |
|
"eval_steps_per_second": 4.342, |
|
"eval_wer": 0.40558636081264526, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"eval_loss": 0.4878837764263153, |
|
"eval_runtime": 110.9007, |
|
"eval_samples_per_second": 34.95, |
|
"eval_steps_per_second": 4.373, |
|
"eval_wer": 0.3946805328670363, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"eval_loss": 0.46965792775154114, |
|
"eval_runtime": 112.2618, |
|
"eval_samples_per_second": 34.526, |
|
"eval_steps_per_second": 4.32, |
|
"eval_wer": 0.37882796861698453, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"learning_rate": 0.000145, |
|
"loss": 0.5625, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.1, |
|
"eval_loss": 0.4748215675354004, |
|
"eval_runtime": 111.3137, |
|
"eval_samples_per_second": 34.821, |
|
"eval_steps_per_second": 4.357, |
|
"eval_wer": 0.37799967788693833, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"eval_loss": 0.48358285427093506, |
|
"eval_runtime": 111.983, |
|
"eval_samples_per_second": 34.612, |
|
"eval_steps_per_second": 4.331, |
|
"eval_wer": 0.368382302188068, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"eval_loss": 0.47962862253189087, |
|
"eval_runtime": 112.0713, |
|
"eval_samples_per_second": 34.585, |
|
"eval_steps_per_second": 4.328, |
|
"eval_wer": 0.36251524285024045, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 3.72, |
|
"eval_loss": 0.4582051932811737, |
|
"eval_runtime": 112.8803, |
|
"eval_samples_per_second": 34.337, |
|
"eval_steps_per_second": 4.297, |
|
"eval_wer": 0.35147136644962385, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"eval_loss": 0.43948230147361755, |
|
"eval_runtime": 112.0141, |
|
"eval_samples_per_second": 34.603, |
|
"eval_steps_per_second": 4.33, |
|
"eval_wer": 0.3437406529691922, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"learning_rate": 6.71875e-05, |
|
"loss": 0.4267, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.13, |
|
"eval_loss": 0.44096100330352783, |
|
"eval_runtime": 111.9224, |
|
"eval_samples_per_second": 34.631, |
|
"eval_steps_per_second": 4.333, |
|
"eval_wer": 0.3420150472815958, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"eval_loss": 0.4466901123523712, |
|
"eval_runtime": 112.4959, |
|
"eval_samples_per_second": 34.455, |
|
"eval_steps_per_second": 4.311, |
|
"eval_wer": 0.3382187147688839, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"eval_loss": 0.4398203492164612, |
|
"eval_runtime": 113.1035, |
|
"eval_samples_per_second": 34.269, |
|
"eval_steps_per_second": 4.288, |
|
"eval_wer": 0.33292685732692173, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 4.75, |
|
"eval_loss": 0.4382670521736145, |
|
"eval_runtime": 112.3465, |
|
"eval_samples_per_second": 34.5, |
|
"eval_steps_per_second": 4.317, |
|
"eval_wer": 0.3286703632975174, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 4.95, |
|
"eval_loss": 0.43576258420944214, |
|
"eval_runtime": 111.7707, |
|
"eval_samples_per_second": 34.678, |
|
"eval_steps_per_second": 4.339, |
|
"eval_wer": 0.3264155718657249, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 4.99, |
|
"step": 2420, |
|
"total_flos": 8.891283011181065e+18, |
|
"train_loss": 1.0930316641311015, |
|
"train_runtime": 6038.6358, |
|
"train_samples_per_second": 12.837, |
|
"train_steps_per_second": 0.401 |
|
} |
|
], |
|
"max_steps": 2420, |
|
"num_train_epochs": 5, |
|
"total_flos": 8.891283011181065e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|