|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.5807200929152149, |
|
"eval_steps": 100, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.019357336430507164, |
|
"eval_loss": 3.5493686199188232, |
|
"eval_runtime": 162.4972, |
|
"eval_samples_per_second": 34.807, |
|
"eval_steps_per_second": 4.351, |
|
"eval_wer": 1.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03871467286101433, |
|
"eval_loss": 3.0426220893859863, |
|
"eval_runtime": 160.0022, |
|
"eval_samples_per_second": 35.35, |
|
"eval_steps_per_second": 4.419, |
|
"eval_wer": 1.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05807200929152149, |
|
"eval_loss": 2.8965415954589844, |
|
"eval_runtime": 160.2187, |
|
"eval_samples_per_second": 35.302, |
|
"eval_steps_per_second": 4.413, |
|
"eval_wer": 1.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07742934572202866, |
|
"eval_loss": 1.826250433921814, |
|
"eval_runtime": 161.1204, |
|
"eval_samples_per_second": 35.104, |
|
"eval_steps_per_second": 4.388, |
|
"eval_wer": 0.9828762176822712, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"grad_norm": 4.890473365783691, |
|
"learning_rate": 0.0002982, |
|
"loss": 3.9715, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.09678668215253582, |
|
"eval_loss": 1.3860080242156982, |
|
"eval_runtime": 161.4467, |
|
"eval_samples_per_second": 35.033, |
|
"eval_steps_per_second": 4.379, |
|
"eval_wer": 0.8748856542183563, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11614401858304298, |
|
"eval_loss": 1.308407187461853, |
|
"eval_runtime": 161.0068, |
|
"eval_samples_per_second": 35.129, |
|
"eval_steps_per_second": 4.391, |
|
"eval_wer": 0.8153456051098522, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.13550135501355012, |
|
"eval_loss": 1.0549893379211426, |
|
"eval_runtime": 161.8736, |
|
"eval_samples_per_second": 34.941, |
|
"eval_steps_per_second": 4.368, |
|
"eval_wer": 0.7336906806181894, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1548586914440573, |
|
"eval_loss": 1.0011754035949707, |
|
"eval_runtime": 161.7881, |
|
"eval_samples_per_second": 34.959, |
|
"eval_steps_per_second": 4.37, |
|
"eval_wer": 0.7190062749755259, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.17421602787456447, |
|
"eval_loss": 0.913667619228363, |
|
"eval_runtime": 162.0369, |
|
"eval_samples_per_second": 34.906, |
|
"eval_steps_per_second": 4.363, |
|
"eval_wer": 0.6752258830704049, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"grad_norm": 3.201047658920288, |
|
"learning_rate": 0.0002406, |
|
"loss": 1.0155, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.19357336430507163, |
|
"eval_loss": 0.8486206531524658, |
|
"eval_runtime": 161.6295, |
|
"eval_samples_per_second": 34.994, |
|
"eval_steps_per_second": 4.374, |
|
"eval_wer": 0.6469483718765547, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.2129307007355788, |
|
"eval_loss": 0.8534524440765381, |
|
"eval_runtime": 161.4191, |
|
"eval_samples_per_second": 35.039, |
|
"eval_steps_per_second": 4.38, |
|
"eval_wer": 0.6111761968191812, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.23228803716608595, |
|
"eval_loss": 0.8349705934524536, |
|
"eval_runtime": 162.6145, |
|
"eval_samples_per_second": 34.782, |
|
"eval_steps_per_second": 4.348, |
|
"eval_wer": 0.6192807048514708, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2516453735965931, |
|
"eval_loss": 0.7680675983428955, |
|
"eval_runtime": 161.7995, |
|
"eval_samples_per_second": 34.957, |
|
"eval_steps_per_second": 4.37, |
|
"eval_wer": 0.5669785431143779, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.27100271002710025, |
|
"eval_loss": 0.7377049326896667, |
|
"eval_runtime": 162.5136, |
|
"eval_samples_per_second": 34.803, |
|
"eval_steps_per_second": 4.35, |
|
"eval_wer": 0.5559211054227985, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"grad_norm": 5.448112487792969, |
|
"learning_rate": 0.00018059999999999997, |
|
"loss": 0.7987, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.29036004645760743, |
|
"eval_loss": 0.7129804491996765, |
|
"eval_runtime": 162.7944, |
|
"eval_samples_per_second": 34.743, |
|
"eval_steps_per_second": 4.343, |
|
"eval_wer": 0.5437242220474715, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.3097173828881146, |
|
"eval_loss": 0.7039781808853149, |
|
"eval_runtime": 162.4512, |
|
"eval_samples_per_second": 34.817, |
|
"eval_steps_per_second": 4.352, |
|
"eval_wer": 0.5451846383463594, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.32907471931862176, |
|
"eval_loss": 0.6728500127792358, |
|
"eval_runtime": 166.2937, |
|
"eval_samples_per_second": 34.012, |
|
"eval_steps_per_second": 4.252, |
|
"eval_wer": 0.5050954085153504, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.34843205574912894, |
|
"eval_loss": 0.6646420359611511, |
|
"eval_runtime": 164.3307, |
|
"eval_samples_per_second": 34.418, |
|
"eval_steps_per_second": 4.302, |
|
"eval_wer": 0.511338286979827, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.3677893921796361, |
|
"eval_loss": 0.6530969142913818, |
|
"eval_runtime": 163.5666, |
|
"eval_samples_per_second": 34.579, |
|
"eval_steps_per_second": 4.322, |
|
"eval_wer": 0.49691065782927574, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"grad_norm": 2.6550886631011963, |
|
"learning_rate": 0.00012059999999999999, |
|
"loss": 0.6851, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.38714672861014326, |
|
"eval_loss": 0.6413969397544861, |
|
"eval_runtime": 162.7473, |
|
"eval_samples_per_second": 34.753, |
|
"eval_steps_per_second": 4.344, |
|
"eval_wer": 0.5037954775240326, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4065040650406504, |
|
"eval_loss": 0.6108531355857849, |
|
"eval_runtime": 163.3073, |
|
"eval_samples_per_second": 34.634, |
|
"eval_steps_per_second": 4.329, |
|
"eval_wer": 0.467654186259248, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.4258614014711576, |
|
"eval_loss": 0.6034538745880127, |
|
"eval_runtime": 164.2548, |
|
"eval_samples_per_second": 34.434, |
|
"eval_steps_per_second": 4.304, |
|
"eval_wer": 0.46924299080419185, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.4452187379016647, |
|
"eval_loss": 0.5801683664321899, |
|
"eval_runtime": 163.2622, |
|
"eval_samples_per_second": 34.644, |
|
"eval_steps_per_second": 4.33, |
|
"eval_wer": 0.458955882588949, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4645760743321719, |
|
"eval_loss": 0.571967363357544, |
|
"eval_runtime": 164.105, |
|
"eval_samples_per_second": 34.466, |
|
"eval_steps_per_second": 4.308, |
|
"eval_wer": 0.44545906822230424, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"grad_norm": 6.736985683441162, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 0.5979, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.48393341076267904, |
|
"eval_loss": 0.569513201713562, |
|
"eval_runtime": 163.6068, |
|
"eval_samples_per_second": 34.571, |
|
"eval_steps_per_second": 4.321, |
|
"eval_wer": 0.4425542841552856, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5032907471931862, |
|
"eval_loss": 0.5556703209877014, |
|
"eval_runtime": 163.3416, |
|
"eval_samples_per_second": 34.627, |
|
"eval_steps_per_second": 4.328, |
|
"eval_wer": 0.43513986294554735, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5226480836236934, |
|
"eval_loss": 0.5499459505081177, |
|
"eval_runtime": 163.0587, |
|
"eval_samples_per_second": 34.687, |
|
"eval_steps_per_second": 4.336, |
|
"eval_wer": 0.4269711607902297, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5420054200542005, |
|
"eval_loss": 0.5451160073280334, |
|
"eval_runtime": 163.5278, |
|
"eval_samples_per_second": 34.587, |
|
"eval_steps_per_second": 4.323, |
|
"eval_wer": 0.425815666575725, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5613627564847077, |
|
"eval_loss": 0.5382982492446899, |
|
"eval_runtime": 163.1776, |
|
"eval_samples_per_second": 34.662, |
|
"eval_steps_per_second": 4.333, |
|
"eval_wer": 0.42172329123268765, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"grad_norm": 4.4167070388793945, |
|
"learning_rate": 6e-07, |
|
"loss": 0.5753, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"eval_loss": 0.5355480313301086, |
|
"eval_runtime": 164.5272, |
|
"eval_samples_per_second": 34.377, |
|
"eval_steps_per_second": 4.297, |
|
"eval_wer": 0.41856173067355684, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.5807200929152149, |
|
"step": 3000, |
|
"total_flos": 3.3874766991231493e+18, |
|
"train_loss": 1.2740035095214843, |
|
"train_runtime": 6100.931, |
|
"train_samples_per_second": 3.934, |
|
"train_steps_per_second": 0.492 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 400, |
|
"total_flos": 3.3874766991231493e+18, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|