|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.7084282460136673, |
|
"eval_steps": 200, |
|
"global_step": 3000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.11389521640091116, |
|
"eval_loss": 0.7527692914009094, |
|
"eval_runtime": 198.1592, |
|
"eval_samples_per_second": 35.436, |
|
"eval_steps_per_second": 0.555, |
|
"eval_wer": 0.5048698494532216, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22779043280182232, |
|
"eval_loss": 0.6966450810432434, |
|
"eval_runtime": 197.214, |
|
"eval_samples_per_second": 35.606, |
|
"eval_steps_per_second": 0.558, |
|
"eval_wer": 0.5050160113740549, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.2847380410022779, |
|
"grad_norm": 4.219395637512207, |
|
"learning_rate": 0.00027833333333333334, |
|
"loss": 2.117, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.3416856492027335, |
|
"eval_loss": 0.6128434538841248, |
|
"eval_runtime": 200.109, |
|
"eval_samples_per_second": 35.091, |
|
"eval_steps_per_second": 0.55, |
|
"eval_wer": 0.47608923849639245, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.45558086560364464, |
|
"eval_loss": 0.6331803202629089, |
|
"eval_runtime": 200.832, |
|
"eval_samples_per_second": 34.965, |
|
"eval_steps_per_second": 0.548, |
|
"eval_wer": 0.5017472993263264, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.5694760820045558, |
|
"grad_norm": 3.2030014991760254, |
|
"learning_rate": 0.00022288888888888887, |
|
"loss": 0.7606, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5694760820045558, |
|
"eval_loss": 0.5895215272903442, |
|
"eval_runtime": 203.9306, |
|
"eval_samples_per_second": 34.433, |
|
"eval_steps_per_second": 0.539, |
|
"eval_wer": 0.457659549024037, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.683371298405467, |
|
"eval_loss": 0.5552608370780945, |
|
"eval_runtime": 205.5709, |
|
"eval_samples_per_second": 34.159, |
|
"eval_steps_per_second": 0.535, |
|
"eval_wer": 0.4211057813683413, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.7972665148063781, |
|
"eval_loss": 0.530360996723175, |
|
"eval_runtime": 215.1898, |
|
"eval_samples_per_second": 32.632, |
|
"eval_steps_per_second": 0.511, |
|
"eval_wer": 0.419604299817962, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.8542141230068337, |
|
"grad_norm": 2.892026424407959, |
|
"learning_rate": 0.00016766666666666666, |
|
"loss": 0.7049, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.9111617312072893, |
|
"eval_loss": 0.5060806274414062, |
|
"eval_runtime": 208.7711, |
|
"eval_samples_per_second": 33.635, |
|
"eval_steps_per_second": 0.527, |
|
"eval_wer": 0.38730251531378307, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.0250569476082005, |
|
"eval_loss": 0.5090161561965942, |
|
"eval_runtime": 207.1232, |
|
"eval_samples_per_second": 33.903, |
|
"eval_steps_per_second": 0.531, |
|
"eval_wer": 0.3959127811956045, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.1389521640091116, |
|
"grad_norm": 2.3204939365386963, |
|
"learning_rate": 0.00011233333333333333, |
|
"loss": 0.6136, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.1389521640091116, |
|
"eval_loss": 0.4839297831058502, |
|
"eval_runtime": 206.3623, |
|
"eval_samples_per_second": 34.028, |
|
"eval_steps_per_second": 0.533, |
|
"eval_wer": 0.3758088733573393, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.2528473804100229, |
|
"eval_loss": 0.46924272179603577, |
|
"eval_runtime": 205.266, |
|
"eval_samples_per_second": 34.209, |
|
"eval_steps_per_second": 0.536, |
|
"eval_wer": 0.3658565752933204, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.366742596810934, |
|
"eval_loss": 0.4569305181503296, |
|
"eval_runtime": 207.0588, |
|
"eval_samples_per_second": 33.913, |
|
"eval_steps_per_second": 0.531, |
|
"eval_wer": 0.35436293333687663, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.4236902050113895, |
|
"grad_norm": 3.092404365539551, |
|
"learning_rate": 5.688888888888888e-05, |
|
"loss": 0.5388, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.4806378132118452, |
|
"eval_loss": 0.4487648606300354, |
|
"eval_runtime": 205.0091, |
|
"eval_samples_per_second": 34.252, |
|
"eval_steps_per_second": 0.537, |
|
"eval_wer": 0.3484765941614956, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.5945330296127562, |
|
"eval_loss": 0.441054105758667, |
|
"eval_runtime": 211.0185, |
|
"eval_samples_per_second": 33.277, |
|
"eval_steps_per_second": 0.521, |
|
"eval_wer": 0.3423112185917963, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 1.7084282460136673, |
|
"grad_norm": 0.7985823154449463, |
|
"learning_rate": 1.4444444444444445e-06, |
|
"loss": 0.5275, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7084282460136673, |
|
"eval_loss": 0.4376124143600464, |
|
"eval_runtime": 210.3633, |
|
"eval_samples_per_second": 33.38, |
|
"eval_steps_per_second": 0.523, |
|
"eval_wer": 0.339454417411871, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.7084282460136673, |
|
"step": 3000, |
|
"total_flos": 5.591115044514249e+18, |
|
"train_loss": 0.8770465799967448, |
|
"train_runtime": 4793.5663, |
|
"train_samples_per_second": 10.013, |
|
"train_steps_per_second": 0.626 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 3000, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 5.591115044514249e+18, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|