|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.8165192041489382, |
|
"eval_steps": 300, |
|
"global_step": 6000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 21.039730072021484, |
|
"learning_rate": 7.4e-06, |
|
"loss": 2.2676, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"eval_cer": 0.2868164937892176, |
|
"eval_loss": 0.6633031964302063, |
|
"eval_runtime": 320.6214, |
|
"eval_samples_per_second": 8.05, |
|
"eval_steps_per_second": 8.05, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 9.525612831115723, |
|
"learning_rate": 1.49e-05, |
|
"loss": 1.8817, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_cer": 0.2927711614803432, |
|
"eval_loss": 0.6335302591323853, |
|
"eval_runtime": 119.6817, |
|
"eval_samples_per_second": 21.566, |
|
"eval_steps_per_second": 21.566, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 7.7516608238220215, |
|
"learning_rate": 2.2400000000000002e-05, |
|
"loss": 1.7845, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_cer": 0.2928031758227686, |
|
"eval_loss": 0.5966914296150208, |
|
"eval_runtime": 123.0968, |
|
"eval_samples_per_second": 20.967, |
|
"eval_steps_per_second": 20.967, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 4.020584583282471, |
|
"learning_rate": 2.9875000000000004e-05, |
|
"loss": 1.7503, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_cer": 0.29213087463183507, |
|
"eval_loss": 0.5989494919776917, |
|
"eval_runtime": 125.3567, |
|
"eval_samples_per_second": 20.589, |
|
"eval_steps_per_second": 20.589, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 3.7881321907043457, |
|
"learning_rate": 3.737500000000001e-05, |
|
"loss": 1.7549, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_cer": 0.29901395825329746, |
|
"eval_loss": 0.6132485866546631, |
|
"eval_runtime": 132.3811, |
|
"eval_samples_per_second": 19.497, |
|
"eval_steps_per_second": 19.497, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 3.710826873779297, |
|
"learning_rate": 4.4875e-05, |
|
"loss": 1.7214, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_cer": 0.29782942758355746, |
|
"eval_loss": 0.6118220686912537, |
|
"eval_runtime": 130.5327, |
|
"eval_samples_per_second": 19.773, |
|
"eval_steps_per_second": 19.773, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 4.127965927124023, |
|
"learning_rate": 4.911181750186986e-05, |
|
"loss": 1.6742, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_cer": 0.2963567678319887, |
|
"eval_loss": 0.6121346354484558, |
|
"eval_runtime": 131.7081, |
|
"eval_samples_per_second": 19.596, |
|
"eval_steps_per_second": 19.596, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.801788091659546, |
|
"learning_rate": 4.630703066566941e-05, |
|
"loss": 1.6562, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_cer": 0.3039761813292355, |
|
"eval_loss": 0.6422853469848633, |
|
"eval_runtime": 132.5985, |
|
"eval_samples_per_second": 19.465, |
|
"eval_steps_per_second": 19.465, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 5.572460174560547, |
|
"learning_rate": 4.350224382946896e-05, |
|
"loss": 1.6373, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_cer": 0.2989499295684467, |
|
"eval_loss": 0.639430046081543, |
|
"eval_runtime": 133.5523, |
|
"eval_samples_per_second": 19.326, |
|
"eval_steps_per_second": 19.326, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 4.840504169464111, |
|
"learning_rate": 4.069745699326851e-05, |
|
"loss": 1.5944, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"eval_cer": 0.2967089255986682, |
|
"eval_loss": 0.6272587776184082, |
|
"eval_runtime": 134.0186, |
|
"eval_samples_per_second": 19.259, |
|
"eval_steps_per_second": 19.259, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 2.972968816757202, |
|
"learning_rate": 3.7892670157068066e-05, |
|
"loss": 1.5654, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_cer": 0.2903700857984377, |
|
"eval_loss": 0.5716381669044495, |
|
"eval_runtime": 134.8224, |
|
"eval_samples_per_second": 19.144, |
|
"eval_steps_per_second": 19.144, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.683291435241699, |
|
"learning_rate": 3.5087883320867614e-05, |
|
"loss": 1.5333, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_cer": 0.29011397105903447, |
|
"eval_loss": 0.573725163936615, |
|
"eval_runtime": 134.5796, |
|
"eval_samples_per_second": 19.178, |
|
"eval_steps_per_second": 19.178, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 8.39251708984375, |
|
"learning_rate": 3.228309648466717e-05, |
|
"loss": 1.5252, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_cer": 0.29501216545012166, |
|
"eval_loss": 0.5687663555145264, |
|
"eval_runtime": 135.3645, |
|
"eval_samples_per_second": 19.067, |
|
"eval_steps_per_second": 19.067, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 2.761993169784546, |
|
"learning_rate": 2.9478309648466717e-05, |
|
"loss": 1.5017, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_cer": 0.2957164809834806, |
|
"eval_loss": 0.5565311312675476, |
|
"eval_runtime": 135.2285, |
|
"eval_samples_per_second": 19.086, |
|
"eval_steps_per_second": 19.086, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 23.93025016784668, |
|
"learning_rate": 2.667352281226627e-05, |
|
"loss": 1.4707, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_cer": 0.28828915354078627, |
|
"eval_loss": 0.5579658150672913, |
|
"eval_runtime": 135.1123, |
|
"eval_samples_per_second": 19.103, |
|
"eval_steps_per_second": 19.103, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.7994115352630615, |
|
"learning_rate": 2.386873597606582e-05, |
|
"loss": 1.4578, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_cer": 0.2855359200922013, |
|
"eval_loss": 0.5352594256401062, |
|
"eval_runtime": 135.7058, |
|
"eval_samples_per_second": 19.019, |
|
"eval_steps_per_second": 19.019, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.9403347969055176, |
|
"learning_rate": 2.1073298429319373e-05, |
|
"loss": 1.4236, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_cer": 0.28758483800742735, |
|
"eval_loss": 0.5495265126228333, |
|
"eval_runtime": 136.3492, |
|
"eval_samples_per_second": 18.929, |
|
"eval_steps_per_second": 18.929, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 3.021031379699707, |
|
"learning_rate": 1.8268511593118924e-05, |
|
"loss": 1.3954, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_cer": 0.2876808810347036, |
|
"eval_loss": 0.5390140414237976, |
|
"eval_runtime": 138.4653, |
|
"eval_samples_per_second": 18.64, |
|
"eval_steps_per_second": 18.64, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 9.61117172241211, |
|
"learning_rate": 1.5463724756918475e-05, |
|
"loss": 1.3947, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_cer": 0.28633627865283645, |
|
"eval_loss": 0.5187196731567383, |
|
"eval_runtime": 136.0715, |
|
"eval_samples_per_second": 18.968, |
|
"eval_steps_per_second": 18.968, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 2.387241840362549, |
|
"learning_rate": 1.2658937920718025e-05, |
|
"loss": 1.3678, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_cer": 0.27993341016775514, |
|
"eval_loss": 0.5071456432342529, |
|
"eval_runtime": 136.2594, |
|
"eval_samples_per_second": 18.942, |
|
"eval_steps_per_second": 18.942, |
|
"step": 6000 |
|
} |
|
], |
|
"logging_steps": 300, |
|
"max_steps": 7348, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 600, |
|
"total_flos": 1.8710735171848015e+20, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|