{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8165192041489382, "eval_steps": 300, "global_step": 6000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "grad_norm": 21.039730072021484, "learning_rate": 7.4e-06, "loss": 2.2676, "step": 300 }, { "epoch": 0.04, "eval_cer": 0.2868164937892176, "eval_loss": 0.6633031964302063, "eval_runtime": 320.6214, "eval_samples_per_second": 8.05, "eval_steps_per_second": 8.05, "step": 300 }, { "epoch": 0.08, "grad_norm": 9.525612831115723, "learning_rate": 1.49e-05, "loss": 1.8817, "step": 600 }, { "epoch": 0.08, "eval_cer": 0.2927711614803432, "eval_loss": 0.6335302591323853, "eval_runtime": 119.6817, "eval_samples_per_second": 21.566, "eval_steps_per_second": 21.566, "step": 600 }, { "epoch": 0.12, "grad_norm": 7.7516608238220215, "learning_rate": 2.2400000000000002e-05, "loss": 1.7845, "step": 900 }, { "epoch": 0.12, "eval_cer": 0.2928031758227686, "eval_loss": 0.5966914296150208, "eval_runtime": 123.0968, "eval_samples_per_second": 20.967, "eval_steps_per_second": 20.967, "step": 900 }, { "epoch": 0.16, "grad_norm": 4.020584583282471, "learning_rate": 2.9875000000000004e-05, "loss": 1.7503, "step": 1200 }, { "epoch": 0.16, "eval_cer": 0.29213087463183507, "eval_loss": 0.5989494919776917, "eval_runtime": 125.3567, "eval_samples_per_second": 20.589, "eval_steps_per_second": 20.589, "step": 1200 }, { "epoch": 0.2, "grad_norm": 3.7881321907043457, "learning_rate": 3.737500000000001e-05, "loss": 1.7549, "step": 1500 }, { "epoch": 0.2, "eval_cer": 0.29901395825329746, "eval_loss": 0.6132485866546631, "eval_runtime": 132.3811, "eval_samples_per_second": 19.497, "eval_steps_per_second": 19.497, "step": 1500 }, { "epoch": 0.24, "grad_norm": 3.710826873779297, "learning_rate": 4.4875e-05, "loss": 1.7214, "step": 1800 }, { "epoch": 0.24, "eval_cer": 0.29782942758355746, "eval_loss": 0.6118220686912537, "eval_runtime": 130.5327, "eval_samples_per_second": 19.773, "eval_steps_per_second": 19.773, "step": 1800 }, { "epoch": 0.29, "grad_norm": 4.127965927124023, "learning_rate": 4.911181750186986e-05, "loss": 1.6742, "step": 2100 }, { "epoch": 0.29, "eval_cer": 0.2963567678319887, "eval_loss": 0.6121346354484558, "eval_runtime": 131.7081, "eval_samples_per_second": 19.596, "eval_steps_per_second": 19.596, "step": 2100 }, { "epoch": 0.33, "grad_norm": 2.801788091659546, "learning_rate": 4.630703066566941e-05, "loss": 1.6562, "step": 2400 }, { "epoch": 0.33, "eval_cer": 0.3039761813292355, "eval_loss": 0.6422853469848633, "eval_runtime": 132.5985, "eval_samples_per_second": 19.465, "eval_steps_per_second": 19.465, "step": 2400 }, { "epoch": 0.37, "grad_norm": 5.572460174560547, "learning_rate": 4.350224382946896e-05, "loss": 1.6373, "step": 2700 }, { "epoch": 0.37, "eval_cer": 0.2989499295684467, "eval_loss": 0.639430046081543, "eval_runtime": 133.5523, "eval_samples_per_second": 19.326, "eval_steps_per_second": 19.326, "step": 2700 }, { "epoch": 0.41, "grad_norm": 4.840504169464111, "learning_rate": 4.069745699326851e-05, "loss": 1.5944, "step": 3000 }, { "epoch": 0.41, "eval_cer": 0.2967089255986682, "eval_loss": 0.6272587776184082, "eval_runtime": 134.0186, "eval_samples_per_second": 19.259, "eval_steps_per_second": 19.259, "step": 3000 }, { "epoch": 0.45, "grad_norm": 2.972968816757202, "learning_rate": 3.7892670157068066e-05, "loss": 1.5654, "step": 3300 }, { "epoch": 0.45, "eval_cer": 0.2903700857984377, "eval_loss": 0.5716381669044495, "eval_runtime": 134.8224, "eval_samples_per_second": 19.144, "eval_steps_per_second": 19.144, "step": 3300 }, { "epoch": 0.49, "grad_norm": 2.683291435241699, "learning_rate": 3.5087883320867614e-05, "loss": 1.5333, "step": 3600 }, { "epoch": 0.49, "eval_cer": 0.29011397105903447, "eval_loss": 0.573725163936615, "eval_runtime": 134.5796, "eval_samples_per_second": 19.178, "eval_steps_per_second": 19.178, "step": 3600 }, { "epoch": 0.53, "grad_norm": 8.39251708984375, "learning_rate": 3.228309648466717e-05, "loss": 1.5252, "step": 3900 }, { "epoch": 0.53, "eval_cer": 0.29501216545012166, "eval_loss": 0.5687663555145264, "eval_runtime": 135.3645, "eval_samples_per_second": 19.067, "eval_steps_per_second": 19.067, "step": 3900 }, { "epoch": 0.57, "grad_norm": 2.761993169784546, "learning_rate": 2.9478309648466717e-05, "loss": 1.5017, "step": 4200 }, { "epoch": 0.57, "eval_cer": 0.2957164809834806, "eval_loss": 0.5565311312675476, "eval_runtime": 135.2285, "eval_samples_per_second": 19.086, "eval_steps_per_second": 19.086, "step": 4200 }, { "epoch": 0.61, "grad_norm": 23.93025016784668, "learning_rate": 2.667352281226627e-05, "loss": 1.4707, "step": 4500 }, { "epoch": 0.61, "eval_cer": 0.28828915354078627, "eval_loss": 0.5579658150672913, "eval_runtime": 135.1123, "eval_samples_per_second": 19.103, "eval_steps_per_second": 19.103, "step": 4500 }, { "epoch": 0.65, "grad_norm": 1.7994115352630615, "learning_rate": 2.386873597606582e-05, "loss": 1.4578, "step": 4800 }, { "epoch": 0.65, "eval_cer": 0.2855359200922013, "eval_loss": 0.5352594256401062, "eval_runtime": 135.7058, "eval_samples_per_second": 19.019, "eval_steps_per_second": 19.019, "step": 4800 }, { "epoch": 0.69, "grad_norm": 1.9403347969055176, "learning_rate": 2.1073298429319373e-05, "loss": 1.4236, "step": 5100 }, { "epoch": 0.69, "eval_cer": 0.28758483800742735, "eval_loss": 0.5495265126228333, "eval_runtime": 136.3492, "eval_samples_per_second": 18.929, "eval_steps_per_second": 18.929, "step": 5100 }, { "epoch": 0.73, "grad_norm": 3.021031379699707, "learning_rate": 1.8268511593118924e-05, "loss": 1.3954, "step": 5400 }, { "epoch": 0.73, "eval_cer": 0.2876808810347036, "eval_loss": 0.5390140414237976, "eval_runtime": 138.4653, "eval_samples_per_second": 18.64, "eval_steps_per_second": 18.64, "step": 5400 }, { "epoch": 0.78, "grad_norm": 9.61117172241211, "learning_rate": 1.5463724756918475e-05, "loss": 1.3947, "step": 5700 }, { "epoch": 0.78, "eval_cer": 0.28633627865283645, "eval_loss": 0.5187196731567383, "eval_runtime": 136.0715, "eval_samples_per_second": 18.968, "eval_steps_per_second": 18.968, "step": 5700 }, { "epoch": 0.82, "grad_norm": 2.387241840362549, "learning_rate": 1.2658937920718025e-05, "loss": 1.3678, "step": 6000 }, { "epoch": 0.82, "eval_cer": 0.27993341016775514, "eval_loss": 0.5071456432342529, "eval_runtime": 136.2594, "eval_samples_per_second": 18.942, "eval_steps_per_second": 18.942, "step": 6000 } ], "logging_steps": 300, "max_steps": 7348, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 600, "total_flos": 1.8710735171848015e+20, "train_batch_size": 1, "trial_name": null, "trial_params": null }