{ "best_metric": null, "best_model_checkpoint": null, "epoch": 10.998535871156662, "eval_steps": 500, "global_step": 3756, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29282576866764276, "grad_norm": 0.26115021109580994, "learning_rate": 3e-05, "loss": 2.0719, "step": 100 }, { "epoch": 0.5856515373352855, "grad_norm": 0.2679530680179596, "learning_rate": 3e-05, "loss": 1.969, "step": 200 }, { "epoch": 0.8784773060029283, "grad_norm": 0.2916224002838135, "learning_rate": 3e-05, "loss": 1.9686, "step": 300 }, { "epoch": 0.9985358711566618, "eval_accuracy": 0.43743589743589745, "eval_loss": 3.6918885707855225, "eval_runtime": 5.2974, "eval_samples_per_second": 94.385, "eval_steps_per_second": 11.893, "step": 341 }, { "epoch": 0.9985358711566618, "eval_bleu": 0.1527908371131025, "eval_exact_match": 0.012, "eval_prefix_exact_match": 0.026, "step": 341 }, { "epoch": 1.171303074670571, "grad_norm": 0.3534347712993622, "learning_rate": 3e-05, "loss": 1.9603, "step": 400 }, { "epoch": 1.4641288433382138, "grad_norm": 0.3338424563407898, "learning_rate": 3e-05, "loss": 1.9235, "step": 500 }, { "epoch": 1.7569546120058566, "grad_norm": 0.4033390283584595, "learning_rate": 3e-05, "loss": 1.9337, "step": 600 }, { "epoch": 2.0, "eval_accuracy": 0.44764102564102565, "eval_loss": 3.7484843730926514, "eval_runtime": 5.0649, "eval_samples_per_second": 98.718, "eval_steps_per_second": 12.438, "step": 683 }, { "epoch": 2.0, "eval_bleu": 0.21311912714932216, "eval_exact_match": 0.02, "eval_prefix_exact_match": 0.032, "step": 683 }, { "epoch": 2.049780380673499, "grad_norm": 0.38882532715797424, "learning_rate": 3e-05, "loss": 1.9267, "step": 700 }, { "epoch": 2.342606149341142, "grad_norm": 0.44310709834098816, "learning_rate": 3e-05, "loss": 1.8823, "step": 800 }, { "epoch": 2.6354319180087846, "grad_norm": 0.5196680426597595, "learning_rate": 3e-05, "loss": 1.901, "step": 900 }, { "epoch": 2.9282576866764276, "grad_norm": 0.5293437242507935, "learning_rate": 3e-05, "loss": 1.9033, "step": 1000 }, { "epoch": 2.998535871156662, "eval_accuracy": 0.4495897435897436, "eval_loss": 3.882551670074463, "eval_runtime": 5.1314, "eval_samples_per_second": 97.439, "eval_steps_per_second": 12.277, "step": 1024 }, { "epoch": 2.998535871156662, "eval_bleu": 0.24307488023362148, "eval_exact_match": 0.016, "eval_prefix_exact_match": 0.032, "step": 1024 }, { "epoch": 3.22108345534407, "grad_norm": 0.552943229675293, "learning_rate": 3e-05, "loss": 1.8529, "step": 1100 }, { "epoch": 3.513909224011713, "grad_norm": 0.6421984434127808, "learning_rate": 3e-05, "loss": 1.8342, "step": 1200 }, { "epoch": 3.8067349926793557, "grad_norm": 0.7155198454856873, "learning_rate": 3e-05, "loss": 1.857, "step": 1300 }, { "epoch": 4.0, "eval_accuracy": 0.4481025641025641, "eval_loss": 3.970107316970825, "eval_runtime": 5.1563, "eval_samples_per_second": 96.969, "eval_steps_per_second": 12.218, "step": 1366 }, { "epoch": 4.0, "eval_bleu": 0.25309406645138827, "eval_exact_match": 0.016, "eval_prefix_exact_match": 0.044, "step": 1366 }, { "epoch": 4.099560761346998, "grad_norm": 0.7837833166122437, "learning_rate": 3e-05, "loss": 1.8284, "step": 1400 }, { "epoch": 4.392386530014641, "grad_norm": 0.784609854221344, "learning_rate": 3e-05, "loss": 1.7884, "step": 1500 }, { "epoch": 4.685212298682284, "grad_norm": 0.834195613861084, "learning_rate": 3e-05, "loss": 1.7841, "step": 1600 }, { "epoch": 4.978038067349927, "grad_norm": 0.8902062177658081, "learning_rate": 3e-05, "loss": 1.8042, "step": 1700 }, { "epoch": 4.998535871156662, "eval_accuracy": 0.4472820512820513, "eval_loss": 4.117065906524658, "eval_runtime": 5.2376, "eval_samples_per_second": 95.464, "eval_steps_per_second": 12.028, "step": 1707 }, { "epoch": 4.998535871156662, "eval_bleu": 0.2531049955279983, "eval_exact_match": 0.014, "eval_prefix_exact_match": 0.042, "step": 1707 }, { "epoch": 5.270863836017569, "grad_norm": 0.9495383501052856, "learning_rate": 3e-05, "loss": 1.7493, "step": 1800 }, { "epoch": 5.563689604685212, "grad_norm": 0.9647482633590698, "learning_rate": 3e-05, "loss": 1.7334, "step": 1900 }, { "epoch": 5.856515373352855, "grad_norm": 0.9825767874717712, "learning_rate": 3e-05, "loss": 1.7443, "step": 2000 }, { "epoch": 6.0, "eval_accuracy": 0.446974358974359, "eval_loss": 4.183700084686279, "eval_runtime": 5.2674, "eval_samples_per_second": 94.923, "eval_steps_per_second": 11.96, "step": 2049 }, { "epoch": 6.0, "eval_bleu": 0.23587359955369594, "eval_exact_match": 0.012, "eval_prefix_exact_match": 0.04, "step": 2049 }, { "epoch": 6.149341142020498, "grad_norm": 1.0430296659469604, "learning_rate": 3e-05, "loss": 1.6959, "step": 2100 }, { "epoch": 6.44216691068814, "grad_norm": 1.0777732133865356, "learning_rate": 3e-05, "loss": 1.6795, "step": 2200 }, { "epoch": 6.734992679355783, "grad_norm": 1.0959677696228027, "learning_rate": 3e-05, "loss": 1.7019, "step": 2300 }, { "epoch": 6.998535871156662, "eval_accuracy": 0.4461538461538462, "eval_loss": 4.260408401489258, "eval_runtime": 5.178, "eval_samples_per_second": 96.563, "eval_steps_per_second": 12.167, "step": 2390 }, { "epoch": 6.998535871156662, "eval_bleu": 0.2008158466316808, "eval_exact_match": 0.006, "eval_prefix_exact_match": 0.042, "step": 2390 }, { "epoch": 7.027818448023426, "grad_norm": 1.0649549961090088, "learning_rate": 3e-05, "loss": 1.6812, "step": 2400 }, { "epoch": 7.320644216691068, "grad_norm": 1.294122576713562, "learning_rate": 3e-05, "loss": 1.6226, "step": 2500 }, { "epoch": 7.613469985358711, "grad_norm": 1.2657020092010498, "learning_rate": 3e-05, "loss": 1.6404, "step": 2600 }, { "epoch": 7.906295754026354, "grad_norm": 1.1963133811950684, "learning_rate": 3e-05, "loss": 1.6305, "step": 2700 }, { "epoch": 8.0, "eval_accuracy": 0.44148717948717947, "eval_loss": 4.406544208526611, "eval_runtime": 5.3355, "eval_samples_per_second": 93.711, "eval_steps_per_second": 11.808, "step": 2732 }, { "epoch": 8.0, "eval_bleu": 0.24435500637461924, "eval_exact_match": 0.008, "eval_prefix_exact_match": 0.038, "step": 2732 }, { "epoch": 8.199121522693996, "grad_norm": 1.3801360130310059, "learning_rate": 3e-05, "loss": 1.5734, "step": 2800 }, { "epoch": 8.49194729136164, "grad_norm": 1.3306772708892822, "learning_rate": 3e-05, "loss": 1.5651, "step": 2900 }, { "epoch": 8.784773060029282, "grad_norm": 1.5165746212005615, "learning_rate": 3e-05, "loss": 1.6056, "step": 3000 }, { "epoch": 8.998535871156662, "eval_accuracy": 0.4397948717948718, "eval_loss": 4.448728561401367, "eval_runtime": 5.187, "eval_samples_per_second": 96.395, "eval_steps_per_second": 12.146, "step": 3073 }, { "epoch": 8.998535871156662, "eval_bleu": 0.20120283162176952, "eval_exact_match": 0.008, "eval_prefix_exact_match": 0.036, "step": 3073 }, { "epoch": 9.077598828696924, "grad_norm": 1.4077060222625732, "learning_rate": 3e-05, "loss": 1.5612, "step": 3100 }, { "epoch": 9.370424597364568, "grad_norm": 1.475984811782837, "learning_rate": 3e-05, "loss": 1.5158, "step": 3200 }, { "epoch": 9.66325036603221, "grad_norm": 1.586300015449524, "learning_rate": 3e-05, "loss": 1.5145, "step": 3300 }, { "epoch": 9.956076134699854, "grad_norm": 1.510048270225525, "learning_rate": 3e-05, "loss": 1.5521, "step": 3400 }, { "epoch": 10.0, "eval_accuracy": 0.4388717948717949, "eval_loss": 4.5474138259887695, "eval_runtime": 5.169, "eval_samples_per_second": 96.731, "eval_steps_per_second": 12.188, "step": 3415 }, { "epoch": 10.0, "eval_bleu": 0.2258317707850195, "eval_exact_match": 0.004, "eval_prefix_exact_match": 0.032, "step": 3415 }, { "epoch": 10.248901903367496, "grad_norm": 1.6384536027908325, "learning_rate": 3e-05, "loss": 1.4631, "step": 3500 }, { "epoch": 10.541727672035138, "grad_norm": 1.7626007795333862, "learning_rate": 3e-05, "loss": 1.4623, "step": 3600 }, { "epoch": 10.834553440702782, "grad_norm": 1.6483694314956665, "learning_rate": 3e-05, "loss": 1.4934, "step": 3700 }, { "epoch": 10.998535871156662, "eval_accuracy": 0.4367179487179487, "eval_loss": 4.589761734008789, "eval_runtime": 5.853, "eval_samples_per_second": 85.426, "eval_steps_per_second": 10.764, "step": 3756 }, { "epoch": 10.998535871156662, "eval_bleu": 0.20792247275869374, "eval_exact_match": 0.004, "eval_prefix_exact_match": 0.026, "step": 3756 } ], "logging_steps": 100, "max_steps": 17050, "num_input_tokens_seen": 0, "num_train_epochs": 50, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.225752205797622e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }