{ "best_metric": null, "best_model_checkpoint": null, "epoch": 13.0, "eval_steps": 500, "global_step": 6877, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.19, "learning_rate": 0.0005, "loss": 1.3631, "step": 100 }, { "epoch": 0.38, "learning_rate": 0.0005, "loss": 1.294, "step": 200 }, { "epoch": 0.57, "learning_rate": 0.0005, "loss": 1.302, "step": 300 }, { "epoch": 0.76, "learning_rate": 0.0005, "loss": 1.2626, "step": 400 }, { "epoch": 0.95, "learning_rate": 0.0005, "loss": 1.2156, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.6911294117647059, "eval_loss": 1.012805700302124, "eval_runtime": 7.0918, "eval_samples_per_second": 70.504, "eval_steps_per_second": 8.883, "step": 529 }, { "epoch": 1.0, "eval_exact_match": 27.6, "eval_f1": 35.68145743145743, "eval_qa_bleu": 19.064111689202868, "eval_qa_exact_match": 0.216, "eval_recite_bleu": 17.898525975872754, "eval_recite_exact_match": 0.006, "step": 529 }, { "epoch": 1.13, "learning_rate": 0.0005, "loss": 1.0305, "step": 600 }, { "epoch": 1.32, "learning_rate": 0.0005, "loss": 0.9199, "step": 700 }, { "epoch": 1.51, "learning_rate": 0.0005, "loss": 0.9086, "step": 800 }, { "epoch": 1.7, "learning_rate": 0.0005, "loss": 2.9245, "step": 900 }, { "epoch": 1.89, "learning_rate": 0.0005, "loss": 4.9199, "step": 1000 }, { "epoch": 2.0, "eval_accuracy": 0.22922352941176471, "eval_loss": 7.1224870681762695, "eval_runtime": 7.7159, "eval_samples_per_second": 64.801, "eval_steps_per_second": 8.165, "step": 1058 }, { "epoch": 2.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 1058 }, { "epoch": 2.08, "learning_rate": 0.0005, "loss": 7.4349, "step": 1100 }, { "epoch": 2.27, "learning_rate": 0.0005, "loss": 8.0921, "step": 1200 }, { "epoch": 2.46, "learning_rate": 0.0005, "loss": 7.889, "step": 1300 }, { "epoch": 2.65, "learning_rate": 0.0005, "loss": 7.2559, "step": 1400 }, { "epoch": 2.84, "learning_rate": 0.0005, "loss": 7.3563, "step": 1500 }, { "epoch": 3.0, "eval_accuracy": 0.24235294117647058, "eval_loss": 7.308440685272217, "eval_runtime": 7.7087, "eval_samples_per_second": 64.862, "eval_steps_per_second": 8.173, "step": 1587 }, { "epoch": 3.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 1587 }, { "epoch": 3.02, "learning_rate": 0.0005, "loss": 7.2721, "step": 1600 }, { "epoch": 3.21, "learning_rate": 0.0005, "loss": 7.2743, "step": 1700 }, { "epoch": 3.4, "learning_rate": 0.0005, "loss": 7.3151, "step": 1800 }, { "epoch": 3.59, "learning_rate": 0.0005, "loss": 7.3366, "step": 1900 }, { "epoch": 3.78, "learning_rate": 0.0005, "loss": 7.3294, "step": 2000 }, { "epoch": 3.97, "learning_rate": 0.0005, "loss": 7.3158, "step": 2100 }, { "epoch": 4.0, "eval_accuracy": 0.23965490196078432, "eval_loss": 7.242833614349365, "eval_runtime": 6.6551, "eval_samples_per_second": 75.13, "eval_steps_per_second": 9.466, "step": 2116 }, { "epoch": 4.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 2116 }, { "epoch": 4.16, "learning_rate": 0.0005, "loss": 7.3307, "step": 2200 }, { "epoch": 4.35, "learning_rate": 0.0005, "loss": 7.3307, "step": 2300 }, { "epoch": 4.54, "learning_rate": 0.0005, "loss": 7.3197, "step": 2400 }, { "epoch": 4.73, "learning_rate": 0.0005, "loss": 7.3238, "step": 2500 }, { "epoch": 4.91, "learning_rate": 0.0005, "loss": 7.3017, "step": 2600 }, { "epoch": 5.0, "eval_accuracy": 0.24474509803921568, "eval_loss": 7.229503631591797, "eval_runtime": 7.0755, "eval_samples_per_second": 70.667, "eval_steps_per_second": 8.904, "step": 2645 }, { "epoch": 5.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 2645 }, { "epoch": 5.1, "learning_rate": 0.0005, "loss": 7.2926, "step": 2700 }, { "epoch": 5.29, "learning_rate": 0.0005, "loss": 7.3006, "step": 2800 }, { "epoch": 5.48, "learning_rate": 0.0005, "loss": 7.3051, "step": 2900 }, { "epoch": 5.67, "learning_rate": 0.0005, "loss": 7.2973, "step": 3000 }, { "epoch": 5.86, "learning_rate": 0.0005, "loss": 7.281, "step": 3100 }, { "epoch": 6.0, "eval_accuracy": 0.24474509803921568, "eval_loss": 7.229503631591797, "eval_runtime": 7.1043, "eval_samples_per_second": 70.38, "eval_steps_per_second": 8.868, "step": 3174 }, { "epoch": 6.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 3174 }, { "epoch": 6.05, "learning_rate": 0.0005, "loss": 7.3017, "step": 3200 }, { "epoch": 6.24, "learning_rate": 0.0005, "loss": 7.297, "step": 3300 }, { "epoch": 6.43, "learning_rate": 0.0005, "loss": 7.2877, "step": 3400 }, { "epoch": 6.62, "learning_rate": 0.0005, "loss": 7.296, "step": 3500 }, { "epoch": 6.81, "learning_rate": 0.0005, "loss": 7.2993, "step": 3600 }, { "epoch": 6.99, "learning_rate": 0.0005, "loss": 7.3056, "step": 3700 }, { "epoch": 7.0, "eval_accuracy": 0.24474509803921568, "eval_loss": 7.229503631591797, "eval_runtime": 7.6931, "eval_samples_per_second": 64.993, "eval_steps_per_second": 8.189, "step": 3703 }, { "epoch": 7.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 3703 }, { "epoch": 7.18, "learning_rate": 0.0005, "loss": 7.299, "step": 3800 }, { "epoch": 7.37, "learning_rate": 0.0005, "loss": 7.2954, "step": 3900 }, { "epoch": 7.56, "learning_rate": 0.0005, "loss": 7.3002, "step": 4000 }, { "epoch": 7.75, "learning_rate": 0.0005, "loss": 7.3011, "step": 4100 }, { "epoch": 7.94, "learning_rate": 0.0005, "loss": 7.2943, "step": 4200 }, { "epoch": 8.0, "eval_accuracy": 0.24474509803921568, "eval_loss": 7.229503631591797, "eval_runtime": 6.6465, "eval_samples_per_second": 75.228, "eval_steps_per_second": 9.479, "step": 4232 }, { "epoch": 8.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 4232 }, { "epoch": 8.13, "learning_rate": 0.0005, "loss": 7.3001, "step": 4300 }, { "epoch": 8.32, "learning_rate": 0.0005, "loss": 7.2922, "step": 4400 }, { "epoch": 8.51, "learning_rate": 0.0005, "loss": 7.2969, "step": 4500 }, { "epoch": 8.7, "learning_rate": 0.0005, "loss": 7.2879, "step": 4600 }, { "epoch": 8.88, "learning_rate": 0.0005, "loss": 7.3011, "step": 4700 }, { "epoch": 9.0, "eval_accuracy": 0.24474509803921568, "eval_loss": 7.229503631591797, "eval_runtime": 7.77, "eval_samples_per_second": 64.35, "eval_steps_per_second": 8.108, "step": 4761 }, { "epoch": 9.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 4761 }, { "epoch": 9.07, "learning_rate": 0.0005, "loss": 7.3066, "step": 4800 }, { "epoch": 9.26, "learning_rate": 0.0005, "loss": 7.2949, "step": 4900 }, { "epoch": 9.45, "learning_rate": 0.0005, "loss": 7.2955, "step": 5000 }, { "epoch": 9.64, "learning_rate": 0.0005, "loss": 7.3061, "step": 5100 }, { "epoch": 9.83, "learning_rate": 0.0005, "loss": 7.2948, "step": 5200 }, { "epoch": 10.0, "eval_accuracy": 0.24474509803921568, "eval_loss": 7.229503631591797, "eval_runtime": 7.0663, "eval_samples_per_second": 70.758, "eval_steps_per_second": 8.916, "step": 5290 }, { "epoch": 10.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 5290 }, { "epoch": 10.02, "learning_rate": 0.0005, "loss": 7.2857, "step": 5300 }, { "epoch": 10.21, "learning_rate": 0.0005, "loss": 7.2941, "step": 5400 }, { "epoch": 10.4, "learning_rate": 0.0005, "loss": 7.2953, "step": 5500 }, { "epoch": 10.59, "learning_rate": 0.0005, "loss": 7.2923, "step": 5600 }, { "epoch": 10.78, "learning_rate": 0.0005, "loss": 7.3021, "step": 5700 }, { "epoch": 10.96, "learning_rate": 0.0005, "loss": 7.3053, "step": 5800 }, { "epoch": 11.0, "eval_accuracy": 0.24474509803921568, "eval_loss": 7.229503631591797, "eval_runtime": 7.7184, "eval_samples_per_second": 64.78, "eval_steps_per_second": 8.162, "step": 5819 }, { "epoch": 11.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 5819 }, { "epoch": 11.15, "learning_rate": 0.0005, "loss": 7.2882, "step": 5900 }, { "epoch": 11.34, "learning_rate": 0.0005, "loss": 7.3, "step": 6000 }, { "epoch": 11.53, "learning_rate": 0.0005, "loss": 7.2984, "step": 6100 }, { "epoch": 11.72, "learning_rate": 0.0005, "loss": 7.2966, "step": 6200 }, { "epoch": 11.91, "learning_rate": 0.0005, "loss": 7.3066, "step": 6300 }, { "epoch": 12.0, "eval_accuracy": 0.24474509803921568, "eval_loss": 7.229503631591797, "eval_runtime": 7.7125, "eval_samples_per_second": 64.83, "eval_steps_per_second": 8.169, "step": 6348 }, { "epoch": 12.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 6348 }, { "epoch": 12.1, "learning_rate": 0.0005, "loss": 7.2909, "step": 6400 }, { "epoch": 12.29, "learning_rate": 0.0005, "loss": 7.3012, "step": 6500 }, { "epoch": 12.48, "learning_rate": 0.0005, "loss": 7.302, "step": 6600 }, { "epoch": 12.67, "learning_rate": 0.0005, "loss": 7.2891, "step": 6700 }, { "epoch": 12.85, "learning_rate": 0.0005, "loss": 7.301, "step": 6800 }, { "epoch": 13.0, "eval_accuracy": 0.24474509803921568, "eval_loss": 7.229503631591797, "eval_runtime": 7.454, "eval_samples_per_second": 67.078, "eval_steps_per_second": 8.452, "step": 6877 }, { "epoch": 13.0, "eval_exact_match": 0.0, "eval_f1": 0.0, "eval_qa_bleu": 0.0, "eval_qa_exact_match": 0.0, "eval_recite_bleu": 0.0, "eval_recite_exact_match": 0.0, "step": 6877 } ], "logging_steps": 100, "max_steps": 26450, "num_train_epochs": 50, "save_steps": 500, "total_flos": 1.457832445498884e+18, "trial_name": null, "trial_params": null }