{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "eval_steps": 500, "global_step": 5000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.4, "learning_rate": 0.0003, "loss": 1.9296, "step": 100 }, { "epoch": 0.8, "learning_rate": 0.0003, "loss": 1.7554, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.6093164556962025, "eval_loss": 1.7939746379852295, "eval_runtime": 4.7481, "eval_samples_per_second": 105.304, "eval_steps_per_second": 13.268, "step": 250 }, { "epoch": 1.2, "learning_rate": 0.0003, "loss": 1.6394, "step": 300 }, { "epoch": 1.6, "learning_rate": 0.0003, "loss": 1.5315, "step": 400 }, { "epoch": 2.0, "learning_rate": 0.0003, "loss": 1.5248, "step": 500 }, { "epoch": 2.0, "eval_accuracy": 0.6085063291139241, "eval_loss": 1.8273799419403076, "eval_runtime": 4.904, "eval_samples_per_second": 101.957, "eval_steps_per_second": 12.847, "step": 500 }, { "epoch": 2.4, "learning_rate": 0.0003, "loss": 1.1692, "step": 600 }, { "epoch": 2.8, "learning_rate": 0.0003, "loss": 1.2054, "step": 700 }, { "epoch": 3.0, "eval_accuracy": 0.6027088607594937, "eval_loss": 1.9717934131622314, "eval_runtime": 4.6675, "eval_samples_per_second": 107.124, "eval_steps_per_second": 13.498, "step": 750 }, { "epoch": 3.2, "learning_rate": 0.0003, "loss": 1.0252, "step": 800 }, { "epoch": 3.6, "learning_rate": 0.0003, "loss": 0.8608, "step": 900 }, { "epoch": 4.0, "learning_rate": 0.0003, "loss": 0.8989, "step": 1000 }, { "epoch": 4.0, "eval_accuracy": 0.5986835443037974, "eval_loss": 2.151914596557617, "eval_runtime": 5.4432, "eval_samples_per_second": 91.857, "eval_steps_per_second": 11.574, "step": 1000 }, { "epoch": 4.4, "learning_rate": 0.0003, "loss": 0.5842, "step": 1100 }, { "epoch": 4.8, "learning_rate": 0.0003, "loss": 0.6306, "step": 1200 }, { "epoch": 5.0, "eval_accuracy": 0.5960506329113924, "eval_loss": 2.329284191131592, "eval_runtime": 4.4146, "eval_samples_per_second": 113.261, "eval_steps_per_second": 14.271, "step": 1250 }, { "epoch": 5.2, "learning_rate": 0.0003, "loss": 0.5368, "step": 1300 }, { "epoch": 5.6, "learning_rate": 0.0003, "loss": 0.4495, "step": 1400 }, { "epoch": 6.0, "learning_rate": 0.0003, "loss": 0.4712, "step": 1500 }, { "epoch": 6.0, "eval_accuracy": 0.5935696202531645, "eval_loss": 2.5598793029785156, "eval_runtime": 4.5669, "eval_samples_per_second": 109.484, "eval_steps_per_second": 13.795, "step": 1500 }, { "epoch": 6.4, "learning_rate": 0.0003, "loss": 0.3625, "step": 1600 }, { "epoch": 6.8, "learning_rate": 0.0003, "loss": 0.3797, "step": 1700 }, { "epoch": 7.0, "eval_accuracy": 0.5935696202531645, "eval_loss": 2.732860803604126, "eval_runtime": 4.5141, "eval_samples_per_second": 110.763, "eval_steps_per_second": 13.956, "step": 1750 }, { "epoch": 7.2, "learning_rate": 0.0003, "loss": 0.3582, "step": 1800 }, { "epoch": 7.6, "learning_rate": 0.0003, "loss": 0.3399, "step": 1900 }, { "epoch": 8.0, "learning_rate": 0.0003, "loss": 0.3527, "step": 2000 }, { "epoch": 8.0, "eval_accuracy": 0.5912911392405064, "eval_loss": 2.8185083866119385, "eval_runtime": 4.7158, "eval_samples_per_second": 106.026, "eval_steps_per_second": 13.359, "step": 2000 }, { "epoch": 8.4, "learning_rate": 0.0003, "loss": 0.3202, "step": 2100 }, { "epoch": 8.8, "learning_rate": 0.0003, "loss": 0.3314, "step": 2200 }, { "epoch": 9.0, "eval_accuracy": 0.592, "eval_loss": 2.824962854385376, "eval_runtime": 4.8288, "eval_samples_per_second": 103.546, "eval_steps_per_second": 13.047, "step": 2250 }, { "epoch": 9.2, "learning_rate": 0.0003, "loss": 0.3174, "step": 2300 }, { "epoch": 9.6, "learning_rate": 0.0003, "loss": 0.3157, "step": 2400 }, { "epoch": 10.0, "learning_rate": 0.0003, "loss": 0.3265, "step": 2500 }, { "epoch": 10.0, "eval_accuracy": 0.5911392405063292, "eval_loss": 2.9242382049560547, "eval_runtime": 4.7138, "eval_samples_per_second": 106.072, "eval_steps_per_second": 13.365, "step": 2500 }, { "epoch": 10.4, "learning_rate": 0.0003, "loss": 0.2989, "step": 2600 }, { "epoch": 10.8, "learning_rate": 0.0003, "loss": 0.3148, "step": 2700 }, { "epoch": 11.0, "eval_accuracy": 0.591240506329114, "eval_loss": 3.0012593269348145, "eval_runtime": 5.1318, "eval_samples_per_second": 97.431, "eval_steps_per_second": 12.276, "step": 2750 }, { "epoch": 11.2, "learning_rate": 0.0003, "loss": 0.3028, "step": 2800 }, { "epoch": 11.6, "learning_rate": 0.0003, "loss": 0.3047, "step": 2900 }, { "epoch": 12.0, "learning_rate": 0.0003, "loss": 0.3184, "step": 3000 }, { "epoch": 12.0, "eval_accuracy": 0.590632911392405, "eval_loss": 2.931525707244873, "eval_runtime": 4.7893, "eval_samples_per_second": 104.399, "eval_steps_per_second": 13.154, "step": 3000 }, { "epoch": 12.4, "learning_rate": 0.0003, "loss": 0.3, "step": 3100 }, { "epoch": 12.8, "learning_rate": 0.0003, "loss": 0.3101, "step": 3200 }, { "epoch": 13.0, "eval_accuracy": 0.5896962025316456, "eval_loss": 2.9116382598876953, "eval_runtime": 4.5748, "eval_samples_per_second": 109.295, "eval_steps_per_second": 13.771, "step": 3250 }, { "epoch": 13.2, "learning_rate": 0.0003, "loss": 0.3063, "step": 3300 }, { "epoch": 13.6, "learning_rate": 0.0003, "loss": 0.3041, "step": 3400 }, { "epoch": 14.0, "learning_rate": 0.0003, "loss": 0.3164, "step": 3500 }, { "epoch": 14.0, "eval_accuracy": 0.5902025316455696, "eval_loss": 2.920793056488037, "eval_runtime": 4.718, "eval_samples_per_second": 105.977, "eval_steps_per_second": 13.353, "step": 3500 }, { "epoch": 14.4, "learning_rate": 0.0003, "loss": 0.2957, "step": 3600 }, { "epoch": 14.8, "learning_rate": 0.0003, "loss": 0.3074, "step": 3700 }, { "epoch": 15.0, "eval_accuracy": 0.5908607594936709, "eval_loss": 2.9385440349578857, "eval_runtime": 5.108, "eval_samples_per_second": 97.887, "eval_steps_per_second": 12.334, "step": 3750 }, { "epoch": 15.2, "learning_rate": 0.0003, "loss": 0.3013, "step": 3800 }, { "epoch": 15.6, "learning_rate": 0.0003, "loss": 0.3002, "step": 3900 }, { "epoch": 16.0, "learning_rate": 0.0003, "loss": 0.3107, "step": 4000 }, { "epoch": 16.0, "eval_accuracy": 0.5891645569620253, "eval_loss": 2.9519243240356445, "eval_runtime": 4.8208, "eval_samples_per_second": 103.717, "eval_steps_per_second": 13.068, "step": 4000 }, { "epoch": 16.4, "learning_rate": 0.0003, "loss": 0.2892, "step": 4100 }, { "epoch": 16.8, "learning_rate": 0.0003, "loss": 0.3054, "step": 4200 }, { "epoch": 17.0, "eval_accuracy": 0.5898227848101266, "eval_loss": 3.010847568511963, "eval_runtime": 5.1511, "eval_samples_per_second": 97.066, "eval_steps_per_second": 12.23, "step": 4250 }, { "epoch": 17.2, "learning_rate": 0.0003, "loss": 0.2959, "step": 4300 }, { "epoch": 17.6, "learning_rate": 0.0003, "loss": 0.297, "step": 4400 }, { "epoch": 18.0, "learning_rate": 0.0003, "loss": 0.309, "step": 4500 }, { "epoch": 18.0, "eval_accuracy": 0.5903544303797469, "eval_loss": 3.003683567047119, "eval_runtime": 4.8904, "eval_samples_per_second": 102.241, "eval_steps_per_second": 12.882, "step": 4500 }, { "epoch": 18.4, "learning_rate": 0.0003, "loss": 0.2883, "step": 4600 }, { "epoch": 18.8, "learning_rate": 0.0003, "loss": 0.3005, "step": 4700 }, { "epoch": 19.0, "eval_accuracy": 0.5898481012658228, "eval_loss": 3.0279438495635986, "eval_runtime": 5.1333, "eval_samples_per_second": 97.403, "eval_steps_per_second": 12.273, "step": 4750 }, { "epoch": 19.2, "learning_rate": 0.0003, "loss": 0.2959, "step": 4800 }, { "epoch": 19.6, "learning_rate": 0.0003, "loss": 0.2911, "step": 4900 }, { "epoch": 20.0, "learning_rate": 0.0003, "loss": 0.3127, "step": 5000 }, { "epoch": 20.0, "eval_accuracy": 0.5883291139240506, "eval_loss": 2.9650285243988037, "eval_runtime": 4.8904, "eval_samples_per_second": 102.242, "eval_steps_per_second": 12.882, "step": 5000 }, { "epoch": 20.0, "step": 5000, "total_flos": 3.1967425075347456e+17, "train_loss": 0.5438678237915039, "train_runtime": 3497.065, "train_samples_per_second": 45.753, "train_steps_per_second": 1.43 } ], "logging_steps": 100, "max_steps": 5000, "num_train_epochs": 20, "save_steps": 500, "total_flos": 3.1967425075347456e+17, "trial_name": null, "trial_params": null }