{ "best_metric": 1.0691347122192383, "best_model_checkpoint": "./outputs/checkpoint-2100", "epoch": 2.9333333333333336, "eval_steps": 100, "global_step": 2200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13, "learning_rate": 0.0002, "loss": 1.2907, "step": 100 }, { "epoch": 0.13, "eval_loss": 1.179780125617981, "eval_runtime": 187.6093, "eval_samples_per_second": 10.287, "eval_steps_per_second": 1.29, "step": 100 }, { "epoch": 0.27, "learning_rate": 0.0002, "loss": 1.0103, "step": 200 }, { "epoch": 0.27, "eval_loss": 1.1445424556732178, "eval_runtime": 243.5241, "eval_samples_per_second": 7.925, "eval_steps_per_second": 0.994, "step": 200 }, { "epoch": 0.4, "learning_rate": 0.0002, "loss": 0.9907, "step": 300 }, { "epoch": 0.4, "eval_loss": 1.1306055784225464, "eval_runtime": 187.9568, "eval_samples_per_second": 10.268, "eval_steps_per_second": 1.288, "step": 300 }, { "epoch": 0.53, "learning_rate": 0.0002, "loss": 0.9792, "step": 400 }, { "epoch": 0.53, "eval_loss": 1.1213247776031494, "eval_runtime": 186.5858, "eval_samples_per_second": 10.344, "eval_steps_per_second": 1.297, "step": 400 }, { "epoch": 0.67, "learning_rate": 0.0002, "loss": 0.9652, "step": 500 }, { "epoch": 0.67, "eval_loss": 1.1149897575378418, "eval_runtime": 186.6184, "eval_samples_per_second": 10.342, "eval_steps_per_second": 1.297, "step": 500 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 0.967, "step": 600 }, { "epoch": 0.8, "eval_loss": 1.1105140447616577, "eval_runtime": 186.7963, "eval_samples_per_second": 10.332, "eval_steps_per_second": 1.296, "step": 600 }, { "epoch": 0.93, "learning_rate": 0.0002, "loss": 0.9632, "step": 700 }, { "epoch": 0.93, "eval_loss": 1.1065897941589355, "eval_runtime": 186.8426, "eval_samples_per_second": 10.33, "eval_steps_per_second": 1.295, "step": 700 }, { "epoch": 1.07, "learning_rate": 0.0002, "loss": 0.9527, "step": 800 }, { "epoch": 1.07, "eval_loss": 1.1026440858840942, "eval_runtime": 186.6422, "eval_samples_per_second": 10.341, "eval_steps_per_second": 1.297, "step": 800 }, { "epoch": 1.2, "learning_rate": 0.0002, "loss": 0.9512, "step": 900 }, { "epoch": 1.2, "eval_loss": 1.0985503196716309, "eval_runtime": 187.0748, "eval_samples_per_second": 10.317, "eval_steps_per_second": 1.294, "step": 900 }, { "epoch": 1.33, "learning_rate": 0.0002, "loss": 0.9484, "step": 1000 }, { "epoch": 1.33, "eval_loss": 1.097111463546753, "eval_runtime": 186.7842, "eval_samples_per_second": 10.333, "eval_steps_per_second": 1.296, "step": 1000 }, { "epoch": 1.47, "learning_rate": 0.0002, "loss": 0.9441, "step": 1100 }, { "epoch": 1.47, "eval_loss": 1.0937378406524658, "eval_runtime": 187.1868, "eval_samples_per_second": 10.311, "eval_steps_per_second": 1.293, "step": 1100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 0.937, "step": 1200 }, { "epoch": 1.6, "eval_loss": 1.0900678634643555, "eval_runtime": 187.1061, "eval_samples_per_second": 10.315, "eval_steps_per_second": 1.293, "step": 1200 }, { "epoch": 1.73, "learning_rate": 0.0002, "loss": 0.9412, "step": 1300 }, { "epoch": 1.73, "eval_loss": 1.0862796306610107, "eval_runtime": 186.0905, "eval_samples_per_second": 10.371, "eval_steps_per_second": 1.3, "step": 1300 }, { "epoch": 1.87, "learning_rate": 0.0002, "loss": 0.9422, "step": 1400 }, { "epoch": 1.87, "eval_loss": 1.0846831798553467, "eval_runtime": 186.1843, "eval_samples_per_second": 10.366, "eval_steps_per_second": 1.3, "step": 1400 }, { "epoch": 2.0, "learning_rate": 0.0002, "loss": 0.9352, "step": 1500 }, { "epoch": 2.0, "eval_loss": 1.084228515625, "eval_runtime": 187.2013, "eval_samples_per_second": 10.31, "eval_steps_per_second": 1.293, "step": 1500 }, { "epoch": 2.13, "learning_rate": 0.0002, "loss": 0.9328, "step": 1600 }, { "epoch": 2.13, "eval_loss": 1.082168459892273, "eval_runtime": 186.7075, "eval_samples_per_second": 10.337, "eval_steps_per_second": 1.296, "step": 1600 }, { "epoch": 2.27, "learning_rate": 0.0002, "loss": 0.9306, "step": 1700 }, { "epoch": 2.27, "eval_loss": 1.0760979652404785, "eval_runtime": 186.6832, "eval_samples_per_second": 10.338, "eval_steps_per_second": 1.296, "step": 1700 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 0.926, "step": 1800 }, { "epoch": 2.4, "eval_loss": 1.0768243074417114, "eval_runtime": 186.9887, "eval_samples_per_second": 10.321, "eval_steps_per_second": 1.294, "step": 1800 }, { "epoch": 2.53, "learning_rate": 0.0002, "loss": 0.9209, "step": 1900 }, { "epoch": 2.53, "eval_loss": 1.07276451587677, "eval_runtime": 186.5526, "eval_samples_per_second": 10.346, "eval_steps_per_second": 1.297, "step": 1900 }, { "epoch": 2.67, "learning_rate": 0.0002, "loss": 0.9309, "step": 2000 }, { "epoch": 2.67, "eval_loss": 1.0723668336868286, "eval_runtime": 187.3011, "eval_samples_per_second": 10.304, "eval_steps_per_second": 1.292, "step": 2000 }, { "epoch": 2.8, "learning_rate": 0.0002, "loss": 0.9257, "step": 2100 }, { "epoch": 2.8, "eval_loss": 1.0691347122192383, "eval_runtime": 253.1645, "eval_samples_per_second": 7.624, "eval_steps_per_second": 0.956, "step": 2100 }, { "epoch": 2.93, "learning_rate": 0.0002, "loss": 0.9177, "step": 2200 }, { "epoch": 2.93, "eval_loss": 1.0692988634109497, "eval_runtime": 258.072, "eval_samples_per_second": 7.479, "eval_steps_per_second": 0.938, "step": 2200 } ], "logging_steps": 100, "max_steps": 2250, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 3.110307163440169e+17, "trial_name": null, "trial_params": null }