{ "best_metric": 1.1990991830825806, "best_model_checkpoint": "./outputs/checkpoint-4100", "epoch": 2.987249544626594, "eval_steps": 100, "global_step": 4100, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 2.182, "step": 100 }, { "epoch": 0.07, "eval_loss": 2.0125839710235596, "eval_runtime": 144.1418, "eval_samples_per_second": 43.527, "eval_steps_per_second": 5.446, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 1.9626, "step": 200 }, { "epoch": 0.15, "eval_loss": 1.9195271730422974, "eval_runtime": 144.2253, "eval_samples_per_second": 43.501, "eval_steps_per_second": 5.443, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 1.8881, "step": 300 }, { "epoch": 0.22, "eval_loss": 1.858821988105774, "eval_runtime": 144.0313, "eval_samples_per_second": 43.56, "eval_steps_per_second": 5.45, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 1.8367, "step": 400 }, { "epoch": 0.29, "eval_loss": 1.8125503063201904, "eval_runtime": 143.9861, "eval_samples_per_second": 43.574, "eval_steps_per_second": 5.452, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 1.785, "step": 500 }, { "epoch": 0.36, "eval_loss": 1.7720445394515991, "eval_runtime": 144.0061, "eval_samples_per_second": 43.568, "eval_steps_per_second": 5.451, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.7562, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.7424511909484863, "eval_runtime": 144.0245, "eval_samples_per_second": 43.562, "eval_steps_per_second": 5.45, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.7176, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.7112771272659302, "eval_runtime": 144.2204, "eval_samples_per_second": 43.503, "eval_steps_per_second": 5.443, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.6969, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.6823335886001587, "eval_runtime": 144.0134, "eval_samples_per_second": 43.565, "eval_steps_per_second": 5.451, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.6619, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.6567240953445435, "eval_runtime": 144.0655, "eval_samples_per_second": 43.55, "eval_steps_per_second": 5.449, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.6469, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.6352009773254395, "eval_runtime": 144.0664, "eval_samples_per_second": 43.549, "eval_steps_per_second": 5.449, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.6461, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.6240431070327759, "eval_runtime": 293.5989, "eval_samples_per_second": 21.369, "eval_steps_per_second": 2.674, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.5992, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.5974311828613281, "eval_runtime": 291.7, "eval_samples_per_second": 21.508, "eval_steps_per_second": 2.691, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.6021, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.5751127004623413, "eval_runtime": 289.9524, "eval_samples_per_second": 21.638, "eval_steps_per_second": 2.707, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.5538, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.5539450645446777, "eval_runtime": 287.8748, "eval_samples_per_second": 21.794, "eval_steps_per_second": 2.727, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.5249, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.5348094701766968, "eval_runtime": 287.891, "eval_samples_per_second": 21.793, "eval_steps_per_second": 2.727, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.506, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.515953540802002, "eval_runtime": 289.836, "eval_samples_per_second": 21.647, "eval_steps_per_second": 2.708, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.5042, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.4988901615142822, "eval_runtime": 291.5471, "eval_samples_per_second": 21.52, "eval_steps_per_second": 2.693, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.4762, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.4844294786453247, "eval_runtime": 293.6668, "eval_samples_per_second": 21.364, "eval_steps_per_second": 2.673, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.4652, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.4694663286209106, "eval_runtime": 295.5867, "eval_samples_per_second": 21.226, "eval_steps_per_second": 2.656, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.4481, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.4534634351730347, "eval_runtime": 296.7451, "eval_samples_per_second": 21.143, "eval_steps_per_second": 2.645, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.4335, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.4383305311203003, "eval_runtime": 294.659, "eval_samples_per_second": 21.292, "eval_steps_per_second": 2.664, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.4075, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.4232139587402344, "eval_runtime": 292.737, "eval_samples_per_second": 21.432, "eval_steps_per_second": 2.682, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.4059, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.411597490310669, "eval_runtime": 290.6759, "eval_samples_per_second": 21.584, "eval_steps_per_second": 2.701, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.4055, "step": 2400 }, { "epoch": 1.75, "eval_loss": 1.3974188566207886, "eval_runtime": 288.6807, "eval_samples_per_second": 21.733, "eval_steps_per_second": 2.719, "step": 2400 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 1.3772, "step": 2500 }, { "epoch": 1.82, "eval_loss": 1.384261965751648, "eval_runtime": 287.1594, "eval_samples_per_second": 21.848, "eval_steps_per_second": 2.734, "step": 2500 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 1.3669, "step": 2600 }, { "epoch": 1.89, "eval_loss": 1.3715009689331055, "eval_runtime": 288.6305, "eval_samples_per_second": 21.737, "eval_steps_per_second": 2.72, "step": 2600 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 1.3548, "step": 2700 }, { "epoch": 1.97, "eval_loss": 1.3575737476348877, "eval_runtime": 290.5389, "eval_samples_per_second": 21.594, "eval_steps_per_second": 2.702, "step": 2700 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 1.3266, "step": 2800 }, { "epoch": 2.04, "eval_loss": 1.3451271057128906, "eval_runtime": 292.4987, "eval_samples_per_second": 21.45, "eval_steps_per_second": 2.684, "step": 2800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 1.3004, "step": 2900 }, { "epoch": 2.11, "eval_loss": 1.333436369895935, "eval_runtime": 294.3881, "eval_samples_per_second": 21.312, "eval_steps_per_second": 2.667, "step": 2900 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 1.3009, "step": 3000 }, { "epoch": 2.19, "eval_loss": 1.3215913772583008, "eval_runtime": 296.2165, "eval_samples_per_second": 21.18, "eval_steps_per_second": 2.65, "step": 3000 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 1.2822, "step": 3100 }, { "epoch": 2.26, "eval_loss": 1.311138391494751, "eval_runtime": 295.6634, "eval_samples_per_second": 21.22, "eval_steps_per_second": 2.655, "step": 3100 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 1.2846, "step": 3200 }, { "epoch": 2.33, "eval_loss": 1.3013139963150024, "eval_runtime": 293.7639, "eval_samples_per_second": 21.357, "eval_steps_per_second": 2.672, "step": 3200 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 1.2674, "step": 3300 }, { "epoch": 2.4, "eval_loss": 1.2875950336456299, "eval_runtime": 291.8744, "eval_samples_per_second": 21.496, "eval_steps_per_second": 2.69, "step": 3300 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 1.2485, "step": 3400 }, { "epoch": 2.48, "eval_loss": 1.2778161764144897, "eval_runtime": 289.8138, "eval_samples_per_second": 21.648, "eval_steps_per_second": 2.709, "step": 3400 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 1.2499, "step": 3500 }, { "epoch": 2.55, "eval_loss": 1.2662204504013062, "eval_runtime": 288.0522, "eval_samples_per_second": 21.781, "eval_steps_per_second": 2.725, "step": 3500 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 1.2363, "step": 3600 }, { "epoch": 2.62, "eval_loss": 1.2541649341583252, "eval_runtime": 287.6646, "eval_samples_per_second": 21.81, "eval_steps_per_second": 2.729, "step": 3600 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 1.22, "step": 3700 }, { "epoch": 2.7, "eval_loss": 1.2425366640090942, "eval_runtime": 289.5395, "eval_samples_per_second": 21.669, "eval_steps_per_second": 2.711, "step": 3700 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 1.1977, "step": 3800 }, { "epoch": 2.77, "eval_loss": 1.2353510856628418, "eval_runtime": 291.5958, "eval_samples_per_second": 21.516, "eval_steps_per_second": 2.692, "step": 3800 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 1.2087, "step": 3900 }, { "epoch": 2.84, "eval_loss": 1.2214804887771606, "eval_runtime": 293.3693, "eval_samples_per_second": 21.386, "eval_steps_per_second": 2.676, "step": 3900 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 1.1912, "step": 4000 }, { "epoch": 2.91, "eval_loss": 1.2116272449493408, "eval_runtime": 295.3584, "eval_samples_per_second": 21.242, "eval_steps_per_second": 2.658, "step": 4000 }, { "epoch": 2.99, "learning_rate": 0.0002, "loss": 1.1882, "step": 4100 }, { "epoch": 2.99, "eval_loss": 1.1990991830825806, "eval_runtime": 133.6346, "eval_samples_per_second": 46.949, "eval_steps_per_second": 5.874, "step": 4100 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 2.444085005899776e+17, "trial_name": null, "trial_params": null }