{ "best_metric": null, "best_model_checkpoint": null, "epoch": 14.013452914798206, "global_step": 50000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14, "learning_rate": 9.900000000000001e-05, "loss": 9.7624, "step": 500 }, { "epoch": 0.28, "learning_rate": 9.8e-05, "loss": 8.2156, "step": 1000 }, { "epoch": 0.42, "learning_rate": 9.7e-05, "loss": 7.5205, "step": 1500 }, { "epoch": 0.56, "learning_rate": 9.6e-05, "loss": 7.3362, "step": 2000 }, { "epoch": 0.7, "learning_rate": 9.5e-05, "loss": 7.1343, "step": 2500 }, { "epoch": 0.84, "learning_rate": 9.4e-05, "loss": 6.9313, "step": 3000 }, { "epoch": 0.98, "learning_rate": 9.300000000000001e-05, "loss": 6.7408, "step": 3500 }, { "epoch": 1.12, "learning_rate": 9.200000000000001e-05, "loss": 6.5858, "step": 4000 }, { "epoch": 1.26, "learning_rate": 9.1e-05, "loss": 6.4674, "step": 4500 }, { "epoch": 1.4, "learning_rate": 9e-05, "loss": 6.3797, "step": 5000 }, { "epoch": 1.54, "learning_rate": 8.900000000000001e-05, "loss": 6.3089, "step": 5500 }, { "epoch": 1.68, "learning_rate": 8.800000000000001e-05, "loss": 6.2535, "step": 6000 }, { "epoch": 1.82, "learning_rate": 8.7e-05, "loss": 6.2054, "step": 6500 }, { "epoch": 1.96, "learning_rate": 8.6e-05, "loss": 6.1582, "step": 7000 }, { "epoch": 2.1, "learning_rate": 8.5e-05, "loss": 6.1192, "step": 7500 }, { "epoch": 2.24, "learning_rate": 8.4e-05, "loss": 6.0872, "step": 8000 }, { "epoch": 2.38, "learning_rate": 8.3e-05, "loss": 6.0494, "step": 8500 }, { "epoch": 2.52, "learning_rate": 8.2e-05, "loss": 6.0233, "step": 9000 }, { "epoch": 2.66, "learning_rate": 8.1e-05, "loss": 5.993, "step": 9500 }, { "epoch": 2.8, "learning_rate": 8e-05, "loss": 5.9731, "step": 10000 }, { "epoch": 2.94, "learning_rate": 7.900000000000001e-05, "loss": 5.9495, "step": 10500 }, { "epoch": 3.08, "learning_rate": 7.800000000000001e-05, "loss": 5.9287, "step": 11000 }, { "epoch": 3.22, "learning_rate": 7.7e-05, "loss": 5.9077, "step": 11500 }, { "epoch": 3.36, "learning_rate": 7.6e-05, "loss": 5.8884, "step": 12000 }, { "epoch": 3.5, "learning_rate": 7.500000000000001e-05, "loss": 5.8712, "step": 12500 }, { "epoch": 3.64, "learning_rate": 7.4e-05, "loss": 5.8562, "step": 13000 }, { "epoch": 3.78, "learning_rate": 7.3e-05, "loss": 5.836, "step": 13500 }, { "epoch": 3.92, "learning_rate": 7.2e-05, "loss": 5.8282, "step": 14000 }, { "epoch": 4.06, "learning_rate": 7.1e-05, "loss": 5.8108, "step": 14500 }, { "epoch": 4.2, "learning_rate": 7e-05, "loss": 5.7997, "step": 15000 }, { "epoch": 4.34, "learning_rate": 6.9e-05, "loss": 5.7869, "step": 15500 }, { "epoch": 4.48, "learning_rate": 6.800000000000001e-05, "loss": 5.7755, "step": 16000 }, { "epoch": 4.62, "learning_rate": 6.7e-05, "loss": 5.7686, "step": 16500 }, { "epoch": 4.76, "learning_rate": 6.6e-05, "loss": 5.7561, "step": 17000 }, { "epoch": 4.9, "learning_rate": 6.500000000000001e-05, "loss": 5.7481, "step": 17500 }, { "epoch": 5.04, "learning_rate": 6.400000000000001e-05, "loss": 5.7397, "step": 18000 }, { "epoch": 5.18, "learning_rate": 6.3e-05, "loss": 5.7285, "step": 18500 }, { "epoch": 5.33, "learning_rate": 6.2e-05, "loss": 5.7194, "step": 19000 }, { "epoch": 5.47, "learning_rate": 6.1e-05, "loss": 5.7117, "step": 19500 }, { "epoch": 5.61, "learning_rate": 6e-05, "loss": 5.7066, "step": 20000 }, { "epoch": 5.75, "learning_rate": 5.9e-05, "loss": 5.6968, "step": 20500 }, { "epoch": 5.89, "learning_rate": 5.8e-05, "loss": 5.6889, "step": 21000 }, { "epoch": 6.03, "learning_rate": 5.6999999999999996e-05, "loss": 5.681, "step": 21500 }, { "epoch": 6.17, "learning_rate": 5.6000000000000006e-05, "loss": 5.6788, "step": 22000 }, { "epoch": 6.31, "learning_rate": 5.500000000000001e-05, "loss": 5.6668, "step": 22500 }, { "epoch": 6.45, "learning_rate": 5.4000000000000005e-05, "loss": 5.6649, "step": 23000 }, { "epoch": 6.59, "learning_rate": 5.300000000000001e-05, "loss": 5.6574, "step": 23500 }, { "epoch": 6.73, "learning_rate": 5.2000000000000004e-05, "loss": 5.6499, "step": 24000 }, { "epoch": 6.87, "learning_rate": 5.1000000000000006e-05, "loss": 5.6465, "step": 24500 }, { "epoch": 7.01, "learning_rate": 5e-05, "loss": 5.6419, "step": 25000 }, { "epoch": 7.15, "learning_rate": 4.9e-05, "loss": 5.6358, "step": 25500 }, { "epoch": 7.29, "learning_rate": 4.8e-05, "loss": 5.6312, "step": 26000 }, { "epoch": 7.43, "learning_rate": 4.7e-05, "loss": 5.6293, "step": 26500 }, { "epoch": 7.57, "learning_rate": 4.600000000000001e-05, "loss": 5.6218, "step": 27000 }, { "epoch": 7.71, "learning_rate": 4.5e-05, "loss": 5.618, "step": 27500 }, { "epoch": 7.85, "learning_rate": 4.4000000000000006e-05, "loss": 5.613, "step": 28000 }, { "epoch": 7.99, "learning_rate": 4.3e-05, "loss": 5.6101, "step": 28500 }, { "epoch": 8.13, "learning_rate": 4.2e-05, "loss": 5.6048, "step": 29000 }, { "epoch": 8.27, "learning_rate": 4.1e-05, "loss": 5.6, "step": 29500 }, { "epoch": 8.41, "learning_rate": 4e-05, "loss": 5.5976, "step": 30000 }, { "epoch": 8.55, "learning_rate": 3.9000000000000006e-05, "loss": 5.5936, "step": 30500 }, { "epoch": 8.69, "learning_rate": 3.8e-05, "loss": 5.5931, "step": 31000 }, { "epoch": 8.83, "learning_rate": 3.7e-05, "loss": 5.5898, "step": 31500 }, { "epoch": 8.97, "learning_rate": 3.6e-05, "loss": 5.5888, "step": 32000 }, { "epoch": 9.11, "learning_rate": 3.5e-05, "loss": 5.582, "step": 32500 }, { "epoch": 9.25, "learning_rate": 3.4000000000000007e-05, "loss": 5.5817, "step": 33000 }, { "epoch": 9.39, "learning_rate": 3.3e-05, "loss": 5.5726, "step": 33500 }, { "epoch": 9.53, "learning_rate": 3.2000000000000005e-05, "loss": 5.5778, "step": 34000 }, { "epoch": 9.67, "learning_rate": 3.1e-05, "loss": 5.5718, "step": 34500 }, { "epoch": 9.81, "learning_rate": 3e-05, "loss": 5.569, "step": 35000 }, { "epoch": 9.95, "learning_rate": 2.9e-05, "loss": 5.5703, "step": 35500 }, { "epoch": 10.09, "learning_rate": 2.8000000000000003e-05, "loss": 5.5668, "step": 36000 }, { "epoch": 10.23, "learning_rate": 2.7000000000000002e-05, "loss": 5.561, "step": 36500 }, { "epoch": 10.37, "learning_rate": 2.6000000000000002e-05, "loss": 5.5554, "step": 37000 }, { "epoch": 10.51, "learning_rate": 2.5e-05, "loss": 5.5621, "step": 37500 }, { "epoch": 10.65, "learning_rate": 2.4e-05, "loss": 5.558, "step": 38000 }, { "epoch": 10.79, "learning_rate": 2.3000000000000003e-05, "loss": 5.5557, "step": 38500 }, { "epoch": 10.93, "learning_rate": 2.2000000000000003e-05, "loss": 5.5572, "step": 39000 }, { "epoch": 11.07, "learning_rate": 2.1e-05, "loss": 5.5519, "step": 39500 }, { "epoch": 11.21, "learning_rate": 2e-05, "loss": 5.5502, "step": 40000 }, { "epoch": 11.35, "learning_rate": 1.9e-05, "loss": 5.5436, "step": 40500 }, { "epoch": 11.49, "learning_rate": 1.8e-05, "loss": 5.5482, "step": 41000 }, { "epoch": 11.63, "learning_rate": 1.7000000000000003e-05, "loss": 5.5524, "step": 41500 }, { "epoch": 11.77, "learning_rate": 1.6000000000000003e-05, "loss": 5.5449, "step": 42000 }, { "epoch": 11.91, "learning_rate": 1.5e-05, "loss": 5.5443, "step": 42500 }, { "epoch": 12.05, "learning_rate": 1.4000000000000001e-05, "loss": 5.5466, "step": 43000 }, { "epoch": 12.19, "learning_rate": 1.3000000000000001e-05, "loss": 5.5388, "step": 43500 }, { "epoch": 12.33, "learning_rate": 1.2e-05, "loss": 5.5397, "step": 44000 }, { "epoch": 12.47, "learning_rate": 1.1000000000000001e-05, "loss": 5.5385, "step": 44500 }, { "epoch": 12.61, "learning_rate": 1e-05, "loss": 5.5437, "step": 45000 }, { "epoch": 12.75, "learning_rate": 9e-06, "loss": 5.541, "step": 45500 }, { "epoch": 12.89, "learning_rate": 8.000000000000001e-06, "loss": 5.5395, "step": 46000 }, { "epoch": 13.03, "learning_rate": 7.000000000000001e-06, "loss": 5.5357, "step": 46500 }, { "epoch": 13.17, "learning_rate": 6e-06, "loss": 5.5351, "step": 47000 }, { "epoch": 13.31, "learning_rate": 5e-06, "loss": 5.5353, "step": 47500 }, { "epoch": 13.45, "learning_rate": 4.000000000000001e-06, "loss": 5.5371, "step": 48000 }, { "epoch": 13.59, "learning_rate": 3e-06, "loss": 5.5343, "step": 48500 }, { "epoch": 13.73, "learning_rate": 2.0000000000000003e-06, "loss": 5.5371, "step": 49000 }, { "epoch": 13.87, "learning_rate": 1.0000000000000002e-06, "loss": 5.538, "step": 49500 }, { "epoch": 14.01, "learning_rate": 0.0, "loss": 5.5386, "step": 50000 }, { "epoch": 14.01, "step": 50000, "total_flos": 336513375928320.0, "train_loss": 5.854185776367188, "train_runtime": 10672.3435, "train_samples_per_second": 149.92, "train_steps_per_second": 4.685 } ], "max_steps": 50000, "num_train_epochs": 15, "total_flos": 336513375928320.0, "trial_name": null, "trial_params": null }