{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 10, "global_step": 389, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 2.4501470088205292e-05, "loss": 1.0383, "step": 10 }, { "epoch": 0.03, "eval_loss": 0.9290542602539062, "eval_runtime": 77.6515, "eval_samples_per_second": 0.245, "eval_steps_per_second": 0.039, "step": 10 }, { "epoch": 0.05, "learning_rate": 2.4001440086405184e-05, "loss": 0.9448, "step": 20 }, { "epoch": 0.05, "eval_loss": 0.9216130971908569, "eval_runtime": 75.2278, "eval_samples_per_second": 0.253, "eval_steps_per_second": 0.04, "step": 20 }, { "epoch": 0.08, "learning_rate": 2.350141008460508e-05, "loss": 0.942, "step": 30 }, { "epoch": 0.08, "eval_loss": 0.9170966148376465, "eval_runtime": 75.2167, "eval_samples_per_second": 0.253, "eval_steps_per_second": 0.04, "step": 30 }, { "epoch": 0.1, "learning_rate": 2.300138008280497e-05, "loss": 0.9305, "step": 40 }, { "epoch": 0.1, "eval_loss": 0.9177520871162415, "eval_runtime": 75.2861, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 40 }, { "epoch": 0.13, "learning_rate": 2.250135008100486e-05, "loss": 0.9301, "step": 50 }, { "epoch": 0.13, "eval_loss": 0.9236036539077759, "eval_runtime": 75.3569, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 50 }, { "epoch": 0.15, "learning_rate": 2.200132007920475e-05, "loss": 0.9412, "step": 60 }, { "epoch": 0.15, "eval_loss": 0.9281936287879944, "eval_runtime": 75.2776, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 60 }, { "epoch": 0.18, "learning_rate": 2.1501290077404645e-05, "loss": 0.9534, "step": 70 }, { "epoch": 0.18, "eval_loss": 0.931694507598877, "eval_runtime": 75.4284, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 70 }, { "epoch": 0.21, "learning_rate": 2.1001260075604537e-05, "loss": 1.0193, "step": 80 }, { "epoch": 0.21, "eval_loss": 0.9383592009544373, "eval_runtime": 75.2446, "eval_samples_per_second": 0.253, "eval_steps_per_second": 0.04, "step": 80 }, { "epoch": 0.23, "learning_rate": 2.050123007380443e-05, "loss": 0.979, "step": 90 }, { "epoch": 0.23, "eval_loss": 0.9449018239974976, "eval_runtime": 75.2111, "eval_samples_per_second": 0.253, "eval_steps_per_second": 0.04, "step": 90 }, { "epoch": 0.26, "learning_rate": 2.000120007200432e-05, "loss": 1.0321, "step": 100 }, { "epoch": 0.26, "eval_loss": 0.9472957253456116, "eval_runtime": 75.1836, "eval_samples_per_second": 0.253, "eval_steps_per_second": 0.04, "step": 100 }, { "epoch": 0.28, "learning_rate": 1.9501170070204212e-05, "loss": 0.997, "step": 110 }, { "epoch": 0.28, "eval_loss": 0.9475318789482117, "eval_runtime": 75.3079, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 110 }, { "epoch": 0.31, "learning_rate": 1.9001140068404107e-05, "loss": 0.9737, "step": 120 }, { "epoch": 0.31, "eval_loss": 0.9536099433898926, "eval_runtime": 75.3453, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 120 }, { "epoch": 0.33, "learning_rate": 1.8501110066603995e-05, "loss": 1.0492, "step": 130 }, { "epoch": 0.33, "eval_loss": 0.9576209187507629, "eval_runtime": 75.2609, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 130 }, { "epoch": 0.36, "learning_rate": 1.8001080064803887e-05, "loss": 0.9903, "step": 140 }, { "epoch": 0.36, "eval_loss": 0.9594760537147522, "eval_runtime": 75.3588, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 140 }, { "epoch": 0.39, "learning_rate": 1.750105006300378e-05, "loss": 1.0525, "step": 150 }, { "epoch": 0.39, "eval_loss": 0.9673194885253906, "eval_runtime": 75.2799, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 150 }, { "epoch": 0.41, "learning_rate": 1.700102006120367e-05, "loss": 0.9806, "step": 160 }, { "epoch": 0.41, "eval_loss": 0.9813961386680603, "eval_runtime": 75.2782, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 160 }, { "epoch": 0.44, "learning_rate": 1.6500990059403565e-05, "loss": 1.0374, "step": 170 }, { "epoch": 0.44, "eval_loss": 0.9768068194389343, "eval_runtime": 75.2881, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 170 }, { "epoch": 0.46, "learning_rate": 1.6000960057603457e-05, "loss": 0.9967, "step": 180 }, { "epoch": 0.46, "eval_loss": 0.991325855255127, "eval_runtime": 75.311, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 180 }, { "epoch": 0.49, "learning_rate": 1.550093005580335e-05, "loss": 1.047, "step": 190 }, { "epoch": 0.49, "eval_loss": 1.0070654153823853, "eval_runtime": 75.2469, "eval_samples_per_second": 0.253, "eval_steps_per_second": 0.04, "step": 190 }, { "epoch": 0.51, "learning_rate": 1.5000900054003238e-05, "loss": 1.0366, "step": 200 }, { "epoch": 0.51, "eval_loss": 1.0217609405517578, "eval_runtime": 75.3156, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 200 }, { "epoch": 0.54, "learning_rate": 1.4500870052203133e-05, "loss": 0.9782, "step": 210 }, { "epoch": 0.54, "eval_loss": 1.0349949598312378, "eval_runtime": 75.3478, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 210 }, { "epoch": 0.57, "learning_rate": 1.4000840050403025e-05, "loss": 1.1381, "step": 220 }, { "epoch": 0.57, "eval_loss": 1.05660080909729, "eval_runtime": 75.403, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 220 }, { "epoch": 0.59, "learning_rate": 1.3500810048602917e-05, "loss": 1.1266, "step": 230 }, { "epoch": 0.59, "eval_loss": 1.0543776750564575, "eval_runtime": 75.3395, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 230 }, { "epoch": 0.62, "learning_rate": 1.3000780046802807e-05, "loss": 1.1131, "step": 240 }, { "epoch": 0.62, "eval_loss": 1.0707024335861206, "eval_runtime": 75.2946, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 240 }, { "epoch": 0.64, "learning_rate": 1.2500750045002698e-05, "loss": 1.0625, "step": 250 }, { "epoch": 0.64, "eval_loss": 1.0837984085083008, "eval_runtime": 75.3303, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 250 }, { "epoch": 0.67, "learning_rate": 1.2000720043202592e-05, "loss": 1.0762, "step": 260 }, { "epoch": 0.67, "eval_loss": 1.0885578393936157, "eval_runtime": 75.2812, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 260 }, { "epoch": 0.69, "learning_rate": 1.1500690041402485e-05, "loss": 1.1995, "step": 270 }, { "epoch": 0.69, "eval_loss": 1.1089729070663452, "eval_runtime": 75.3209, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 270 }, { "epoch": 0.72, "learning_rate": 1.1000660039602375e-05, "loss": 1.115, "step": 280 }, { "epoch": 0.72, "eval_loss": 1.1146286725997925, "eval_runtime": 75.3791, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 280 }, { "epoch": 0.75, "learning_rate": 1.0500630037802268e-05, "loss": 0.9881, "step": 290 }, { "epoch": 0.75, "eval_loss": 1.1138123273849487, "eval_runtime": 75.3832, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 290 }, { "epoch": 0.77, "learning_rate": 1.000060003600216e-05, "loss": 1.2121, "step": 300 }, { "epoch": 0.77, "eval_loss": 1.1398226022720337, "eval_runtime": 75.307, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 300 }, { "epoch": 0.8, "learning_rate": 9.500570034202053e-06, "loss": 1.1354, "step": 310 }, { "epoch": 0.8, "eval_loss": 1.1480103731155396, "eval_runtime": 75.3962, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 310 }, { "epoch": 0.82, "learning_rate": 9.000540032401943e-06, "loss": 1.1815, "step": 320 }, { "epoch": 0.82, "eval_loss": 1.1434487104415894, "eval_runtime": 75.3053, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 320 }, { "epoch": 0.85, "learning_rate": 8.500510030601835e-06, "loss": 1.3204, "step": 330 }, { "epoch": 0.85, "eval_loss": 1.142481803894043, "eval_runtime": 75.2776, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 330 }, { "epoch": 0.87, "learning_rate": 8.000480028801728e-06, "loss": 1.2111, "step": 340 }, { "epoch": 0.87, "eval_loss": 1.1574183702468872, "eval_runtime": 75.3414, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 340 }, { "epoch": 0.9, "learning_rate": 7.500450027001619e-06, "loss": 1.1378, "step": 350 }, { "epoch": 0.9, "eval_loss": 1.1606627702713013, "eval_runtime": 75.3801, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 350 }, { "epoch": 0.93, "learning_rate": 7.0004200252015126e-06, "loss": 1.2455, "step": 360 }, { "epoch": 0.93, "eval_loss": 1.1685103178024292, "eval_runtime": 75.2747, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 360 }, { "epoch": 0.95, "learning_rate": 6.500390023401403e-06, "loss": 1.2436, "step": 370 }, { "epoch": 0.95, "eval_loss": 1.179547667503357, "eval_runtime": 75.3915, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 370 }, { "epoch": 0.98, "learning_rate": 6.000360021601296e-06, "loss": 1.2619, "step": 380 }, { "epoch": 0.98, "eval_loss": 1.1883246898651123, "eval_runtime": 75.3375, "eval_samples_per_second": 0.252, "eval_steps_per_second": 0.04, "step": 380 } ], "logging_steps": 10, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 3.9179178565632e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }