{ "best_metric": 1.6601147651672363, "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_bloominstinwild-belle1.5m/checkpoint-1000", "epoch": 0.6428801028608164, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5.9999999999999995e-05, "loss": 2.3902, "step": 20 }, { "epoch": 0.03, "learning_rate": 0.00011999999999999999, "loss": 2.1808, "step": 40 }, { "epoch": 0.04, "learning_rate": 0.00017999999999999998, "loss": 1.9937, "step": 60 }, { "epoch": 0.05, "learning_rate": 0.00023999999999999998, "loss": 1.8967, "step": 80 }, { "epoch": 0.06, "learning_rate": 0.0003, "loss": 1.8529, "step": 100 }, { "epoch": 0.08, "learning_rate": 0.0002958762886597938, "loss": 1.8004, "step": 120 }, { "epoch": 0.09, "learning_rate": 0.0002917525773195876, "loss": 1.785, "step": 140 }, { "epoch": 0.1, "learning_rate": 0.0002876288659793814, "loss": 1.7755, "step": 160 }, { "epoch": 0.12, "learning_rate": 0.00028350515463917525, "loss": 1.7554, "step": 180 }, { "epoch": 0.13, "learning_rate": 0.0002793814432989691, "loss": 1.7534, "step": 200 }, { "epoch": 0.13, "eval_loss": 1.7528996467590332, "eval_runtime": 17.6175, "eval_samples_per_second": 113.524, "eval_steps_per_second": 1.816, "step": 200 }, { "epoch": 0.14, "learning_rate": 0.00027525773195876286, "loss": 1.7375, "step": 220 }, { "epoch": 0.15, "learning_rate": 0.0002711340206185567, "loss": 1.7331, "step": 240 }, { "epoch": 0.17, "learning_rate": 0.00026701030927835047, "loss": 1.7248, "step": 260 }, { "epoch": 0.18, "learning_rate": 0.0002628865979381443, "loss": 1.7282, "step": 280 }, { "epoch": 0.19, "learning_rate": 0.00025876288659793813, "loss": 1.7167, "step": 300 }, { "epoch": 0.21, "learning_rate": 0.0002546391752577319, "loss": 1.7215, "step": 320 }, { "epoch": 0.22, "learning_rate": 0.00025051546391752574, "loss": 1.7157, "step": 340 }, { "epoch": 0.23, "learning_rate": 0.0002463917525773196, "loss": 1.7146, "step": 360 }, { "epoch": 0.24, "learning_rate": 0.00024226804123711338, "loss": 1.6953, "step": 380 }, { "epoch": 0.26, "learning_rate": 0.00023814432989690718, "loss": 1.6951, "step": 400 }, { "epoch": 0.26, "eval_loss": 1.7059786319732666, "eval_runtime": 17.3801, "eval_samples_per_second": 115.074, "eval_steps_per_second": 1.841, "step": 400 }, { "epoch": 0.27, "learning_rate": 0.00023402061855670102, "loss": 1.6916, "step": 420 }, { "epoch": 0.28, "learning_rate": 0.00022989690721649485, "loss": 1.6893, "step": 440 }, { "epoch": 0.3, "learning_rate": 0.00022577319587628863, "loss": 1.6932, "step": 460 }, { "epoch": 0.31, "learning_rate": 0.00022164948453608246, "loss": 1.6913, "step": 480 }, { "epoch": 0.32, "learning_rate": 0.00021752577319587626, "loss": 1.6823, "step": 500 }, { "epoch": 0.33, "learning_rate": 0.0002134020618556701, "loss": 1.6921, "step": 520 }, { "epoch": 0.35, "learning_rate": 0.0002092783505154639, "loss": 1.6759, "step": 540 }, { "epoch": 0.36, "learning_rate": 0.0002051546391752577, "loss": 1.6758, "step": 560 }, { "epoch": 0.37, "learning_rate": 0.0002010309278350515, "loss": 1.6732, "step": 580 }, { "epoch": 0.39, "learning_rate": 0.00019690721649484534, "loss": 1.673, "step": 600 }, { "epoch": 0.39, "eval_loss": 1.6831790208816528, "eval_runtime": 17.4032, "eval_samples_per_second": 114.921, "eval_steps_per_second": 1.839, "step": 600 }, { "epoch": 0.4, "learning_rate": 0.00019278350515463918, "loss": 1.6625, "step": 620 }, { "epoch": 0.41, "learning_rate": 0.00018865979381443298, "loss": 1.666, "step": 640 }, { "epoch": 0.42, "learning_rate": 0.00018453608247422679, "loss": 1.6662, "step": 660 }, { "epoch": 0.44, "learning_rate": 0.0001804123711340206, "loss": 1.6692, "step": 680 }, { "epoch": 0.45, "learning_rate": 0.00017628865979381442, "loss": 1.6635, "step": 700 }, { "epoch": 0.46, "learning_rate": 0.00017216494845360823, "loss": 1.6685, "step": 720 }, { "epoch": 0.48, "learning_rate": 0.00016804123711340206, "loss": 1.6534, "step": 740 }, { "epoch": 0.49, "learning_rate": 0.00016391752577319584, "loss": 1.6627, "step": 760 }, { "epoch": 0.5, "learning_rate": 0.00015979381443298967, "loss": 1.6596, "step": 780 }, { "epoch": 0.51, "learning_rate": 0.0001556701030927835, "loss": 1.6659, "step": 800 }, { "epoch": 0.51, "eval_loss": 1.6697583198547363, "eval_runtime": 17.3944, "eval_samples_per_second": 114.979, "eval_steps_per_second": 1.84, "step": 800 }, { "epoch": 0.53, "learning_rate": 0.0001515463917525773, "loss": 1.6599, "step": 820 }, { "epoch": 0.54, "learning_rate": 0.0001474226804123711, "loss": 1.6587, "step": 840 }, { "epoch": 0.55, "learning_rate": 0.00014329896907216494, "loss": 1.6586, "step": 860 }, { "epoch": 0.57, "learning_rate": 0.00013917525773195875, "loss": 1.6602, "step": 880 }, { "epoch": 0.58, "learning_rate": 0.00013505154639175258, "loss": 1.6513, "step": 900 }, { "epoch": 0.59, "learning_rate": 0.00013092783505154639, "loss": 1.6477, "step": 920 }, { "epoch": 0.6, "learning_rate": 0.0001268041237113402, "loss": 1.6455, "step": 940 }, { "epoch": 0.62, "learning_rate": 0.000122680412371134, "loss": 1.6543, "step": 960 }, { "epoch": 0.63, "learning_rate": 0.00011855670103092781, "loss": 1.6499, "step": 980 }, { "epoch": 0.64, "learning_rate": 0.00011443298969072163, "loss": 1.647, "step": 1000 }, { "epoch": 0.64, "eval_loss": 1.6601147651672363, "eval_runtime": 17.3892, "eval_samples_per_second": 115.014, "eval_steps_per_second": 1.84, "step": 1000 } ], "max_steps": 1555, "num_train_epochs": 1, "total_flos": 8.598717456522936e+18, "trial_name": null, "trial_params": null }