{ "best_metric": 1.6501820087432861, "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_bloominstinwild-belle1.5m/checkpoint-1400", "epoch": 0.9000321440051431, "global_step": 1400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5.9999999999999995e-05, "loss": 2.3902, "step": 20 }, { "epoch": 0.03, "learning_rate": 0.00011999999999999999, "loss": 2.1808, "step": 40 }, { "epoch": 0.04, "learning_rate": 0.00017999999999999998, "loss": 1.9937, "step": 60 }, { "epoch": 0.05, "learning_rate": 0.00023999999999999998, "loss": 1.8967, "step": 80 }, { "epoch": 0.06, "learning_rate": 0.0003, "loss": 1.8529, "step": 100 }, { "epoch": 0.08, "learning_rate": 0.0002958762886597938, "loss": 1.8004, "step": 120 }, { "epoch": 0.09, "learning_rate": 0.0002917525773195876, "loss": 1.785, "step": 140 }, { "epoch": 0.1, "learning_rate": 0.0002876288659793814, "loss": 1.7755, "step": 160 }, { "epoch": 0.12, "learning_rate": 0.00028350515463917525, "loss": 1.7554, "step": 180 }, { "epoch": 0.13, "learning_rate": 0.0002793814432989691, "loss": 1.7534, "step": 200 }, { "epoch": 0.13, "eval_loss": 1.7528996467590332, "eval_runtime": 17.6175, "eval_samples_per_second": 113.524, "eval_steps_per_second": 1.816, "step": 200 }, { "epoch": 0.14, "learning_rate": 0.00027525773195876286, "loss": 1.7375, "step": 220 }, { "epoch": 0.15, "learning_rate": 0.0002711340206185567, "loss": 1.7331, "step": 240 }, { "epoch": 0.17, "learning_rate": 0.00026701030927835047, "loss": 1.7248, "step": 260 }, { "epoch": 0.18, "learning_rate": 0.0002628865979381443, "loss": 1.7282, "step": 280 }, { "epoch": 0.19, "learning_rate": 0.00025876288659793813, "loss": 1.7167, "step": 300 }, { "epoch": 0.21, "learning_rate": 0.0002546391752577319, "loss": 1.7215, "step": 320 }, { "epoch": 0.22, "learning_rate": 0.00025051546391752574, "loss": 1.7157, "step": 340 }, { "epoch": 0.23, "learning_rate": 0.0002463917525773196, "loss": 1.7146, "step": 360 }, { "epoch": 0.24, "learning_rate": 0.00024226804123711338, "loss": 1.6953, "step": 380 }, { "epoch": 0.26, "learning_rate": 0.00023814432989690718, "loss": 1.6951, "step": 400 }, { "epoch": 0.26, "eval_loss": 1.7059786319732666, "eval_runtime": 17.3801, "eval_samples_per_second": 115.074, "eval_steps_per_second": 1.841, "step": 400 }, { "epoch": 0.27, "learning_rate": 0.00023402061855670102, "loss": 1.6916, "step": 420 }, { "epoch": 0.28, "learning_rate": 0.00022989690721649485, "loss": 1.6893, "step": 440 }, { "epoch": 0.3, "learning_rate": 0.00022577319587628863, "loss": 1.6932, "step": 460 }, { "epoch": 0.31, "learning_rate": 0.00022164948453608246, "loss": 1.6913, "step": 480 }, { "epoch": 0.32, "learning_rate": 0.00021752577319587626, "loss": 1.6823, "step": 500 }, { "epoch": 0.33, "learning_rate": 0.0002134020618556701, "loss": 1.6921, "step": 520 }, { "epoch": 0.35, "learning_rate": 0.0002092783505154639, "loss": 1.6759, "step": 540 }, { "epoch": 0.36, "learning_rate": 0.0002051546391752577, "loss": 1.6758, "step": 560 }, { "epoch": 0.37, "learning_rate": 0.0002010309278350515, "loss": 1.6732, "step": 580 }, { "epoch": 0.39, "learning_rate": 0.00019690721649484534, "loss": 1.673, "step": 600 }, { "epoch": 0.39, "eval_loss": 1.6831790208816528, "eval_runtime": 17.4032, "eval_samples_per_second": 114.921, "eval_steps_per_second": 1.839, "step": 600 }, { "epoch": 0.4, "learning_rate": 0.00019278350515463918, "loss": 1.6625, "step": 620 }, { "epoch": 0.41, "learning_rate": 0.00018865979381443298, "loss": 1.666, "step": 640 }, { "epoch": 0.42, "learning_rate": 0.00018453608247422679, "loss": 1.6662, "step": 660 }, { "epoch": 0.44, "learning_rate": 0.0001804123711340206, "loss": 1.6692, "step": 680 }, { "epoch": 0.45, "learning_rate": 0.00017628865979381442, "loss": 1.6635, "step": 700 }, { "epoch": 0.46, "learning_rate": 0.00017216494845360823, "loss": 1.6685, "step": 720 }, { "epoch": 0.48, "learning_rate": 0.00016804123711340206, "loss": 1.6534, "step": 740 }, { "epoch": 0.49, "learning_rate": 0.00016391752577319584, "loss": 1.6627, "step": 760 }, { "epoch": 0.5, "learning_rate": 0.00015979381443298967, "loss": 1.6596, "step": 780 }, { "epoch": 0.51, "learning_rate": 0.0001556701030927835, "loss": 1.6659, "step": 800 }, { "epoch": 0.51, "eval_loss": 1.6697583198547363, "eval_runtime": 17.3944, "eval_samples_per_second": 114.979, "eval_steps_per_second": 1.84, "step": 800 }, { "epoch": 0.53, "learning_rate": 0.0001515463917525773, "loss": 1.6599, "step": 820 }, { "epoch": 0.54, "learning_rate": 0.0001474226804123711, "loss": 1.6587, "step": 840 }, { "epoch": 0.55, "learning_rate": 0.00014329896907216494, "loss": 1.6586, "step": 860 }, { "epoch": 0.57, "learning_rate": 0.00013917525773195875, "loss": 1.6602, "step": 880 }, { "epoch": 0.58, "learning_rate": 0.00013505154639175258, "loss": 1.6513, "step": 900 }, { "epoch": 0.59, "learning_rate": 0.00013092783505154639, "loss": 1.6477, "step": 920 }, { "epoch": 0.6, "learning_rate": 0.0001268041237113402, "loss": 1.6455, "step": 940 }, { "epoch": 0.62, "learning_rate": 0.000122680412371134, "loss": 1.6543, "step": 960 }, { "epoch": 0.63, "learning_rate": 0.00011855670103092781, "loss": 1.6499, "step": 980 }, { "epoch": 0.64, "learning_rate": 0.00011443298969072163, "loss": 1.647, "step": 1000 }, { "epoch": 0.64, "eval_loss": 1.6601147651672363, "eval_runtime": 17.3892, "eval_samples_per_second": 115.014, "eval_steps_per_second": 1.84, "step": 1000 }, { "epoch": 0.66, "learning_rate": 0.00011030927835051547, "loss": 1.649, "step": 1020 }, { "epoch": 0.67, "learning_rate": 0.00010618556701030927, "loss": 1.6562, "step": 1040 }, { "epoch": 0.68, "learning_rate": 0.00010206185567010309, "loss": 1.6488, "step": 1060 }, { "epoch": 0.69, "learning_rate": 9.79381443298969e-05, "loss": 1.6453, "step": 1080 }, { "epoch": 0.71, "learning_rate": 9.381443298969071e-05, "loss": 1.6348, "step": 1100 }, { "epoch": 0.72, "learning_rate": 8.969072164948453e-05, "loss": 1.6453, "step": 1120 }, { "epoch": 0.73, "learning_rate": 8.556701030927834e-05, "loss": 1.6362, "step": 1140 }, { "epoch": 0.75, "learning_rate": 8.144329896907215e-05, "loss": 1.6417, "step": 1160 }, { "epoch": 0.76, "learning_rate": 7.731958762886596e-05, "loss": 1.6382, "step": 1180 }, { "epoch": 0.77, "learning_rate": 7.319587628865979e-05, "loss": 1.6433, "step": 1200 }, { "epoch": 0.77, "eval_loss": 1.6533111333847046, "eval_runtime": 17.3863, "eval_samples_per_second": 115.033, "eval_steps_per_second": 1.841, "step": 1200 }, { "epoch": 0.78, "learning_rate": 6.90721649484536e-05, "loss": 1.6415, "step": 1220 }, { "epoch": 0.8, "learning_rate": 6.494845360824742e-05, "loss": 1.6459, "step": 1240 }, { "epoch": 0.81, "learning_rate": 6.0824742268041234e-05, "loss": 1.644, "step": 1260 }, { "epoch": 0.82, "learning_rate": 5.6701030927835046e-05, "loss": 1.6472, "step": 1280 }, { "epoch": 0.84, "learning_rate": 5.257731958762886e-05, "loss": 1.6368, "step": 1300 }, { "epoch": 0.85, "learning_rate": 4.8453608247422676e-05, "loss": 1.6325, "step": 1320 }, { "epoch": 0.86, "learning_rate": 4.4329896907216494e-05, "loss": 1.6354, "step": 1340 }, { "epoch": 0.87, "learning_rate": 4.0206185567010306e-05, "loss": 1.6381, "step": 1360 }, { "epoch": 0.89, "learning_rate": 3.608247422680412e-05, "loss": 1.6519, "step": 1380 }, { "epoch": 0.9, "learning_rate": 3.1958762886597937e-05, "loss": 1.6492, "step": 1400 }, { "epoch": 0.9, "eval_loss": 1.6501820087432861, "eval_runtime": 17.3757, "eval_samples_per_second": 115.103, "eval_steps_per_second": 1.842, "step": 1400 } ], "max_steps": 1555, "num_train_epochs": 1, "total_flos": 1.203777052717613e+19, "trial_name": null, "trial_params": null }