{ "best_metric": NaN, "best_model_checkpoint": "/mnt/bn/qingyi-bn-lq/llama/saved_bloomz-7b1-mt_TQA/checkpoint-200", "epoch": 2.0972146368104863, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 5.9999999999999995e-05, "loss": 56.1213, "step": 20 }, { "epoch": 0.14, "learning_rate": 0.00011999999999999999, "loss": 46.3785, "step": 40 }, { "epoch": 0.21, "learning_rate": 0.00017999999999999998, "loss": 27.2008, "step": 60 }, { "epoch": 0.28, "learning_rate": 0.00023999999999999998, "loss": 58.3372, "step": 80 }, { "epoch": 0.35, "learning_rate": 0.0003, "loss": 21.5097, "step": 100 }, { "epoch": 0.42, "learning_rate": 0.00029208443271767806, "loss": 149.5568, "step": 120 }, { "epoch": 0.49, "learning_rate": 0.0002841688654353562, "loss": 12.1125, "step": 140 }, { "epoch": 0.56, "learning_rate": 0.0002762532981530343, "loss": 64.6813, "step": 160 }, { "epoch": 0.63, "learning_rate": 0.00026833773087071237, "loss": 12.8244, "step": 180 }, { "epoch": 0.7, "learning_rate": 0.0002604221635883905, "loss": 13.6392, "step": 200 }, { "epoch": 0.7, "eval_loss": NaN, "eval_runtime": 39.2226, "eval_samples_per_second": 50.991, "eval_steps_per_second": 1.606, "step": 200 }, { "epoch": 0.77, "learning_rate": 0.0002525065963060686, "loss": 29.1801, "step": 220 }, { "epoch": 0.84, "learning_rate": 0.0002445910290237467, "loss": 7.1808, "step": 240 }, { "epoch": 0.91, "learning_rate": 0.0002366754617414248, "loss": 17.5066, "step": 260 }, { "epoch": 0.98, "learning_rate": 0.00022875989445910288, "loss": 13.7954, "step": 280 }, { "epoch": 1.05, "learning_rate": 0.000220844327176781, "loss": 15.2006, "step": 300 }, { "epoch": 1.12, "learning_rate": 0.00021292875989445908, "loss": 18.5282, "step": 320 }, { "epoch": 1.19, "learning_rate": 0.0002050131926121372, "loss": 10.157, "step": 340 }, { "epoch": 1.26, "learning_rate": 0.0001970976253298153, "loss": 9.2541, "step": 360 }, { "epoch": 1.33, "learning_rate": 0.00018918205804749339, "loss": 9.0145, "step": 380 }, { "epoch": 1.4, "learning_rate": 0.00018126649076517147, "loss": 8.6491, "step": 400 }, { "epoch": 1.4, "eval_loss": NaN, "eval_runtime": 39.8237, "eval_samples_per_second": 50.221, "eval_steps_per_second": 1.582, "step": 400 }, { "epoch": 1.47, "learning_rate": 0.0001733509234828496, "loss": 6.757, "step": 420 }, { "epoch": 1.54, "learning_rate": 0.0001654353562005277, "loss": 4.1081, "step": 440 }, { "epoch": 1.61, "learning_rate": 0.00015751978891820578, "loss": 19.5577, "step": 460 }, { "epoch": 1.68, "learning_rate": 0.0001496042216358839, "loss": 14.4416, "step": 480 }, { "epoch": 1.75, "learning_rate": 0.00014168865435356198, "loss": 8.4368, "step": 500 }, { "epoch": 1.82, "learning_rate": 0.0001337730870712401, "loss": 5.4915, "step": 520 }, { "epoch": 1.89, "learning_rate": 0.00012585751978891818, "loss": 28.9042, "step": 540 }, { "epoch": 1.96, "learning_rate": 0.00011794195250659629, "loss": 25.3276, "step": 560 }, { "epoch": 2.03, "learning_rate": 0.0001100263852242744, "loss": 8.954, "step": 580 }, { "epoch": 2.1, "learning_rate": 0.0001021108179419525, "loss": 8.7963, "step": 600 }, { "epoch": 2.1, "eval_loss": NaN, "eval_runtime": 39.412, "eval_samples_per_second": 50.746, "eval_steps_per_second": 1.598, "step": 600 } ], "max_steps": 858, "num_train_epochs": 3, "total_flos": 5.660860955911782e+18, "trial_name": null, "trial_params": null }