{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.5197355996222852, "global_step": 125, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 0.0004, "loss": 2.6298, "step": 2 }, { "epoch": 0.05, "learning_rate": 0.0008, "loss": 2.6348, "step": 4 }, { "epoch": 0.07, "learning_rate": 0.0009999024041442454, "loss": 2.6855, "step": 6 }, { "epoch": 0.1, "learning_rate": 0.0009991218658821608, "loss": 2.6779, "step": 8 }, { "epoch": 0.12, "learning_rate": 0.0009975620080758321, "loss": 2.7265, "step": 10 }, { "epoch": 0.15, "learning_rate": 0.000995225266258058, "loss": 2.7332, "step": 12 }, { "epoch": 0.17, "learning_rate": 0.0009921152889737985, "loss": 2.7308, "step": 14 }, { "epoch": 0.19, "learning_rate": 0.0009882369320834067, "loss": 2.7706, "step": 16 }, { "epoch": 0.22, "learning_rate": 0.0009835962511807786, "loss": 2.7455, "step": 18 }, { "epoch": 0.24, "learning_rate": 0.0009782004921382612, "loss": 2.7819, "step": 20 }, { "epoch": 0.27, "learning_rate": 0.0009720580797930845, "loss": 2.7122, "step": 22 }, { "epoch": 0.29, "learning_rate": 0.0009651786047929772, "loss": 2.7141, "step": 24 }, { "epoch": 0.31, "learning_rate": 0.0009575728086215092, "loss": 2.694, "step": 26 }, { "epoch": 0.34, "learning_rate": 0.00094925256682654, "loss": 2.6981, "step": 28 }, { "epoch": 0.36, "learning_rate": 0.0009402308704779598, "loss": 2.6848, "step": 30 }, { "epoch": 0.39, "learning_rate": 0.0009305218058836777, "loss": 2.6498, "step": 32 }, { "epoch": 0.41, "learning_rate": 0.0009201405325955221, "loss": 2.7059, "step": 34 }, { "epoch": 0.44, "learning_rate": 0.0009091032597394012, "loss": 2.7016, "step": 36 }, { "epoch": 0.46, "learning_rate": 0.0008974272207066767, "loss": 2.6599, "step": 38 }, { "epoch": 0.48, "learning_rate": 0.0008851306462462688, "loss": 2.6922, "step": 40 }, { "epoch": 0.51, "learning_rate": 0.0008722327359995064, "loss": 2.6473, "step": 42 }, { "epoch": 0.53, "learning_rate": 0.0008587536285221655, "loss": 2.7236, "step": 44 }, { "epoch": 0.56, "learning_rate": 0.000844714369840506, "loss": 2.6924, "step": 46 }, { "epoch": 0.58, "learning_rate": 0.0008301368805903988, "loss": 2.6428, "step": 48 }, { "epoch": 0.6, "learning_rate": 0.0008150439217908557, "loss": 2.6161, "step": 50 }, { "epoch": 0.63, "learning_rate": 0.0007994590593054001, "loss": 2.6721, "step": 52 }, { "epoch": 0.65, "learning_rate": 0.0007834066270467691, "loss": 2.6506, "step": 54 }, { "epoch": 0.68, "learning_rate": 0.0007669116889823954, "loss": 2.6787, "step": 56 }, { "epoch": 0.7, "learning_rate": 0.00075, "loss": 2.6172, "step": 58 }, { "epoch": 0.73, "learning_rate": 0.0007326979656943906, "loss": 2.6071, "step": 60 }, { "epoch": 0.75, "learning_rate": 0.0007150326011382603, "loss": 2.6513, "step": 62 }, { "epoch": 0.77, "learning_rate": 0.0006970314887013584, "loss": 2.679, "step": 64 }, { "epoch": 0.8, "learning_rate": 0.0006787227349838946, "loss": 2.6879, "step": 66 }, { "epoch": 0.82, "learning_rate": 0.0006601349269314187, "loss": 2.6459, "step": 68 }, { "epoch": 0.85, "learning_rate": 0.0006412970871996995, "loss": 2.6268, "step": 70 }, { "epoch": 0.87, "learning_rate": 0.0006222386288392914, "loss": 2.615, "step": 72 }, { "epoch": 0.89, "learning_rate": 0.0006029893093705492, "loss": 2.6541, "step": 74 }, { "epoch": 0.92, "learning_rate": 0.0005835791843207916, "loss": 2.6697, "step": 76 }, { "epoch": 0.94, "learning_rate": 0.0005640385602961634, "loss": 2.6017, "step": 78 }, { "epoch": 0.97, "learning_rate": 0.0005443979476614675, "loss": 2.658, "step": 80 }, { "epoch": 0.99, "learning_rate": 0.0005246880129018515, "loss": 2.5922, "step": 82 }, { "epoch": 1.02, "learning_rate": 0.0005049395307407329, "loss": 3.5292, "step": 84 }, { "epoch": 1.05, "learning_rate": 0.0004851833360887201, "loss": 2.5377, "step": 86 }, { "epoch": 1.07, "learning_rate": 0.0004654502758985611, "loss": 2.6096, "step": 88 }, { "epoch": 1.1, "learning_rate": 0.00044577116100128736, "loss": 2.5112, "step": 90 }, { "epoch": 1.12, "learning_rate": 0.00042617671799875947, "loss": 2.5469, "step": 92 }, { "epoch": 1.15, "learning_rate": 0.0004066975412877255, "loss": 2.5364, "step": 94 }, { "epoch": 1.17, "learning_rate": 0.00038736404529030255, "loss": 2.5132, "step": 96 }, { "epoch": 1.19, "learning_rate": 0.0003682064169654663, "loss": 2.5339, "step": 98 }, { "epoch": 1.22, "learning_rate": 0.0003492545686756986, "loss": 2.5136, "step": 100 }, { "epoch": 1.24, "learning_rate": 0.00033053809148238423, "loss": 2.5043, "step": 102 }, { "epoch": 1.27, "learning_rate": 0.00031208620894288106, "loss": 2.5174, "step": 104 }, { "epoch": 1.29, "learning_rate": 0.0002939277314814041, "loss": 2.479, "step": 106 }, { "epoch": 1.31, "learning_rate": 0.0002760910114049686, "loss": 2.5554, "step": 108 }, { "epoch": 1.34, "learning_rate": 0.00025860389863462764, "loss": 2.5007, "step": 110 }, { "epoch": 1.36, "learning_rate": 0.00024149369722112717, "loss": 2.493, "step": 112 }, { "epoch": 1.39, "learning_rate": 0.00022478712271287087, "loss": 2.4715, "step": 114 }, { "epoch": 1.41, "learning_rate": 0.00020851026044276406, "loss": 2.5359, "step": 116 }, { "epoch": 1.44, "learning_rate": 0.00019268852479906146, "loss": 2.5447, "step": 118 }, { "epoch": 1.46, "learning_rate": 0.00017734661954381753, "loss": 2.5141, "step": 120 }, { "epoch": 1.48, "learning_rate": 0.00016250849924089484, "loss": 2.558, "step": 122 }, { "epoch": 1.51, "learning_rate": 0.00014819733185375534, "loss": 2.5372, "step": 124 } ], "max_steps": 164, "num_train_epochs": 2, "total_flos": 1.1869674526642012e+18, "trial_name": null, "trial_params": null }