{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.64, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016, "grad_norm": 0.3705686628818512, "learning_rate": 0.00019987329060020616, "loss": 2.7276, "num_input_tokens_seen": 78104, "step": 5 }, { "epoch": 0.032, "grad_norm": 0.28178921341896057, "learning_rate": 0.00019949348350626456, "loss": 2.6452, "num_input_tokens_seen": 157384, "step": 10 }, { "epoch": 0.048, "grad_norm": 0.1864372342824936, "learning_rate": 0.00019886154122075343, "loss": 2.5708, "num_input_tokens_seen": 240256, "step": 15 }, { "epoch": 0.064, "grad_norm": 0.1788010597229004, "learning_rate": 0.00019797906520422677, "loss": 2.5831, "num_input_tokens_seen": 319032, "step": 20 }, { "epoch": 0.08, "grad_norm": 0.23142138123512268, "learning_rate": 0.00019684829181681234, "loss": 2.6138, "num_input_tokens_seen": 397992, "step": 25 }, { "epoch": 0.096, "grad_norm": 0.21496976912021637, "learning_rate": 0.00019547208665085457, "loss": 2.5461, "num_input_tokens_seen": 479904, "step": 30 }, { "epoch": 0.112, "grad_norm": 0.2536880671977997, "learning_rate": 0.0001938539372689649, "loss": 2.5797, "num_input_tokens_seen": 555448, "step": 35 }, { "epoch": 0.128, "grad_norm": 0.39764249324798584, "learning_rate": 0.00019199794436588243, "loss": 2.5482, "num_input_tokens_seen": 630888, "step": 40 }, { "epoch": 0.144, "grad_norm": 0.29237431287765503, "learning_rate": 0.00018990881137654258, "loss": 2.4726, "num_input_tokens_seen": 709456, "step": 45 }, { "epoch": 0.16, "grad_norm": 0.24707558751106262, "learning_rate": 0.0001875918325566888, "loss": 2.5557, "num_input_tokens_seen": 788504, "step": 50 }, { "epoch": 0.176, "grad_norm": 0.24359019100666046, "learning_rate": 0.00018505287956623297, "loss": 2.5357, "num_input_tokens_seen": 864984, "step": 55 }, { "epoch": 0.192, "grad_norm": 0.23572562634944916, "learning_rate": 0.00018229838658936564, "loss": 2.4798, "num_input_tokens_seen": 939136, "step": 60 }, { "epoch": 0.208, "grad_norm": 0.41712337732315063, "learning_rate": 0.00017933533402912354, "loss": 2.5619, "num_input_tokens_seen": 1018024, "step": 65 }, { "epoch": 0.224, "grad_norm": 0.2777227759361267, "learning_rate": 0.00017617123081773591, "loss": 2.5334, "num_input_tokens_seen": 1095072, "step": 70 }, { "epoch": 0.24, "grad_norm": 0.25098717212677, "learning_rate": 0.00017281409538757883, "loss": 2.5199, "num_input_tokens_seen": 1172824, "step": 75 }, { "epoch": 0.256, "grad_norm": 0.24321608245372772, "learning_rate": 0.00016927243535095997, "loss": 2.4897, "num_input_tokens_seen": 1244688, "step": 80 }, { "epoch": 0.272, "grad_norm": 0.25580549240112305, "learning_rate": 0.0001655552259402295, "loss": 2.513, "num_input_tokens_seen": 1321104, "step": 85 }, { "epoch": 0.288, "grad_norm": 0.24990542232990265, "learning_rate": 0.00016167188726285434, "loss": 2.5533, "num_input_tokens_seen": 1402456, "step": 90 }, { "epoch": 0.304, "grad_norm": 0.26301833987236023, "learning_rate": 0.00015763226042909455, "loss": 2.5297, "num_input_tokens_seen": 1483648, "step": 95 }, { "epoch": 0.32, "grad_norm": 0.23519474267959595, "learning_rate": 0.0001534465826127801, "loss": 2.518, "num_input_tokens_seen": 1560608, "step": 100 }, { "epoch": 0.336, "grad_norm": 0.26162955164909363, "learning_rate": 0.00014912546110838775, "loss": 2.5537, "num_input_tokens_seen": 1640728, "step": 105 }, { "epoch": 0.352, "grad_norm": 0.2565276026725769, "learning_rate": 0.00014467984645016258, "loss": 2.489, "num_input_tokens_seen": 1722440, "step": 110 }, { "epoch": 0.368, "grad_norm": 0.3414108455181122, "learning_rate": 0.00014012100466140578, "loss": 2.5448, "num_input_tokens_seen": 1805984, "step": 115 }, { "epoch": 0.384, "grad_norm": 0.2590246796607971, "learning_rate": 0.00013546048870425356, "loss": 2.4148, "num_input_tokens_seen": 1890592, "step": 120 }, { "epoch": 0.4, "grad_norm": 0.2765924036502838, "learning_rate": 0.00013071010920229909, "loss": 2.5512, "num_input_tokens_seen": 1971272, "step": 125 }, { "epoch": 0.416, "grad_norm": 0.2582058906555176, "learning_rate": 0.00012588190451025207, "loss": 2.5264, "num_input_tokens_seen": 2053352, "step": 130 }, { "epoch": 0.432, "grad_norm": 0.2416098713874817, "learning_rate": 0.00012098811020648475, "loss": 2.5525, "num_input_tokens_seen": 2132624, "step": 135 }, { "epoch": 0.448, "grad_norm": 0.3602483868598938, "learning_rate": 0.00011604112808577603, "loss": 2.5029, "num_input_tokens_seen": 2210376, "step": 140 }, { "epoch": 0.464, "grad_norm": 0.2507006824016571, "learning_rate": 0.000111053494730832, "loss": 2.5546, "num_input_tokens_seen": 2292896, "step": 145 }, { "epoch": 0.48, "grad_norm": 0.32158130407333374, "learning_rate": 0.00010603784974222861, "loss": 2.5431, "num_input_tokens_seen": 2376432, "step": 150 }, { "epoch": 0.496, "grad_norm": 0.3036045730113983, "learning_rate": 0.00010100690370728755, "loss": 2.4882, "num_input_tokens_seen": 2451848, "step": 155 }, { "epoch": 0.512, "grad_norm": 0.31646254658699036, "learning_rate": 9.597340598905852e-05, "loss": 2.4319, "num_input_tokens_seen": 2532816, "step": 160 }, { "epoch": 0.528, "grad_norm": 0.2881500720977783, "learning_rate": 9.095011241703623e-05, "loss": 2.4688, "num_input_tokens_seen": 2616160, "step": 165 }, { "epoch": 0.544, "grad_norm": 0.27659812569618225, "learning_rate": 8.594975296149076e-05, "loss": 2.5149, "num_input_tokens_seen": 2694864, "step": 170 }, { "epoch": 0.56, "grad_norm": 0.4290514290332794, "learning_rate": 8.098499947332934e-05, "loss": 2.4754, "num_input_tokens_seen": 2770464, "step": 175 }, { "epoch": 0.576, "grad_norm": 0.28775912523269653, "learning_rate": 7.606843357124426e-05, "loss": 2.4875, "num_input_tokens_seen": 2852888, "step": 180 }, { "epoch": 0.592, "grad_norm": 0.2709747850894928, "learning_rate": 7.121251475752539e-05, "loss": 2.563, "num_input_tokens_seen": 2933520, "step": 185 }, { "epoch": 0.608, "grad_norm": 0.3473074734210968, "learning_rate": 6.642954884333955e-05, "loss": 2.4903, "num_input_tokens_seen": 3011656, "step": 190 }, { "epoch": 0.624, "grad_norm": 0.28855767846107483, "learning_rate": 6.173165676349103e-05, "loss": 2.5325, "num_input_tokens_seen": 3088960, "step": 195 }, { "epoch": 0.64, "grad_norm": 0.27710187435150146, "learning_rate": 5.713074385969457e-05, "loss": 2.4344, "num_input_tokens_seen": 3165160, "step": 200 } ], "logging_steps": 5, "max_steps": 312, "num_input_tokens_seen": 3165160, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.30881325463765e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }