{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.15077271013946475, "eval_steps": 500, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.003769317753486619, "grad_norm": 2.665037155151367, "learning_rate": 4.999956146783009e-05, "loss": 1.564, "num_input_tokens_seen": 23856, "step": 5 }, { "epoch": 0.007538635506973238, "grad_norm": 1.4514005184173584, "learning_rate": 4.9998245886705174e-05, "loss": 0.968, "num_input_tokens_seen": 47584, "step": 10 }, { "epoch": 0.011307953260459858, "grad_norm": 2.0009350776672363, "learning_rate": 4.999605330277923e-05, "loss": 0.9879, "num_input_tokens_seen": 70864, "step": 15 }, { "epoch": 0.015077271013946476, "grad_norm": 1.2164318561553955, "learning_rate": 4.999298379297376e-05, "loss": 1.073, "num_input_tokens_seen": 94192, "step": 20 }, { "epoch": 0.018846588767433094, "grad_norm": 2.2099952697753906, "learning_rate": 4.998903746497505e-05, "loss": 0.8549, "num_input_tokens_seen": 117472, "step": 25 }, { "epoch": 0.022615906520919715, "grad_norm": 1.6259618997573853, "learning_rate": 4.998421445723046e-05, "loss": 0.8704, "num_input_tokens_seen": 140704, "step": 30 }, { "epoch": 0.026385224274406333, "grad_norm": 1.7849559783935547, "learning_rate": 4.997851493894349e-05, "loss": 0.9786, "num_input_tokens_seen": 163680, "step": 35 }, { "epoch": 0.03015454202789295, "grad_norm": 1.8650152683258057, "learning_rate": 4.997193911006793e-05, "loss": 0.8356, "num_input_tokens_seen": 187072, "step": 40 }, { "epoch": 0.03392385978137957, "grad_norm": 1.681622862815857, "learning_rate": 4.996448720130077e-05, "loss": 0.778, "num_input_tokens_seen": 211040, "step": 45 }, { "epoch": 0.03769317753486619, "grad_norm": 1.6398649215698242, "learning_rate": 4.995615947407415e-05, "loss": 0.939, "num_input_tokens_seen": 234560, "step": 50 }, { "epoch": 0.04146249528835281, "grad_norm": 2.327622890472412, "learning_rate": 4.994695622054618e-05, "loss": 1.0274, "num_input_tokens_seen": 257632, "step": 55 }, { "epoch": 0.04523181304183943, "grad_norm": 2.6391327381134033, "learning_rate": 4.9936877763590664e-05, "loss": 0.8822, "num_input_tokens_seen": 281024, "step": 60 }, { "epoch": 0.049001130795326045, "grad_norm": 1.8776222467422485, "learning_rate": 4.992592445678582e-05, "loss": 0.8146, "num_input_tokens_seen": 304688, "step": 65 }, { "epoch": 0.052770448548812667, "grad_norm": 1.6592698097229004, "learning_rate": 4.991409668440185e-05, "loss": 0.9989, "num_input_tokens_seen": 327424, "step": 70 }, { "epoch": 0.05653976630229928, "grad_norm": 1.709376335144043, "learning_rate": 4.990139486138743e-05, "loss": 0.9344, "num_input_tokens_seen": 350528, "step": 75 }, { "epoch": 0.0603090840557859, "grad_norm": 1.3547413349151611, "learning_rate": 4.988781943335521e-05, "loss": 0.7932, "num_input_tokens_seen": 373280, "step": 80 }, { "epoch": 0.06407840180927252, "grad_norm": 1.3162572383880615, "learning_rate": 4.987337087656614e-05, "loss": 0.9445, "num_input_tokens_seen": 395856, "step": 85 }, { "epoch": 0.06784771956275915, "grad_norm": 1.8211767673492432, "learning_rate": 4.985804969791279e-05, "loss": 0.7369, "num_input_tokens_seen": 418704, "step": 90 }, { "epoch": 0.07161703731624576, "grad_norm": 1.552996039390564, "learning_rate": 4.984185643490151e-05, "loss": 1.0226, "num_input_tokens_seen": 442432, "step": 95 }, { "epoch": 0.07538635506973237, "grad_norm": 1.816767930984497, "learning_rate": 4.9824791655633676e-05, "loss": 0.7753, "num_input_tokens_seen": 466128, "step": 100 }, { "epoch": 0.079155672823219, "grad_norm": 1.6724802255630493, "learning_rate": 4.9806855958785625e-05, "loss": 0.8278, "num_input_tokens_seen": 489536, "step": 105 }, { "epoch": 0.08292499057670562, "grad_norm": 2.187622308731079, "learning_rate": 4.978804997358779e-05, "loss": 0.8432, "num_input_tokens_seen": 513200, "step": 110 }, { "epoch": 0.08669430833019223, "grad_norm": 1.500815987586975, "learning_rate": 4.9768374359802525e-05, "loss": 0.9649, "num_input_tokens_seen": 536432, "step": 115 }, { "epoch": 0.09046362608367886, "grad_norm": 1.6637320518493652, "learning_rate": 4.9747829807701e-05, "loss": 0.8249, "num_input_tokens_seen": 559776, "step": 120 }, { "epoch": 0.09423294383716548, "grad_norm": 1.5978686809539795, "learning_rate": 4.972641703803896e-05, "loss": 0.9157, "num_input_tokens_seen": 583248, "step": 125 }, { "epoch": 0.09800226159065209, "grad_norm": 1.4269095659255981, "learning_rate": 4.9704136802031485e-05, "loss": 0.885, "num_input_tokens_seen": 606768, "step": 130 }, { "epoch": 0.1017715793441387, "grad_norm": 1.6915644407272339, "learning_rate": 4.96809898813266e-05, "loss": 0.8736, "num_input_tokens_seen": 630896, "step": 135 }, { "epoch": 0.10554089709762533, "grad_norm": 1.6915837526321411, "learning_rate": 4.965697708797784e-05, "loss": 0.7312, "num_input_tokens_seen": 654320, "step": 140 }, { "epoch": 0.10931021485111195, "grad_norm": 1.921988606452942, "learning_rate": 4.963209926441581e-05, "loss": 0.9478, "num_input_tokens_seen": 677248, "step": 145 }, { "epoch": 0.11307953260459856, "grad_norm": 2.241665840148926, "learning_rate": 4.9606357283418575e-05, "loss": 0.9174, "num_input_tokens_seen": 700672, "step": 150 }, { "epoch": 0.11684885035808519, "grad_norm": 2.076327323913574, "learning_rate": 4.957975204808108e-05, "loss": 0.8453, "num_input_tokens_seen": 724480, "step": 155 }, { "epoch": 0.1206181681115718, "grad_norm": 1.5655834674835205, "learning_rate": 4.955228449178345e-05, "loss": 0.701, "num_input_tokens_seen": 748144, "step": 160 }, { "epoch": 0.12438748586505842, "grad_norm": 1.8062623739242554, "learning_rate": 4.952395557815826e-05, "loss": 0.7981, "num_input_tokens_seen": 771584, "step": 165 }, { "epoch": 0.12815680361854503, "grad_norm": 2.2940807342529297, "learning_rate": 4.949476630105669e-05, "loss": 0.8824, "num_input_tokens_seen": 795248, "step": 170 }, { "epoch": 0.13192612137203166, "grad_norm": 2.108461856842041, "learning_rate": 4.9464717684513726e-05, "loss": 0.8368, "num_input_tokens_seen": 818272, "step": 175 }, { "epoch": 0.1356954391255183, "grad_norm": 1.5639266967773438, "learning_rate": 4.943381078271214e-05, "loss": 0.951, "num_input_tokens_seen": 841440, "step": 180 }, { "epoch": 0.1394647568790049, "grad_norm": 1.4479436874389648, "learning_rate": 4.9402046679945613e-05, "loss": 0.8697, "num_input_tokens_seen": 864640, "step": 185 }, { "epoch": 0.14323407463249152, "grad_norm": 1.8331745862960815, "learning_rate": 4.936942649058061e-05, "loss": 0.7765, "num_input_tokens_seen": 888032, "step": 190 }, { "epoch": 0.14700339238597815, "grad_norm": 1.6916519403457642, "learning_rate": 4.933595135901732e-05, "loss": 0.778, "num_input_tokens_seen": 911008, "step": 195 }, { "epoch": 0.15077271013946475, "grad_norm": 2.0202736854553223, "learning_rate": 4.930162245964952e-05, "loss": 0.8926, "num_input_tokens_seen": 934432, "step": 200 } ], "logging_steps": 5, "max_steps": 2652, "num_input_tokens_seen": 934432, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.1128212106346496e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }