{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.960960960960961, "eval_steps": 500, "global_step": 30, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03203203203203203, "grad_norm": 0.5500465035438538, "learning_rate": 0.0002, "loss": 1.9326, "step": 1 }, { "epoch": 0.06406406406406406, "grad_norm": 0.3067496418952942, "learning_rate": 0.0004, "loss": 2.0123, "step": 2 }, { "epoch": 0.0960960960960961, "grad_norm": 0.24163766205310822, "learning_rate": 0.0003988275914308719, "loss": 1.6857, "step": 3 }, { "epoch": 0.12812812812812813, "grad_norm": 0.4033714830875397, "learning_rate": 0.00039532411114201737, "loss": 1.568, "step": 4 }, { "epoch": 0.16016016016016016, "grad_norm": 0.40937018394470215, "learning_rate": 0.00038953063423656053, "loss": 1.2066, "step": 5 }, { "epoch": 0.1921921921921922, "grad_norm": 0.303056001663208, "learning_rate": 0.00038151508393419143, "loss": 1.2382, "step": 6 }, { "epoch": 0.22422422422422422, "grad_norm": 0.286432147026062, "learning_rate": 0.00037137143523351785, "loss": 0.9406, "step": 7 }, { "epoch": 0.25625625625625625, "grad_norm": 0.354612797498703, "learning_rate": 0.00035921861314112876, "loss": 1.032, "step": 8 }, { "epoch": 0.2882882882882883, "grad_norm": 0.3172045052051544, "learning_rate": 0.0003451990983846262, "loss": 1.1604, "step": 9 }, { "epoch": 0.3203203203203203, "grad_norm": 0.35591357946395874, "learning_rate": 0.00032947725695636553, "loss": 1.2686, "step": 10 }, { "epoch": 0.35235235235235235, "grad_norm": 0.45947980880737305, "learning_rate": 0.0003122374130724765, "loss": 1.4562, "step": 11 }, { "epoch": 0.3843843843843844, "grad_norm": 0.381909042596817, "learning_rate": 0.00029368168813995806, "loss": 1.3231, "step": 12 }, { "epoch": 0.4164164164164164, "grad_norm": 0.47724688053131104, "learning_rate": 0.0002740276310679829, "loss": 1.276, "step": 13 }, { "epoch": 0.44844844844844844, "grad_norm": 0.3836033344268799, "learning_rate": 0.00025350566770584423, "loss": 1.5523, "step": 14 }, { "epoch": 0.4804804804804805, "grad_norm": 0.3899593651294708, "learning_rate": 0.000232356399310553, "loss": 1.2293, "step": 15 }, { "epoch": 0.5125125125125125, "grad_norm": 0.3470306992530823, "learning_rate": 0.00021082778171708353, "loss": 1.2625, "step": 16 }, { "epoch": 0.5445445445445446, "grad_norm": 0.36498191952705383, "learning_rate": 0.00018917221828291652, "loss": 1.2286, "step": 17 }, { "epoch": 0.5765765765765766, "grad_norm": 0.3170139193534851, "learning_rate": 0.00016764360068944706, "loss": 1.2458, "step": 18 }, { "epoch": 0.6086086086086087, "grad_norm": 0.3011997938156128, "learning_rate": 0.00014649433229415587, "loss": 1.1672, "step": 19 }, { "epoch": 0.6406406406406406, "grad_norm": 0.29357728362083435, "learning_rate": 0.00012597236893201712, "loss": 1.1728, "step": 20 }, { "epoch": 0.6726726726726727, "grad_norm": 0.30459967255592346, "learning_rate": 0.000106318311860042, "loss": 1.1528, "step": 21 }, { "epoch": 0.7047047047047047, "grad_norm": 0.23712120950222015, "learning_rate": 8.776258692752355e-05, "loss": 0.7205, "step": 22 }, { "epoch": 0.7367367367367368, "grad_norm": 0.2815784215927124, "learning_rate": 7.052274304363449e-05, "loss": 0.7541, "step": 23 }, { "epoch": 0.7687687687687688, "grad_norm": 0.3061799705028534, "learning_rate": 5.4800901615373876e-05, "loss": 1.4001, "step": 24 }, { "epoch": 0.8008008008008008, "grad_norm": 0.32817238569259644, "learning_rate": 4.078138685887125e-05, "loss": 1.1528, "step": 25 }, { "epoch": 0.8328328328328328, "grad_norm": 0.37422603368759155, "learning_rate": 2.862856476648219e-05, "loss": 1.3155, "step": 26 }, { "epoch": 0.8648648648648649, "grad_norm": 0.3299436867237091, "learning_rate": 1.8484916065808623e-05, "loss": 1.1114, "step": 27 }, { "epoch": 0.8968968968968969, "grad_norm": 0.32912251353263855, "learning_rate": 1.0469365763439531e-05, "loss": 1.0327, "step": 28 }, { "epoch": 0.928928928928929, "grad_norm": 0.30801478028297424, "learning_rate": 4.6758888579826685e-06, "loss": 0.85, "step": 29 }, { "epoch": 0.960960960960961, "grad_norm": 0.3802436292171478, "learning_rate": 1.1724085691280805e-06, "loss": 1.2445, "step": 30 } ], "logging_steps": 1, "max_steps": 31, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 30, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.01879310700544e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }