{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 220, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004545454545454545, "grad_norm": 3.78125, "learning_rate": 9.090909090909091e-06, "loss": 1.8611, "step": 1 }, { "epoch": 0.022727272727272728, "grad_norm": 3.21875, "learning_rate": 4.545454545454546e-05, "loss": 1.8713, "step": 5 }, { "epoch": 0.045454545454545456, "grad_norm": 1.96875, "learning_rate": 9.090909090909092e-05, "loss": 1.8057, "step": 10 }, { "epoch": 0.06818181818181818, "grad_norm": 1.2890625, "learning_rate": 0.00013636363636363637, "loss": 1.7424, "step": 15 }, { "epoch": 0.09090909090909091, "grad_norm": 1.796875, "learning_rate": 0.00018181818181818183, "loss": 1.6841, "step": 20 }, { "epoch": 0.11363636363636363, "grad_norm": 1.9375, "learning_rate": 0.0001998867339183008, "loss": 1.611, "step": 25 }, { "epoch": 0.13636363636363635, "grad_norm": 1.84375, "learning_rate": 0.00019919548128307954, "loss": 1.5838, "step": 30 }, { "epoch": 0.1590909090909091, "grad_norm": 1.46875, "learning_rate": 0.00019788024462147788, "loss": 1.5374, "step": 35 }, { "epoch": 0.18181818181818182, "grad_norm": 0.76953125, "learning_rate": 0.00019594929736144976, "loss": 1.5049, "step": 40 }, { "epoch": 0.20454545454545456, "grad_norm": 0.9609375, "learning_rate": 0.00019341478602651069, "loss": 1.4867, "step": 45 }, { "epoch": 0.22727272727272727, "grad_norm": 0.83203125, "learning_rate": 0.00019029265382866214, "loss": 1.4711, "step": 50 }, { "epoch": 0.25, "grad_norm": 0.9453125, "learning_rate": 0.00018660254037844388, "loss": 1.4442, "step": 55 }, { "epoch": 0.2727272727272727, "grad_norm": 0.95703125, "learning_rate": 0.0001823676581429833, "loss": 1.4328, "step": 60 }, { "epoch": 0.29545454545454547, "grad_norm": 0.875, "learning_rate": 0.0001776146464291757, "loss": 1.4086, "step": 65 }, { "epoch": 0.3181818181818182, "grad_norm": 1.046875, "learning_rate": 0.00017237340381050703, "loss": 1.3785, "step": 70 }, { "epoch": 0.3409090909090909, "grad_norm": 0.90625, "learning_rate": 0.00016667690005162916, "loss": 1.3806, "step": 75 }, { "epoch": 0.36363636363636365, "grad_norm": 0.7734375, "learning_rate": 0.00016056096871376667, "loss": 1.3681, "step": 80 }, { "epoch": 0.38636363636363635, "grad_norm": 0.66015625, "learning_rate": 0.00015406408174555976, "loss": 1.3576, "step": 85 }, { "epoch": 0.4090909090909091, "grad_norm": 0.6328125, "learning_rate": 0.0001472271074772683, "loss": 1.3267, "step": 90 }, { "epoch": 0.4318181818181818, "grad_norm": 1.015625, "learning_rate": 0.00014009305354066137, "loss": 1.3442, "step": 95 }, { "epoch": 0.45454545454545453, "grad_norm": 0.8984375, "learning_rate": 0.00013270679633174218, "loss": 1.3321, "step": 100 }, { "epoch": 0.4772727272727273, "grad_norm": 0.9765625, "learning_rate": 0.0001251147987181079, "loss": 1.3126, "step": 105 }, { "epoch": 0.5, "grad_norm": 0.84765625, "learning_rate": 0.00011736481776669306, "loss": 1.3108, "step": 110 }, { "epoch": 0.5227272727272727, "grad_norm": 0.6640625, "learning_rate": 0.00010950560433041826, "loss": 1.2999, "step": 115 }, { "epoch": 0.5454545454545454, "grad_norm": 0.52734375, "learning_rate": 0.00010158659638348081, "loss": 1.3105, "step": 120 }, { "epoch": 0.5681818181818182, "grad_norm": 0.69140625, "learning_rate": 9.365760803434355e-05, "loss": 1.3079, "step": 125 }, { "epoch": 0.5909090909090909, "grad_norm": 0.58984375, "learning_rate": 8.57685161726715e-05, "loss": 1.2927, "step": 130 }, { "epoch": 0.6136363636363636, "grad_norm": 0.51953125, "learning_rate": 7.796894672134594e-05, "loss": 1.2819, "step": 135 }, { "epoch": 0.6363636363636364, "grad_norm": 0.76171875, "learning_rate": 7.030796246717255e-05, "loss": 1.2973, "step": 140 }, { "epoch": 0.6590909090909091, "grad_norm": 0.64453125, "learning_rate": 6.283375443396726e-05, "loss": 1.2926, "step": 145 }, { "epoch": 0.6818181818181818, "grad_norm": 0.462890625, "learning_rate": 5.559333873942259e-05, "loss": 1.2886, "step": 150 }, { "epoch": 0.7045454545454546, "grad_norm": 0.671875, "learning_rate": 4.8632260842659393e-05, "loss": 1.2927, "step": 155 }, { "epoch": 0.7272727272727273, "grad_norm": 0.51953125, "learning_rate": 4.19943090428802e-05, "loss": 1.2802, "step": 160 }, { "epoch": 0.75, "grad_norm": 0.447265625, "learning_rate": 3.5721239031346066e-05, "loss": 1.2582, "step": 165 }, { "epoch": 0.7727272727272727, "grad_norm": 0.5625, "learning_rate": 2.9852511229367865e-05, "loss": 1.2829, "step": 170 }, { "epoch": 0.7954545454545454, "grad_norm": 0.41796875, "learning_rate": 2.4425042564574184e-05, "loss": 1.2812, "step": 175 }, { "epoch": 0.8181818181818182, "grad_norm": 0.4375, "learning_rate": 1.947297424689414e-05, "loss": 1.2832, "step": 180 }, { "epoch": 0.8409090909090909, "grad_norm": 0.4609375, "learning_rate": 1.5027457005048573e-05, "loss": 1.277, "step": 185 }, { "epoch": 0.8636363636363636, "grad_norm": 0.5234375, "learning_rate": 1.1116455134507664e-05, "loss": 1.2788, "step": 190 }, { "epoch": 0.8863636363636364, "grad_norm": 0.53125, "learning_rate": 7.764570589541875e-06, "loss": 1.2764, "step": 195 }, { "epoch": 0.9090909090909091, "grad_norm": 0.5078125, "learning_rate": 4.992888225905468e-06, "loss": 1.2836, "step": 200 }, { "epoch": 0.9318181818181818, "grad_norm": 0.65234375, "learning_rate": 2.818843167645835e-06, "loss": 1.2792, "step": 205 }, { "epoch": 0.9545454545454546, "grad_norm": 0.5625, "learning_rate": 1.2561111323605712e-06, "loss": 1.2774, "step": 210 }, { "epoch": 0.9772727272727273, "grad_norm": 0.466796875, "learning_rate": 3.145224048057727e-07, "loss": 1.2792, "step": 215 }, { "epoch": 1.0, "grad_norm": 0.51171875, "learning_rate": 0.0, "loss": 1.2733, "step": 220 }, { "epoch": 1.0, "step": 220, "total_flos": 7357983621120000.0, "train_loss": 1.3856282998215068, "train_runtime": 92.8881, "train_samples_per_second": 151.214, "train_steps_per_second": 2.368 } ], "logging_steps": 5, "max_steps": 220, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 7357983621120000.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }