{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 500, "global_step": 312, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.00025, "loss": 0.7923, "step": 10 }, { "epoch": 0.06, "learning_rate": 0.0005, "loss": 0.1889, "step": 20 }, { "epoch": 0.1, "learning_rate": 0.000498554476723488, "loss": 0.1673, "step": 30 }, { "epoch": 0.13, "learning_rate": 0.0004942346231942955, "loss": 0.1507, "step": 40 }, { "epoch": 0.16, "learning_rate": 0.0004870903950030429, "loss": 0.1385, "step": 50 }, { "epoch": 0.19, "learning_rate": 0.00047720440933487574, "loss": 0.1593, "step": 60 }, { "epoch": 0.22, "learning_rate": 0.0004646909895689508, "loss": 0.1549, "step": 70 }, { "epoch": 0.26, "learning_rate": 0.0004496948432215913, "loss": 0.159, "step": 80 }, { "epoch": 0.29, "learning_rate": 0.0004323893885216219, "loss": 0.1562, "step": 90 }, { "epoch": 0.32, "learning_rate": 0.00041297474896967814, "loss": 0.1352, "step": 100 }, { "epoch": 0.35, "learning_rate": 0.0003916754390727794, "loss": 0.1588, "step": 110 }, { "epoch": 0.38, "learning_rate": 0.0003687377680167626, "loss": 0.1523, "step": 120 }, { "epoch": 0.42, "learning_rate": 0.0003444269913009912, "loss": 0.1418, "step": 130 }, { "epoch": 0.45, "learning_rate": 0.0003190242432743673, "loss": 0.1428, "step": 140 }, { "epoch": 0.48, "learning_rate": 0.0002928232860453694, "loss": 0.1471, "step": 150 }, { "epoch": 0.51, "learning_rate": 0.00026612711236232914, "loss": 0.1427, "step": 160 }, { "epoch": 0.54, "learning_rate": 0.00023924444174886733, "loss": 0.1366, "step": 170 }, { "epoch": 0.58, "learning_rate": 0.00021248615041383683, "loss": 0.1463, "step": 180 }, { "epoch": 0.61, "learning_rate": 0.00018616167622095327, "loss": 0.1328, "step": 190 }, { "epoch": 0.64, "learning_rate": 0.0001605754402917186, "loss": 0.134, "step": 200 }, { "epoch": 0.67, "learning_rate": 0.00013602332662288535, "loss": 0.1417, "step": 210 }, { "epoch": 0.7, "learning_rate": 0.00011278926042882027, "loss": 0.1194, "step": 220 }, { "epoch": 0.74, "learning_rate": 9.114192477745568e-05, "loss": 0.1359, "step": 230 }, { "epoch": 0.77, "learning_rate": 7.133165348925977e-05, "loss": 0.1244, "step": 240 }, { "epoch": 0.8, "learning_rate": 5.3587536230321364e-05, "loss": 0.1367, "step": 250 }, { "epoch": 0.83, "learning_rate": 3.811476927679228e-05, "loss": 0.1256, "step": 260 }, { "epoch": 0.86, "learning_rate": 2.5092282586939186e-05, "loss": 0.1312, "step": 270 }, { "epoch": 0.9, "learning_rate": 1.467067062178823e-05, "loss": 0.1272, "step": 280 }, { "epoch": 0.93, "learning_rate": 6.970450842734649e-06, "loss": 0.1241, "step": 290 }, { "epoch": 0.96, "learning_rate": 2.0806700251775058e-06, "loss": 0.1234, "step": 300 }, { "epoch": 0.99, "learning_rate": 5.7874505027283307e-08, "loss": 0.1305, "step": 310 }, { "epoch": 1.0, "step": 312, "total_flos": 8.46080087312302e+16, "train_loss": 0.16286772943268985, "train_runtime": 1280.7232, "train_samples_per_second": 3.904, "train_steps_per_second": 0.244 } ], "logging_steps": 10, "max_steps": 312, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 8.46080087312302e+16, "train_batch_size": 4, "trial_name": null, "trial_params": null }