{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0010534743584341157, "eval_steps": 10, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 2.1069487168682315e-05, "eval_loss": 3.2631447315216064, "eval_runtime": 573.1084, "eval_samples_per_second": 34.871, "eval_steps_per_second": 17.436, "step": 1 }, { "epoch": 0.00010534743584341157, "grad_norm": 0.18382155895233154, "learning_rate": 5e-05, "loss": 3.2617, "step": 5 }, { "epoch": 0.00021069487168682313, "grad_norm": 0.23125022649765015, "learning_rate": 0.0001, "loss": 3.2338, "step": 10 }, { "epoch": 0.00021069487168682313, "eval_loss": 3.2458643913269043, "eval_runtime": 569.477, "eval_samples_per_second": 35.094, "eval_steps_per_second": 17.548, "step": 10 }, { "epoch": 0.0003160423075302347, "grad_norm": 0.49949705600738525, "learning_rate": 9.619397662556435e-05, "loss": 3.1384, "step": 15 }, { "epoch": 0.00042138974337364626, "grad_norm": 0.5684022903442383, "learning_rate": 8.535533905932738e-05, "loss": 3.0211, "step": 20 }, { "epoch": 0.00042138974337364626, "eval_loss": 3.1317827701568604, "eval_runtime": 567.3693, "eval_samples_per_second": 35.224, "eval_steps_per_second": 17.613, "step": 20 }, { "epoch": 0.0005267371792170579, "grad_norm": 0.6344699263572693, "learning_rate": 6.91341716182545e-05, "loss": 3.2487, "step": 25 }, { "epoch": 0.0006320846150604694, "grad_norm": 0.5269583463668823, "learning_rate": 5e-05, "loss": 3.103, "step": 30 }, { "epoch": 0.0006320846150604694, "eval_loss": 3.084909677505493, "eval_runtime": 569.2849, "eval_samples_per_second": 35.105, "eval_steps_per_second": 17.554, "step": 30 }, { "epoch": 0.000737432050903881, "grad_norm": 1.1395690441131592, "learning_rate": 3.086582838174551e-05, "loss": 3.0862, "step": 35 }, { "epoch": 0.0008427794867472925, "grad_norm": 0.6051152944564819, "learning_rate": 1.4644660940672627e-05, "loss": 2.9749, "step": 40 }, { "epoch": 0.0008427794867472925, "eval_loss": 3.0690979957580566, "eval_runtime": 569.8301, "eval_samples_per_second": 35.072, "eval_steps_per_second": 17.537, "step": 40 }, { "epoch": 0.0009481269225907041, "grad_norm": 0.6785821318626404, "learning_rate": 3.8060233744356633e-06, "loss": 2.9934, "step": 45 }, { "epoch": 0.0010534743584341157, "grad_norm": 0.7350694537162781, "learning_rate": 0.0, "loss": 3.1248, "step": 50 }, { "epoch": 0.0010534743584341157, "eval_loss": 3.0668978691101074, "eval_runtime": 568.6446, "eval_samples_per_second": 35.145, "eval_steps_per_second": 17.573, "step": 50 } ], "logging_steps": 5, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 13, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1801107479199744.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }