{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.22130013831258644, "eval_steps": 50, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0011065006915629322, "eval_loss": 0.636605441570282, "eval_runtime": 17.8412, "eval_samples_per_second": 21.355, "eval_steps_per_second": 10.706, "step": 1 }, { "epoch": 0.011065006915629323, "grad_norm": 0.7727070450782776, "learning_rate": 0.0002, "loss": 0.4613, "step": 10 }, { "epoch": 0.022130013831258646, "grad_norm": 0.5235627293586731, "learning_rate": 0.0002, "loss": 0.4515, "step": 20 }, { "epoch": 0.03319502074688797, "grad_norm": 0.43008068203926086, "learning_rate": 0.0002, "loss": 0.3421, "step": 30 }, { "epoch": 0.04426002766251729, "grad_norm": 0.6640490889549255, "learning_rate": 0.0002, "loss": 0.341, "step": 40 }, { "epoch": 0.05532503457814661, "grad_norm": 0.5494036078453064, "learning_rate": 0.0002, "loss": 0.3486, "step": 50 }, { "epoch": 0.05532503457814661, "eval_loss": 0.3021501302719116, "eval_runtime": 15.8917, "eval_samples_per_second": 23.975, "eval_steps_per_second": 12.019, "step": 50 }, { "epoch": 0.06639004149377593, "grad_norm": 0.4756554961204529, "learning_rate": 0.0002, "loss": 0.304, "step": 60 }, { "epoch": 0.07745504840940526, "grad_norm": 0.4401240646839142, "learning_rate": 0.0002, "loss": 0.2755, "step": 70 }, { "epoch": 0.08852005532503458, "grad_norm": 0.8257670402526855, "learning_rate": 0.0002, "loss": 0.2915, "step": 80 }, { "epoch": 0.0995850622406639, "grad_norm": 0.532486617565155, "learning_rate": 0.0002, "loss": 0.2963, "step": 90 }, { "epoch": 0.11065006915629322, "grad_norm": 0.7195320129394531, "learning_rate": 0.0002, "loss": 0.3226, "step": 100 }, { "epoch": 0.11065006915629322, "eval_loss": 0.29097670316696167, "eval_runtime": 15.8559, "eval_samples_per_second": 24.029, "eval_steps_per_second": 12.046, "step": 100 }, { "epoch": 0.12171507607192254, "grad_norm": 0.49127256870269775, "learning_rate": 0.0002, "loss": 0.2935, "step": 110 }, { "epoch": 0.13278008298755187, "grad_norm": 0.6152183413505554, "learning_rate": 0.0002, "loss": 0.306, "step": 120 }, { "epoch": 0.14384508990318118, "grad_norm": 0.4972716271877289, "learning_rate": 0.0002, "loss": 0.3016, "step": 130 }, { "epoch": 0.15491009681881052, "grad_norm": 0.40710729360580444, "learning_rate": 0.0002, "loss": 0.3087, "step": 140 }, { "epoch": 0.16597510373443983, "grad_norm": 0.31315892934799194, "learning_rate": 0.0002, "loss": 0.2556, "step": 150 }, { "epoch": 0.16597510373443983, "eval_loss": 0.2839745581150055, "eval_runtime": 15.865, "eval_samples_per_second": 24.015, "eval_steps_per_second": 12.039, "step": 150 }, { "epoch": 0.17704011065006917, "grad_norm": 0.5134880542755127, "learning_rate": 0.0002, "loss": 0.2988, "step": 160 }, { "epoch": 0.18810511756569848, "grad_norm": 0.5094125866889954, "learning_rate": 0.0002, "loss": 0.248, "step": 170 }, { "epoch": 0.1991701244813278, "grad_norm": 0.6164352297782898, "learning_rate": 0.0002, "loss": 0.2982, "step": 180 }, { "epoch": 0.21023513139695713, "grad_norm": 0.4869607090950012, "learning_rate": 0.0002, "loss": 0.2838, "step": 190 }, { "epoch": 0.22130013831258644, "grad_norm": 0.47990214824676514, "learning_rate": 0.0002, "loss": 0.3091, "step": 200 }, { "epoch": 0.22130013831258644, "eval_loss": 0.2770348787307739, "eval_runtime": 15.9233, "eval_samples_per_second": 23.927, "eval_steps_per_second": 11.995, "step": 200 } ], "logging_steps": 10, "max_steps": 200, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.206523358530765e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }