| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.0, |
| "eval_steps": 500, |
| "global_step": 80, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.31746031746031744, |
| "grad_norm": 3.1160410477431384, |
| "learning_rate": 2e-05, |
| "loss": 0.743, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.18026351928710938, |
| "step": 5, |
| "valid_targets_mean": 4387.2, |
| "valid_targets_min": 740 |
| }, |
| { |
| "epoch": 0.6349206349206349, |
| "grad_norm": 0.9623025342413126, |
| "learning_rate": 3.998096443163716e-05, |
| "loss": 0.6426, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.15755076706409454, |
| "step": 10, |
| "valid_targets_mean": 3791.7, |
| "valid_targets_min": 695 |
| }, |
| { |
| "epoch": 0.9523809523809523, |
| "grad_norm": 0.5400417708688194, |
| "learning_rate": 3.931851652578137e-05, |
| "loss": 0.5922, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1631372570991516, |
| "step": 15, |
| "valid_targets_mean": 4048.3, |
| "valid_targets_min": 1151 |
| }, |
| { |
| "epoch": 1.253968253968254, |
| "grad_norm": 0.4346375139896992, |
| "learning_rate": 3.774021666356444e-05, |
| "loss": 0.5513, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.13641172647476196, |
| "step": 20, |
| "valid_targets_mean": 3890.5, |
| "valid_targets_min": 1004 |
| }, |
| { |
| "epoch": 1.5714285714285714, |
| "grad_norm": 0.3583194659422831, |
| "learning_rate": 3.532088886237956e-05, |
| "loss": 0.5268, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.157705619931221, |
| "step": 25, |
| "valid_targets_mean": 4477.9, |
| "valid_targets_min": 656 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 0.2998700734251974, |
| "learning_rate": 3.217522858017442e-05, |
| "loss": 0.5194, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.12757813930511475, |
| "step": 30, |
| "valid_targets_mean": 4071.6, |
| "valid_targets_min": 773 |
| }, |
| { |
| "epoch": 2.1904761904761907, |
| "grad_norm": 0.354322163099566, |
| "learning_rate": 2.8452365234813992e-05, |
| "loss": 0.4999, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1116921454668045, |
| "step": 35, |
| "valid_targets_mean": 3261.8, |
| "valid_targets_min": 695 |
| }, |
| { |
| "epoch": 2.507936507936508, |
| "grad_norm": 0.2736861113965854, |
| "learning_rate": 2.4328792278762058e-05, |
| "loss": 0.476, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11490871757268906, |
| "step": 40, |
| "valid_targets_mean": 3865.1, |
| "valid_targets_min": 746 |
| }, |
| { |
| "epoch": 2.825396825396825, |
| "grad_norm": 0.26810971560154995, |
| "learning_rate": 2e-05, |
| "loss": 0.4743, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09590233117341995, |
| "step": 45, |
| "valid_targets_mean": 3056.7, |
| "valid_targets_min": 726 |
| }, |
| { |
| "epoch": 3.126984126984127, |
| "grad_norm": 0.26045065338871903, |
| "learning_rate": 1.5671207721237945e-05, |
| "loss": 0.4803, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1086278185248375, |
| "step": 50, |
| "valid_targets_mean": 3327.7, |
| "valid_targets_min": 746 |
| }, |
| { |
| "epoch": 3.4444444444444446, |
| "grad_norm": 0.2646056254599043, |
| "learning_rate": 1.1547634765186016e-05, |
| "loss": 0.4741, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10994982719421387, |
| "step": 55, |
| "valid_targets_mean": 3503.4, |
| "valid_targets_min": 569 |
| }, |
| { |
| "epoch": 3.761904761904762, |
| "grad_norm": 0.23864309413170337, |
| "learning_rate": 7.824771419825588e-06, |
| "loss": 0.4612, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.09653226286172867, |
| "step": 60, |
| "valid_targets_mean": 3644.4, |
| "valid_targets_min": 1042 |
| }, |
| { |
| "epoch": 4.063492063492063, |
| "grad_norm": 0.2652889879375569, |
| "learning_rate": 4.679111137620442e-06, |
| "loss": 0.4606, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.1475735604763031, |
| "step": 65, |
| "valid_targets_mean": 4244.4, |
| "valid_targets_min": 2068 |
| }, |
| { |
| "epoch": 4.380952380952381, |
| "grad_norm": 0.28736207202393793, |
| "learning_rate": 2.259783336435566e-06, |
| "loss": 0.467, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10715197026729584, |
| "step": 70, |
| "valid_targets_mean": 3558.8, |
| "valid_targets_min": 733 |
| }, |
| { |
| "epoch": 4.698412698412699, |
| "grad_norm": 0.26387185108781563, |
| "learning_rate": 6.814834742186361e-07, |
| "loss": 0.459, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.10606744885444641, |
| "step": 75, |
| "valid_targets_mean": 3355.8, |
| "valid_targets_min": 761 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.2624385775505968, |
| "learning_rate": 1.9035568362844037e-08, |
| "loss": 0.4442, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11343716084957123, |
| "step": 80, |
| "valid_targets_mean": 3113.4, |
| "valid_targets_min": 587 |
| }, |
| { |
| "epoch": 5.0, |
| "loss_nan_ranks": 0, |
| "loss_rank_avg": 0.11343716084957123, |
| "step": 80, |
| "total_flos": 1.3290499486829773e+17, |
| "train_loss": 0.5170090794563293, |
| "train_runtime": 1718.7633, |
| "train_samples_per_second": 2.909, |
| "train_steps_per_second": 0.047, |
| "valid_targets_mean": 3113.4, |
| "valid_targets_min": 587 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 80, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": false, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.3290499486829773e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|