File size: 1,697 Bytes

db7f9c2
 
 
 
 
dacbbef
db7f9c2
 
 
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
 
dacbbef
db7f9c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dacbbef
db7f9c2

{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 1650,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.3847486674785614,
      "learning_rate": 0.0002,
      "loss": 0.774,
      "step": 250
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 0.48815852403640747,
      "learning_rate": 0.0002,
      "loss": 0.3868,
      "step": 500
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 0.574570894241333,
      "learning_rate": 0.0002,
      "loss": 0.2431,
      "step": 750
    },
    {
      "epoch": 3.6363636363636362,
      "grad_norm": 0.3991849720478058,
      "learning_rate": 0.0002,
      "loss": 0.1852,
      "step": 1000
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 0.34638234972953796,
      "learning_rate": 0.0002,
      "loss": 0.1582,
      "step": 1250
    },
    {
      "epoch": 5.454545454545454,
      "grad_norm": 0.32800230383872986,
      "learning_rate": 0.0002,
      "loss": 0.1419,
      "step": 1500
    }
  ],
  "logging_steps": 250,
  "max_steps": 1650,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.028449026965504e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}