File size: 2,822 Bytes

3ae8bb2
 
 
8c178d2
3ae8bb2
8c178d2
3ae8bb2
 
 
 
 
8c178d2
7947b64
8c178d2
7947b64
3ae8bb2
 
 
8c178d2
7947b64
8c178d2
7947b64
3ae8bb2
 
 
8c178d2
7947b64
8c178d2
7947b64
3ae8bb2
 
 
8c178d2
7947b64
8c178d2
7947b64
3ae8bb2
 
 
8c178d2
7947b64
8c178d2
7947b64
3ae8bb2
 
 
8c178d2
7947b64
8c178d2
7947b64
3ae8bb2
 
 
8c178d2
7947b64
8c178d2
7947b64
3ae8bb2
 
 
8c178d2
7947b64
8c178d2
7947b64
3ae8bb2
 
 
8c178d2
7947b64
8c178d2
7947b64
3ae8bb2
 
 
8c178d2
7947b64
8c178d2
7947b64
3ae8bb2
 
 
8c178d2
7947b64
8c178d2
7947b64
8c178d2
 
 
 
7947b64
8c178d2
7947b64
8c178d2
 
 
 
 
7947b64
 
 
 
 
3ae8bb2
 
 
8c178d2
3ae8bb2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7947b64
3ae8bb2

{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.84,
  "eval_steps": 500,
  "global_step": 24,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.32,
      "grad_norm": 1.0203443765640259,
      "learning_rate": 0.00018333333333333334,
      "loss": 9.4828,
      "step": 2
    },
    {
      "epoch": 0.64,
      "grad_norm": 1.6231805086135864,
      "learning_rate": 0.0001666666666666667,
      "loss": 9.1586,
      "step": 4
    },
    {
      "epoch": 0.96,
      "grad_norm": 2.2771148681640625,
      "learning_rate": 0.00015000000000000001,
      "loss": 8.6907,
      "step": 6
    },
    {
      "epoch": 1.28,
      "grad_norm": 2.1158745288848877,
      "learning_rate": 0.00013333333333333334,
      "loss": 8.2535,
      "step": 8
    },
    {
      "epoch": 1.6,
      "grad_norm": 1.86466646194458,
      "learning_rate": 0.00011666666666666668,
      "loss": 7.8775,
      "step": 10
    },
    {
      "epoch": 1.92,
      "grad_norm": 1.410565972328186,
      "learning_rate": 0.0001,
      "loss": 7.5684,
      "step": 12
    },
    {
      "epoch": 2.24,
      "grad_norm": 0.9350765347480774,
      "learning_rate": 8.333333333333334e-05,
      "loss": 7.4331,
      "step": 14
    },
    {
      "epoch": 2.56,
      "grad_norm": 0.9051127433776855,
      "learning_rate": 6.666666666666667e-05,
      "loss": 7.3164,
      "step": 16
    },
    {
      "epoch": 2.88,
      "grad_norm": 0.8144361972808838,
      "learning_rate": 5e-05,
      "loss": 7.2426,
      "step": 18
    },
    {
      "epoch": 3.2,
      "grad_norm": 1.0328956842422485,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 7.2851,
      "step": 20
    },
    {
      "epoch": 3.52,
      "grad_norm": 0.9490511417388916,
      "learning_rate": 1.6666666666666667e-05,
      "loss": 7.213,
      "step": 22
    },
    {
      "epoch": 3.84,
      "grad_norm": 0.826574981212616,
      "learning_rate": 0.0,
      "loss": 7.1788,
      "step": 24
    },
    {
      "epoch": 3.84,
      "step": 24,
      "total_flos": 107168334014400.0,
      "train_loss": 7.891708850860596,
      "train_runtime": 637.8922,
      "train_samples_per_second": 0.627,
      "train_steps_per_second": 0.038
    }
  ],
  "logging_steps": 2,
  "max_steps": 24,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 107168334014400.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}