{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 90.0,
  "eval_steps": 500,
  "global_step": 360,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 2.5,
      "grad_norm": 0.5093985795974731,
      "learning_rate": 0.0009980973490458728,
      "loss": 2.5467,
      "step": 10
    },
    {
      "epoch": 5.0,
      "grad_norm": 0.27049773931503296,
      "learning_rate": 0.000992403876506104,
      "loss": 1.618,
      "step": 20
    },
    {
      "epoch": 7.5,
      "grad_norm": 0.22319507598876953,
      "learning_rate": 0.0009829629131445341,
      "loss": 1.3933,
      "step": 30
    },
    {
      "epoch": 10.0,
      "grad_norm": 0.2564426362514496,
      "learning_rate": 0.0009698463103929542,
      "loss": 1.2537,
      "step": 40
    },
    {
      "epoch": 12.5,
      "grad_norm": 0.29759547114372253,
      "learning_rate": 0.0009531538935183251,
      "loss": 1.1368,
      "step": 50
    },
    {
      "epoch": 15.0,
      "grad_norm": 0.39310407638549805,
      "learning_rate": 0.0009330127018922195,
      "loss": 1.0137,
      "step": 60
    },
    {
      "epoch": 17.5,
      "grad_norm": 0.4282013475894928,
      "learning_rate": 0.0009095760221444959,
      "loss": 0.9095,
      "step": 70
    },
    {
      "epoch": 20.0,
      "grad_norm": 0.5433480143547058,
      "learning_rate": 0.000883022221559489,
      "loss": 0.8259,
      "step": 80
    },
    {
      "epoch": 22.5,
      "grad_norm": 0.5130556225776672,
      "learning_rate": 0.0008535533905932737,
      "loss": 0.7432,
      "step": 90
    },
    {
      "epoch": 25.0,
      "grad_norm": 0.5033867359161377,
      "learning_rate": 0.0008213938048432696,
      "loss": 0.6756,
      "step": 100
    },
    {
      "epoch": 27.5,
      "grad_norm": 0.5322891473770142,
      "learning_rate": 0.0007867882181755231,
      "loss": 0.6123,
      "step": 110
    },
    {
      "epoch": 30.0,
      "grad_norm": 0.5658751726150513,
      "learning_rate": 0.00075,
      "loss": 0.567,
      "step": 120
    },
    {
      "epoch": 32.5,
      "grad_norm": 0.5688820481300354,
      "learning_rate": 0.0007113091308703497,
      "loss": 0.5171,
      "step": 130
    },
    {
      "epoch": 35.0,
      "grad_norm": 0.48720425367355347,
      "learning_rate": 0.0006710100716628344,
      "loss": 0.4825,
      "step": 140
    },
    {
      "epoch": 37.5,
      "grad_norm": 0.5325213074684143,
      "learning_rate": 0.0006294095225512603,
      "loss": 0.4498,
      "step": 150
    },
    {
      "epoch": 40.0,
      "grad_norm": 0.527807891368866,
      "learning_rate": 0.0005868240888334653,
      "loss": 0.4184,
      "step": 160
    },
    {
      "epoch": 42.5,
      "grad_norm": 0.4882418215274811,
      "learning_rate": 0.0005435778713738292,
      "loss": 0.3946,
      "step": 170
    },
    {
      "epoch": 45.0,
      "grad_norm": 0.49641069769859314,
      "learning_rate": 0.0005,
      "loss": 0.3706,
      "step": 180
    },
    {
      "epoch": 47.5,
      "grad_norm": 0.5216576457023621,
      "learning_rate": 0.00045642212862617086,
      "loss": 0.3529,
      "step": 190
    },
    {
      "epoch": 50.0,
      "grad_norm": 0.5289739966392517,
      "learning_rate": 0.00041317591116653486,
      "loss": 0.3334,
      "step": 200
    },
    {
      "epoch": 52.5,
      "grad_norm": 0.49065396189689636,
      "learning_rate": 0.0003705904774487396,
      "loss": 0.3166,
      "step": 210
    },
    {
      "epoch": 55.0,
      "grad_norm": 0.4922383725643158,
      "learning_rate": 0.0003289899283371657,
      "loss": 0.3086,
      "step": 220
    },
    {
      "epoch": 57.5,
      "grad_norm": 0.42886731028556824,
      "learning_rate": 0.0002886908691296504,
      "loss": 0.2894,
      "step": 230
    },
    {
      "epoch": 60.0,
      "grad_norm": 0.42780551314353943,
      "learning_rate": 0.0002500000000000001,
      "loss": 0.286,
      "step": 240
    },
    {
      "epoch": 62.5,
      "grad_norm": 0.4010085463523865,
      "learning_rate": 0.00021321178182447708,
      "loss": 0.2775,
      "step": 250
    },
    {
      "epoch": 65.0,
      "grad_norm": 0.4402850866317749,
      "learning_rate": 0.0001786061951567303,
      "loss": 0.2714,
      "step": 260
    },
    {
      "epoch": 67.5,
      "grad_norm": 0.4067270755767822,
      "learning_rate": 0.00014644660940672628,
      "loss": 0.2664,
      "step": 270
    },
    {
      "epoch": 70.0,
      "grad_norm": 0.43572983145713806,
      "learning_rate": 0.00011697777844051105,
      "loss": 0.2582,
      "step": 280
    },
    {
      "epoch": 72.5,
      "grad_norm": 0.3881024420261383,
      "learning_rate": 9.042397785550405e-05,
      "loss": 0.2549,
      "step": 290
    },
    {
      "epoch": 75.0,
      "grad_norm": 0.3768531382083893,
      "learning_rate": 6.698729810778065e-05,
      "loss": 0.251,
      "step": 300
    },
    {
      "epoch": 77.5,
      "grad_norm": 0.3921433389186859,
      "learning_rate": 4.684610648167503e-05,
      "loss": 0.2509,
      "step": 310
    },
    {
      "epoch": 80.0,
      "grad_norm": 0.38551065325737,
      "learning_rate": 3.0153689607045842e-05,
      "loss": 0.2466,
      "step": 320
    },
    {
      "epoch": 82.5,
      "grad_norm": 0.3801429867744446,
      "learning_rate": 1.70370868554659e-05,
      "loss": 0.2462,
      "step": 330
    },
    {
      "epoch": 85.0,
      "grad_norm": 0.39348262548446655,
      "learning_rate": 7.59612349389599e-06,
      "loss": 0.2468,
      "step": 340
    },
    {
      "epoch": 87.5,
      "grad_norm": 0.3822017014026642,
      "learning_rate": 1.9026509541272275e-06,
      "loss": 0.2477,
      "step": 350
    },
    {
      "epoch": 90.0,
      "grad_norm": 0.3794040381908417,
      "learning_rate": 0.0,
      "loss": 0.2449,
      "step": 360
    }
  ],
  "logging_steps": 10,
  "max_steps": 360,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 90,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 1.43272328822784e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}