{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9972602739726028,
  "eval_steps": 500,
  "global_step": 182,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.005479452054794521,
      "grad_norm": 26.082090377807617,
      "learning_rate": 1.0526315789473684e-05,
      "loss": 1.7702,
      "step": 1
    },
    {
      "epoch": 0.0273972602739726,
      "grad_norm": 1.9692896604537964,
      "learning_rate": 5.2631578947368424e-05,
      "loss": 1.7256,
      "step": 5
    },
    {
      "epoch": 0.0547945205479452,
      "grad_norm": 1.138301968574524,
      "learning_rate": 0.00010526315789473685,
      "loss": 1.6367,
      "step": 10
    },
    {
      "epoch": 0.0821917808219178,
      "grad_norm": 1.4542856216430664,
      "learning_rate": 0.00015789473684210527,
      "loss": 1.5304,
      "step": 15
    },
    {
      "epoch": 0.1095890410958904,
      "grad_norm": 1.3035956621170044,
      "learning_rate": 0.0001999814270411335,
      "loss": 1.4406,
      "step": 20
    },
    {
      "epoch": 0.136986301369863,
      "grad_norm": 0.91375333070755,
      "learning_rate": 0.00019933209759891317,
      "loss": 1.3465,
      "step": 25
    },
    {
      "epoch": 0.1643835616438356,
      "grad_norm": 0.6928793787956238,
      "learning_rate": 0.00019776100779878345,
      "loss": 1.2954,
      "step": 30
    },
    {
      "epoch": 0.1917808219178082,
      "grad_norm": 0.412971556186676,
      "learning_rate": 0.00019528273669757972,
      "loss": 1.2706,
      "step": 35
    },
    {
      "epoch": 0.2191780821917808,
      "grad_norm": 0.44875410199165344,
      "learning_rate": 0.00019192028161552847,
      "loss": 1.2453,
      "step": 40
    },
    {
      "epoch": 0.2465753424657534,
      "grad_norm": 0.2584894001483917,
      "learning_rate": 0.0001877048447307252,
      "loss": 1.2217,
      "step": 45
    },
    {
      "epoch": 0.273972602739726,
      "grad_norm": 0.2488676905632019,
      "learning_rate": 0.00018267554353596025,
      "loss": 1.1993,
      "step": 50
    },
    {
      "epoch": 0.3013698630136986,
      "grad_norm": 0.24647608399391174,
      "learning_rate": 0.00017687904784473188,
      "loss": 1.1941,
      "step": 55
    },
    {
      "epoch": 0.3287671232876712,
      "grad_norm": 0.3073161840438843,
      "learning_rate": 0.00017036914671487852,
      "loss": 1.1967,
      "step": 60
    },
    {
      "epoch": 0.3561643835616438,
      "grad_norm": 0.2503437399864197,
      "learning_rate": 0.00016320624930859904,
      "loss": 1.1889,
      "step": 65
    },
    {
      "epoch": 0.3835616438356164,
      "grad_norm": 0.2762184739112854,
      "learning_rate": 0.00015545682432067067,
      "loss": 1.17,
      "step": 70
    },
    {
      "epoch": 0.410958904109589,
      "grad_norm": 0.25446659326553345,
      "learning_rate": 0.00014719278317673655,
      "loss": 1.1707,
      "step": 75
    },
    {
      "epoch": 0.4383561643835616,
      "grad_norm": 0.2860565483570099,
      "learning_rate": 0.00013849081272532544,
      "loss": 1.1699,
      "step": 80
    },
    {
      "epoch": 0.4657534246575342,
      "grad_norm": 0.26391202211380005,
      "learning_rate": 0.00012943166361594242,
      "loss": 1.1557,
      "step": 85
    },
    {
      "epoch": 0.4931506849315068,
      "grad_norm": 0.27276208996772766,
      "learning_rate": 0.00012009940096678452,
      "loss": 1.1546,
      "step": 90
    },
    {
      "epoch": 0.5205479452054794,
      "grad_norm": 0.3492254912853241,
      "learning_rate": 0.00011058062427557229,
      "loss": 1.1549,
      "step": 95
    },
    {
      "epoch": 0.547945205479452,
      "grad_norm": 0.2584497928619385,
      "learning_rate": 0.00010096366381239808,
      "loss": 1.1406,
      "step": 100
    },
    {
      "epoch": 0.5753424657534246,
      "grad_norm": 0.3101947605609894,
      "learning_rate": 9.133776095173015e-05,
      "loss": 1.1437,
      "step": 105
    },
    {
      "epoch": 0.6027397260273972,
      "grad_norm": 0.33753615617752075,
      "learning_rate": 8.179224004974857e-05,
      "loss": 1.131,
      "step": 110
    },
    {
      "epoch": 0.6301369863013698,
      "grad_norm": 0.28552067279815674,
      "learning_rate": 7.24156795516461e-05,
      "loss": 1.1375,
      "step": 115
    },
    {
      "epoch": 0.6575342465753424,
      "grad_norm": 0.2846600413322449,
      "learning_rate": 6.32950900206708e-05,
      "loss": 1.1499,
      "step": 120
    },
    {
      "epoch": 0.684931506849315,
      "grad_norm": 0.2724830210208893,
      "learning_rate": 5.451510671645807e-05,
      "loss": 1.1364,
      "step": 125
    },
    {
      "epoch": 0.7123287671232876,
      "grad_norm": 0.23818813264369965,
      "learning_rate": 4.61572042151878e-05,
      "loss": 1.1307,
      "step": 130
    },
    {
      "epoch": 0.7397260273972602,
      "grad_norm": 0.3082185685634613,
      "learning_rate": 3.829894035956306e-05,
      "loss": 1.1422,
      "step": 135
    },
    {
      "epoch": 0.7671232876712328,
      "grad_norm": 0.26166653633117676,
      "learning_rate": 3.101323655443882e-05,
      "loss": 1.1303,
      "step": 140
    },
    {
      "epoch": 0.7945205479452054,
      "grad_norm": 0.2606567442417145,
      "learning_rate": 2.4367701086656624e-05,
      "loss": 1.1238,
      "step": 145
    },
    {
      "epoch": 0.821917808219178,
      "grad_norm": 0.27404358983039856,
      "learning_rate": 1.8424001748393905e-05,
      "loss": 1.1377,
      "step": 150
    },
    {
      "epoch": 0.8493150684931506,
      "grad_norm": 0.24582931399345398,
      "learning_rate": 1.3237293585821786e-05,
      "loss": 1.1381,
      "step": 155
    },
    {
      "epoch": 0.8767123287671232,
      "grad_norm": 0.25582000613212585,
      "learning_rate": 8.855707083324183e-06,
      "loss": 1.1419,
      "step": 160
    },
    {
      "epoch": 0.9041095890410958,
      "grad_norm": 0.2417668253183365,
      "learning_rate": 5.319901532714877e-06,
      "loss": 1.1263,
      "step": 165
    },
    {
      "epoch": 0.9315068493150684,
      "grad_norm": 0.23573410511016846,
      "learning_rate": 2.66268773199988e-06,
      "loss": 1.1412,
      "step": 170
    },
    {
      "epoch": 0.958904109589041,
      "grad_norm": 0.23247532546520233,
      "learning_rate": 9.087235148824368e-07,
      "loss": 1.1378,
      "step": 175
    },
    {
      "epoch": 0.9863013698630136,
      "grad_norm": 0.2713688015937805,
      "learning_rate": 7.428493637002821e-08,
      "loss": 1.1394,
      "step": 180
    },
    {
      "epoch": 0.9972602739726028,
      "eval_loss": 1.641967535018921,
      "eval_runtime": 0.6331,
      "eval_samples_per_second": 14.215,
      "eval_steps_per_second": 1.579,
      "step": 182
    },
    {
      "epoch": 0.9972602739726028,
      "step": 182,
      "total_flos": 8.060051304564654e+17,
      "train_loss": 1.2164833742183643,
      "train_runtime": 689.7556,
      "train_samples_per_second": 50.685,
      "train_steps_per_second": 0.264
    }
  ],
  "logging_steps": 5,
  "max_steps": 182,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 8.060051304564654e+17,
  "train_batch_size": 12,
  "trial_name": null,
  "trial_params": null
}