{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.07948599059415778,
  "eval_steps": 500,
  "global_step": 4800,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.001655958137378287,
      "grad_norm": 0.19597935676574707,
      "learning_rate": 0.00019966890756553004,
      "loss": 2.1972,
      "step": 100
    },
    {
      "epoch": 0.003311916274756574,
      "grad_norm": 0.25808241963386536,
      "learning_rate": 0.00019933771577352244,
      "loss": 1.9677,
      "step": 200
    },
    {
      "epoch": 0.0049678744121348616,
      "grad_norm": 0.23811133205890656,
      "learning_rate": 0.00019900652398151486,
      "loss": 1.9341,
      "step": 300
    },
    {
      "epoch": 0.006623832549513148,
      "grad_norm": 0.26714324951171875,
      "learning_rate": 0.00019867533218950728,
      "loss": 1.915,
      "step": 400
    },
    {
      "epoch": 0.008279790686891435,
      "grad_norm": 0.23645658791065216,
      "learning_rate": 0.0001983441403974997,
      "loss": 1.8916,
      "step": 500
    },
    {
      "epoch": 0.009935748824269723,
      "grad_norm": 0.2878512740135193,
      "learning_rate": 0.00019801294860549213,
      "loss": 1.9003,
      "step": 600
    },
    {
      "epoch": 0.01159170696164801,
      "grad_norm": 0.2687942087650299,
      "learning_rate": 0.00019768175681348456,
      "loss": 1.876,
      "step": 700
    },
    {
      "epoch": 0.013247665099026296,
      "grad_norm": 0.2722982168197632,
      "learning_rate": 0.00019735056502147698,
      "loss": 1.9004,
      "step": 800
    },
    {
      "epoch": 0.014903623236404583,
      "grad_norm": 0.25342944264411926,
      "learning_rate": 0.0001970193732294694,
      "loss": 1.8947,
      "step": 900
    },
    {
      "epoch": 0.01655958137378287,
      "grad_norm": 0.2900806963443756,
      "learning_rate": 0.0001966881814374618,
      "loss": 1.8795,
      "step": 1000
    },
    {
      "epoch": 0.018215539511161158,
      "grad_norm": 0.24855603277683258,
      "learning_rate": 0.00019635698964545422,
      "loss": 1.8657,
      "step": 1100
    },
    {
      "epoch": 0.019871497648539446,
      "grad_norm": 0.25272709131240845,
      "learning_rate": 0.00019602579785344665,
      "loss": 1.8687,
      "step": 1200
    },
    {
      "epoch": 0.02152745578591773,
      "grad_norm": 0.31408464908599854,
      "learning_rate": 0.00019569460606143904,
      "loss": 1.8332,
      "step": 1300
    },
    {
      "epoch": 0.02318341392329602,
      "grad_norm": 0.26880863308906555,
      "learning_rate": 0.00019536341426943147,
      "loss": 1.8603,
      "step": 1400
    },
    {
      "epoch": 0.024839372060674308,
      "grad_norm": 0.2371913194656372,
      "learning_rate": 0.0001950322224774239,
      "loss": 1.8273,
      "step": 1500
    },
    {
      "epoch": 0.026495330198052593,
      "grad_norm": 0.2510370910167694,
      "learning_rate": 0.00019470103068541632,
      "loss": 1.8524,
      "step": 1600
    },
    {
      "epoch": 0.02815128833543088,
      "grad_norm": 0.26143962144851685,
      "learning_rate": 0.00019436983889340874,
      "loss": 1.8543,
      "step": 1700
    },
    {
      "epoch": 0.029807246472809166,
      "grad_norm": 0.2438499480485916,
      "learning_rate": 0.00019403864710140116,
      "loss": 1.8411,
      "step": 1800
    },
    {
      "epoch": 0.03146320461018746,
      "grad_norm": 0.2666601836681366,
      "learning_rate": 0.0001937074553093936,
      "loss": 1.8548,
      "step": 1900
    },
    {
      "epoch": 0.03311916274756574,
      "grad_norm": 0.2752065062522888,
      "learning_rate": 0.000193376263517386,
      "loss": 1.8534,
      "step": 2000
    },
    {
      "epoch": 0.03477512088494403,
      "grad_norm": 0.24849963188171387,
      "learning_rate": 0.00019304507172537844,
      "loss": 1.8476,
      "step": 2100
    },
    {
      "epoch": 0.036431079022322316,
      "grad_norm": 0.2809307277202606,
      "learning_rate": 0.00019271387993337083,
      "loss": 1.8505,
      "step": 2200
    },
    {
      "epoch": 0.038087037159700604,
      "grad_norm": 0.23209506273269653,
      "learning_rate": 0.00019238268814136326,
      "loss": 1.864,
      "step": 2300
    },
    {
      "epoch": 0.03974299529707889,
      "grad_norm": 0.25108611583709717,
      "learning_rate": 0.00019205149634935568,
      "loss": 1.8411,
      "step": 2400
    },
    {
      "epoch": 0.041398953434457174,
      "grad_norm": 0.2639986276626587,
      "learning_rate": 0.0001917203045573481,
      "loss": 1.8456,
      "step": 2500
    },
    {
      "epoch": 0.04305491157183546,
      "grad_norm": 0.2927249073982239,
      "learning_rate": 0.00019138911276534053,
      "loss": 1.8361,
      "step": 2600
    },
    {
      "epoch": 0.04471086970921375,
      "grad_norm": 0.2660035192966461,
      "learning_rate": 0.00019105792097333295,
      "loss": 1.8352,
      "step": 2700
    },
    {
      "epoch": 0.04636682784659204,
      "grad_norm": 0.23683211207389832,
      "learning_rate": 0.00019072672918132538,
      "loss": 1.824,
      "step": 2800
    },
    {
      "epoch": 0.04802278598397033,
      "grad_norm": 0.7001804709434509,
      "learning_rate": 0.00019039553738931777,
      "loss": 1.82,
      "step": 2900
    },
    {
      "epoch": 0.049678744121348616,
      "grad_norm": 0.2500315010547638,
      "learning_rate": 0.0001900643455973102,
      "loss": 1.862,
      "step": 3000
    },
    {
      "epoch": 0.0513347022587269,
      "grad_norm": 0.2476750761270523,
      "learning_rate": 0.00018973315380530262,
      "loss": 1.8247,
      "step": 3100
    },
    {
      "epoch": 0.052990660396105185,
      "grad_norm": 0.23064357042312622,
      "learning_rate": 0.00018940196201329502,
      "loss": 1.8685,
      "step": 3200
    },
    {
      "epoch": 0.054646618533483474,
      "grad_norm": 0.2495209276676178,
      "learning_rate": 0.00018907077022128744,
      "loss": 1.8214,
      "step": 3300
    },
    {
      "epoch": 0.05630257667086176,
      "grad_norm": 0.25310614705085754,
      "learning_rate": 0.00018873957842927987,
      "loss": 1.84,
      "step": 3400
    },
    {
      "epoch": 0.05795853480824005,
      "grad_norm": 0.24329665303230286,
      "learning_rate": 0.0001884083866372723,
      "loss": 1.7982,
      "step": 3500
    },
    {
      "epoch": 0.05961449294561833,
      "grad_norm": 0.25951218605041504,
      "learning_rate": 0.00018807719484526471,
      "loss": 1.8078,
      "step": 3600
    },
    {
      "epoch": 0.06127045108299662,
      "grad_norm": 0.24307559430599213,
      "learning_rate": 0.00018774600305325714,
      "loss": 1.8181,
      "step": 3700
    },
    {
      "epoch": 0.06292640922037492,
      "grad_norm": 0.27577558159828186,
      "learning_rate": 0.00018741481126124956,
      "loss": 1.8158,
      "step": 3800
    },
    {
      "epoch": 0.06458236735775319,
      "grad_norm": 0.21584127843379974,
      "learning_rate": 0.00018708361946924199,
      "loss": 1.8362,
      "step": 3900
    },
    {
      "epoch": 0.06623832549513148,
      "grad_norm": 0.2558760344982147,
      "learning_rate": 0.0001867524276772344,
      "loss": 1.8425,
      "step": 4000
    },
    {
      "epoch": 0.06789428363250977,
      "grad_norm": 0.2206682711839676,
      "learning_rate": 0.0001864212358852268,
      "loss": 1.8155,
      "step": 4100
    },
    {
      "epoch": 0.06955024176988805,
      "grad_norm": 0.23946842551231384,
      "learning_rate": 0.00018609004409321923,
      "loss": 1.8349,
      "step": 4200
    },
    {
      "epoch": 0.07120619990726634,
      "grad_norm": 0.22356823086738586,
      "learning_rate": 0.00018575885230121165,
      "loss": 1.8013,
      "step": 4300
    },
    {
      "epoch": 0.07286215804464463,
      "grad_norm": 0.28327444195747375,
      "learning_rate": 0.00018542766050920408,
      "loss": 1.8193,
      "step": 4400
    },
    {
      "epoch": 0.07451811618202292,
      "grad_norm": 0.259748637676239,
      "learning_rate": 0.0001850964687171965,
      "loss": 1.8401,
      "step": 4500
    },
    {
      "epoch": 0.07617407431940121,
      "grad_norm": 0.24509303271770477,
      "learning_rate": 0.00018476527692518893,
      "loss": 1.8313,
      "step": 4600
    },
    {
      "epoch": 0.0778300324567795,
      "grad_norm": 0.2799519896507263,
      "learning_rate": 0.00018443408513318132,
      "loss": 1.8437,
      "step": 4700
    },
    {
      "epoch": 0.07948599059415778,
      "grad_norm": 0.25356101989746094,
      "learning_rate": 0.00018410289334117375,
      "loss": 1.7989,
      "step": 4800
    }
  ],
  "logging_steps": 100,
  "max_steps": 60388,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 800,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 3.318952218329088e+18,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}