{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9980636237897649,
  "eval_steps": 23,
  "global_step": 451,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.05089903181189488,
      "grad_norm": 0.7468199729919434,
      "learning_rate": 0.001825741858350554,
      "loss": 1.4222,
      "step": 23
    },
    {
      "epoch": 0.10179806362378976,
      "grad_norm": 0.33098104596138,
      "learning_rate": 0.0008451542547285166,
      "loss": 1.2833,
      "step": 46
    },
    {
      "epoch": 0.15269709543568466,
      "grad_norm": 0.3552784323692322,
      "learning_rate": 0.0006262242910851496,
      "loss": 1.095,
      "step": 69
    },
    {
      "epoch": 0.20359612724757953,
      "grad_norm": 0.32930678129196167,
      "learning_rate": 0.0005198752449100363,
      "loss": 1.0648,
      "step": 92
    },
    {
      "epoch": 0.2544951590594744,
      "grad_norm": 0.32375234365463257,
      "learning_rate": 0.0004540766091864998,
      "loss": 1.0184,
      "step": 115
    },
    {
      "epoch": 0.3053941908713693,
      "grad_norm": 0.37444040179252625,
      "learning_rate": 0.0004082482904638631,
      "loss": 1.0037,
      "step": 138
    },
    {
      "epoch": 0.3562932226832642,
      "grad_norm": 0.398503839969635,
      "learning_rate": 0.00037397879600338285,
      "loss": 0.9613,
      "step": 161
    },
    {
      "epoch": 0.40719225449515906,
      "grad_norm": 0.36333534121513367,
      "learning_rate": 0.00034710506725031166,
      "loss": 0.9395,
      "step": 184
    },
    {
      "epoch": 0.4580912863070539,
      "grad_norm": 0.3362521231174469,
      "learning_rate": 0.0003253000243161777,
      "loss": 0.929,
      "step": 207
    },
    {
      "epoch": 0.5089903181189488,
      "grad_norm": 0.3286592960357666,
      "learning_rate": 0.0003071475584169756,
      "loss": 0.9067,
      "step": 230
    },
    {
      "epoch": 0.5598893499308437,
      "grad_norm": 0.37335479259490967,
      "learning_rate": 0.0002917299829957891,
      "loss": 0.8955,
      "step": 253
    },
    {
      "epoch": 0.6107883817427386,
      "grad_norm": 0.3964427411556244,
      "learning_rate": 0.0002784230231948523,
      "loss": 0.8665,
      "step": 276
    },
    {
      "epoch": 0.6616874135546335,
      "grad_norm": 0.39572906494140625,
      "learning_rate": 0.0002667852642561041,
      "loss": 0.8622,
      "step": 299
    },
    {
      "epoch": 0.7125864453665284,
      "grad_norm": 0.4889402389526367,
      "learning_rate": 0.0002564945880212886,
      "loss": 0.8716,
      "step": 322
    },
    {
      "epoch": 0.7634854771784232,
      "grad_norm": 0.3482915163040161,
      "learning_rate": 0.00024730968341474897,
      "loss": 0.8326,
      "step": 345
    },
    {
      "epoch": 0.8143845089903181,
      "grad_norm": 0.4124608337879181,
      "learning_rate": 0.00023904572186687873,
      "loss": 0.8199,
      "step": 368
    },
    {
      "epoch": 0.865283540802213,
      "grad_norm": 0.3616255819797516,
      "learning_rate": 0.00023155842232374464,
      "loss": 0.8103,
      "step": 391
    },
    {
      "epoch": 0.9161825726141078,
      "grad_norm": 0.42308276891708374,
      "learning_rate": 0.00022473328748774736,
      "loss": 0.8067,
      "step": 414
    },
    {
      "epoch": 0.9670816044260028,
      "grad_norm": 0.4363687038421631,
      "learning_rate": 0.00021847813825958586,
      "loss": 0.8163,
      "step": 437
    },
    {
      "epoch": 0.9980636237897649,
      "step": 451,
      "total_flos": 4.580565166436909e+18,
      "train_loss": 0.9513344627261955,
      "train_runtime": 3338.9072,
      "train_samples_per_second": 17.323,
      "train_steps_per_second": 0.135
    }
  ],
  "logging_steps": 23,
  "max_steps": 451,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 23,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 4.580565166436909e+18,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}