File size: 2,808 Bytes

a9766b3
 
 
bba747b
a9766b3
bba747b
a9766b3
 
 
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
bba747b
 
 
 
a9766b3
 
 
 
bba747b
 
 
 
 
 
a9766b3
 
 
bba747b
a9766b3
 
 
bba747b
a9766b3

{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9993360523570142,
  "eval_steps": 500,
  "global_step": 1317,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.08,
      "grad_norm": 2.6666550636291504,
      "learning_rate": 1.5151515151515153e-05,
      "loss": 1.75,
      "step": 100
    },
    {
      "epoch": 0.15,
      "grad_norm": 1.9126778841018677,
      "learning_rate": 1.983794055463009e-05,
      "loss": 0.9131,
      "step": 200
    },
    {
      "epoch": 0.23,
      "grad_norm": 2.010279655456543,
      "learning_rate": 1.90244256701717e-05,
      "loss": 0.921,
      "step": 300
    },
    {
      "epoch": 0.3,
      "grad_norm": 1.661656141281128,
      "learning_rate": 1.7580334804873595e-05,
      "loss": 0.8664,
      "step": 400
    },
    {
      "epoch": 0.38,
      "grad_norm": 1.8409279584884644,
      "learning_rate": 1.5606572885773613e-05,
      "loss": 0.8562,
      "step": 500
    },
    {
      "epoch": 0.46,
      "grad_norm": 1.8635607957839966,
      "learning_rate": 1.324105526655396e-05,
      "loss": 0.8561,
      "step": 600
    },
    {
      "epoch": 0.53,
      "grad_norm": 1.4191073179244995,
      "learning_rate": 1.0649070980273363e-05,
      "loss": 0.8621,
      "step": 700
    },
    {
      "epoch": 0.61,
      "grad_norm": 1.4062063694000244,
      "learning_rate": 8.011733273733208e-06,
      "loss": 0.8523,
      "step": 800
    },
    {
      "epoch": 0.68,
      "grad_norm": 1.7386995553970337,
      "learning_rate": 5.51332443501349e-06,
      "loss": 0.8277,
      "step": 900
    },
    {
      "epoch": 0.76,
      "grad_norm": 1.6414889097213745,
      "learning_rate": 3.3284191862731585e-06,
      "loss": 0.8406,
      "step": 1000
    },
    {
      "epoch": 0.83,
      "grad_norm": 1.6184569597244263,
      "learning_rate": 1.6096863865200606e-06,
      "loss": 0.8189,
      "step": 1100
    },
    {
      "epoch": 0.91,
      "grad_norm": 1.6727625131607056,
      "learning_rate": 4.772213925798331e-07,
      "loss": 0.837,
      "step": 1200
    },
    {
      "epoch": 0.99,
      "grad_norm": 1.7798255681991577,
      "learning_rate": 1.0154472728808318e-08,
      "loss": 0.8195,
      "step": 1300
    },
    {
      "epoch": 1.0,
      "step": 1317,
      "total_flos": 2.5585123528237056e+16,
      "train_loss": 0.9235503957773034,
      "train_runtime": 6177.2242,
      "train_samples_per_second": 1.707,
      "train_steps_per_second": 0.213
    }
  ],
  "logging_steps": 100,
  "max_steps": 1317,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "total_flos": 2.5585123528237056e+16,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}