File size: 1,997 Bytes
73dfccd
 
 
 
 
 
 
 
 
 
 
 
0fbc3db
73dfccd
0fbc3db
73dfccd
 
 
 
0fbc3db
73dfccd
0fbc3db
73dfccd
 
 
 
0fbc3db
73dfccd
0fbc3db
73dfccd
 
 
 
0fbc3db
73dfccd
0fbc3db
73dfccd
 
 
 
0fbc3db
73dfccd
0fbc3db
73dfccd
 
 
 
0fbc3db
73dfccd
0fbc3db
73dfccd
 
 
 
 
0fbc3db
 
 
 
 
73dfccd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0fbc3db
73dfccd
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.5555555555555554,
  "eval_steps": 500,
  "global_step": 12,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.5925925925925926,
      "grad_norm": 1.454345464706421,
      "learning_rate": 0.0001666666666666667,
      "loss": 9.486,
      "step": 2
    },
    {
      "epoch": 1.1851851851851851,
      "grad_norm": 1.290387511253357,
      "learning_rate": 0.00013333333333333334,
      "loss": 9.1386,
      "step": 4
    },
    {
      "epoch": 1.7777777777777777,
      "grad_norm": 2.150188684463501,
      "learning_rate": 0.0001,
      "loss": 8.7163,
      "step": 6
    },
    {
      "epoch": 2.3703703703703702,
      "grad_norm": 2.306529998779297,
      "learning_rate": 6.666666666666667e-05,
      "loss": 8.2646,
      "step": 8
    },
    {
      "epoch": 2.962962962962963,
      "grad_norm": 1.6873130798339844,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 7.9025,
      "step": 10
    },
    {
      "epoch": 3.5555555555555554,
      "grad_norm": 1.427616000175476,
      "learning_rate": 0.0,
      "loss": 7.7933,
      "step": 12
    },
    {
      "epoch": 3.5555555555555554,
      "step": 12,
      "total_flos": 55764488285424.0,
      "train_loss": 8.55021588007609,
      "train_runtime": 312.1882,
      "train_samples_per_second": 0.692,
      "train_steps_per_second": 0.038
    }
  ],
  "logging_steps": 2,
  "max_steps": 12,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 55764488285424.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}