File size: 2,275 Bytes
a3bf387
 
 
 
 
 
 
 
 
 
 
 
f231137
a3bf387
 
 
 
 
 
f231137
a3bf387
 
 
 
 
 
f231137
a3bf387
55cf540
a3bf387
 
 
 
f231137
a3bf387
f231137
a3bf387
 
 
 
f231137
a3bf387
f231137
a3bf387
 
 
 
f231137
a3bf387
f231137
a3bf387
 
 
 
f231137
a3bf387
f231137
a3bf387
 
 
 
f231137
a3bf387
f231137
a3bf387
 
 
 
 
 
f231137
 
 
 
a3bf387
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.764705882352941,
  "eval_steps": 25,
  "global_step": 8,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.47058823529411764,
      "grad_norm": 1.1788603067398071,
      "learning_rate": 0.000175,
      "loss": 9.5575,
      "step": 1
    },
    {
      "epoch": 0.9411764705882353,
      "grad_norm": 1.169764518737793,
      "learning_rate": 0.00015000000000000001,
      "loss": 9.4221,
      "step": 2
    },
    {
      "epoch": 1.4117647058823528,
      "grad_norm": 1.1544612646102905,
      "learning_rate": 0.000125,
      "loss": 9.2271,
      "step": 3
    },
    {
      "epoch": 1.8823529411764706,
      "grad_norm": 1.3188167810440063,
      "learning_rate": 0.0001,
      "loss": 8.996,
      "step": 4
    },
    {
      "epoch": 2.3529411764705883,
      "grad_norm": 1.6397812366485596,
      "learning_rate": 7.500000000000001e-05,
      "loss": 8.8136,
      "step": 5
    },
    {
      "epoch": 2.8235294117647056,
      "grad_norm": 1.7697429656982422,
      "learning_rate": 5e-05,
      "loss": 8.699,
      "step": 6
    },
    {
      "epoch": 3.2941176470588234,
      "grad_norm": 2.157055616378784,
      "learning_rate": 2.5e-05,
      "loss": 8.4982,
      "step": 7
    },
    {
      "epoch": 3.764705882352941,
      "grad_norm": 2.2286105155944824,
      "learning_rate": 0.0,
      "loss": 8.4287,
      "step": 8
    },
    {
      "epoch": 3.764705882352941,
      "step": 8,
      "total_flos": 3464493556500.0,
      "train_loss": 8.955286264419556,
      "train_runtime": 43.6635,
      "train_samples_per_second": 3.023,
      "train_steps_per_second": 0.183
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 8,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3464493556500.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}