File size: 2,391 Bytes
6a36b64
 
 
 
 
 
 
 
 
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
6a36b64
 
 
 
 
7c4ca9d
 
 
 
 
6a36b64
 
 
 
 
 
 
7c4ca9d
6a36b64
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 290,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.07,
      "learning_rate": 1.724137931034483e-06,
      "loss": 13.6499,
      "step": 20
    },
    {
      "epoch": 0.14,
      "learning_rate": 3.448275862068966e-06,
      "loss": 12.8629,
      "step": 40
    },
    {
      "epoch": 0.21,
      "learning_rate": 4.999083215558211e-06,
      "loss": 10.3507,
      "step": 60
    },
    {
      "epoch": 0.28,
      "learning_rate": 4.88988035667903e-06,
      "loss": 8.248,
      "step": 80
    },
    {
      "epoch": 0.34,
      "learning_rate": 4.606455184041623e-06,
      "loss": 7.2653,
      "step": 100
    },
    {
      "epoch": 0.41,
      "learning_rate": 4.169469396971739e-06,
      "loss": 7.0064,
      "step": 120
    },
    {
      "epoch": 0.48,
      "learning_rate": 3.6107792658847597e-06,
      "loss": 6.6082,
      "step": 140
    },
    {
      "epoch": 0.55,
      "learning_rate": 2.971113309695796e-06,
      "loss": 6.5262,
      "step": 160
    },
    {
      "epoch": 0.62,
      "learning_rate": 2.2971031861814225e-06,
      "loss": 6.1328,
      "step": 180
    },
    {
      "epoch": 0.69,
      "learning_rate": 1.6378842434300746e-06,
      "loss": 6.1506,
      "step": 200
    },
    {
      "epoch": 0.76,
      "learning_rate": 1.041513552231265e-06,
      "loss": 5.8993,
      "step": 220
    },
    {
      "epoch": 0.83,
      "learning_rate": 5.51466544896021e-07,
      "loss": 6.1471,
      "step": 240
    },
    {
      "epoch": 0.9,
      "learning_rate": 2.0346765559094566e-07,
      "loss": 6.2593,
      "step": 260
    },
    {
      "epoch": 0.97,
      "learning_rate": 2.2886008552983064e-08,
      "loss": 6.0709,
      "step": 280
    },
    {
      "epoch": 1.0,
      "step": 290,
      "total_flos": 8536543131303936.0,
      "train_loss": 7.7484286078091325,
      "train_runtime": 226.3779,
      "train_samples_per_second": 2.558,
      "train_steps_per_second": 1.281
    }
  ],
  "logging_steps": 20,
  "max_steps": 290,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "total_flos": 8536543131303936.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}