File size: 2,595 Bytes
ce30918
 
 
0e2e36c
ce30918
0e2e36c
ce30918
 
 
 
 
0e2e36c
 
521622c
ce30918
 
 
0e2e36c
 
521622c
ce30918
 
 
0e2e36c
 
521622c
ce30918
 
 
0e2e36c
521622c
 
 
 
ce30918
 
 
0e2e36c
 
521622c
ce30918
 
 
0e2e36c
 
521622c
ce30918
 
 
0e2e36c
521622c
 
 
 
ce30918
 
 
0e2e36c
 
521622c
ce30918
 
 
0e2e36c
 
521622c
0e2e36c
 
 
 
521622c
 
 
 
0e2e36c
 
 
 
 
521622c
0e2e36c
 
 
 
 
521622c
0e2e36c
 
 
 
521622c
 
 
 
0e2e36c
 
 
 
 
 
521622c
 
 
0e2e36c
ce30918
 
 
0e2e36c
ce30918
 
 
0e2e36c
 
ce30918
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9905660377358491,
  "eval_steps": 10,
  "global_step": 42,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02,
      "learning_rate": 4.9930094929529506e-05,
      "loss": 1.2348,
      "step": 1
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.827184371610511e-05,
      "loss": 1.2258,
      "step": 5
    },
    {
      "epoch": 0.24,
      "learning_rate": 4.332629679574566e-05,
      "loss": 1.2298,
      "step": 10
    },
    {
      "epoch": 0.24,
      "eval_loss": 1.1997809410095215,
      "eval_runtime": 24.4233,
      "eval_samples_per_second": 5.282,
      "eval_steps_per_second": 1.761,
      "step": 10
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.5847093477938956e-05,
      "loss": 1.2091,
      "step": 15
    },
    {
      "epoch": 0.47,
      "learning_rate": 2.686825233966061e-05,
      "loss": 1.1885,
      "step": 20
    },
    {
      "epoch": 0.47,
      "eval_loss": 1.1652644872665405,
      "eval_runtime": 24.4242,
      "eval_samples_per_second": 5.282,
      "eval_steps_per_second": 1.761,
      "step": 20
    },
    {
      "epoch": 0.59,
      "learning_rate": 1.7631120639727393e-05,
      "loss": 1.1647,
      "step": 25
    },
    {
      "epoch": 0.71,
      "learning_rate": 9.412754953531663e-06,
      "loss": 1.1674,
      "step": 30
    },
    {
      "epoch": 0.71,
      "eval_loss": 1.1437482833862305,
      "eval_runtime": 24.4259,
      "eval_samples_per_second": 5.281,
      "eval_steps_per_second": 1.76,
      "step": 30
    },
    {
      "epoch": 0.83,
      "learning_rate": 3.3493649053890326e-06,
      "loss": 1.151,
      "step": 35
    },
    {
      "epoch": 0.94,
      "learning_rate": 2.7922934437178695e-07,
      "loss": 1.1425,
      "step": 40
    },
    {
      "epoch": 0.94,
      "eval_loss": 1.1380583047866821,
      "eval_runtime": 24.4167,
      "eval_samples_per_second": 5.283,
      "eval_steps_per_second": 1.761,
      "step": 40
    },
    {
      "epoch": 0.99,
      "step": 42,
      "total_flos": 6.447644673468006e+16,
      "train_loss": 1.1835048993428547,
      "train_runtime": 3217.2275,
      "train_samples_per_second": 1.58,
      "train_steps_per_second": 0.013
    }
  ],
  "logging_steps": 5,
  "max_steps": 42,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 20,
  "total_flos": 6.447644673468006e+16,
  "train_batch_size": 6,
  "trial_name": null,
  "trial_params": null
}