rjac commited on
Commit
8e05c1b
1 Parent(s): c1ed44d

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +5 -5
  2. train_results.json +5 -5
  3. trainer_state.json +49 -229
all_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 4.03,
3
- "train_loss": 1.0273077850341796,
4
- "train_runtime": 12431.9662,
5
- "train_samples_per_second": 10.296,
6
- "train_steps_per_second": 0.161
7
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 1.3475898265838624,
4
+ "train_runtime": 419.0595,
5
+ "train_samples_per_second": 7.636,
6
+ "train_steps_per_second": 0.119
7
  }
train_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 4.03,
3
- "train_loss": 1.0273077850341796,
4
- "train_runtime": 12431.9662,
5
- "train_samples_per_second": 10.296,
6
- "train_steps_per_second": 0.161
7
  }
 
1
  {
2
+ "epoch": 1.0,
3
+ "train_loss": 1.3475898265838624,
4
+ "train_runtime": 419.0595,
5
+ "train_samples_per_second": 7.636,
6
+ "train_steps_per_second": 0.119
7
  }
trainer_state.json CHANGED
@@ -1,265 +1,85 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.03,
5
- "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 0.03,
12
- "learning_rate": 2.7416666666666667e-08,
13
- "loss": 1.3507,
14
- "step": 50
15
- },
16
- {
17
- "epoch": 0.05,
18
- "learning_rate": 5.658333333333333e-08,
19
- "loss": 1.3311,
20
- "step": 100
21
- },
22
- {
23
- "epoch": 0.07,
24
- "learning_rate": 8.575e-08,
25
- "loss": 1.3252,
26
- "step": 150
27
- },
28
  {
29
  "epoch": 0.1,
30
- "learning_rate": 1.1491666666666665e-07,
31
- "loss": 1.2374,
32
- "step": 200
33
- },
34
- {
35
- "epoch": 0.12,
36
- "learning_rate": 1.4408333333333332e-07,
37
- "loss": 1.2842,
38
- "step": 250
39
- },
40
- {
41
- "epoch": 0.15,
42
- "learning_rate": 1.7325e-07,
43
- "loss": 1.1666,
44
- "step": 300
45
- },
46
- {
47
- "epoch": 0.17,
48
- "learning_rate": 2.0241666666666666e-07,
49
- "loss": 1.0935,
50
- "step": 350
51
  },
52
  {
53
  "epoch": 0.2,
54
- "learning_rate": 2.315833333333333e-07,
55
- "loss": 1.0436,
56
- "step": 400
57
- },
58
- {
59
- "epoch": 0.23,
60
- "learning_rate": 2.6075e-07,
61
- "loss": 1.0156,
62
- "step": 450
63
- },
64
- {
65
- "epoch": 1.01,
66
- "learning_rate": 2.8991666666666667e-07,
67
- "loss": 1.1327,
68
- "step": 500
69
- },
70
- {
71
- "epoch": 1.03,
72
- "learning_rate": 3.190833333333333e-07,
73
- "loss": 1.3554,
74
- "step": 550
75
- },
76
- {
77
- "epoch": 1.06,
78
- "learning_rate": 3.4824999999999996e-07,
79
- "loss": 1.2085,
80
- "step": 600
81
- },
82
- {
83
- "epoch": 1.08,
84
- "learning_rate": 3.3825e-07,
85
- "loss": 1.2987,
86
- "step": 650
87
- },
88
- {
89
- "epoch": 1.11,
90
- "learning_rate": 3.2574999999999996e-07,
91
- "loss": 0.9905,
92
- "step": 700
93
- },
94
- {
95
- "epoch": 1.13,
96
- "learning_rate": 3.1325e-07,
97
- "loss": 1.0011,
98
- "step": 750
99
- },
100
- {
101
- "epoch": 1.16,
102
- "learning_rate": 3.0075e-07,
103
- "loss": 0.9573,
104
- "step": 800
105
- },
106
- {
107
- "epoch": 1.18,
108
- "learning_rate": 2.8825e-07,
109
- "loss": 0.9946,
110
- "step": 850
111
  },
112
  {
113
- "epoch": 1.21,
114
- "learning_rate": 2.7574999999999996e-07,
115
- "loss": 0.9934,
116
- "step": 900
117
  },
118
  {
119
- "epoch": 1.23,
120
- "learning_rate": 2.6325e-07,
121
- "loss": 0.9519,
122
- "step": 950
123
  },
124
  {
125
- "epoch": 2.02,
126
- "learning_rate": 2.5075e-07,
127
- "loss": 1.042,
128
- "step": 1000
129
  },
130
  {
131
- "epoch": 2.04,
132
- "learning_rate": 2.3824999999999998e-07,
133
- "loss": 1.0339,
134
- "step": 1050
135
  },
136
  {
137
- "epoch": 2.06,
138
- "learning_rate": 2.2575e-07,
139
- "loss": 1.0101,
140
- "step": 1100
141
  },
142
  {
143
- "epoch": 2.09,
144
- "learning_rate": 2.1325e-07,
145
- "loss": 1.0805,
146
- "step": 1150
147
  },
148
  {
149
- "epoch": 2.12,
150
- "learning_rate": 2.0075000000000002e-07,
151
- "loss": 0.8829,
152
- "step": 1200
153
  },
154
  {
155
- "epoch": 2.14,
156
- "learning_rate": 1.8824999999999998e-07,
157
- "loss": 0.901,
158
- "step": 1250
159
- },
160
- {
161
- "epoch": 2.17,
162
- "learning_rate": 1.7574999999999997e-07,
163
- "loss": 0.8773,
164
- "step": 1300
165
- },
166
- {
167
- "epoch": 2.19,
168
- "learning_rate": 1.6324999999999998e-07,
169
- "loss": 0.921,
170
- "step": 1350
171
- },
172
- {
173
- "epoch": 2.21,
174
- "learning_rate": 1.5075e-07,
175
- "loss": 0.8838,
176
- "step": 1400
177
- },
178
- {
179
- "epoch": 2.24,
180
- "learning_rate": 1.3825e-07,
181
- "loss": 0.9622,
182
- "step": 1450
183
- },
184
- {
185
- "epoch": 3.02,
186
- "learning_rate": 1.2575e-07,
187
- "loss": 0.9518,
188
- "step": 1500
189
- },
190
- {
191
- "epoch": 3.05,
192
- "learning_rate": 1.1325e-07,
193
- "loss": 0.9739,
194
- "step": 1550
195
- },
196
- {
197
- "epoch": 3.07,
198
- "learning_rate": 1.0075e-07,
199
- "loss": 0.9918,
200
- "step": 1600
201
- },
202
- {
203
- "epoch": 3.1,
204
- "learning_rate": 8.825e-08,
205
- "loss": 0.9803,
206
- "step": 1650
207
- },
208
- {
209
- "epoch": 3.12,
210
- "learning_rate": 7.575e-08,
211
- "loss": 0.8518,
212
- "step": 1700
213
- },
214
- {
215
- "epoch": 3.15,
216
- "learning_rate": 6.325e-08,
217
- "loss": 0.8628,
218
- "step": 1750
219
- },
220
- {
221
- "epoch": 3.17,
222
- "learning_rate": 5.0749999999999995e-08,
223
- "loss": 0.864,
224
- "step": 1800
225
- },
226
- {
227
- "epoch": 3.2,
228
- "learning_rate": 3.825e-08,
229
- "loss": 0.9134,
230
- "step": 1850
231
- },
232
- {
233
- "epoch": 3.22,
234
- "learning_rate": 2.5749999999999996e-08,
235
- "loss": 0.828,
236
- "step": 1900
237
- },
238
- {
239
- "epoch": 4.0,
240
- "learning_rate": 1.3250000000000001e-08,
241
- "loss": 0.9324,
242
- "step": 1950
243
- },
244
- {
245
- "epoch": 4.03,
246
- "learning_rate": 7.5e-10,
247
- "loss": 0.6153,
248
- "step": 2000
249
  },
250
  {
251
- "epoch": 4.03,
252
- "step": 2000,
253
- "total_flos": 3.15121729536e+18,
254
- "train_loss": 1.0273077850341796,
255
- "train_runtime": 12431.9662,
256
- "train_samples_per_second": 10.296,
257
- "train_steps_per_second": 0.161
258
  }
259
  ],
260
- "max_steps": 2000,
261
  "num_train_epochs": 9223372036854775807,
262
- "total_flos": 3.15121729536e+18,
263
  "trial_name": null,
264
  "trial_params": null
265
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 50,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  {
11
  "epoch": 0.1,
12
+ "learning_rate": 5e-08,
13
+ "loss": 1.3906,
14
+ "step": 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 0.2,
18
+ "learning_rate": 9.347826086956522e-08,
19
+ "loss": 1.3831,
20
+ "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  },
22
  {
23
+ "epoch": 0.3,
24
+ "learning_rate": 8.26086956521739e-08,
25
+ "loss": 1.406,
26
+ "step": 15
27
  },
28
  {
29
+ "epoch": 0.4,
30
+ "learning_rate": 7.173913043478261e-08,
31
+ "loss": 1.4041,
32
+ "step": 20
33
  },
34
  {
35
+ "epoch": 0.5,
36
+ "learning_rate": 6.086956521739131e-08,
37
+ "loss": 1.3447,
38
+ "step": 25
39
  },
40
  {
41
+ "epoch": 0.6,
42
+ "learning_rate": 5e-08,
43
+ "loss": 1.3263,
44
+ "step": 30
45
  },
46
  {
47
+ "epoch": 0.7,
48
+ "learning_rate": 3.913043478260869e-08,
49
+ "loss": 1.3201,
50
+ "step": 35
51
  },
52
  {
53
+ "epoch": 0.8,
54
+ "learning_rate": 2.8260869565217388e-08,
55
+ "loss": 1.3223,
56
+ "step": 40
57
  },
58
  {
59
+ "epoch": 0.9,
60
+ "learning_rate": 1.7391304347826087e-08,
61
+ "loss": 1.2421,
62
+ "step": 45
63
  },
64
  {
65
+ "epoch": 1.0,
66
+ "learning_rate": 6.521739130434782e-09,
67
+ "loss": 1.3366,
68
+ "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
  },
70
  {
71
+ "epoch": 1.0,
72
+ "step": 50,
73
+ "total_flos": 7.8780432384e+16,
74
+ "train_loss": 1.3475898265838624,
75
+ "train_runtime": 419.0595,
76
+ "train_samples_per_second": 7.636,
77
+ "train_steps_per_second": 0.119
78
  }
79
  ],
80
+ "max_steps": 50,
81
  "num_train_epochs": 9223372036854775807,
82
+ "total_flos": 7.8780432384e+16,
83
  "trial_name": null,
84
  "trial_params": null
85
  }