pszemraj commited on
Commit
1a4d61f
1 Parent(s): 7c8225f

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +14 -0
  2. eval_results.json +9 -0
  3. train_results.json +8 -0
  4. trainer_state.json +304 -0
all_results.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.0,
3
+ "eval_f1": 0.6717231571462432,
4
+ "eval_loss": 0.21304987370967865,
5
+ "eval_runtime": 3.3638,
6
+ "eval_samples": 989,
7
+ "eval_samples_per_second": 294.016,
8
+ "eval_steps_per_second": 4.757,
9
+ "train_loss": 0.2572957956662742,
10
+ "train_runtime": 515.2049,
11
+ "train_samples": 7914,
12
+ "train_samples_per_second": 92.165,
13
+ "train_steps_per_second": 0.722
14
+ }
eval_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.0,
3
+ "eval_f1": 0.6717231571462432,
4
+ "eval_loss": 0.21304987370967865,
5
+ "eval_runtime": 3.3638,
6
+ "eval_samples": 989,
7
+ "eval_samples_per_second": 294.016,
8
+ "eval_steps_per_second": 4.757
9
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6.0,
3
+ "train_loss": 0.2572957956662742,
4
+ "train_runtime": 515.2049,
5
+ "train_samples": 7914,
6
+ "train_samples_per_second": 92.165,
7
+ "train_steps_per_second": 0.722
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,304 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 372,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.16,
13
+ "learning_rate": 2.6666666666666667e-05,
14
+ "loss": 0.6871,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.32,
19
+ "learning_rate": 3.943977591036415e-05,
20
+ "loss": 0.506,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.48,
25
+ "learning_rate": 3.8319327731092444e-05,
26
+ "loss": 0.386,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.65,
31
+ "learning_rate": 3.719887955182073e-05,
32
+ "loss": 0.3467,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.81,
37
+ "learning_rate": 3.6078431372549025e-05,
38
+ "loss": 0.3235,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.97,
43
+ "learning_rate": 3.495798319327731e-05,
44
+ "loss": 0.3118,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 1.0,
49
+ "eval_f1": 0.336166194523135,
50
+ "eval_loss": 0.2885044813156128,
51
+ "eval_runtime": 3.3771,
52
+ "eval_samples_per_second": 292.857,
53
+ "eval_steps_per_second": 4.738,
54
+ "step": 62
55
+ },
56
+ {
57
+ "epoch": 1.13,
58
+ "learning_rate": 3.383753501400561e-05,
59
+ "loss": 0.3046,
60
+ "step": 70
61
+ },
62
+ {
63
+ "epoch": 1.29,
64
+ "learning_rate": 3.2717086834733894e-05,
65
+ "loss": 0.2906,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 1.45,
70
+ "learning_rate": 3.159663865546219e-05,
71
+ "loss": 0.2896,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 1.61,
76
+ "learning_rate": 3.047619047619048e-05,
77
+ "loss": 0.2826,
78
+ "step": 100
79
+ },
80
+ {
81
+ "epoch": 1.77,
82
+ "learning_rate": 2.935574229691877e-05,
83
+ "loss": 0.2749,
84
+ "step": 110
85
+ },
86
+ {
87
+ "epoch": 1.94,
88
+ "learning_rate": 2.8235294117647063e-05,
89
+ "loss": 0.2676,
90
+ "step": 120
91
+ },
92
+ {
93
+ "epoch": 2.0,
94
+ "eval_f1": 0.4882154882154882,
95
+ "eval_loss": 0.25112977623939514,
96
+ "eval_runtime": 3.3888,
97
+ "eval_samples_per_second": 291.842,
98
+ "eval_steps_per_second": 4.721,
99
+ "step": 124
100
+ },
101
+ {
102
+ "epoch": 2.1,
103
+ "learning_rate": 2.7114845938375354e-05,
104
+ "loss": 0.2534,
105
+ "step": 130
106
+ },
107
+ {
108
+ "epoch": 2.26,
109
+ "learning_rate": 2.5994397759103644e-05,
110
+ "loss": 0.2491,
111
+ "step": 140
112
+ },
113
+ {
114
+ "epoch": 2.42,
115
+ "learning_rate": 2.4873949579831935e-05,
116
+ "loss": 0.2452,
117
+ "step": 150
118
+ },
119
+ {
120
+ "epoch": 2.58,
121
+ "learning_rate": 2.3753501400560226e-05,
122
+ "loss": 0.2373,
123
+ "step": 160
124
+ },
125
+ {
126
+ "epoch": 2.74,
127
+ "learning_rate": 2.2633053221288516e-05,
128
+ "loss": 0.2355,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 2.9,
133
+ "learning_rate": 2.1512605042016807e-05,
134
+ "loss": 0.2325,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 3.0,
139
+ "eval_f1": 0.6093467596178673,
140
+ "eval_loss": 0.22724518179893494,
141
+ "eval_runtime": 3.3851,
142
+ "eval_samples_per_second": 292.16,
143
+ "eval_steps_per_second": 4.727,
144
+ "step": 186
145
+ },
146
+ {
147
+ "epoch": 3.06,
148
+ "learning_rate": 2.0392156862745097e-05,
149
+ "loss": 0.231,
150
+ "step": 190
151
+ },
152
+ {
153
+ "epoch": 3.23,
154
+ "learning_rate": 1.927170868347339e-05,
155
+ "loss": 0.2195,
156
+ "step": 200
157
+ },
158
+ {
159
+ "epoch": 3.39,
160
+ "learning_rate": 1.8151260504201682e-05,
161
+ "loss": 0.2156,
162
+ "step": 210
163
+ },
164
+ {
165
+ "epoch": 3.55,
166
+ "learning_rate": 1.7030812324929973e-05,
167
+ "loss": 0.2184,
168
+ "step": 220
169
+ },
170
+ {
171
+ "epoch": 3.71,
172
+ "learning_rate": 1.5910364145658263e-05,
173
+ "loss": 0.2109,
174
+ "step": 230
175
+ },
176
+ {
177
+ "epoch": 3.87,
178
+ "learning_rate": 1.4789915966386557e-05,
179
+ "loss": 0.2127,
180
+ "step": 240
181
+ },
182
+ {
183
+ "epoch": 4.0,
184
+ "eval_f1": 0.6591346153846154,
185
+ "eval_loss": 0.21806256473064423,
186
+ "eval_runtime": 3.3784,
187
+ "eval_samples_per_second": 292.738,
188
+ "eval_steps_per_second": 4.736,
189
+ "step": 248
190
+ },
191
+ {
192
+ "epoch": 4.03,
193
+ "learning_rate": 1.3669467787114848e-05,
194
+ "loss": 0.2067,
195
+ "step": 250
196
+ },
197
+ {
198
+ "epoch": 4.19,
199
+ "learning_rate": 1.2549019607843138e-05,
200
+ "loss": 0.1984,
201
+ "step": 260
202
+ },
203
+ {
204
+ "epoch": 4.35,
205
+ "learning_rate": 1.1428571428571429e-05,
206
+ "loss": 0.1957,
207
+ "step": 270
208
+ },
209
+ {
210
+ "epoch": 4.52,
211
+ "learning_rate": 1.030812324929972e-05,
212
+ "loss": 0.1967,
213
+ "step": 280
214
+ },
215
+ {
216
+ "epoch": 4.68,
217
+ "learning_rate": 9.187675070028012e-06,
218
+ "loss": 0.1975,
219
+ "step": 290
220
+ },
221
+ {
222
+ "epoch": 4.84,
223
+ "learning_rate": 8.067226890756303e-06,
224
+ "loss": 0.194,
225
+ "step": 300
226
+ },
227
+ {
228
+ "epoch": 5.0,
229
+ "learning_rate": 6.946778711484594e-06,
230
+ "loss": 0.1978,
231
+ "step": 310
232
+ },
233
+ {
234
+ "epoch": 5.0,
235
+ "eval_f1": 0.6685687113647171,
236
+ "eval_loss": 0.2140355408191681,
237
+ "eval_runtime": 3.3785,
238
+ "eval_samples_per_second": 292.736,
239
+ "eval_steps_per_second": 4.736,
240
+ "step": 310
241
+ },
242
+ {
243
+ "epoch": 5.16,
244
+ "learning_rate": 5.826330532212886e-06,
245
+ "loss": 0.1877,
246
+ "step": 320
247
+ },
248
+ {
249
+ "epoch": 5.32,
250
+ "learning_rate": 4.705882352941177e-06,
251
+ "loss": 0.1877,
252
+ "step": 330
253
+ },
254
+ {
255
+ "epoch": 5.48,
256
+ "learning_rate": 3.585434173669468e-06,
257
+ "loss": 0.1803,
258
+ "step": 340
259
+ },
260
+ {
261
+ "epoch": 5.65,
262
+ "learning_rate": 2.4649859943977594e-06,
263
+ "loss": 0.1874,
264
+ "step": 350
265
+ },
266
+ {
267
+ "epoch": 5.81,
268
+ "learning_rate": 1.3445378151260504e-06,
269
+ "loss": 0.1911,
270
+ "step": 360
271
+ },
272
+ {
273
+ "epoch": 5.97,
274
+ "learning_rate": 2.2408963585434175e-07,
275
+ "loss": 0.1817,
276
+ "step": 370
277
+ },
278
+ {
279
+ "epoch": 6.0,
280
+ "eval_f1": 0.6717231571462432,
281
+ "eval_loss": 0.21304987370967865,
282
+ "eval_runtime": 3.3805,
283
+ "eval_samples_per_second": 292.556,
284
+ "eval_steps_per_second": 4.733,
285
+ "step": 372
286
+ },
287
+ {
288
+ "epoch": 6.0,
289
+ "step": 372,
290
+ "total_flos": 1.2495360147628032e+16,
291
+ "train_loss": 0.2572957956662742,
292
+ "train_runtime": 515.2049,
293
+ "train_samples_per_second": 92.165,
294
+ "train_steps_per_second": 0.722
295
+ }
296
+ ],
297
+ "logging_steps": 10,
298
+ "max_steps": 372,
299
+ "num_train_epochs": 6,
300
+ "save_steps": 500,
301
+ "total_flos": 1.2495360147628032e+16,
302
+ "trial_name": null,
303
+ "trial_params": null
304
+ }