Jeska commited on
Commit
828013b
1 Parent(s): c48fbb0

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_accuracy": 0.8756855726242065,
4
- "eval_loss": 0.810522198677063,
5
- "eval_runtime": 3.6004,
6
  "eval_samples": 1094,
7
- "eval_samples_per_second": 303.856,
8
- "eval_steps_per_second": 38.051,
9
- "train_loss": 1.72243186873619,
10
- "train_runtime": 2003.3488,
11
  "train_samples": 10556,
12
- "train_samples_per_second": 79.038,
13
- "train_steps_per_second": 2.471
14
  }
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.9031078815460205,
4
+ "eval_loss": 0.4978463649749756,
5
+ "eval_runtime": 3.9767,
6
  "eval_samples": 1094,
7
+ "eval_samples_per_second": 275.1,
8
+ "eval_steps_per_second": 34.45,
9
+ "train_loss": 0.7703812461429173,
10
+ "train_runtime": 4144.3399,
11
  "train_samples": 10556,
12
+ "train_samples_per_second": 76.413,
13
+ "train_steps_per_second": 2.389
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_accuracy": 0.8756855726242065,
4
- "eval_loss": 0.810522198677063,
5
- "eval_runtime": 3.6004,
6
  "eval_samples": 1094,
7
- "eval_samples_per_second": 303.856,
8
- "eval_steps_per_second": 38.051
9
  }
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.9031078815460205,
4
+ "eval_loss": 0.4978463649749756,
5
+ "eval_runtime": 3.9767,
6
  "eval_samples": 1094,
7
+ "eval_samples_per_second": 275.1,
8
+ "eval_steps_per_second": 34.45
9
  }
runs/Dec15_15-40-52_jbuhmann/events.out.tfevents.1639582865.jbuhmann.28904.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63cc129a444bc7a72e82f2e0c1d16935e1ce7f330009fe3d1239bfbc9ed68996
3
- size 29107
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d63d734834def9213d353aa33c52fadcbcf76e337c8773c5868054e65c635ecc
3
+ size 29461
runs/Dec15_15-40-52_jbuhmann/events.out.tfevents.1639587014.jbuhmann.28904.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26c9c33649a9e22d0ae15626e70632ae0dab2f12afe88208258091716a08f29b
3
+ size 363
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 15.0,
3
- "train_loss": 1.72243186873619,
4
- "train_runtime": 2003.3488,
5
  "train_samples": 10556,
6
- "train_samples_per_second": 79.038,
7
- "train_steps_per_second": 2.471
8
  }
1
  {
2
+ "epoch": 30.0,
3
+ "train_loss": 0.7703812461429173,
4
+ "train_runtime": 4144.3399,
5
  "train_samples": 10556,
6
+ "train_samples_per_second": 76.413,
7
+ "train_steps_per_second": 2.389
8
  }
trainer_state.json CHANGED
@@ -1,214 +1,409 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
- "global_step": 4950,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "eval_accuracy": 0.22303473949432373,
13
- "eval_loss": 3.9924213886260986,
14
- "eval_runtime": 3.5667,
15
- "eval_samples_per_second": 306.729,
16
- "eval_steps_per_second": 38.411,
17
  "step": 330
18
  },
19
  {
20
  "epoch": 1.52,
21
- "learning_rate": 8.98989898989899e-06,
22
- "loss": 4.3795,
23
  "step": 500
24
  },
25
  {
26
  "epoch": 2.0,
27
- "eval_accuracy": 0.3957952558994293,
28
- "eval_loss": 3.1812195777893066,
29
- "eval_runtime": 3.5362,
30
- "eval_samples_per_second": 309.369,
31
- "eval_steps_per_second": 38.742,
32
  "step": 660
33
  },
34
  {
35
  "epoch": 3.0,
36
- "eval_accuracy": 0.5511882901191711,
37
- "eval_loss": 2.5903849601745605,
38
- "eval_runtime": 3.5231,
39
- "eval_samples_per_second": 310.519,
40
- "eval_steps_per_second": 38.886,
41
  "step": 990
42
  },
43
  {
44
  "epoch": 3.03,
45
- "learning_rate": 7.97979797979798e-06,
46
- "loss": 3.2046,
47
  "step": 1000
48
  },
49
  {
50
  "epoch": 4.0,
51
- "eval_accuracy": 0.6407678127288818,
52
- "eval_loss": 2.1536314487457275,
53
- "eval_runtime": 3.5444,
54
- "eval_samples_per_second": 308.659,
55
- "eval_steps_per_second": 38.653,
56
  "step": 1320
57
  },
58
  {
59
  "epoch": 4.55,
60
- "learning_rate": 6.969696969696971e-06,
61
- "loss": 2.3683,
62
  "step": 1500
63
  },
64
  {
65
  "epoch": 5.0,
66
- "eval_accuracy": 0.7129799127578735,
67
- "eval_loss": 1.8079293966293335,
68
- "eval_runtime": 3.555,
69
- "eval_samples_per_second": 307.739,
70
- "eval_steps_per_second": 38.538,
71
  "step": 1650
72
  },
73
  {
74
  "epoch": 6.0,
75
- "eval_accuracy": 0.7687385678291321,
76
- "eval_loss": 1.5419940948486328,
77
- "eval_runtime": 3.5056,
78
- "eval_samples_per_second": 312.075,
79
- "eval_steps_per_second": 39.081,
80
  "step": 1980
81
  },
82
  {
83
  "epoch": 6.06,
84
- "learning_rate": 5.95959595959596e-06,
85
- "loss": 1.8065,
86
  "step": 2000
87
  },
88
  {
89
  "epoch": 7.0,
90
- "eval_accuracy": 0.8053016662597656,
91
- "eval_loss": 1.3433690071105957,
92
- "eval_runtime": 3.4889,
93
- "eval_samples_per_second": 313.564,
94
- "eval_steps_per_second": 39.267,
95
  "step": 2310
96
  },
97
  {
98
  "epoch": 7.58,
99
- "learning_rate": 4.94949494949495e-06,
100
- "loss": 1.373,
101
  "step": 2500
102
  },
103
  {
104
  "epoch": 8.0,
105
- "eval_accuracy": 0.825411319732666,
106
- "eval_loss": 1.1881896257400513,
107
- "eval_runtime": 3.5056,
108
- "eval_samples_per_second": 312.068,
109
- "eval_steps_per_second": 39.08,
110
  "step": 2640
111
  },
112
  {
113
  "epoch": 9.0,
114
- "eval_accuracy": 0.8400365710258484,
115
- "eval_loss": 1.0700345039367676,
116
- "eval_runtime": 3.4951,
117
- "eval_samples_per_second": 313.014,
118
- "eval_steps_per_second": 39.198,
119
  "step": 2970
120
  },
121
  {
122
  "epoch": 9.09,
123
- "learning_rate": 3.93939393939394e-06,
124
- "loss": 1.0931,
125
  "step": 3000
126
  },
127
  {
128
  "epoch": 10.0,
129
- "eval_accuracy": 0.851005494594574,
130
- "eval_loss": 0.9790602326393127,
131
- "eval_runtime": 3.4892,
132
- "eval_samples_per_second": 313.542,
133
- "eval_steps_per_second": 39.264,
134
  "step": 3300
135
  },
136
  {
137
  "epoch": 10.61,
138
- "learning_rate": 2.9292929292929295e-06,
139
- "loss": 0.8714,
140
  "step": 3500
141
  },
142
  {
143
  "epoch": 11.0,
144
- "eval_accuracy": 0.8619744181632996,
145
- "eval_loss": 0.9201710224151611,
146
- "eval_runtime": 3.4883,
147
- "eval_samples_per_second": 313.616,
148
- "eval_steps_per_second": 39.274,
149
  "step": 3630
150
  },
151
  {
152
  "epoch": 12.0,
153
- "eval_accuracy": 0.868372917175293,
154
- "eval_loss": 0.8640827536582947,
155
- "eval_runtime": 3.5075,
156
- "eval_samples_per_second": 311.902,
157
- "eval_steps_per_second": 39.059,
158
  "step": 3960
159
  },
160
  {
161
  "epoch": 12.12,
162
- "learning_rate": 1.9191919191919192e-06,
163
- "loss": 0.7428,
164
  "step": 4000
165
  },
166
  {
167
  "epoch": 13.0,
168
- "eval_accuracy": 0.8747714757919312,
169
- "eval_loss": 0.8372448086738586,
170
- "eval_runtime": 3.5209,
171
- "eval_samples_per_second": 310.718,
172
- "eval_steps_per_second": 38.911,
173
  "step": 4290
174
  },
175
  {
176
  "epoch": 13.64,
177
- "learning_rate": 9.090909090909091e-07,
178
- "loss": 0.6531,
179
  "step": 4500
180
  },
181
  {
182
  "epoch": 14.0,
183
- "eval_accuracy": 0.8765996098518372,
184
- "eval_loss": 0.8168175220489502,
185
- "eval_runtime": 3.5294,
186
- "eval_samples_per_second": 309.965,
187
- "eval_steps_per_second": 38.816,
188
  "step": 4620
189
  },
190
  {
191
  "epoch": 15.0,
192
- "eval_accuracy": 0.8756855726242065,
193
- "eval_loss": 0.810522198677063,
194
- "eval_runtime": 3.5261,
195
- "eval_samples_per_second": 310.256,
196
- "eval_steps_per_second": 38.853,
197
  "step": 4950
198
  },
199
  {
200
- "epoch": 15.0,
201
- "step": 4950,
202
- "total_flos": 1.043199019279872e+16,
203
- "train_loss": 1.72243186873619,
204
- "train_runtime": 2003.3488,
205
- "train_samples_per_second": 79.038,
206
- "train_steps_per_second": 2.471
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  }
208
  ],
209
- "max_steps": 4950,
210
- "num_train_epochs": 15,
211
- "total_flos": 1.043199019279872e+16,
212
  "trial_name": null,
213
  "trial_params": null
214
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 30.0,
5
+ "global_step": 9900,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "eval_accuracy": 0.22486288845539093,
13
+ "eval_loss": 3.9691851139068604,
14
+ "eval_runtime": 3.5766,
15
+ "eval_samples_per_second": 305.88,
16
+ "eval_steps_per_second": 38.305,
17
  "step": 330
18
  },
19
  {
20
  "epoch": 1.52,
21
+ "learning_rate": 9.494949494949497e-06,
22
+ "loss": 4.3672,
23
  "step": 500
24
  },
25
  {
26
  "epoch": 2.0,
27
+ "eval_accuracy": 0.4031078517436981,
28
+ "eval_loss": 3.1312456130981445,
29
+ "eval_runtime": 3.5565,
30
+ "eval_samples_per_second": 307.607,
31
+ "eval_steps_per_second": 38.521,
32
  "step": 660
33
  },
34
  {
35
  "epoch": 3.0,
36
+ "eval_accuracy": 0.5658135414123535,
37
+ "eval_loss": 2.5068106651306152,
38
+ "eval_runtime": 3.6412,
39
+ "eval_samples_per_second": 300.452,
40
+ "eval_steps_per_second": 37.625,
41
  "step": 990
42
  },
43
  {
44
  "epoch": 3.03,
45
+ "learning_rate": 8.98989898989899e-06,
46
+ "loss": 3.1495,
47
  "step": 1000
48
  },
49
  {
50
  "epoch": 4.0,
51
+ "eval_accuracy": 0.6599634289741516,
52
+ "eval_loss": 2.029958724975586,
53
+ "eval_runtime": 3.6051,
54
+ "eval_samples_per_second": 303.46,
55
+ "eval_steps_per_second": 38.002,
56
  "step": 1320
57
  },
58
  {
59
  "epoch": 4.55,
60
+ "learning_rate": 8.484848484848486e-06,
61
+ "loss": 2.2491,
62
  "step": 1500
63
  },
64
  {
65
  "epoch": 5.0,
66
+ "eval_accuracy": 0.7449725866317749,
67
+ "eval_loss": 1.651670217514038,
68
+ "eval_runtime": 3.5564,
69
+ "eval_samples_per_second": 307.618,
70
+ "eval_steps_per_second": 38.523,
71
  "step": 1650
72
  },
73
  {
74
  "epoch": 6.0,
75
+ "eval_accuracy": 0.79433274269104,
76
+ "eval_loss": 1.3604321479797363,
77
+ "eval_runtime": 3.5912,
78
+ "eval_samples_per_second": 304.631,
79
+ "eval_steps_per_second": 38.148,
80
  "step": 1980
81
  },
82
  {
83
  "epoch": 6.06,
84
+ "learning_rate": 7.97979797979798e-06,
85
+ "loss": 1.622,
86
  "step": 2000
87
  },
88
  {
89
  "epoch": 7.0,
90
+ "eval_accuracy": 0.8327239751815796,
91
+ "eval_loss": 1.132811427116394,
92
+ "eval_runtime": 3.5858,
93
+ "eval_samples_per_second": 305.092,
94
+ "eval_steps_per_second": 38.206,
95
  "step": 2310
96
  },
97
  {
98
  "epoch": 7.58,
99
+ "learning_rate": 7.474747474747476e-06,
100
+ "loss": 1.1252,
101
  "step": 2500
102
  },
103
  {
104
  "epoch": 8.0,
105
+ "eval_accuracy": 0.8610603213310242,
106
+ "eval_loss": 0.9484481811523438,
107
+ "eval_runtime": 3.5674,
108
+ "eval_samples_per_second": 306.664,
109
+ "eval_steps_per_second": 38.403,
110
  "step": 2640
111
  },
112
  {
113
  "epoch": 9.0,
114
+ "eval_accuracy": 0.8756855726242065,
115
+ "eval_loss": 0.8212197422981262,
116
+ "eval_runtime": 3.5581,
117
+ "eval_samples_per_second": 307.464,
118
+ "eval_steps_per_second": 38.503,
119
  "step": 2970
120
  },
121
  {
122
  "epoch": 9.09,
123
+ "learning_rate": 6.969696969696971e-06,
124
+ "loss": 0.7969,
125
  "step": 3000
126
  },
127
  {
128
  "epoch": 10.0,
129
+ "eval_accuracy": 0.8829981684684753,
130
+ "eval_loss": 0.7243201732635498,
131
+ "eval_runtime": 3.5763,
132
+ "eval_samples_per_second": 305.906,
133
+ "eval_steps_per_second": 38.308,
134
  "step": 3300
135
  },
136
  {
137
  "epoch": 10.61,
138
+ "learning_rate": 6.464646464646466e-06,
139
+ "loss": 0.5348,
140
  "step": 3500
141
  },
142
  {
143
  "epoch": 11.0,
144
+ "eval_accuracy": 0.8866544961929321,
145
+ "eval_loss": 0.6596779227256775,
146
+ "eval_runtime": 3.5885,
147
+ "eval_samples_per_second": 304.866,
148
+ "eval_steps_per_second": 38.178,
149
  "step": 3630
150
  },
151
  {
152
  "epoch": 12.0,
153
+ "eval_accuracy": 0.8857403993606567,
154
+ "eval_loss": 0.598337709903717,
155
+ "eval_runtime": 3.6039,
156
+ "eval_samples_per_second": 303.56,
157
+ "eval_steps_per_second": 38.014,
158
  "step": 3960
159
  },
160
  {
161
  "epoch": 12.12,
162
+ "learning_rate": 5.95959595959596e-06,
163
+ "loss": 0.3744,
164
  "step": 4000
165
  },
166
  {
167
  "epoch": 13.0,
168
+ "eval_accuracy": 0.8976234197616577,
169
+ "eval_loss": 0.5634561777114868,
170
+ "eval_runtime": 3.5353,
171
+ "eval_samples_per_second": 309.453,
172
+ "eval_steps_per_second": 38.752,
173
  "step": 4290
174
  },
175
  {
176
  "epoch": 13.64,
177
+ "learning_rate": 5.4545454545454545e-06,
178
+ "loss": 0.2564,
179
  "step": 4500
180
  },
181
  {
182
  "epoch": 14.0,
183
+ "eval_accuracy": 0.8985374569892883,
184
+ "eval_loss": 0.5437070727348328,
185
+ "eval_runtime": 3.5598,
186
+ "eval_samples_per_second": 307.319,
187
+ "eval_steps_per_second": 38.485,
188
  "step": 4620
189
  },
190
  {
191
  "epoch": 15.0,
192
+ "eval_accuracy": 0.9012796878814697,
193
+ "eval_loss": 0.5124027132987976,
194
+ "eval_runtime": 3.5534,
195
+ "eval_samples_per_second": 307.874,
196
+ "eval_steps_per_second": 38.555,
197
  "step": 4950
198
  },
199
  {
200
+ "epoch": 15.15,
201
+ "learning_rate": 4.94949494949495e-06,
202
+ "loss": 0.1862,
203
+ "step": 5000
204
+ },
205
+ {
206
+ "epoch": 16.0,
207
+ "eval_accuracy": 0.9021937847137451,
208
+ "eval_loss": 0.5074306130409241,
209
+ "eval_runtime": 3.5582,
210
+ "eval_samples_per_second": 307.456,
211
+ "eval_steps_per_second": 38.502,
212
+ "step": 5280
213
+ },
214
+ {
215
+ "epoch": 16.67,
216
+ "learning_rate": 4.444444444444444e-06,
217
+ "loss": 0.1349,
218
+ "step": 5500
219
+ },
220
+ {
221
+ "epoch": 17.0,
222
+ "eval_accuracy": 0.9049360156059265,
223
+ "eval_loss": 0.5027920007705688,
224
+ "eval_runtime": 3.5652,
225
+ "eval_samples_per_second": 306.859,
226
+ "eval_steps_per_second": 38.427,
227
+ "step": 5610
228
+ },
229
+ {
230
+ "epoch": 18.0,
231
+ "eval_accuracy": 0.9076782464981079,
232
+ "eval_loss": 0.4876061677932739,
233
+ "eval_runtime": 3.5354,
234
+ "eval_samples_per_second": 309.439,
235
+ "eval_steps_per_second": 38.751,
236
+ "step": 5940
237
+ },
238
+ {
239
+ "epoch": 18.18,
240
+ "learning_rate": 3.93939393939394e-06,
241
+ "loss": 0.0979,
242
+ "step": 6000
243
+ },
244
+ {
245
+ "epoch": 19.0,
246
+ "eval_accuracy": 0.9049360156059265,
247
+ "eval_loss": 0.4970707595348358,
248
+ "eval_runtime": 3.5724,
249
+ "eval_samples_per_second": 306.234,
250
+ "eval_steps_per_second": 38.349,
251
+ "step": 6270
252
+ },
253
+ {
254
+ "epoch": 19.7,
255
+ "learning_rate": 3.4343434343434347e-06,
256
+ "loss": 0.0763,
257
+ "step": 6500
258
+ },
259
+ {
260
+ "epoch": 20.0,
261
+ "eval_accuracy": 0.9021937847137451,
262
+ "eval_loss": 0.4940781891345978,
263
+ "eval_runtime": 3.5051,
264
+ "eval_samples_per_second": 312.116,
265
+ "eval_steps_per_second": 39.086,
266
+ "step": 6600
267
+ },
268
+ {
269
+ "epoch": 21.0,
270
+ "eval_accuracy": 0.9049360156059265,
271
+ "eval_loss": 0.4956739842891693,
272
+ "eval_runtime": 3.5501,
273
+ "eval_samples_per_second": 308.164,
274
+ "eval_steps_per_second": 38.591,
275
+ "step": 6930
276
+ },
277
+ {
278
+ "epoch": 21.21,
279
+ "learning_rate": 2.9292929292929295e-06,
280
+ "loss": 0.0602,
281
+ "step": 7000
282
+ },
283
+ {
284
+ "epoch": 22.0,
285
+ "eval_accuracy": 0.9049360156059265,
286
+ "eval_loss": 0.4989473819732666,
287
+ "eval_runtime": 3.5171,
288
+ "eval_samples_per_second": 311.053,
289
+ "eval_steps_per_second": 38.953,
290
+ "step": 7260
291
+ },
292
+ {
293
+ "epoch": 22.73,
294
+ "learning_rate": 2.4242424242424244e-06,
295
+ "loss": 0.0504,
296
+ "step": 7500
297
+ },
298
+ {
299
+ "epoch": 23.0,
300
+ "eval_accuracy": 0.9040219187736511,
301
+ "eval_loss": 0.49591735005378723,
302
+ "eval_runtime": 3.5103,
303
+ "eval_samples_per_second": 311.654,
304
+ "eval_steps_per_second": 39.028,
305
+ "step": 7590
306
+ },
307
+ {
308
+ "epoch": 24.0,
309
+ "eval_accuracy": 0.9031078815460205,
310
+ "eval_loss": 0.4943903982639313,
311
+ "eval_runtime": 3.7255,
312
+ "eval_samples_per_second": 293.651,
313
+ "eval_steps_per_second": 36.774,
314
+ "step": 7920
315
+ },
316
+ {
317
+ "epoch": 24.24,
318
+ "learning_rate": 1.9191919191919192e-06,
319
+ "loss": 0.0422,
320
+ "step": 8000
321
+ },
322
+ {
323
+ "epoch": 25.0,
324
+ "eval_accuracy": 0.9040219187736511,
325
+ "eval_loss": 0.4985043704509735,
326
+ "eval_runtime": 3.6733,
327
+ "eval_samples_per_second": 297.824,
328
+ "eval_steps_per_second": 37.296,
329
+ "step": 8250
330
+ },
331
+ {
332
+ "epoch": 25.76,
333
+ "learning_rate": 1.4141414141414143e-06,
334
+ "loss": 0.0379,
335
+ "step": 8500
336
+ },
337
+ {
338
+ "epoch": 26.0,
339
+ "eval_accuracy": 0.9049360156059265,
340
+ "eval_loss": 0.4970065951347351,
341
+ "eval_runtime": 3.7215,
342
+ "eval_samples_per_second": 293.971,
343
+ "eval_steps_per_second": 36.814,
344
+ "step": 8580
345
+ },
346
+ {
347
+ "epoch": 27.0,
348
+ "eval_accuracy": 0.9040219187736511,
349
+ "eval_loss": 0.4949011206626892,
350
+ "eval_runtime": 3.6072,
351
+ "eval_samples_per_second": 303.279,
352
+ "eval_steps_per_second": 37.979,
353
+ "step": 8910
354
+ },
355
+ {
356
+ "epoch": 27.27,
357
+ "learning_rate": 9.090909090909091e-07,
358
+ "loss": 0.0351,
359
+ "step": 9000
360
+ },
361
+ {
362
+ "epoch": 28.0,
363
+ "eval_accuracy": 0.9040219187736511,
364
+ "eval_loss": 0.49713513255119324,
365
+ "eval_runtime": 3.5069,
366
+ "eval_samples_per_second": 311.955,
367
+ "eval_steps_per_second": 39.066,
368
+ "step": 9240
369
+ },
370
+ {
371
+ "epoch": 28.79,
372
+ "learning_rate": 4.040404040404041e-07,
373
+ "loss": 0.0321,
374
+ "step": 9500
375
+ },
376
+ {
377
+ "epoch": 29.0,
378
+ "eval_accuracy": 0.9031078815460205,
379
+ "eval_loss": 0.49672842025756836,
380
+ "eval_runtime": 3.506,
381
+ "eval_samples_per_second": 312.034,
382
+ "eval_steps_per_second": 39.076,
383
+ "step": 9570
384
+ },
385
+ {
386
+ "epoch": 30.0,
387
+ "eval_accuracy": 0.9031078815460205,
388
+ "eval_loss": 0.4978463649749756,
389
+ "eval_runtime": 3.6907,
390
+ "eval_samples_per_second": 296.421,
391
+ "eval_steps_per_second": 37.12,
392
+ "step": 9900
393
+ },
394
+ {
395
+ "epoch": 30.0,
396
+ "step": 9900,
397
+ "total_flos": 2.086398038559744e+16,
398
+ "train_loss": 0.7703812461429173,
399
+ "train_runtime": 4144.3399,
400
+ "train_samples_per_second": 76.413,
401
+ "train_steps_per_second": 2.389
402
  }
403
  ],
404
+ "max_steps": 9900,
405
+ "num_train_epochs": 30,
406
+ "total_flos": 2.086398038559744e+16,
407
  "trial_name": null,
408
  "trial_params": null
409
  }