mountaincreek12 commited on
Commit
226fdc2
1 Parent(s): 3879522

Training in progress, epoch 0

Browse files
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.49,
3
- "total_flos": 4.447730655960883e+17,
4
- "train_loss": 0.31461275049618315,
5
- "train_runtime": 310.8231,
6
- "train_samples_per_second": 60.645,
7
- "train_steps_per_second": 0.45
8
  }
 
1
  {
2
+ "epoch": 28.57,
3
+ "total_flos": 1.4797115229312e+18,
4
+ "train_loss": 0.175064886448284,
5
+ "train_runtime": 1076.0914,
6
+ "train_samples_per_second": 18.205,
7
+ "train_steps_per_second": 0.139
8
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:906024750d8d4fc61fb267c5ef650bafa6d3a7f15b568fc2f56be4112895decc
3
  size 110348984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4bf131fb2e50399ddd95fe34bae231d6b74ce7241f4f8f97c5f209d228d5ece
3
  size 110348984
runs/Apr08_03-52-04_pytorch-1-10-gpu-py-ml-g4dn-xlarge-bd81d1f288c5d63d0571c57fb0b0/events.out.tfevents.1712548468.pytorch-1-10-gpu-py-ml-g4dn-xlarge-bd81d1f288c5d63d0571c57fb0b0.746.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:167405626ed62bd5ecc4744f235518f54fd21eb515afd7b6f9ecfcf5a774666e
3
+ size 5450
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.49,
3
- "total_flos": 4.447730655960883e+17,
4
- "train_loss": 0.31461275049618315,
5
- "train_runtime": 310.8231,
6
- "train_samples_per_second": 60.645,
7
- "train_steps_per_second": 0.45
8
  }
 
1
  {
2
+ "epoch": 28.57,
3
+ "total_flos": 1.4797115229312e+18,
4
+ "train_loss": 0.175064886448284,
5
+ "train_runtime": 1076.0914,
6
+ "train_samples_per_second": 18.205,
7
+ "train_steps_per_second": 0.139
8
  }
trainer_state.json CHANGED
@@ -1,217 +1,395 @@
1
  {
2
- "best_metric": 0.921760391198044,
3
- "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-sealv1/checkpoint-118",
4
- "epoch": 9.491525423728813,
5
  "eval_steps": 500,
6
- "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.68,
13
- "grad_norm": 8.243694305419922,
14
- "learning_rate": 3.571428571428572e-05,
15
- "loss": 1.1068,
 
 
 
 
 
 
 
 
 
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.95,
20
- "eval_accuracy": 0.706601466992665,
21
- "eval_loss": 0.6517727375030518,
22
- "eval_runtime": 2.518,
23
- "eval_samples_per_second": 162.428,
24
- "eval_steps_per_second": 5.163,
25
- "step": 14
26
- },
27
- {
28
- "epoch": 1.36,
29
- "grad_norm": 11.443496704101562,
30
- "learning_rate": 4.761904761904762e-05,
31
- "loss": 0.4912,
 
 
 
 
 
 
 
 
 
32
  "step": 20
33
  },
34
  {
35
- "epoch": 1.97,
36
- "eval_accuracy": 0.843520782396088,
37
- "eval_loss": 0.46679040789604187,
38
- "eval_runtime": 2.5032,
39
- "eval_samples_per_second": 163.39,
40
- "eval_steps_per_second": 5.193,
41
- "step": 29
 
 
 
 
 
 
 
 
 
42
  },
43
  {
44
- "epoch": 2.03,
45
- "grad_norm": 9.79934024810791,
46
- "learning_rate": 4.3650793650793655e-05,
47
- "loss": 0.3366,
48
  "step": 30
49
  },
50
  {
51
- "epoch": 2.71,
52
- "grad_norm": 8.352068901062012,
53
- "learning_rate": 3.968253968253968e-05,
54
- "loss": 0.2749,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  "step": 40
56
  },
57
  {
58
- "epoch": 2.98,
59
- "eval_accuracy": 0.8704156479217604,
60
- "eval_loss": 0.41268301010131836,
61
- "eval_runtime": 2.4728,
62
- "eval_samples_per_second": 165.399,
63
- "eval_steps_per_second": 5.257,
64
- "step": 44
 
 
 
 
 
 
 
 
 
65
  },
66
  {
67
- "epoch": 3.39,
68
- "grad_norm": 7.004426002502441,
69
- "learning_rate": 3.571428571428572e-05,
70
- "loss": 0.3189,
71
  "step": 50
72
  },
73
  {
74
- "epoch": 4.0,
75
- "eval_accuracy": 0.8875305623471883,
76
- "eval_loss": 0.36258456110954285,
77
- "eval_runtime": 2.4887,
78
- "eval_samples_per_second": 164.344,
79
- "eval_steps_per_second": 5.224,
80
- "step": 59
81
- },
82
- {
83
- "epoch": 4.07,
84
- "grad_norm": 4.621389865875244,
85
- "learning_rate": 3.1746031746031745e-05,
86
- "loss": 0.2412,
 
 
 
 
 
 
 
 
 
87
  "step": 60
88
  },
89
  {
90
- "epoch": 4.75,
91
- "grad_norm": 6.152388572692871,
92
- "learning_rate": 2.777777777777778e-05,
93
- "loss": 0.2226,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
94
  "step": 70
95
  },
96
  {
97
- "epoch": 4.95,
98
- "eval_accuracy": 0.9046454767726161,
99
- "eval_loss": 0.26376911997795105,
100
- "eval_runtime": 2.4808,
101
- "eval_samples_per_second": 164.864,
102
- "eval_steps_per_second": 5.24,
103
  "step": 73
104
  },
105
  {
106
- "epoch": 5.42,
107
- "grad_norm": 6.929192066192627,
108
- "learning_rate": 2.380952380952381e-05,
109
- "loss": 0.2394,
 
 
 
 
 
 
 
 
 
110
  "step": 80
111
  },
112
  {
113
- "epoch": 5.97,
114
- "eval_accuracy": 0.8801955990220048,
115
- "eval_loss": 0.358397513628006,
116
- "eval_runtime": 2.4802,
117
- "eval_samples_per_second": 164.908,
118
- "eval_steps_per_second": 5.242,
119
- "step": 88
 
 
 
 
 
 
 
 
 
120
  },
121
  {
122
- "epoch": 6.1,
123
- "grad_norm": 6.410078525543213,
124
- "learning_rate": 1.984126984126984e-05,
125
- "loss": 0.2156,
126
  "step": 90
127
  },
128
  {
129
- "epoch": 6.78,
130
- "grad_norm": 6.870254039764404,
131
- "learning_rate": 1.5873015873015872e-05,
132
- "loss": 0.2241,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  "step": 100
134
  },
135
  {
136
- "epoch": 6.98,
137
- "eval_accuracy": 0.9046454767726161,
138
- "eval_loss": 0.2820737659931183,
139
- "eval_runtime": 2.4944,
140
- "eval_samples_per_second": 163.967,
141
- "eval_steps_per_second": 5.212,
142
- "step": 103
143
  },
144
  {
145
- "epoch": 7.46,
146
- "grad_norm": 20.99500274658203,
147
- "learning_rate": 1.1904761904761905e-05,
148
- "loss": 0.1815,
149
  "step": 110
150
  },
151
  {
152
- "epoch": 8.0,
153
- "eval_accuracy": 0.921760391198044,
154
- "eval_loss": 0.21383894979953766,
155
- "eval_runtime": 2.4781,
156
- "eval_samples_per_second": 165.045,
157
- "eval_steps_per_second": 5.246,
158
- "step": 118
159
- },
160
- {
161
- "epoch": 8.14,
162
- "grad_norm": 10.073310852050781,
163
- "learning_rate": 7.936507936507936e-06,
164
- "loss": 0.1715,
 
 
 
 
 
 
 
 
 
165
  "step": 120
166
  },
167
  {
168
- "epoch": 8.81,
169
- "grad_norm": 5.79032564163208,
170
- "learning_rate": 3.968253968253968e-06,
171
- "loss": 0.1862,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  "step": 130
173
  },
174
  {
175
- "epoch": 8.95,
176
- "eval_accuracy": 0.9046454767726161,
177
- "eval_loss": 0.27377307415008545,
178
- "eval_runtime": 2.4752,
179
- "eval_samples_per_second": 165.24,
180
- "eval_steps_per_second": 5.252,
181
- "step": 132
182
  },
183
  {
184
- "epoch": 9.49,
185
- "grad_norm": 9.280794143676758,
186
- "learning_rate": 0.0,
187
- "loss": 0.1942,
188
- "step": 140
 
 
189
  },
190
  {
191
- "epoch": 9.49,
192
- "eval_accuracy": 0.9119804400977995,
193
- "eval_loss": 0.25534436106681824,
194
- "eval_runtime": 2.4962,
195
- "eval_samples_per_second": 163.851,
196
- "eval_steps_per_second": 5.208,
197
  "step": 140
198
  },
199
  {
200
- "epoch": 9.49,
201
- "step": 140,
202
- "total_flos": 4.447730655960883e+17,
203
- "train_loss": 0.31461275049618315,
204
- "train_runtime": 310.8231,
205
- "train_samples_per_second": 60.645,
206
- "train_steps_per_second": 0.45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  }
208
  ],
209
  "logging_steps": 10,
210
- "max_steps": 140,
211
  "num_input_tokens_seen": 0,
212
- "num_train_epochs": 10,
213
  "save_steps": 500,
214
- "total_flos": 4.447730655960883e+17,
215
  "train_batch_size": 32,
216
  "trial_name": null,
217
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.9862068965517241,
3
+ "best_model_checkpoint": "swin-tiny-patch4-window7-224-finetuned-sealv1/checkpoint-94",
4
+ "epoch": 28.571428571428573,
5
  "eval_steps": 500,
6
+ "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.95,
13
+ "eval_accuracy": 0.496551724137931,
14
+ "eval_loss": 1.2919867038726807,
15
+ "eval_runtime": 3.1223,
16
+ "eval_samples_per_second": 46.44,
17
+ "eval_steps_per_second": 1.601,
18
+ "step": 5
19
+ },
20
+ {
21
+ "epoch": 1.9,
22
+ "grad_norm": 29.579647064208984,
23
+ "learning_rate": 3.3333333333333335e-05,
24
+ "loss": 1.1379,
25
  "step": 10
26
  },
27
  {
28
+ "epoch": 1.9,
29
+ "eval_accuracy": 0.496551724137931,
30
+ "eval_loss": 1.017675518989563,
31
+ "eval_runtime": 3.0482,
32
+ "eval_samples_per_second": 47.569,
33
+ "eval_steps_per_second": 1.64,
34
+ "step": 10
35
+ },
36
+ {
37
+ "epoch": 2.86,
38
+ "eval_accuracy": 0.8758620689655172,
39
+ "eval_loss": 0.7626163959503174,
40
+ "eval_runtime": 3.0592,
41
+ "eval_samples_per_second": 47.398,
42
+ "eval_steps_per_second": 1.634,
43
+ "step": 15
44
+ },
45
+ {
46
+ "epoch": 3.81,
47
+ "grad_norm": 11.914603233337402,
48
+ "learning_rate": 4.814814814814815e-05,
49
+ "loss": 0.6784,
50
  "step": 20
51
  },
52
  {
53
+ "epoch": 4.0,
54
+ "eval_accuracy": 0.9310344827586207,
55
+ "eval_loss": 0.5387857556343079,
56
+ "eval_runtime": 3.0489,
57
+ "eval_samples_per_second": 47.558,
58
+ "eval_steps_per_second": 1.64,
59
+ "step": 21
60
+ },
61
+ {
62
+ "epoch": 4.95,
63
+ "eval_accuracy": 0.9103448275862069,
64
+ "eval_loss": 0.4190843105316162,
65
+ "eval_runtime": 3.0202,
66
+ "eval_samples_per_second": 48.01,
67
+ "eval_steps_per_second": 1.656,
68
+ "step": 26
69
  },
70
  {
71
+ "epoch": 5.71,
72
+ "grad_norm": 7.094911575317383,
73
+ "learning_rate": 4.4444444444444447e-05,
74
+ "loss": 0.3269,
75
  "step": 30
76
  },
77
  {
78
+ "epoch": 5.9,
79
+ "eval_accuracy": 0.8896551724137931,
80
+ "eval_loss": 0.39897656440734863,
81
+ "eval_runtime": 3.0245,
82
+ "eval_samples_per_second": 47.942,
83
+ "eval_steps_per_second": 1.653,
84
+ "step": 31
85
+ },
86
+ {
87
+ "epoch": 6.86,
88
+ "eval_accuracy": 0.9517241379310345,
89
+ "eval_loss": 0.20898482203483582,
90
+ "eval_runtime": 3.0505,
91
+ "eval_samples_per_second": 47.534,
92
+ "eval_steps_per_second": 1.639,
93
+ "step": 36
94
+ },
95
+ {
96
+ "epoch": 7.62,
97
+ "grad_norm": 6.531408786773682,
98
+ "learning_rate": 4.074074074074074e-05,
99
+ "loss": 0.2068,
100
  "step": 40
101
  },
102
  {
103
+ "epoch": 8.0,
104
+ "eval_accuracy": 0.9586206896551724,
105
+ "eval_loss": 0.18190069496631622,
106
+ "eval_runtime": 3.053,
107
+ "eval_samples_per_second": 47.494,
108
+ "eval_steps_per_second": 1.638,
109
+ "step": 42
110
+ },
111
+ {
112
+ "epoch": 8.95,
113
+ "eval_accuracy": 0.9655172413793104,
114
+ "eval_loss": 0.11921437084674835,
115
+ "eval_runtime": 3.0392,
116
+ "eval_samples_per_second": 47.71,
117
+ "eval_steps_per_second": 1.645,
118
+ "step": 47
119
  },
120
  {
121
+ "epoch": 9.52,
122
+ "grad_norm": 5.500025272369385,
123
+ "learning_rate": 3.7037037037037037e-05,
124
+ "loss": 0.1104,
125
  "step": 50
126
  },
127
  {
128
+ "epoch": 9.9,
129
+ "eval_accuracy": 0.9724137931034482,
130
+ "eval_loss": 0.06815902888774872,
131
+ "eval_runtime": 3.0143,
132
+ "eval_samples_per_second": 48.105,
133
+ "eval_steps_per_second": 1.659,
134
+ "step": 52
135
+ },
136
+ {
137
+ "epoch": 10.86,
138
+ "eval_accuracy": 0.9724137931034482,
139
+ "eval_loss": 0.08536545187234879,
140
+ "eval_runtime": 3.0425,
141
+ "eval_samples_per_second": 47.658,
142
+ "eval_steps_per_second": 1.643,
143
+ "step": 57
144
+ },
145
+ {
146
+ "epoch": 11.43,
147
+ "grad_norm": 2.2279155254364014,
148
+ "learning_rate": 3.3333333333333335e-05,
149
+ "loss": 0.0571,
150
  "step": 60
151
  },
152
  {
153
+ "epoch": 12.0,
154
+ "eval_accuracy": 0.9655172413793104,
155
+ "eval_loss": 0.08157803863286972,
156
+ "eval_runtime": 3.0775,
157
+ "eval_samples_per_second": 47.116,
158
+ "eval_steps_per_second": 1.625,
159
+ "step": 63
160
+ },
161
+ {
162
+ "epoch": 12.95,
163
+ "eval_accuracy": 0.9793103448275862,
164
+ "eval_loss": 0.053488027304410934,
165
+ "eval_runtime": 3.0426,
166
+ "eval_samples_per_second": 47.656,
167
+ "eval_steps_per_second": 1.643,
168
+ "step": 68
169
+ },
170
+ {
171
+ "epoch": 13.33,
172
+ "grad_norm": 3.4508020877838135,
173
+ "learning_rate": 2.962962962962963e-05,
174
+ "loss": 0.0382,
175
  "step": 70
176
  },
177
  {
178
+ "epoch": 13.9,
179
+ "eval_accuracy": 0.9793103448275862,
180
+ "eval_loss": 0.04910058155655861,
181
+ "eval_runtime": 3.0539,
182
+ "eval_samples_per_second": 47.48,
183
+ "eval_steps_per_second": 1.637,
184
  "step": 73
185
  },
186
  {
187
+ "epoch": 14.86,
188
+ "eval_accuracy": 0.9793103448275862,
189
+ "eval_loss": 0.05336721986532211,
190
+ "eval_runtime": 3.0215,
191
+ "eval_samples_per_second": 47.989,
192
+ "eval_steps_per_second": 1.655,
193
+ "step": 78
194
+ },
195
+ {
196
+ "epoch": 15.24,
197
+ "grad_norm": 0.396097868680954,
198
+ "learning_rate": 2.5925925925925925e-05,
199
+ "loss": 0.0158,
200
  "step": 80
201
  },
202
  {
203
+ "epoch": 16.0,
204
+ "eval_accuracy": 0.9793103448275862,
205
+ "eval_loss": 0.03685032203793526,
206
+ "eval_runtime": 3.0204,
207
+ "eval_samples_per_second": 48.006,
208
+ "eval_steps_per_second": 1.655,
209
+ "step": 84
210
+ },
211
+ {
212
+ "epoch": 16.95,
213
+ "eval_accuracy": 0.9724137931034482,
214
+ "eval_loss": 0.11105308681726456,
215
+ "eval_runtime": 3.0328,
216
+ "eval_samples_per_second": 47.81,
217
+ "eval_steps_per_second": 1.649,
218
+ "step": 89
219
  },
220
  {
221
+ "epoch": 17.14,
222
+ "grad_norm": 1.3435662984848022,
223
+ "learning_rate": 2.2222222222222223e-05,
224
+ "loss": 0.0082,
225
  "step": 90
226
  },
227
  {
228
+ "epoch": 17.9,
229
+ "eval_accuracy": 0.9862068965517241,
230
+ "eval_loss": 0.0514516718685627,
231
+ "eval_runtime": 3.0168,
232
+ "eval_samples_per_second": 48.065,
233
+ "eval_steps_per_second": 1.657,
234
+ "step": 94
235
+ },
236
+ {
237
+ "epoch": 18.86,
238
+ "eval_accuracy": 0.9793103448275862,
239
+ "eval_loss": 0.07132605463266373,
240
+ "eval_runtime": 3.0424,
241
+ "eval_samples_per_second": 47.66,
242
+ "eval_steps_per_second": 1.643,
243
+ "step": 99
244
+ },
245
+ {
246
+ "epoch": 19.05,
247
+ "grad_norm": 1.5692905187606812,
248
+ "learning_rate": 1.8518518518518518e-05,
249
+ "loss": 0.0105,
250
  "step": 100
251
  },
252
  {
253
+ "epoch": 20.0,
254
+ "eval_accuracy": 0.9793103448275862,
255
+ "eval_loss": 0.059764981269836426,
256
+ "eval_runtime": 3.0213,
257
+ "eval_samples_per_second": 47.993,
258
+ "eval_steps_per_second": 1.655,
259
+ "step": 105
260
  },
261
  {
262
+ "epoch": 20.95,
263
+ "grad_norm": 0.6745745539665222,
264
+ "learning_rate": 1.4814814814814815e-05,
265
+ "loss": 0.009,
266
  "step": 110
267
  },
268
  {
269
+ "epoch": 20.95,
270
+ "eval_accuracy": 0.9724137931034482,
271
+ "eval_loss": 0.07587717473506927,
272
+ "eval_runtime": 3.0482,
273
+ "eval_samples_per_second": 47.569,
274
+ "eval_steps_per_second": 1.64,
275
+ "step": 110
276
+ },
277
+ {
278
+ "epoch": 21.9,
279
+ "eval_accuracy": 0.9793103448275862,
280
+ "eval_loss": 0.07691250741481781,
281
+ "eval_runtime": 3.0353,
282
+ "eval_samples_per_second": 47.771,
283
+ "eval_steps_per_second": 1.647,
284
+ "step": 115
285
+ },
286
+ {
287
+ "epoch": 22.86,
288
+ "grad_norm": 5.058462142944336,
289
+ "learning_rate": 1.1111111111111112e-05,
290
+ "loss": 0.0134,
291
  "step": 120
292
  },
293
  {
294
+ "epoch": 22.86,
295
+ "eval_accuracy": 0.9793103448275862,
296
+ "eval_loss": 0.070177361369133,
297
+ "eval_runtime": 3.0247,
298
+ "eval_samples_per_second": 47.938,
299
+ "eval_steps_per_second": 1.653,
300
+ "step": 120
301
+ },
302
+ {
303
+ "epoch": 24.0,
304
+ "eval_accuracy": 0.9793103448275862,
305
+ "eval_loss": 0.060489553958177567,
306
+ "eval_runtime": 3.0525,
307
+ "eval_samples_per_second": 47.501,
308
+ "eval_steps_per_second": 1.638,
309
+ "step": 126
310
+ },
311
+ {
312
+ "epoch": 24.76,
313
+ "grad_norm": 1.4174549579620361,
314
+ "learning_rate": 7.4074074074074075e-06,
315
+ "loss": 0.0042,
316
  "step": 130
317
  },
318
  {
319
+ "epoch": 24.95,
320
+ "eval_accuracy": 0.9793103448275862,
321
+ "eval_loss": 0.06210276484489441,
322
+ "eval_runtime": 3.0244,
323
+ "eval_samples_per_second": 47.944,
324
+ "eval_steps_per_second": 1.653,
325
+ "step": 131
326
  },
327
  {
328
+ "epoch": 25.9,
329
+ "eval_accuracy": 0.9793103448275862,
330
+ "eval_loss": 0.06535915285348892,
331
+ "eval_runtime": 3.0417,
332
+ "eval_samples_per_second": 47.671,
333
+ "eval_steps_per_second": 1.644,
334
+ "step": 136
335
  },
336
  {
337
+ "epoch": 26.67,
338
+ "grad_norm": 0.7513312101364136,
339
+ "learning_rate": 3.7037037037037037e-06,
340
+ "loss": 0.0027,
 
 
341
  "step": 140
342
  },
343
  {
344
+ "epoch": 26.86,
345
+ "eval_accuracy": 0.9724137931034482,
346
+ "eval_loss": 0.06658191233873367,
347
+ "eval_runtime": 3.0308,
348
+ "eval_samples_per_second": 47.843,
349
+ "eval_steps_per_second": 1.65,
350
+ "step": 141
351
+ },
352
+ {
353
+ "epoch": 28.0,
354
+ "eval_accuracy": 0.9793103448275862,
355
+ "eval_loss": 0.06654076278209686,
356
+ "eval_runtime": 3.0747,
357
+ "eval_samples_per_second": 47.159,
358
+ "eval_steps_per_second": 1.626,
359
+ "step": 147
360
+ },
361
+ {
362
+ "epoch": 28.57,
363
+ "grad_norm": 2.4569530487060547,
364
+ "learning_rate": 0.0,
365
+ "loss": 0.0065,
366
+ "step": 150
367
+ },
368
+ {
369
+ "epoch": 28.57,
370
+ "eval_accuracy": 0.9793103448275862,
371
+ "eval_loss": 0.06503264605998993,
372
+ "eval_runtime": 3.0856,
373
+ "eval_samples_per_second": 46.992,
374
+ "eval_steps_per_second": 1.62,
375
+ "step": 150
376
+ },
377
+ {
378
+ "epoch": 28.57,
379
+ "step": 150,
380
+ "total_flos": 1.4797115229312e+18,
381
+ "train_loss": 0.175064886448284,
382
+ "train_runtime": 1076.0914,
383
+ "train_samples_per_second": 18.205,
384
+ "train_steps_per_second": 0.139
385
  }
386
  ],
387
  "logging_steps": 10,
388
+ "max_steps": 150,
389
  "num_input_tokens_seen": 0,
390
+ "num_train_epochs": 30,
391
  "save_steps": 500,
392
+ "total_flos": 1.4797115229312e+18,
393
  "train_batch_size": 32,
394
  "trial_name": null,
395
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa7f6a4d27b095d4d9346004b0f7772126cf0335c77cd7dcbfa4d65e7f0a3ecc
3
  size 4527
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:218eb8bf534d4d2f1bf19a89b978665296be37a95beb535a7cade1f692029704
3
  size 4527