JulienRPA commited on
Commit
c1945c4
1 Parent(s): 0b2eb0e

Training in progress, step 4000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95248b59e7dac0d789b6b077fbe09ce5175324e63b5e9cdd37a3a30c2cde9027
3
  size 2000137067
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f8804038bfc8c4fff09781205b46f28d914f21115e30a8813fb451cdb64ad17
3
  size 2000137067
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4238518d766751ead10635c197669c039a8c46869571a7f8fc96716f256600df
3
  size 1002469625
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ccb2c6910f34be7a138f85235698d8f0ec58283fab1416ef6a153c170b333ed
3
  size 1002469625
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dda8e14cf65113c4145f87b0ecbda755c0d32ab5bbb56548e3c45d7ecd14a2c9
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:047a205ba65d4143fae2458ea498dbf8e5a685cd3e2e670c6951c8a299de8ae5
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9c68e8f52d353c005549c69e33b5a29ace5f59d7300d7ea3a17b4a529d455d0
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5789a0b7645be9b850d95a6b32fdc2dd662d33e25aea9a3e9fcb4a7309e592af
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.397624039133473,
5
- "global_step": 2000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -137,11 +137,142 @@
137
  "eval_samples_per_second": 0.751,
138
  "eval_steps_per_second": 0.094,
139
  "step": 2000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
  }
141
  ],
142
  "max_steps": 11448,
143
  "num_train_epochs": 8,
144
- "total_flos": 1328908826910720.0,
145
  "trial_name": null,
146
  "trial_params": null
147
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.795248078266946,
5
+ "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
137
  "eval_samples_per_second": 0.751,
138
  "eval_steps_per_second": 0.094,
139
  "step": 2000
140
+ },
141
+ {
142
+ "epoch": 1.47,
143
+ "learning_rate": 4.2e-05,
144
+ "loss": 6.2524,
145
+ "step": 2100
146
+ },
147
+ {
148
+ "epoch": 1.54,
149
+ "learning_rate": 4.4000000000000006e-05,
150
+ "loss": 5.8029,
151
+ "step": 2200
152
+ },
153
+ {
154
+ "epoch": 1.61,
155
+ "learning_rate": 4.600000000000001e-05,
156
+ "loss": 5.606,
157
+ "step": 2300
158
+ },
159
+ {
160
+ "epoch": 1.68,
161
+ "learning_rate": 4.8e-05,
162
+ "loss": 5.3757,
163
+ "step": 2400
164
+ },
165
+ {
166
+ "epoch": 1.75,
167
+ "learning_rate": 5e-05,
168
+ "loss": 5.1043,
169
+ "step": 2500
170
+ },
171
+ {
172
+ "epoch": 1.82,
173
+ "learning_rate": 4.944121591417077e-05,
174
+ "loss": 4.9414,
175
+ "step": 2600
176
+ },
177
+ {
178
+ "epoch": 1.89,
179
+ "learning_rate": 4.888243182834153e-05,
180
+ "loss": 4.7381,
181
+ "step": 2700
182
+ },
183
+ {
184
+ "epoch": 1.96,
185
+ "learning_rate": 4.8323647742512295e-05,
186
+ "loss": 4.6214,
187
+ "step": 2800
188
+ },
189
+ {
190
+ "epoch": 2.03,
191
+ "learning_rate": 4.776486365668306e-05,
192
+ "loss": 4.2971,
193
+ "step": 2900
194
+ },
195
+ {
196
+ "epoch": 2.1,
197
+ "learning_rate": 4.720607957085382e-05,
198
+ "loss": 4.1602,
199
+ "step": 3000
200
+ },
201
+ {
202
+ "epoch": 2.17,
203
+ "learning_rate": 4.664729548502459e-05,
204
+ "loss": 4.0391,
205
+ "step": 3100
206
+ },
207
+ {
208
+ "epoch": 2.24,
209
+ "learning_rate": 4.6088511399195353e-05,
210
+ "loss": 3.9211,
211
+ "step": 3200
212
+ },
213
+ {
214
+ "epoch": 2.31,
215
+ "learning_rate": 4.552972731336611e-05,
216
+ "loss": 3.7642,
217
+ "step": 3300
218
+ },
219
+ {
220
+ "epoch": 2.38,
221
+ "learning_rate": 4.497094322753688e-05,
222
+ "loss": 3.6698,
223
+ "step": 3400
224
+ },
225
+ {
226
+ "epoch": 2.45,
227
+ "learning_rate": 4.4412159141707646e-05,
228
+ "loss": 3.5409,
229
+ "step": 3500
230
+ },
231
+ {
232
+ "epoch": 2.52,
233
+ "learning_rate": 4.385337505587841e-05,
234
+ "loss": 3.4016,
235
+ "step": 3600
236
+ },
237
+ {
238
+ "epoch": 2.59,
239
+ "learning_rate": 4.329459097004918e-05,
240
+ "loss": 3.2761,
241
+ "step": 3700
242
+ },
243
+ {
244
+ "epoch": 2.66,
245
+ "learning_rate": 4.2735806884219945e-05,
246
+ "loss": 3.1708,
247
+ "step": 3800
248
+ },
249
+ {
250
+ "epoch": 2.73,
251
+ "learning_rate": 4.2177022798390704e-05,
252
+ "loss": 3.0849,
253
+ "step": 3900
254
+ },
255
+ {
256
+ "epoch": 2.8,
257
+ "learning_rate": 4.161823871256147e-05,
258
+ "loss": 3.0222,
259
+ "step": 4000
260
+ },
261
+ {
262
+ "epoch": 2.8,
263
+ "eval_bleu": 27.8543,
264
+ "eval_em": 0.0,
265
+ "eval_gen_len": 36.8,
266
+ "eval_loss": 2.8796441555023193,
267
+ "eval_runtime": 501.7382,
268
+ "eval_samples_per_second": 2.402,
269
+ "eval_steps_per_second": 0.301,
270
+ "step": 4000
271
  }
272
  ],
273
  "max_steps": 11448,
274
  "num_train_epochs": 8,
275
+ "total_flos": 2632028349156096.0,
276
  "trial_name": null,
277
  "trial_params": null
278
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4238518d766751ead10635c197669c039a8c46869571a7f8fc96716f256600df
3
  size 1002469625
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ccb2c6910f34be7a138f85235698d8f0ec58283fab1416ef6a153c170b333ed
3
  size 1002469625
runs/Jun05_10-45-59_0a95bf9de5ac/events.out.tfevents.1685962630.0a95bf9de5ac.3272.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:098e0e6b4e1c013ebd1598cfcedd0c3450868df3bf0b760afd29eede229bcb12
3
- size 12098
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbc2bf10f2e8697285e6c7d205571262251279a84d779b5e1565cd93e4f856da
3
+ size 15654