mprzibilla commited on
Commit
50c347b
·
1 Parent(s): 2001a24

Training in progress, epoch 1

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a08673e36b5589851a89e97585a3867293b7bfc29762c0d241bad5a459e2c98
3
- size 162155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8676f9764c07e6b933f4fa352af9b98b3b044de2eff759af6bf5baa43998241a
3
+ size 721649669
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd2694752b23d09cc0476bcd99fca84c77c41869f0d0641d2ed74aed026ce263
3
  size 377640289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3321050bfb777e92d4a9535bacf26233d6d93c177f40afb72e9a4edb621469ad
3
  size 377640289
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df299414f56d3cee0b6d3d0811a00f61147a37acc97a9fa7bcc500e7d68f6f17
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02a382c9fbd249cde2e9b0ca8a6981276f1d798e47ccb2d4445b039ddbd74cae
3
  size 14639
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df7dc3303bdc1a2737159ab5ce6ac6dfaf84d16b8f82e60421bc66305d6aac5e
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af1f41381fb3909e7aa415aa1d1f2d9db8344ae4f5d0ae7b0cdf50527d59f2cb
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b0cf379358077b3b304ca0deebe51217ffd230c794737401ceae6e68e81a0c3
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00e56ff1f9aeef202866d18c52f49cf630af4e847aa9aca70191fe768f73377a
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,271 +1,33 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
- "global_step": 4830,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.840923948572674e-05,
13
- "loss": 110.9133,
14
  "step": 322
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_cer": 1.0,
19
- "eval_loss": 12.292933464050293,
20
  "eval_new_wer": 1.0,
21
  "eval_old_wer": 1.0,
22
- "eval_runtime": 7.4878,
23
- "eval_samples_per_second": 28.046,
24
- "eval_steps_per_second": 3.606,
25
  "step": 322
26
- },
27
- {
28
- "epoch": 2.0,
29
- "learning_rate": 9.139246023098715e-05,
30
- "loss": 5.7752,
31
- "step": 644
32
- },
33
- {
34
- "epoch": 2.0,
35
- "eval_cer": 1.0,
36
- "eval_loss": 3.7167434692382812,
37
- "eval_new_wer": 1.0,
38
- "eval_old_wer": 1.0,
39
- "eval_runtime": 7.7609,
40
- "eval_samples_per_second": 27.059,
41
- "eval_steps_per_second": 3.479,
42
- "step": 644
43
- },
44
- {
45
- "epoch": 3.0,
46
- "learning_rate": 8.437568097624755e-05,
47
- "loss": 3.4632,
48
- "step": 966
49
- },
50
- {
51
- "epoch": 3.0,
52
- "eval_cer": 1.0,
53
- "eval_loss": 3.6892776489257812,
54
- "eval_new_wer": 1.0,
55
- "eval_old_wer": 1.0,
56
- "eval_runtime": 8.762,
57
- "eval_samples_per_second": 23.967,
58
- "eval_steps_per_second": 3.081,
59
- "step": 966
60
- },
61
- {
62
- "epoch": 4.0,
63
- "learning_rate": 7.735890172150796e-05,
64
- "loss": 3.3883,
65
- "step": 1288
66
- },
67
- {
68
- "epoch": 4.0,
69
- "eval_cer": 1.0,
70
- "eval_loss": 3.6128814220428467,
71
- "eval_new_wer": 1.0,
72
- "eval_old_wer": 1.0,
73
- "eval_runtime": 8.0803,
74
- "eval_samples_per_second": 25.989,
75
- "eval_steps_per_second": 3.341,
76
- "step": 1288
77
- },
78
- {
79
- "epoch": 5.0,
80
- "learning_rate": 7.034212246676837e-05,
81
- "loss": 3.3461,
82
- "step": 1610
83
- },
84
- {
85
- "epoch": 5.0,
86
- "eval_cer": 1.0,
87
- "eval_loss": 3.5834929943084717,
88
- "eval_new_wer": 1.0,
89
- "eval_old_wer": 1.0,
90
- "eval_runtime": 8.5699,
91
- "eval_samples_per_second": 24.504,
92
- "eval_steps_per_second": 3.151,
93
- "step": 1610
94
- },
95
- {
96
- "epoch": 6.0,
97
- "learning_rate": 6.332534321202876e-05,
98
- "loss": 3.3228,
99
- "step": 1932
100
- },
101
- {
102
- "epoch": 6.0,
103
- "eval_cer": 1.0,
104
- "eval_loss": 3.564229726791382,
105
- "eval_new_wer": 1.0,
106
- "eval_old_wer": 1.0,
107
- "eval_runtime": 7.6036,
108
- "eval_samples_per_second": 27.619,
109
- "eval_steps_per_second": 3.551,
110
- "step": 1932
111
- },
112
- {
113
- "epoch": 7.0,
114
- "learning_rate": 5.6308563957289177e-05,
115
- "loss": 3.3009,
116
- "step": 2254
117
- },
118
- {
119
- "epoch": 7.0,
120
- "eval_cer": 1.0,
121
- "eval_loss": 3.5358943939208984,
122
- "eval_new_wer": 1.0,
123
- "eval_old_wer": 1.0,
124
- "eval_runtime": 7.829,
125
- "eval_samples_per_second": 26.824,
126
- "eval_steps_per_second": 3.449,
127
- "step": 2254
128
- },
129
- {
130
- "epoch": 8.0,
131
- "learning_rate": 4.9291784702549575e-05,
132
- "loss": 3.2842,
133
- "step": 2576
134
- },
135
- {
136
- "epoch": 8.0,
137
- "eval_cer": 1.0,
138
- "eval_loss": 3.5163114070892334,
139
- "eval_new_wer": 1.0,
140
- "eval_old_wer": 1.0,
141
- "eval_runtime": 7.665,
142
- "eval_samples_per_second": 27.397,
143
- "eval_steps_per_second": 3.523,
144
- "step": 2576
145
- },
146
- {
147
- "epoch": 9.0,
148
- "learning_rate": 4.2275005447809986e-05,
149
- "loss": 3.2793,
150
- "step": 2898
151
- },
152
- {
153
- "epoch": 9.0,
154
- "eval_cer": 1.0,
155
- "eval_loss": 3.4834606647491455,
156
- "eval_new_wer": 1.0,
157
- "eval_old_wer": 1.0,
158
- "eval_runtime": 8.9806,
159
- "eval_samples_per_second": 23.384,
160
- "eval_steps_per_second": 3.006,
161
- "step": 2898
162
- },
163
- {
164
- "epoch": 10.0,
165
- "learning_rate": 3.5258226193070384e-05,
166
- "loss": 3.2629,
167
- "step": 3220
168
- },
169
- {
170
- "epoch": 10.0,
171
- "eval_cer": 1.0,
172
- "eval_loss": 3.4674203395843506,
173
- "eval_new_wer": 1.0,
174
- "eval_old_wer": 1.0,
175
- "eval_runtime": 9.6156,
176
- "eval_samples_per_second": 21.839,
177
- "eval_steps_per_second": 2.808,
178
- "step": 3220
179
- },
180
- {
181
- "epoch": 11.0,
182
- "learning_rate": 2.824144693833079e-05,
183
- "loss": 3.2617,
184
- "step": 3542
185
- },
186
- {
187
- "epoch": 11.0,
188
- "eval_cer": 1.0,
189
- "eval_loss": 3.4477264881134033,
190
- "eval_new_wer": 1.0,
191
- "eval_old_wer": 1.0,
192
- "eval_runtime": 8.5536,
193
- "eval_samples_per_second": 24.551,
194
- "eval_steps_per_second": 3.157,
195
- "step": 3542
196
- },
197
- {
198
- "epoch": 12.0,
199
- "learning_rate": 2.1224667683591198e-05,
200
- "loss": 3.251,
201
- "step": 3864
202
- },
203
- {
204
- "epoch": 12.0,
205
- "eval_cer": 1.0,
206
- "eval_loss": 3.451367139816284,
207
- "eval_new_wer": 1.0,
208
- "eval_old_wer": 1.0,
209
- "eval_runtime": 7.3215,
210
- "eval_samples_per_second": 28.683,
211
- "eval_steps_per_second": 3.688,
212
- "step": 3864
213
- },
214
- {
215
- "epoch": 13.0,
216
- "learning_rate": 1.4207888428851601e-05,
217
- "loss": 3.2499,
218
- "step": 4186
219
- },
220
- {
221
- "epoch": 13.0,
222
- "eval_cer": 1.0,
223
- "eval_loss": 3.4498093128204346,
224
- "eval_new_wer": 1.0,
225
- "eval_old_wer": 1.0,
226
- "eval_runtime": 8.0836,
227
- "eval_samples_per_second": 25.979,
228
- "eval_steps_per_second": 3.34,
229
- "step": 4186
230
- },
231
- {
232
- "epoch": 14.0,
233
- "learning_rate": 7.191109174112007e-06,
234
- "loss": 3.2462,
235
- "step": 4508
236
- },
237
- {
238
- "epoch": 14.0,
239
- "eval_cer": 1.0,
240
- "eval_loss": 3.4502668380737305,
241
- "eval_new_wer": 1.0,
242
- "eval_old_wer": 1.0,
243
- "eval_runtime": 7.7485,
244
- "eval_samples_per_second": 27.102,
245
- "eval_steps_per_second": 3.485,
246
- "step": 4508
247
- },
248
- {
249
- "epoch": 15.0,
250
- "learning_rate": 1.743299193724123e-07,
251
- "loss": 3.2407,
252
- "step": 4830
253
- },
254
- {
255
- "epoch": 15.0,
256
- "eval_cer": 1.0,
257
- "eval_loss": 3.4475154876708984,
258
- "eval_new_wer": 1.0,
259
- "eval_old_wer": 1.0,
260
- "eval_runtime": 7.7552,
261
- "eval_samples_per_second": 27.079,
262
- "eval_steps_per_second": 3.482,
263
- "step": 4830
264
  }
265
  ],
266
  "max_steps": 4830,
267
  "num_train_epochs": 15,
268
- "total_flos": 3.496646357064e+18,
269
  "trial_name": null,
270
  "trial_params": null
271
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 322,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9.83874482458052e-05,
13
+ "loss": 20.722,
14
  "step": 322
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_cer": 1.0,
19
+ "eval_loss": 3.2618589401245117,
20
  "eval_new_wer": 1.0,
21
  "eval_old_wer": 1.0,
22
+ "eval_runtime": 7.6687,
23
+ "eval_samples_per_second": 27.384,
24
+ "eval_steps_per_second": 3.521,
25
  "step": 322
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "max_steps": 4830,
29
  "num_train_epochs": 15,
30
+ "total_flos": 2.331097571376e+17,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82d8a5336a4bd974fb46749e5c97487a8c34caeee0663a03cc169408151233dd
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec767e1aae092c1f11b99196ba7b3d812c3d6f0dd8fc08a3236cbdaed665c876
3
  size 3387
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd2694752b23d09cc0476bcd99fca84c77c41869f0d0641d2ed74aed026ce263
3
  size 377640289
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3321050bfb777e92d4a9535bacf26233d6d93c177f40afb72e9a4edb621469ad
3
  size 377640289
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82d8a5336a4bd974fb46749e5c97487a8c34caeee0663a03cc169408151233dd
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec767e1aae092c1f11b99196ba7b3d812c3d6f0dd8fc08a3236cbdaed665c876
3
  size 3387