mprzibilla commited on
Commit
15bb3d2
1 Parent(s): d0a0fcb

Training in progress, epoch 1

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c83da8a036d69efc8024b00e44d6a00123cfc886a3fa289af79285418564b80
3
- size 168299
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4da6ad10fe5ddfee1403f949f31c1ef5eea2feda23160bf58ce1088f375e0be4
3
+ size 721655813
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92fe0edec4ebcac6b9bfef5c8f50a1f58b8ce7a05e14cb0975d8c1b760c74879
3
  size 377643361
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e977485d43ce175909714a990eaf32d0340768b859b648a3e806d55dfcda5f9
3
  size 377643361
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ad40faf567f1a7fc0c84303ebf9c04650cca482149b50a1f10be4e8b10e81ed1
3
  size 14639
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87e59c420ac8f254a85b7998827f0de34b640bffc0c9b34d43149710f0b80a4a
3
  size 14639
last-checkpoint/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72c25de5caa05be3ade9c581fb6e8d0990d8379d446f63ba53da2351bc6bb8bc
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3364651fd5a9e628b0b6f0c628dfc49d08e7dd9bf653f061f7a1e5bea9536652
3
  size 557
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:603fd8cfbe6a3a7249f97ae827d8bf30db6a5a28fc7f5478df2bb53acdb8f061
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33f8658c69038edd1bc039ffb6bc31d7c511183dc762692b5ed25c6a8b5f8c59
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,271 +1,33 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
- "global_step": 4140,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
- "learning_rate": 9.844902110348335e-05,
13
- "loss": 134.3311,
14
  "step": 276
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_cer": 1.0,
19
- "eval_loss": 40.12137985229492,
20
  "eval_new_wer": 1.0,
21
  "eval_old_wer": 1.0,
22
- "eval_runtime": 6.4824,
23
- "eval_samples_per_second": 27.767,
24
- "eval_steps_per_second": 3.548,
25
  "step": 276
26
- },
27
- {
28
- "epoch": 2.0,
29
- "learning_rate": 9.143147724383423e-05,
30
- "loss": 12.6593,
31
- "step": 552
32
- },
33
- {
34
- "epoch": 2.0,
35
- "eval_cer": 1.0,
36
- "eval_loss": 4.015556335449219,
37
- "eval_new_wer": 1.0,
38
- "eval_old_wer": 1.0,
39
- "eval_runtime": 6.4651,
40
- "eval_samples_per_second": 27.842,
41
- "eval_steps_per_second": 3.558,
42
- "step": 552
43
- },
44
- {
45
- "epoch": 3.0,
46
- "learning_rate": 8.441393338418511e-05,
47
- "loss": 3.5481,
48
- "step": 828
49
- },
50
- {
51
- "epoch": 3.0,
52
- "eval_cer": 1.0,
53
- "eval_loss": 3.8011059761047363,
54
- "eval_new_wer": 1.0,
55
- "eval_old_wer": 1.0,
56
- "eval_runtime": 6.4663,
57
- "eval_samples_per_second": 27.837,
58
- "eval_steps_per_second": 3.557,
59
- "step": 828
60
- },
61
- {
62
- "epoch": 4.0,
63
- "learning_rate": 7.739638952453599e-05,
64
- "loss": 3.4168,
65
- "step": 1104
66
- },
67
- {
68
- "epoch": 4.0,
69
- "eval_cer": 1.0,
70
- "eval_loss": 3.76212739944458,
71
- "eval_new_wer": 1.0,
72
- "eval_old_wer": 1.0,
73
- "eval_runtime": 6.4555,
74
- "eval_samples_per_second": 27.883,
75
- "eval_steps_per_second": 3.563,
76
- "step": 1104
77
- },
78
- {
79
- "epoch": 5.0,
80
- "learning_rate": 7.037884566488685e-05,
81
- "loss": 3.3804,
82
- "step": 1380
83
- },
84
- {
85
- "epoch": 5.0,
86
- "eval_cer": 1.0,
87
- "eval_loss": 3.722968816757202,
88
- "eval_new_wer": 1.0,
89
- "eval_old_wer": 1.0,
90
- "eval_runtime": 6.5104,
91
- "eval_samples_per_second": 27.648,
92
- "eval_steps_per_second": 3.533,
93
- "step": 1380
94
- },
95
- {
96
- "epoch": 6.0,
97
- "learning_rate": 6.336130180523773e-05,
98
- "loss": 3.3596,
99
- "step": 1656
100
- },
101
- {
102
- "epoch": 6.0,
103
- "eval_cer": 1.0,
104
- "eval_loss": 3.714810609817505,
105
- "eval_new_wer": 1.0,
106
- "eval_old_wer": 1.0,
107
- "eval_runtime": 6.4471,
108
- "eval_samples_per_second": 27.919,
109
- "eval_steps_per_second": 3.567,
110
- "step": 1656
111
- },
112
- {
113
- "epoch": 7.0,
114
- "learning_rate": 5.6343757945588615e-05,
115
- "loss": 3.3422,
116
- "step": 1932
117
- },
118
- {
119
- "epoch": 7.0,
120
- "eval_cer": 1.0,
121
- "eval_loss": 3.6891369819641113,
122
- "eval_new_wer": 1.0,
123
- "eval_old_wer": 1.0,
124
- "eval_runtime": 6.4647,
125
- "eval_samples_per_second": 27.843,
126
- "eval_steps_per_second": 3.558,
127
- "step": 1932
128
- },
129
- {
130
- "epoch": 8.0,
131
- "learning_rate": 4.932621408593949e-05,
132
- "loss": 3.3317,
133
- "step": 2208
134
- },
135
- {
136
- "epoch": 8.0,
137
- "eval_cer": 1.0,
138
- "eval_loss": 3.7060208320617676,
139
- "eval_new_wer": 1.0,
140
- "eval_old_wer": 1.0,
141
- "eval_runtime": 6.4783,
142
- "eval_samples_per_second": 27.785,
143
- "eval_steps_per_second": 3.55,
144
- "step": 2208
145
- },
146
- {
147
- "epoch": 9.0,
148
- "learning_rate": 4.230867022629036e-05,
149
- "loss": 3.3247,
150
- "step": 2484
151
- },
152
- {
153
- "epoch": 9.0,
154
- "eval_cer": 1.0,
155
- "eval_loss": 3.6837430000305176,
156
- "eval_new_wer": 1.0,
157
- "eval_old_wer": 1.0,
158
- "eval_runtime": 6.5103,
159
- "eval_samples_per_second": 27.649,
160
- "eval_steps_per_second": 3.533,
161
- "step": 2484
162
- },
163
- {
164
- "epoch": 10.0,
165
- "learning_rate": 3.529112636664124e-05,
166
- "loss": 3.3178,
167
- "step": 2760
168
- },
169
- {
170
- "epoch": 10.0,
171
- "eval_cer": 1.0,
172
- "eval_loss": 3.660386562347412,
173
- "eval_new_wer": 1.0,
174
- "eval_old_wer": 1.0,
175
- "eval_runtime": 6.4496,
176
- "eval_samples_per_second": 27.909,
177
- "eval_steps_per_second": 3.566,
178
- "step": 2760
179
- },
180
- {
181
- "epoch": 11.0,
182
- "learning_rate": 2.827358250699212e-05,
183
- "loss": 3.3115,
184
- "step": 3036
185
- },
186
- {
187
- "epoch": 11.0,
188
- "eval_cer": 1.0,
189
- "eval_loss": 3.6680169105529785,
190
- "eval_new_wer": 1.0,
191
- "eval_old_wer": 1.0,
192
- "eval_runtime": 6.5154,
193
- "eval_samples_per_second": 27.627,
194
- "eval_steps_per_second": 3.53,
195
- "step": 3036
196
- },
197
- {
198
- "epoch": 12.0,
199
- "learning_rate": 2.1256038647342997e-05,
200
- "loss": 3.3048,
201
- "step": 3312
202
- },
203
- {
204
- "epoch": 12.0,
205
- "eval_cer": 1.0,
206
- "eval_loss": 3.662872314453125,
207
- "eval_new_wer": 1.0,
208
- "eval_old_wer": 1.0,
209
- "eval_runtime": 6.4674,
210
- "eval_samples_per_second": 27.832,
211
- "eval_steps_per_second": 3.556,
212
- "step": 3312
213
- },
214
- {
215
- "epoch": 13.0,
216
- "learning_rate": 1.4238494787693874e-05,
217
- "loss": 3.3012,
218
- "step": 3588
219
- },
220
- {
221
- "epoch": 13.0,
222
- "eval_cer": 1.0,
223
- "eval_loss": 3.665947198867798,
224
- "eval_new_wer": 1.0,
225
- "eval_old_wer": 1.0,
226
- "eval_runtime": 6.509,
227
- "eval_samples_per_second": 27.654,
228
- "eval_steps_per_second": 3.534,
229
- "step": 3588
230
- },
231
- {
232
- "epoch": 14.0,
233
- "learning_rate": 7.2209509280447494e-06,
234
- "loss": 3.298,
235
- "step": 3864
236
- },
237
- {
238
- "epoch": 14.0,
239
- "eval_cer": 1.0,
240
- "eval_loss": 3.6620442867279053,
241
- "eval_new_wer": 1.0,
242
- "eval_old_wer": 1.0,
243
- "eval_runtime": 6.4873,
244
- "eval_samples_per_second": 27.747,
245
- "eval_steps_per_second": 3.545,
246
- "step": 3864
247
- },
248
- {
249
- "epoch": 15.0,
250
- "learning_rate": 2.0340706839562675e-07,
251
- "loss": 3.2999,
252
- "step": 4140
253
- },
254
- {
255
- "epoch": 15.0,
256
- "eval_cer": 1.0,
257
- "eval_loss": 3.6636345386505127,
258
- "eval_new_wer": 1.0,
259
- "eval_old_wer": 1.0,
260
- "eval_runtime": 6.4572,
261
- "eval_samples_per_second": 27.876,
262
- "eval_steps_per_second": 3.562,
263
- "step": 4140
264
  }
265
  ],
266
  "max_steps": 4140,
267
  "num_train_epochs": 15,
268
- "total_flos": 3.049755441937728e+18,
269
  "trial_name": null,
270
  "trial_params": null
271
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0,
5
+ "global_step": 276,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.0,
12
+ "learning_rate": 9.842359521993389e-05,
13
+ "loss": 27.18,
14
  "step": 276
15
  },
16
  {
17
  "epoch": 1.0,
18
  "eval_cer": 1.0,
19
+ "eval_loss": 3.3241915702819824,
20
  "eval_new_wer": 1.0,
21
  "eval_old_wer": 1.0,
22
+ "eval_runtime": 6.7757,
23
+ "eval_samples_per_second": 26.565,
24
+ "eval_steps_per_second": 3.394,
25
  "step": 276
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
  ],
28
  "max_steps": 4140,
29
  "num_train_epochs": 15,
30
+ "total_flos": 2.00879993971008e+17,
31
  "trial_name": null,
32
  "trial_params": null
33
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9698411e7a1146b016a2d4859cac51b281a6688b094cb726a6d30b60b24f1bac
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deb9c0f892450f00fa996999ceb9f160bb8cf2b07756f1ef89cb049e366cb472
3
  size 3387
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92fe0edec4ebcac6b9bfef5c8f50a1f58b8ce7a05e14cb0975d8c1b760c74879
3
  size 377643361
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e977485d43ce175909714a990eaf32d0340768b859b648a3e806d55dfcda5f9
3
  size 377643361
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9698411e7a1146b016a2d4859cac51b281a6688b094cb726a6d30b60b24f1bac
3
  size 3387
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:deb9c0f892450f00fa996999ceb9f160bb8cf2b07756f1ef89cb049e366cb472
3
  size 3387