marcel commited on
Commit
b11366b
1 Parent(s): c00c85b
Files changed (4) hide show
  1. optimizer.pt +1 -1
  2. pytorch_model.bin +1 -1
  3. scheduler.pt +1 -1
  4. trainer_state.json +115 -3
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:415f789cebf94dc975e2af37856a750f6a78c4e12af0b81cbeef753992d90419
3
  size 2490339591
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6843bd4f71c5da6f742fb97ed4629615bebb3e739a7469bf65e972cec15e6ffb
3
  size 2490339591
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0dc27a79ad93085f47b570fdaf3825b555caa80cf07cabe1ba820b1fd6247b8a
3
  size 1262065048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46921802d4224cbc87382a360359b0b60338b51f7ee0688a8fef26156d9c6937
3
  size 1262065048
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69fca0da0f8fa986f151fcf8b07e0888193c079c13d2d8004abe9580c45c5584
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a9d74c7b8404ba33c7dc27d3f935915c03a1e67d4dcd0c32769deff9f16fe75
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.8484231943031535,
5
- "global_step": 2800,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -104,11 +104,123 @@
104
  "eval_samples_per_second": 7.953,
105
  "eval_wer": 0.39256756756756755,
106
  "step": 2800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
108
  ],
109
  "max_steps": 29490,
110
  "num_train_epochs": 30,
111
- "total_flos": 1.5548475390462444e+19,
112
  "trial_name": null,
113
  "trial_params": null
114
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.103763987792472,
5
+ "global_step": 6000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
104
  "eval_samples_per_second": 7.953,
105
  "eval_wer": 0.39256756756756755,
106
  "step": 2800
107
+ },
108
+ {
109
+ "epoch": 3.26,
110
+ "learning_rate": 0.0002720593308037254,
111
+ "loss": 0.2208,
112
+ "step": 3200
113
+ },
114
+ {
115
+ "epoch": 3.26,
116
+ "eval_loss": 0.40793511271476746,
117
+ "eval_runtime": 233.2565,
118
+ "eval_samples_per_second": 8.021,
119
+ "eval_wer": 0.3984029484029484,
120
+ "step": 3200
121
+ },
122
+ {
123
+ "epoch": 3.66,
124
+ "learning_rate": 0.0002679199724042773,
125
+ "loss": 0.2168,
126
+ "step": 3600
127
+ },
128
+ {
129
+ "epoch": 3.66,
130
+ "eval_loss": 0.40217384696006775,
131
+ "eval_runtime": 233.5419,
132
+ "eval_samples_per_second": 8.011,
133
+ "eval_wer": 0.40958230958230957,
134
+ "step": 3600
135
+ },
136
+ {
137
+ "epoch": 4.07,
138
+ "learning_rate": 0.0002637806140048292,
139
+ "loss": 0.2059,
140
+ "step": 4000
141
+ },
142
+ {
143
+ "epoch": 4.07,
144
+ "eval_loss": 0.42218777537345886,
145
+ "eval_runtime": 233.047,
146
+ "eval_samples_per_second": 8.028,
147
+ "eval_wer": 0.40128992628992627,
148
+ "step": 4000
149
+ },
150
+ {
151
+ "epoch": 4.48,
152
+ "learning_rate": 0.0002596412556053811,
153
+ "loss": 0.1808,
154
+ "step": 4400
155
+ },
156
+ {
157
+ "epoch": 4.48,
158
+ "eval_loss": 0.43475455045700073,
159
+ "eval_runtime": 235.2511,
160
+ "eval_samples_per_second": 7.953,
161
+ "eval_wer": 0.39213759213759214,
162
+ "step": 4400
163
+ },
164
+ {
165
+ "epoch": 4.88,
166
+ "learning_rate": 0.0002555018972059331,
167
+ "loss": 0.1874,
168
+ "step": 4800
169
+ },
170
+ {
171
+ "epoch": 4.88,
172
+ "eval_loss": 0.40788909792900085,
173
+ "eval_runtime": 237.3453,
174
+ "eval_samples_per_second": 7.883,
175
+ "eval_wer": 0.3885749385749386,
176
+ "step": 4800
177
+ },
178
+ {
179
+ "epoch": 5.29,
180
+ "learning_rate": 0.000251362538806485,
181
+ "loss": 0.1671,
182
+ "step": 5200
183
+ },
184
+ {
185
+ "epoch": 5.29,
186
+ "eval_loss": 0.42425239086151123,
187
+ "eval_runtime": 235.3303,
188
+ "eval_samples_per_second": 7.951,
189
+ "eval_wer": 0.3864864864864865,
190
+ "step": 5200
191
+ },
192
+ {
193
+ "epoch": 5.7,
194
+ "learning_rate": 0.0002472231804070369,
195
+ "loss": 0.168,
196
+ "step": 5600
197
+ },
198
+ {
199
+ "epoch": 5.7,
200
+ "eval_loss": 0.39308613538742065,
201
+ "eval_runtime": 234.7832,
202
+ "eval_samples_per_second": 7.969,
203
+ "eval_wer": 0.38114250614250617,
204
+ "step": 5600
205
+ },
206
+ {
207
+ "epoch": 6.1,
208
+ "learning_rate": 0.0002430838220075888,
209
+ "loss": 0.1603,
210
+ "step": 6000
211
+ },
212
+ {
213
+ "epoch": 6.1,
214
+ "eval_loss": 0.4091956615447998,
215
+ "eval_runtime": 234.4154,
216
+ "eval_samples_per_second": 7.982,
217
+ "eval_wer": 0.38175675675675674,
218
+ "step": 6000
219
  }
220
  ],
221
  "max_steps": 29490,
222
  "num_train_epochs": 30,
223
+ "total_flos": 3.3303502305486766e+19,
224
  "trial_name": null,
225
  "trial_params": null
226
  }