Zaid commited on
Commit
a446a18
1 Parent(s): 0e08250

upload better model

Browse files
Files changed (4) hide show
  1. config.json +1 -1
  2. pytorch_model.bin +1 -1
  3. trainer_state.json +40 -292
  4. training_args.bin +2 -2
config.json CHANGED
@@ -3,7 +3,7 @@
3
  "activation_dropout": 0.0,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
- "CustomClassificationModel"
7
  ],
8
  "attention_dropout": 0.01,
9
  "bos_token_id": 1,
3
  "activation_dropout": 0.0,
4
  "apply_spec_augment": true,
5
  "architectures": [
6
+ "Wav2Vec2ClassificationModel"
7
  ],
8
  "attention_dropout": 0.01,
9
  "bos_token_id": 1,
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f64da00b3a836544cfce6c19393da385760943de3f28c8ce89db05aa7953464a
3
  size 1265016809
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f1a423dcfecf848e5700f9b66b836612e15de92a9b5cf653d5277d12b382648
3
  size 1265016809
trainer_state.json CHANGED
@@ -1,324 +1,72 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.412280701754386,
5
- "global_step": 2200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.11,
12
- "learning_rate": 2.973626373626374e-05,
13
- "loss": 1.622,
14
- "step": 100
15
- },
16
- {
17
- "epoch": 0.11,
18
- "eval_accuracy": 0.24010217113665389,
19
- "eval_loss": 1.6581733226776123,
20
- "eval_runtime": 381.1384,
21
- "eval_samples_per_second": 4.109,
22
- "step": 100
23
- },
24
- {
25
- "epoch": 0.22,
26
- "learning_rate": 2.9406593406593407e-05,
27
- "loss": 1.3721,
28
- "step": 200
29
- },
30
- {
31
- "epoch": 0.22,
32
- "eval_accuracy": 0.2835249042145594,
33
- "eval_loss": 1.9170576333999634,
34
- "eval_runtime": 378.5115,
35
- "eval_samples_per_second": 4.137,
36
- "step": 200
37
- },
38
- {
39
- "epoch": 0.33,
40
- "learning_rate": 2.907692307692308e-05,
41
- "loss": 1.1269,
42
- "step": 300
43
- },
44
- {
45
- "epoch": 0.33,
46
- "eval_accuracy": 0.36845466155810985,
47
- "eval_loss": 1.7032861709594727,
48
- "eval_runtime": 385.3173,
49
- "eval_samples_per_second": 4.064,
50
- "step": 300
51
- },
52
- {
53
- "epoch": 0.44,
54
- "learning_rate": 2.8747252747252748e-05,
55
- "loss": 0.9152,
56
- "step": 400
57
- },
58
- {
59
- "epoch": 0.44,
60
- "eval_accuracy": 0.388250319284802,
61
- "eval_loss": 1.7423878908157349,
62
- "eval_runtime": 388.0594,
63
- "eval_samples_per_second": 4.035,
64
- "step": 400
65
- },
66
- {
67
- "epoch": 0.55,
68
- "learning_rate": 2.8417582417582416e-05,
69
- "loss": 0.732,
70
- "step": 500
71
- },
72
- {
73
- "epoch": 0.55,
74
- "eval_accuracy": 0.4616858237547893,
75
- "eval_loss": 1.5253428220748901,
76
- "eval_runtime": 386.9842,
77
- "eval_samples_per_second": 4.047,
78
- "step": 500
79
- },
80
- {
81
- "epoch": 0.66,
82
- "learning_rate": 2.8087912087912088e-05,
83
- "loss": 0.684,
84
- "step": 600
85
- },
86
- {
87
- "epoch": 0.66,
88
- "eval_accuracy": 0.4367816091954023,
89
- "eval_loss": 1.6441736221313477,
90
- "eval_runtime": 388.1257,
91
- "eval_samples_per_second": 4.035,
92
- "step": 600
93
- },
94
- {
95
- "epoch": 0.77,
96
- "learning_rate": 2.775824175824176e-05,
97
- "loss": 0.5538,
98
- "step": 700
99
- },
100
- {
101
- "epoch": 0.77,
102
- "eval_accuracy": 0.5108556832694764,
103
- "eval_loss": 1.450364351272583,
104
- "eval_runtime": 389.4611,
105
- "eval_samples_per_second": 4.021,
106
- "step": 700
107
- },
108
- {
109
- "epoch": 0.88,
110
- "learning_rate": 2.7428571428571428e-05,
111
- "loss": 0.5425,
112
- "step": 800
113
- },
114
- {
115
- "epoch": 0.88,
116
- "eval_accuracy": 0.49936143039591313,
117
- "eval_loss": 1.4108269214630127,
118
- "eval_runtime": 389.7612,
119
- "eval_samples_per_second": 4.018,
120
- "step": 800
121
- },
122
- {
123
- "epoch": 0.99,
124
- "learning_rate": 2.70989010989011e-05,
125
- "loss": 0.514,
126
- "step": 900
127
- },
128
- {
129
- "epoch": 0.99,
130
- "eval_accuracy": 0.5057471264367817,
131
- "eval_loss": 1.4206087589263916,
132
- "eval_runtime": 396.4756,
133
- "eval_samples_per_second": 3.95,
134
- "step": 900
135
- },
136
- {
137
- "epoch": 1.1,
138
- "learning_rate": 2.6769230769230772e-05,
139
- "loss": 0.3068,
140
  "step": 1000
141
  },
142
  {
143
- "epoch": 1.1,
144
- "eval_accuracy": 0.5159642401021711,
145
- "eval_loss": 1.6294729709625244,
146
- "eval_runtime": 394.6782,
147
- "eval_samples_per_second": 3.968,
148
  "step": 1000
149
  },
150
  {
151
- "epoch": 1.21,
152
- "learning_rate": 2.643956043956044e-05,
153
- "loss": 0.2865,
154
- "step": 1100
155
- },
156
- {
157
- "epoch": 1.21,
158
- "eval_accuracy": 0.5076628352490421,
159
- "eval_loss": 1.733625888824463,
160
- "eval_runtime": 393.6145,
161
- "eval_samples_per_second": 3.979,
162
- "step": 1100
163
- },
164
- {
165
- "epoch": 1.32,
166
- "learning_rate": 2.6109890109890112e-05,
167
- "loss": 0.301,
168
- "step": 1200
169
- },
170
- {
171
- "epoch": 1.32,
172
- "eval_accuracy": 0.6277139208173691,
173
- "eval_loss": 1.2089332342147827,
174
- "eval_runtime": 389.8374,
175
- "eval_samples_per_second": 4.017,
176
- "step": 1200
177
- },
178
- {
179
- "epoch": 1.43,
180
- "learning_rate": 2.578021978021978e-05,
181
- "loss": 0.2517,
182
- "step": 1300
183
- },
184
- {
185
- "epoch": 1.43,
186
- "eval_accuracy": 0.5983397190293742,
187
- "eval_loss": 1.3427765369415283,
188
- "eval_runtime": 388.9975,
189
- "eval_samples_per_second": 4.026,
190
- "step": 1300
191
- },
192
- {
193
- "epoch": 1.54,
194
- "learning_rate": 2.545054945054945e-05,
195
- "loss": 0.2819,
196
- "step": 1400
197
- },
198
- {
199
- "epoch": 1.54,
200
- "eval_accuracy": 0.5600255427841635,
201
- "eval_loss": 1.5083413124084473,
202
- "eval_runtime": 390.3746,
203
- "eval_samples_per_second": 4.012,
204
- "step": 1400
205
- },
206
- {
207
- "epoch": 1.64,
208
- "learning_rate": 2.512087912087912e-05,
209
- "loss": 0.2706,
210
- "step": 1500
211
- },
212
- {
213
- "epoch": 1.64,
214
- "eval_accuracy": 0.6047254150702427,
215
- "eval_loss": 1.5413047075271606,
216
- "eval_runtime": 389.1952,
217
- "eval_samples_per_second": 4.024,
218
- "step": 1500
219
- },
220
- {
221
- "epoch": 1.75,
222
- "learning_rate": 2.479120879120879e-05,
223
- "loss": 0.1991,
224
- "step": 1600
225
- },
226
- {
227
- "epoch": 1.75,
228
- "eval_accuracy": 0.598978288633461,
229
- "eval_loss": 1.5572402477264404,
230
- "eval_runtime": 389.1243,
231
- "eval_samples_per_second": 4.024,
232
- "step": 1600
233
- },
234
- {
235
- "epoch": 1.86,
236
- "learning_rate": 2.446153846153846e-05,
237
- "loss": 0.2726,
238
- "step": 1700
239
- },
240
- {
241
- "epoch": 1.86,
242
- "eval_accuracy": 0.6021711366538953,
243
- "eval_loss": 1.4013671875,
244
- "eval_runtime": 392.7301,
245
- "eval_samples_per_second": 3.987,
246
- "step": 1700
247
- },
248
- {
249
- "epoch": 1.97,
250
- "learning_rate": 2.4131868131868133e-05,
251
- "loss": 0.2015,
252
- "step": 1800
253
- },
254
- {
255
- "epoch": 1.97,
256
- "eval_accuracy": 0.5676883780332056,
257
- "eval_loss": 2.248255729675293,
258
- "eval_runtime": 389.7999,
259
- "eval_samples_per_second": 4.017,
260
- "step": 1800
261
- },
262
- {
263
- "epoch": 2.08,
264
- "learning_rate": 2.38021978021978e-05,
265
- "loss": 0.173,
266
- "step": 1900
267
- },
268
- {
269
- "epoch": 2.08,
270
- "eval_accuracy": 0.644316730523627,
271
- "eval_loss": 1.5393086671829224,
272
- "eval_runtime": 390.3149,
273
- "eval_samples_per_second": 4.012,
274
- "step": 1900
275
- },
276
- {
277
- "epoch": 2.19,
278
- "learning_rate": 2.3472527472527474e-05,
279
- "loss": 0.1177,
280
  "step": 2000
281
  },
282
  {
283
- "epoch": 2.19,
284
- "eval_accuracy": 0.6028097062579821,
285
- "eval_loss": 1.9018601179122925,
286
- "eval_runtime": 390.5708,
287
- "eval_samples_per_second": 4.01,
288
  "step": 2000
289
  },
290
  {
291
- "epoch": 2.3,
292
- "learning_rate": 2.3142857142857145e-05,
293
- "loss": 0.0907,
294
- "step": 2100
295
  },
296
  {
297
- "epoch": 2.3,
298
- "eval_accuracy": 0.6194125159642401,
299
- "eval_loss": 1.8655920028686523,
300
- "eval_runtime": 392.1244,
301
- "eval_samples_per_second": 3.994,
302
- "step": 2100
303
  },
304
  {
305
- "epoch": 2.41,
306
- "learning_rate": 2.2813186813186814e-05,
307
- "loss": 0.1878,
308
- "step": 2200
309
  },
310
  {
311
- "epoch": 2.41,
312
- "eval_accuracy": 0.6296296296296297,
313
- "eval_loss": 1.998379111289978,
314
- "eval_runtime": 390.0846,
315
- "eval_samples_per_second": 4.015,
316
- "step": 2200
317
  }
318
  ],
319
- "max_steps": 9120,
320
  "num_train_epochs": 10,
321
- "total_flos": 2.137068831744e+19,
322
  "trial_name": null,
323
  "trial_params": null
324
  }
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 5.089058524173028,
5
+ "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.27,
12
+ "learning_rate": 2.625e-05,
13
+ "loss": 1.5969,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  "step": 1000
15
  },
16
  {
17
+ "epoch": 1.27,
18
+ "eval_accuracy": 0.2975871313672922,
19
+ "eval_loss": 1.9108707904815674,
20
+ "eval_runtime": 414.1526,
21
+ "eval_samples_per_second": 3.603,
22
  "step": 1000
23
  },
24
  {
25
+ "epoch": 2.54,
26
+ "learning_rate": 2.2423469387755103e-05,
27
+ "loss": 0.6315,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  "step": 2000
29
  },
30
  {
31
+ "epoch": 2.54,
32
+ "eval_accuracy": 0.7607238605898123,
33
+ "eval_loss": 0.705723762512207,
34
+ "eval_runtime": 417.2946,
35
+ "eval_samples_per_second": 3.575,
36
  "step": 2000
37
  },
38
  {
39
+ "epoch": 3.82,
40
+ "learning_rate": 1.8596938775510206e-05,
41
+ "loss": 0.2271,
42
+ "step": 3000
43
  },
44
  {
45
+ "epoch": 3.82,
46
+ "eval_accuracy": 0.7774798927613941,
47
+ "eval_loss": 0.7536256909370422,
48
+ "eval_runtime": 427.7625,
49
+ "eval_samples_per_second": 3.488,
50
+ "step": 3000
51
  },
52
  {
53
+ "epoch": 5.09,
54
+ "learning_rate": 1.4770408163265308e-05,
55
+ "loss": 0.089,
56
+ "step": 4000
57
  },
58
  {
59
+ "epoch": 5.09,
60
+ "eval_accuracy": 0.8378016085790885,
61
+ "eval_loss": 0.7497373819351196,
62
+ "eval_runtime": 419.2295,
63
+ "eval_samples_per_second": 3.559,
64
+ "step": 4000
65
  }
66
  ],
67
+ "max_steps": 7860,
68
  "num_train_epochs": 10,
69
+ "total_flos": 3.882544084944e+19,
70
  "trial_name": null,
71
  "trial_params": null
72
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75111b63f66ceef5558bccfa756b147f9bb7703645a7b27ee22101f1a5addd97
3
- size 2287
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1b797ae8b891be0b59321ccb7440a09f91031db885acf8932b1425f364f100
3
+ size 2351