ihanif commited on
Commit
9f0a12a
1 Parent(s): 91c5694

Training in progress, step 300

Browse files
checkpoint-300/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a29af7e42a8cc291c7dc41a87ab9bec819ca5e7407672e72ed2528611450ee9
3
  size 1934161093
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:22e62120437d4a5d43ed19f559839cbc39250df020c1097fa01f356bcbf4bba8
3
  size 1934161093
checkpoint-300/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3659cbd57caabfa6834081314e1044f720d4f82db5a36a341158bdc9fc0cf4f2
3
  size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18928fdec4a6f78c294cc9f60f25b8acac276650309efc21ceb0ecf42f321cb0
3
  size 967102601
checkpoint-300/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0968b0e39b19cec9553e062832c01be226fad3fe583411f84809dfd1a9c08ee1
3
- size 14511
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ab614b521b578676b3402f6dd6f0907952e16f2d208b34af9d84266a360fc37
3
+ size 14639
checkpoint-300/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ef418a6ac4669ffd3c7cffe1d7df12a1bb116658f99f16e613cb28c6cbef5cf
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce7f2249620974c2bcc6e52f5d48799672ec47f32baf09849b58c0ab5addde0f
3
  size 557
checkpoint-300/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6c1d870d593d2993fc02f7df3f7be41e6fce04eb6216560256b783d35551c95
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbb349d7867b639670f1b127be8892c37cb67d6956f365801519932598abf657
3
  size 627
checkpoint-300/trainer_state.json CHANGED
@@ -1,226 +1,226 @@
1
  {
2
- "best_metric": 53.62439467312349,
3
  "best_model_checkpoint": "./checkpoint-300",
4
- "epoch": 3.566371681415929,
5
  "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.12,
12
- "learning_rate": 3e-06,
13
- "loss": 4.1495,
14
  "step": 10
15
  },
16
  {
17
- "epoch": 0.24,
18
- "learning_rate": 6.333333333333333e-06,
19
- "loss": 2.9287,
20
  "step": 20
21
  },
22
  {
23
- "epoch": 0.35,
24
- "learning_rate": 9.666666666666667e-06,
25
- "loss": 2.0462,
26
  "step": 30
27
  },
28
  {
29
- "epoch": 0.47,
30
- "learning_rate": 9.666666666666667e-06,
31
- "loss": 1.6138,
32
  "step": 40
33
  },
34
  {
35
- "epoch": 0.59,
36
- "learning_rate": 9.296296296296296e-06,
37
- "loss": 1.3862,
38
  "step": 50
39
  },
40
  {
41
- "epoch": 0.71,
42
- "learning_rate": 8.925925925925927e-06,
43
- "loss": 1.2604,
44
  "step": 60
45
  },
46
  {
47
- "epoch": 0.83,
48
- "learning_rate": 8.555555555555556e-06,
49
- "loss": 1.1436,
50
  "step": 70
51
  },
52
  {
53
- "epoch": 0.94,
54
- "learning_rate": 8.185185185185187e-06,
55
- "loss": 1.168,
56
  "step": 80
57
  },
58
  {
59
- "epoch": 1.07,
60
- "learning_rate": 7.814814814814816e-06,
61
- "loss": 1.1041,
62
  "step": 90
63
  },
64
  {
65
- "epoch": 1.19,
66
- "learning_rate": 7.444444444444445e-06,
67
- "loss": 0.9683,
68
  "step": 100
69
  },
70
  {
71
- "epoch": 1.19,
72
- "eval_cer": 131.61659035460045,
73
- "eval_loss": 0.8811978697776794,
74
- "eval_runtime": 593.3196,
75
- "eval_samples_per_second": 0.863,
76
- "eval_steps_per_second": 0.431,
77
- "eval_wer": 139.37651331719127,
78
  "step": 100
79
  },
80
  {
81
- "epoch": 1.31,
82
- "learning_rate": 7.074074074074074e-06,
83
- "loss": 0.909,
84
  "step": 110
85
  },
86
  {
87
- "epoch": 1.42,
88
- "learning_rate": 6.703703703703704e-06,
89
- "loss": 0.9213,
90
  "step": 120
91
  },
92
  {
93
- "epoch": 1.54,
94
- "learning_rate": 6.333333333333333e-06,
95
- "loss": 0.9092,
96
  "step": 130
97
  },
98
  {
99
- "epoch": 1.66,
100
- "learning_rate": 5.962962962962963e-06,
101
- "loss": 0.8481,
102
  "step": 140
103
  },
104
  {
105
- "epoch": 1.78,
106
- "learning_rate": 5.5925925925925926e-06,
107
- "loss": 0.8471,
108
  "step": 150
109
  },
110
  {
111
- "epoch": 1.9,
112
- "learning_rate": 5.2222222222222226e-06,
113
- "loss": 0.8504,
114
  "step": 160
115
  },
116
  {
117
- "epoch": 2.02,
118
- "learning_rate": 4.851851851851852e-06,
119
- "loss": 0.8264,
120
  "step": 170
121
  },
122
  {
123
- "epoch": 2.14,
124
- "learning_rate": 4.481481481481482e-06,
125
- "loss": 0.7236,
126
  "step": 180
127
  },
128
  {
129
- "epoch": 2.26,
130
- "learning_rate": 4.111111111111111e-06,
131
- "loss": 0.6898,
132
  "step": 190
133
  },
134
  {
135
- "epoch": 2.38,
136
- "learning_rate": 3.740740740740741e-06,
137
- "loss": 0.6848,
138
  "step": 200
139
  },
140
  {
141
- "epoch": 2.38,
142
- "eval_cer": 151.33685371478225,
143
- "eval_loss": 0.7542899250984192,
144
- "eval_runtime": 551.6472,
145
- "eval_samples_per_second": 0.928,
146
- "eval_steps_per_second": 0.464,
147
- "eval_wer": 145.9972760290557,
148
  "step": 200
149
  },
150
  {
151
- "epoch": 2.5,
152
- "learning_rate": 3.3703703703703705e-06,
153
- "loss": 0.7021,
154
  "step": 210
155
  },
156
  {
157
- "epoch": 2.61,
158
- "learning_rate": 3e-06,
159
- "loss": 0.6956,
160
  "step": 220
161
  },
162
  {
163
- "epoch": 2.73,
164
- "learning_rate": 2.6296296296296297e-06,
165
- "loss": 0.629,
166
  "step": 230
167
  },
168
  {
169
- "epoch": 2.85,
170
- "learning_rate": 2.2592592592592592e-06,
171
- "loss": 0.6661,
172
  "step": 240
173
  },
174
  {
175
- "epoch": 2.97,
176
- "learning_rate": 1.888888888888889e-06,
177
- "loss": 0.6251,
178
  "step": 250
179
  },
180
  {
181
- "epoch": 3.09,
182
- "learning_rate": 1.5185185185185186e-06,
183
- "loss": 0.6852,
184
  "step": 260
185
  },
186
  {
187
- "epoch": 3.21,
188
- "learning_rate": 1.1481481481481482e-06,
189
- "loss": 0.5772,
190
  "step": 270
191
  },
192
  {
193
- "epoch": 3.33,
194
- "learning_rate": 7.777777777777779e-07,
195
- "loss": 0.5592,
196
  "step": 280
197
  },
198
  {
199
- "epoch": 3.45,
200
- "learning_rate": 4.074074074074075e-07,
201
- "loss": 0.5845,
202
  "step": 290
203
  },
204
  {
205
- "epoch": 3.57,
206
- "learning_rate": 3.703703703703704e-08,
207
- "loss": 0.5548,
208
  "step": 300
209
  },
210
  {
211
- "epoch": 3.57,
212
- "eval_cer": 22.68473647271147,
213
- "eval_loss": 0.6978507041931152,
214
- "eval_runtime": 832.954,
215
- "eval_samples_per_second": 0.615,
216
- "eval_steps_per_second": 0.307,
217
- "eval_wer": 53.62439467312349,
218
  "step": 300
219
  }
220
  ],
221
- "max_steps": 300,
222
- "num_train_epochs": 4,
223
- "total_flos": 2.79033223569408e+18,
224
  "trial_name": null,
225
  "trial_params": null
226
  }
 
1
  {
2
+ "best_metric": 50.13619854721549,
3
  "best_model_checkpoint": "./checkpoint-300",
4
+ "epoch": 7.141176470588236,
5
  "global_step": 300,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.24,
12
+ "learning_rate": 2.666666666666667e-06,
13
+ "loss": 4.3134,
14
  "step": 10
15
  },
16
  {
17
+ "epoch": 0.47,
18
+ "learning_rate": 6e-06,
19
+ "loss": 2.9329,
20
  "step": 20
21
  },
22
  {
23
+ "epoch": 0.71,
24
+ "learning_rate": 9.333333333333334e-06,
25
+ "loss": 2.0584,
26
  "step": 30
27
  },
28
  {
29
+ "epoch": 0.94,
30
+ "learning_rate": 9.829787234042554e-06,
31
+ "loss": 1.566,
32
  "step": 40
33
  },
34
  {
35
+ "epoch": 1.19,
36
+ "learning_rate": 9.617021276595745e-06,
37
+ "loss": 1.3777,
38
  "step": 50
39
  },
40
  {
41
+ "epoch": 1.42,
42
+ "learning_rate": 9.404255319148937e-06,
43
+ "loss": 1.1469,
44
  "step": 60
45
  },
46
  {
47
+ "epoch": 1.66,
48
+ "learning_rate": 9.191489361702128e-06,
49
+ "loss": 1.0638,
50
  "step": 70
51
  },
52
  {
53
+ "epoch": 1.89,
54
+ "learning_rate": 8.97872340425532e-06,
55
+ "loss": 0.9974,
56
  "step": 80
57
  },
58
  {
59
+ "epoch": 2.14,
60
+ "learning_rate": 8.765957446808512e-06,
61
+ "loss": 0.9615,
62
  "step": 90
63
  },
64
  {
65
+ "epoch": 2.38,
66
+ "learning_rate": 8.553191489361703e-06,
67
+ "loss": 0.8262,
68
  "step": 100
69
  },
70
  {
71
+ "epoch": 2.38,
72
+ "eval_cer": 149.33157314260887,
73
+ "eval_loss": 0.8188337683677673,
74
+ "eval_runtime": 459.9395,
75
+ "eval_samples_per_second": 1.113,
76
+ "eval_steps_per_second": 0.139,
77
+ "eval_wer": 146.85230024213075,
78
  "step": 100
79
  },
80
  {
81
+ "epoch": 2.61,
82
+ "learning_rate": 8.340425531914894e-06,
83
+ "loss": 0.7986,
84
  "step": 110
85
  },
86
  {
87
+ "epoch": 2.85,
88
+ "learning_rate": 8.127659574468085e-06,
89
+ "loss": 0.7707,
90
  "step": 120
91
  },
92
  {
93
+ "epoch": 3.09,
94
+ "learning_rate": 7.914893617021278e-06,
95
+ "loss": 0.7623,
96
  "step": 130
97
  },
98
  {
99
+ "epoch": 3.33,
100
+ "learning_rate": 7.702127659574469e-06,
101
+ "loss": 0.6451,
102
  "step": 140
103
  },
104
  {
105
+ "epoch": 3.56,
106
+ "learning_rate": 7.48936170212766e-06,
107
+ "loss": 0.6205,
108
  "step": 150
109
  },
110
  {
111
+ "epoch": 3.8,
112
+ "learning_rate": 7.2765957446808524e-06,
113
+ "loss": 0.6097,
114
  "step": 160
115
  },
116
  {
117
+ "epoch": 4.05,
118
+ "learning_rate": 7.0638297872340434e-06,
119
+ "loss": 0.6217,
120
  "step": 170
121
  },
122
  {
123
+ "epoch": 4.28,
124
+ "learning_rate": 6.8510638297872344e-06,
125
+ "loss": 0.4584,
126
  "step": 180
127
  },
128
  {
129
+ "epoch": 4.52,
130
+ "learning_rate": 6.6382978723404254e-06,
131
+ "loss": 0.4929,
132
  "step": 190
133
  },
134
  {
135
+ "epoch": 4.75,
136
+ "learning_rate": 6.425531914893618e-06,
137
+ "loss": 0.4843,
138
  "step": 200
139
  },
140
  {
141
+ "epoch": 4.75,
142
+ "eval_cer": 22.099862972494236,
143
+ "eval_loss": 0.6699215173721313,
144
+ "eval_runtime": 1062.3991,
145
+ "eval_samples_per_second": 0.482,
146
+ "eval_steps_per_second": 0.06,
147
+ "eval_wer": 52.66343825665859,
148
  "step": 200
149
  },
150
  {
151
+ "epoch": 4.99,
152
+ "learning_rate": 6.212765957446809e-06,
153
+ "loss": 0.4678,
154
  "step": 210
155
  },
156
  {
157
+ "epoch": 5.24,
158
+ "learning_rate": 6e-06,
159
+ "loss": 0.4014,
160
  "step": 220
161
  },
162
  {
163
+ "epoch": 5.47,
164
+ "learning_rate": 5.787234042553191e-06,
165
+ "loss": 0.3926,
166
  "step": 230
167
  },
168
  {
169
+ "epoch": 5.71,
170
+ "learning_rate": 5.574468085106384e-06,
171
+ "loss": 0.3806,
172
  "step": 240
173
  },
174
  {
175
+ "epoch": 5.94,
176
+ "learning_rate": 5.361702127659575e-06,
177
+ "loss": 0.3855,
178
  "step": 250
179
  },
180
  {
181
+ "epoch": 6.19,
182
+ "learning_rate": 5.148936170212766e-06,
183
+ "loss": 0.3478,
184
  "step": 260
185
  },
186
  {
187
+ "epoch": 6.42,
188
+ "learning_rate": 4.936170212765958e-06,
189
+ "loss": 0.3078,
190
  "step": 270
191
  },
192
  {
193
+ "epoch": 6.66,
194
+ "learning_rate": 4.7234042553191496e-06,
195
+ "loss": 0.3147,
196
  "step": 280
197
  },
198
  {
199
+ "epoch": 6.89,
200
+ "learning_rate": 4.5106382978723406e-06,
201
+ "loss": 0.3213,
202
  "step": 290
203
  },
204
  {
205
+ "epoch": 7.14,
206
+ "learning_rate": 4.297872340425532e-06,
207
+ "loss": 0.287,
208
  "step": 300
209
  },
210
  {
211
+ "epoch": 7.14,
212
+ "eval_cer": 20.677784833394604,
213
+ "eval_loss": 0.6913720369338989,
214
+ "eval_runtime": 1003.603,
215
+ "eval_samples_per_second": 0.51,
216
+ "eval_steps_per_second": 0.064,
217
+ "eval_wer": 50.13619854721549,
218
  "step": 300
219
  }
220
  ],
221
+ "max_steps": 500,
222
+ "num_train_epochs": 12,
223
+ "total_flos": 5.58730193559552e+18,
224
  "trial_name": null,
225
  "trial_params": null
226
  }
checkpoint-300/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcf3a24325bb49c25270193816ed6035a253ca3ae300c31ffcec0afeb4229266
3
- size 3579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5518cd575512706c66c89476ae9c64328c1bcac98ec06151bdc9afebf6c503fb
3
+ size 3643
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:187c1d7674c7d7e27b2030f3c8b472ab80d8863925d52fe41abcfd44c6675a3a
3
  size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18928fdec4a6f78c294cc9f60f25b8acac276650309efc21ceb0ecf42f321cb0
3
  size 967102601
runs/Dec20_20-28-50_129-146-32-172/events.out.tfevents.1671568135.129-146-32-172.151517.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bb9fad7bb496cbe780fc5b737f82d64309e3290a5409c43485e045f47471fed
3
- size 8114
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9f730aa019c3fdfb3bb29b6c3bdc8df372194567bace669c0490525c9864f8b
3
+ size 10049
whisper_small_ps_augmented.py CHANGED
@@ -287,6 +287,7 @@ trainer = Seq2SeqTrainer(
287
  tokenizer=processor.feature_extractor
288
 
289
  )
 
290
 
291
  """We'll save the processor object once before starting training. Since the processor is not trainable, it won't change over the course of training:"""
292
 
 
287
  tokenizer=processor.feature_extractor
288
 
289
  )
290
+ trainer.train(resume_from_checkpoint = True)
291
 
292
  """We'll save the processor object once before starting training. Since the processor is not trainable, it won't change over the course of training:"""
293