xezpeleta commited on
Commit
5d1981e
1 Parent(s): af47934

Training in progress, step 1000

Browse files
checkpoint-1000/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:07ceaafff6dfa572e5b63e54f0d02c51a7f7062534e6b38aa9e601ddb6888a11
3
  size 6111428695
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e445adc1c3c18c87959585f1c753c47cddbfe8321f2096b28ccdc0105f64d216
3
  size 6111428695
checkpoint-1000/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85fff927f86a1224f3364d93a1923c8b597b5ae4054ce50e4e6367f876338da3
3
  size 3055754841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae163c1a2a53962024eae4041f8b164051aa5f9d0be4db7fd4035ffc54fcc2f7
3
  size 3055754841
checkpoint-1000/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c457058d9706972e5066ee37d0cdebd1bec14ec4a839fe2833426578f2bc6224
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:337db3bde69262529f84673e0c1688a8983313ba44ed7b47639b3c14c033c21e
3
  size 14575
checkpoint-1000/scaler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:15056addf4be2ba630e63bf371888824481831c339ee213b5ce99a63a72cb007
3
  size 557
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a52443f13b1ebc1f37fc5e2f4410a0fd95d757755d28ce4ca3377896a36d719
3
  size 557
checkpoint-1000/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ca970d66f7f07c0e8752869b05b946fd6e8bf2f6a38832ab3db1935c1c221fd
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b864fef9e9930a8de94a02300fc770f05cb6e9d2328db736b4330cf0cb6352fb
3
  size 627
checkpoint-1000/trainer_state.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "best_metric": 28.34865729677184,
3
  "best_model_checkpoint": "./checkpoint-1000",
4
- "epoch": 0.2,
5
  "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.01,
12
  "learning_rate": 4.6000000000000004e-07,
13
  "loss": 1.4182,
14
  "step": 25
@@ -26,238 +26,238 @@
26
  "step": 75
27
  },
28
  {
29
- "epoch": 0.02,
30
  "learning_rate": 1.94e-06,
31
  "loss": 0.7765,
32
  "step": 100
33
  },
34
  {
35
- "epoch": 0.03,
36
  "learning_rate": 2.4400000000000004e-06,
37
  "loss": 0.7103,
38
  "step": 125
39
  },
40
  {
41
- "epoch": 0.03,
42
  "learning_rate": 2.9400000000000002e-06,
43
  "loss": 0.6597,
44
  "step": 150
45
  },
46
  {
47
- "epoch": 0.04,
48
  "learning_rate": 3.44e-06,
49
  "loss": 0.6657,
50
  "step": 175
51
  },
52
  {
53
- "epoch": 0.04,
54
  "learning_rate": 3.94e-06,
55
  "loss": 0.5853,
56
  "step": 200
57
  },
58
  {
59
- "epoch": 0.04,
60
  "learning_rate": 4.440000000000001e-06,
61
  "loss": 0.5273,
62
  "step": 225
63
  },
64
  {
65
- "epoch": 0.05,
66
  "learning_rate": 4.94e-06,
67
  "loss": 0.5979,
68
  "step": 250
69
  },
70
  {
71
- "epoch": 0.06,
72
  "learning_rate": 5.4400000000000004e-06,
73
  "loss": 0.5861,
74
  "step": 275
75
  },
76
  {
77
- "epoch": 0.06,
78
  "learning_rate": 5.94e-06,
79
  "loss": 0.5085,
80
  "step": 300
81
  },
82
  {
83
- "epoch": 0.07,
84
  "learning_rate": 6.440000000000001e-06,
85
  "loss": 0.4827,
86
  "step": 325
87
  },
88
  {
89
- "epoch": 0.07,
90
  "learning_rate": 6.9400000000000005e-06,
91
  "loss": 0.4909,
92
  "step": 350
93
  },
94
  {
95
- "epoch": 0.07,
96
  "learning_rate": 7.440000000000001e-06,
97
  "loss": 0.4651,
98
  "step": 375
99
  },
100
  {
101
- "epoch": 0.08,
102
  "learning_rate": 7.94e-06,
103
  "loss": 0.494,
104
  "step": 400
105
  },
106
  {
107
- "epoch": 0.09,
108
  "learning_rate": 8.44e-06,
109
  "loss": 0.4188,
110
  "step": 425
111
  },
112
  {
113
- "epoch": 0.09,
114
  "learning_rate": 8.94e-06,
115
  "loss": 0.3849,
116
  "step": 450
117
  },
118
  {
119
- "epoch": 0.1,
120
  "learning_rate": 9.440000000000001e-06,
121
  "loss": 0.4577,
122
  "step": 475
123
  },
124
  {
125
- "epoch": 0.1,
126
  "learning_rate": 9.940000000000001e-06,
127
  "loss": 0.4415,
128
  "step": 500
129
  },
130
  {
131
- "epoch": 0.1,
132
- "learning_rate": 9.951111111111111e-06,
133
- "loss": 0.4615,
134
  "step": 525
135
  },
136
  {
137
- "epoch": 0.11,
138
- "learning_rate": 9.895555555555557e-06,
139
- "loss": 0.4282,
140
  "step": 550
141
  },
142
  {
143
- "epoch": 0.12,
144
- "learning_rate": 9.842222222222223e-06,
145
- "loss": 0.4481,
146
  "step": 575
147
  },
148
  {
149
- "epoch": 0.12,
150
- "learning_rate": 9.786666666666667e-06,
151
- "loss": 0.4441,
152
  "step": 600
153
  },
154
  {
155
- "epoch": 0.12,
156
- "learning_rate": 9.731111111111113e-06,
157
- "loss": 0.4238,
158
  "step": 625
159
  },
160
  {
161
- "epoch": 0.13,
162
- "learning_rate": 9.675555555555555e-06,
163
- "loss": 0.4245,
164
  "step": 650
165
  },
166
  {
167
- "epoch": 0.14,
168
- "learning_rate": 9.620000000000001e-06,
169
- "loss": 0.4118,
170
  "step": 675
171
  },
172
  {
173
- "epoch": 0.14,
174
- "learning_rate": 9.564444444444445e-06,
175
- "loss": 0.4111,
176
  "step": 700
177
  },
178
  {
179
- "epoch": 0.14,
180
- "learning_rate": 9.508888888888889e-06,
181
- "loss": 0.3642,
182
  "step": 725
183
  },
184
  {
185
- "epoch": 0.15,
186
- "learning_rate": 9.453333333333335e-06,
187
- "loss": 0.401,
188
  "step": 750
189
  },
190
  {
191
- "epoch": 0.15,
192
- "learning_rate": 9.397777777777779e-06,
193
- "loss": 0.3855,
194
  "step": 775
195
  },
196
  {
197
- "epoch": 0.16,
198
- "learning_rate": 9.342222222222223e-06,
199
- "loss": 0.3668,
200
  "step": 800
201
  },
202
  {
203
- "epoch": 0.17,
204
- "learning_rate": 9.286666666666667e-06,
205
- "loss": 0.3794,
206
  "step": 825
207
  },
208
  {
209
- "epoch": 0.17,
210
- "learning_rate": 9.231111111111111e-06,
211
- "loss": 0.4296,
212
  "step": 850
213
  },
214
  {
215
- "epoch": 0.17,
216
- "learning_rate": 9.175555555555557e-06,
217
- "loss": 0.4003,
218
  "step": 875
219
  },
220
  {
221
- "epoch": 0.18,
222
- "learning_rate": 9.12e-06,
223
- "loss": 0.374,
224
  "step": 900
225
  },
226
  {
227
- "epoch": 0.18,
228
- "learning_rate": 9.064444444444447e-06,
229
- "loss": 0.4051,
230
  "step": 925
231
  },
232
  {
233
- "epoch": 0.19,
234
- "learning_rate": 9.008888888888889e-06,
235
- "loss": 0.3806,
236
  "step": 950
237
  },
238
  {
239
- "epoch": 0.2,
240
- "learning_rate": 8.953333333333335e-06,
241
- "loss": 0.4161,
242
  "step": 975
243
  },
244
  {
245
- "epoch": 0.2,
246
- "learning_rate": 8.897777777777779e-06,
247
- "loss": 0.4198,
248
  "step": 1000
249
  },
250
  {
251
- "epoch": 0.2,
252
- "eval_loss": 0.41016528010368347,
253
- "eval_runtime": 1814.0427,
254
- "eval_samples_per_second": 3.633,
255
- "eval_steps_per_second": 0.454,
256
- "eval_wer": 28.34865729677184,
257
  "step": 1000
258
  }
259
  ],
260
- "max_steps": 5000,
261
  "num_train_epochs": 9223372036854775807,
262
  "total_flos": 4.08241963008e+18,
263
  "trial_name": null,
 
1
  {
2
+ "best_metric": 28.265624367127064,
3
  "best_model_checkpoint": "./checkpoint-1000",
4
+ "epoch": 0.14285714285714285,
5
  "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.0,
12
  "learning_rate": 4.6000000000000004e-07,
13
  "loss": 1.4182,
14
  "step": 25
 
26
  "step": 75
27
  },
28
  {
29
+ "epoch": 0.01,
30
  "learning_rate": 1.94e-06,
31
  "loss": 0.7765,
32
  "step": 100
33
  },
34
  {
35
+ "epoch": 0.02,
36
  "learning_rate": 2.4400000000000004e-06,
37
  "loss": 0.7103,
38
  "step": 125
39
  },
40
  {
41
+ "epoch": 0.02,
42
  "learning_rate": 2.9400000000000002e-06,
43
  "loss": 0.6597,
44
  "step": 150
45
  },
46
  {
47
+ "epoch": 0.03,
48
  "learning_rate": 3.44e-06,
49
  "loss": 0.6657,
50
  "step": 175
51
  },
52
  {
53
+ "epoch": 0.03,
54
  "learning_rate": 3.94e-06,
55
  "loss": 0.5853,
56
  "step": 200
57
  },
58
  {
59
+ "epoch": 0.03,
60
  "learning_rate": 4.440000000000001e-06,
61
  "loss": 0.5273,
62
  "step": 225
63
  },
64
  {
65
+ "epoch": 0.04,
66
  "learning_rate": 4.94e-06,
67
  "loss": 0.5979,
68
  "step": 250
69
  },
70
  {
71
+ "epoch": 0.04,
72
  "learning_rate": 5.4400000000000004e-06,
73
  "loss": 0.5861,
74
  "step": 275
75
  },
76
  {
77
+ "epoch": 0.04,
78
  "learning_rate": 5.94e-06,
79
  "loss": 0.5085,
80
  "step": 300
81
  },
82
  {
83
+ "epoch": 0.05,
84
  "learning_rate": 6.440000000000001e-06,
85
  "loss": 0.4827,
86
  "step": 325
87
  },
88
  {
89
+ "epoch": 0.05,
90
  "learning_rate": 6.9400000000000005e-06,
91
  "loss": 0.4909,
92
  "step": 350
93
  },
94
  {
95
+ "epoch": 0.05,
96
  "learning_rate": 7.440000000000001e-06,
97
  "loss": 0.4651,
98
  "step": 375
99
  },
100
  {
101
+ "epoch": 0.06,
102
  "learning_rate": 7.94e-06,
103
  "loss": 0.494,
104
  "step": 400
105
  },
106
  {
107
+ "epoch": 0.06,
108
  "learning_rate": 8.44e-06,
109
  "loss": 0.4188,
110
  "step": 425
111
  },
112
  {
113
+ "epoch": 0.06,
114
  "learning_rate": 8.94e-06,
115
  "loss": 0.3849,
116
  "step": 450
117
  },
118
  {
119
+ "epoch": 0.07,
120
  "learning_rate": 9.440000000000001e-06,
121
  "loss": 0.4577,
122
  "step": 475
123
  },
124
  {
125
+ "epoch": 0.07,
126
  "learning_rate": 9.940000000000001e-06,
127
  "loss": 0.4415,
128
  "step": 500
129
  },
130
  {
131
+ "epoch": 0.07,
132
+ "learning_rate": 9.966153846153847e-06,
133
+ "loss": 0.4614,
134
  "step": 525
135
  },
136
  {
137
+ "epoch": 0.08,
138
+ "learning_rate": 9.927692307692309e-06,
139
+ "loss": 0.4283,
140
  "step": 550
141
  },
142
  {
143
+ "epoch": 0.08,
144
+ "learning_rate": 9.88923076923077e-06,
145
+ "loss": 0.4486,
146
  "step": 575
147
  },
148
  {
149
+ "epoch": 0.09,
150
+ "learning_rate": 9.850769230769231e-06,
151
+ "loss": 0.4434,
152
  "step": 600
153
  },
154
  {
155
+ "epoch": 0.09,
156
+ "learning_rate": 9.812307692307694e-06,
157
+ "loss": 0.4245,
158
  "step": 625
159
  },
160
  {
161
+ "epoch": 0.09,
162
+ "learning_rate": 9.773846153846154e-06,
163
+ "loss": 0.4254,
164
  "step": 650
165
  },
166
  {
167
+ "epoch": 0.1,
168
+ "learning_rate": 9.735384615384616e-06,
169
+ "loss": 0.4143,
170
  "step": 675
171
  },
172
  {
173
+ "epoch": 0.1,
174
+ "learning_rate": 9.696923076923078e-06,
175
+ "loss": 0.4115,
176
  "step": 700
177
  },
178
  {
179
+ "epoch": 0.1,
180
+ "learning_rate": 9.658461538461539e-06,
181
+ "loss": 0.3656,
182
  "step": 725
183
  },
184
  {
185
+ "epoch": 0.11,
186
+ "learning_rate": 9.620000000000001e-06,
187
+ "loss": 0.4004,
188
  "step": 750
189
  },
190
  {
191
+ "epoch": 0.11,
192
+ "learning_rate": 9.581538461538462e-06,
193
+ "loss": 0.388,
194
  "step": 775
195
  },
196
  {
197
+ "epoch": 0.11,
198
+ "learning_rate": 9.543076923076924e-06,
199
+ "loss": 0.3665,
200
  "step": 800
201
  },
202
  {
203
+ "epoch": 0.12,
204
+ "learning_rate": 9.504615384615386e-06,
205
+ "loss": 0.3777,
206
  "step": 825
207
  },
208
  {
209
+ "epoch": 0.12,
210
+ "learning_rate": 9.466153846153846e-06,
211
+ "loss": 0.4338,
212
  "step": 850
213
  },
214
  {
215
+ "epoch": 0.12,
216
+ "learning_rate": 9.427692307692309e-06,
217
+ "loss": 0.4017,
218
  "step": 875
219
  },
220
  {
221
+ "epoch": 0.13,
222
+ "learning_rate": 9.38923076923077e-06,
223
+ "loss": 0.3757,
224
  "step": 900
225
  },
226
  {
227
+ "epoch": 0.13,
228
+ "learning_rate": 9.350769230769231e-06,
229
+ "loss": 0.4084,
230
  "step": 925
231
  },
232
  {
233
+ "epoch": 0.14,
234
+ "learning_rate": 9.312307692307693e-06,
235
+ "loss": 0.3821,
236
  "step": 950
237
  },
238
  {
239
+ "epoch": 0.14,
240
+ "learning_rate": 9.273846153846154e-06,
241
+ "loss": 0.421,
242
  "step": 975
243
  },
244
  {
245
+ "epoch": 0.14,
246
+ "learning_rate": 9.235384615384616e-06,
247
+ "loss": 0.4203,
248
  "step": 1000
249
  },
250
  {
251
+ "epoch": 0.14,
252
+ "eval_loss": 0.4127572178840637,
253
+ "eval_runtime": 1802.5775,
254
+ "eval_samples_per_second": 3.656,
255
+ "eval_steps_per_second": 0.457,
256
+ "eval_wer": 28.265624367127064,
257
  "step": 1000
258
  }
259
  ],
260
+ "max_steps": 7000,
261
  "num_train_epochs": 9223372036854775807,
262
  "total_flos": 4.08241963008e+18,
263
  "trial_name": null,
checkpoint-1000/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a13c314f78e95fbdb83f90ffb2bc7a882d83957078ec2bb7e6c5baa43309eafa
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1ca27d9d1bef820c87cfd52d5adb15af5250e1ac4555cd1703a088ad0dad1a4
3
  size 3643
nohup.out CHANGED
The diff for this file is too large to render. See raw diff
 
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b7dc0a0327257c9da0cde7a0b6d43f71479af9744f2a9ed0cc123594c0ef9a0
3
  size 3055754841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae163c1a2a53962024eae4041f8b164051aa5f9d0be4db7fd4035ffc54fcc2f7
3
  size 3055754841
run.sh CHANGED
@@ -6,7 +6,7 @@ python run_speech_recognition_seq2seq_streaming.py \
6
  --train_split_name="train+validation" \
7
  --eval_split_name="test" \
8
  --model_index_name="Whisper Small Basque" \
9
- --max_steps="5000" \
10
  --output_dir="./" \
11
  --per_device_train_batch_size="4" \
12
  --per_device_eval_batch_size="8" \
 
6
  --train_split_name="train+validation" \
7
  --eval_split_name="test" \
8
  --model_index_name="Whisper Small Basque" \
9
+ --max_steps="7000" \
10
  --output_dir="./" \
11
  --per_device_train_batch_size="4" \
12
  --per_device_eval_batch_size="8" \
runs/Jul23_08-44-43_tknadmin-System-Product-Name/1690094707.0659819/events.out.tfevents.1690094707.tknadmin-System-Product-Name.1702135.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:002f14b404a900b4f14b38c6a0f3808710076a3da91b9e16fc4438ede5e61cf0
3
+ size 5884
runs/Jul23_08-44-43_tknadmin-System-Product-Name/events.out.tfevents.1690094707.tknadmin-System-Product-Name.1702135.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a7de503a23764aa98cc19218d9e67fa85df970a739e2868bd0d343b0cfe71c3
3
+ size 10864
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a13c314f78e95fbdb83f90ffb2bc7a882d83957078ec2bb7e6c5baa43309eafa
3
  size 3643
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1ca27d9d1bef820c87cfd52d5adb15af5250e1ac4555cd1703a088ad0dad1a4
3
  size 3643