danielizham commited on
Commit
b0da69b
1 Parent(s): f54a820

Training in progress, step 1000

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  wandb/run-20230506_113337-ysywp688/run-ysywp688.wandb filter=lfs diff=lfs merge=lfs -text
36
  wandb/run-20230507_103405-9zf5xxpu/run-9zf5xxpu.wandb filter=lfs diff=lfs merge=lfs -text
 
 
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
  wandb/run-20230506_113337-ysywp688/run-ysywp688.wandb filter=lfs diff=lfs merge=lfs -text
36
  wandb/run-20230507_103405-9zf5xxpu/run-9zf5xxpu.wandb filter=lfs diff=lfs merge=lfs -text
37
+ wandb/run-20230522_132945-qz4vsw4s/run-qz4vsw4s.wandb filter=lfs diff=lfs merge=lfs -text
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2e2f9ffcf3e4f0917bfddf6abbdf0e45eacb86241c0acc0051e9b851df9dc04c
3
  size 967102601
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2adfb14f0919f6c0396d0ccccf79cdf94565b1b6f3aded7891339d7b5defde9
3
  size 967102601
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f41ee1723077d9367104b059cfecdb53a548d49fcfc2a3cd253a6e7408db5fbf
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fd312eb781c0cfa722f1157f6072c4883251e20fea77e50bc4737586659df6f
3
  size 4027
wandb/debug-internal.log CHANGED
@@ -1 +1 @@
1
- run-20230507_103405-9zf5xxpu/logs/debug-internal.log
 
1
+ run-20230522_132945-qz4vsw4s/logs/debug-internal.log
wandb/debug.log CHANGED
@@ -1 +1 @@
1
- run-20230507_103405-9zf5xxpu/logs/debug.log
 
1
+ run-20230522_132945-qz4vsw4s/logs/debug.log
wandb/latest-run CHANGED
@@ -1 +1 @@
1
- run-20230507_103405-9zf5xxpu
 
1
+ run-20230522_132945-qz4vsw4s
wandb/run-20230522_132945-qz4vsw4s/files/conda-environment.yaml ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: whisper
2
+ channels:
3
+ - conda-forge
4
+ - defaults
5
+ dependencies:
6
+ - _libgcc_mutex=0.1=conda_forge
7
+ - _openmp_mutex=4.5=2_gnu
8
+ - bzip2=1.0.8=h7f98852_4
9
+ - ca-certificates=2022.12.7=ha878542_0
10
+ - git-lfs=3.3.0=ha770c72_0
11
+ - ld_impl_linux-64=2.40=h41732ed_0
12
+ - libffi=3.4.2=h7f98852_5
13
+ - libgcc-ng=12.2.0=h65d4601_19
14
+ - libgomp=12.2.0=h65d4601_19
15
+ - libnsl=2.0.0=h7f98852_0
16
+ - libsqlite=3.40.0=h753d276_1
17
+ - libuuid=2.38.1=h0b41bf4_0
18
+ - libzlib=1.2.13=h166bdaf_4
19
+ - ncurses=6.3=h27087fc_1
20
+ - openssl=3.1.0=hd590300_3
21
+ - pip=23.1.2=pyhd8ed1ab_0
22
+ - python=3.9.16=h2782a2a_0_cpython
23
+ - readline=8.2=h8228510_1
24
+ - setuptools=67.7.2=pyhd8ed1ab_0
25
+ - tk=8.6.12=h27826a3_0
26
+ - wheel=0.40.0=pyhd8ed1ab_0
27
+ - xz=5.2.6=h166bdaf_0
28
+ - pip:
29
+ - absl-py==1.4.0
30
+ - accelerate==0.18.0
31
+ - aiohttp==3.8.4
32
+ - aiosignal==1.3.1
33
+ - appdirs==1.4.4
34
+ - async-timeout==4.0.2
35
+ - attrs==23.1.0
36
+ - audioread==3.0.0
37
+ - cachetools==5.3.0
38
+ - certifi==2022.12.7
39
+ - cffi==1.15.1
40
+ - charset-normalizer==3.1.0
41
+ - click==8.1.3
42
+ - cmake==3.26.3
43
+ - datasets==2.12.1.dev0
44
+ - decorator==5.1.1
45
+ - dill==0.3.6
46
+ - docker-pycreds==0.4.0
47
+ - evaluate==0.4.0
48
+ - filelock==3.12.0
49
+ - frozenlist==1.3.3
50
+ - fsspec==2023.4.0
51
+ - gitdb==4.0.10
52
+ - gitpython==3.1.31
53
+ - google-auth==2.17.3
54
+ - google-auth-oauthlib==1.0.0
55
+ - grpcio==1.54.0
56
+ - huggingface-hub==0.14.1
57
+ - idna==3.4
58
+ - importlib-metadata==6.6.0
59
+ - jinja2==3.1.2
60
+ - jiwer==3.0.1
61
+ - joblib==1.2.0
62
+ - lazy-loader==0.2
63
+ - librosa==0.10.0.post2
64
+ - lit==16.0.2
65
+ - llvmlite==0.40.0
66
+ - markdown==3.4.3
67
+ - markupsafe==2.1.2
68
+ - more-itertools==9.1.0
69
+ - mpmath==1.3.0
70
+ - msgpack==1.0.5
71
+ - multidict==6.0.4
72
+ - multiprocess==0.70.14
73
+ - networkx==3.1
74
+ - numba==0.57.0
75
+ - numpy==1.24.3
76
+ - nvidia-cublas-cu11==11.10.3.66
77
+ - nvidia-cuda-cupti-cu11==11.7.101
78
+ - nvidia-cuda-nvrtc-cu11==11.7.99
79
+ - nvidia-cuda-runtime-cu11==11.7.99
80
+ - nvidia-cudnn-cu11==8.5.0.96
81
+ - nvidia-cufft-cu11==10.9.0.58
82
+ - nvidia-curand-cu11==10.2.10.91
83
+ - nvidia-cusolver-cu11==11.4.0.1
84
+ - nvidia-cusparse-cu11==11.7.4.91
85
+ - nvidia-nccl-cu11==2.14.3
86
+ - nvidia-nvtx-cu11==11.7.91
87
+ - oauthlib==3.2.2
88
+ - packaging==23.1
89
+ - pandas==2.0.1
90
+ - pathtools==0.1.2
91
+ - pooch==1.6.0
92
+ - protobuf==4.22.3
93
+ - psutil==5.9.5
94
+ - pyarrow==12.0.0
95
+ - pyasn1==0.5.0
96
+ - pyasn1-modules==0.3.0
97
+ - pycparser==2.21
98
+ - python-dateutil==2.8.2
99
+ - pytz==2023.3
100
+ - pyyaml==6.0
101
+ - rapidfuzz==2.13.7
102
+ - regex==2023.5.4
103
+ - requests==2.29.0
104
+ - requests-oauthlib==1.3.1
105
+ - responses==0.18.0
106
+ - rsa==4.9
107
+ - scikit-learn==1.2.2
108
+ - scipy==1.10.1
109
+ - sentry-sdk==1.22.1
110
+ - setproctitle==1.3.2
111
+ - six==1.16.0
112
+ - smmap==5.0.0
113
+ - soundfile==0.12.1
114
+ - soxr==0.3.5
115
+ - sympy==1.11.1
116
+ - tensorboard==2.12.3
117
+ - tensorboard-data-server==0.7.0
118
+ - threadpoolctl==3.1.0
119
+ - tokenizers==0.13.3
120
+ - torch==2.0.0
121
+ - torchaudio==2.0.1
122
+ - tqdm==4.65.0
123
+ - transformers==4.29.0.dev0
124
+ - triton==2.0.0
125
+ - typing-extensions==4.5.0
126
+ - tzdata==2023.3
127
+ - urllib3==1.26.15
128
+ - wandb==0.15.2
129
+ - werkzeug==2.3.3
130
+ - xxhash==3.2.0
131
+ - yarl==1.9.2
132
+ - zipp==3.15.0
133
+ prefix: /home/local/QCRI/dizham/miniconda3/envs/whisper
wandb/run-20230522_132945-qz4vsw4s/files/config.yaml ADDED
@@ -0,0 +1,688 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.9.16
7
+ cli_version: 0.15.2
8
+ framework: huggingface
9
+ huggingface_version: 4.29.0.dev0
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: false
12
+ start_time: 1684751385.694775
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 5
17
+ - 11
18
+ - 49
19
+ - 51
20
+ - 53
21
+ - 55
22
+ - 71
23
+ 2:
24
+ - 1
25
+ - 5
26
+ - 11
27
+ - 49
28
+ - 51
29
+ - 53
30
+ - 55
31
+ - 71
32
+ 3:
33
+ - 7
34
+ - 23
35
+ 4: 3.9.16
36
+ 5: 0.15.2
37
+ 6: 4.29.0.dev0
38
+ 8:
39
+ - 5
40
+ m:
41
+ - 1: train/global_step
42
+ 6:
43
+ - 3
44
+ - 1: train/loss
45
+ 5: 1
46
+ 6:
47
+ - 1
48
+ - 1: train/learning_rate
49
+ 5: 1
50
+ 6:
51
+ - 1
52
+ - 1: train/epoch
53
+ 5: 1
54
+ 6:
55
+ - 1
56
+ - 1: eval/loss
57
+ 5: 1
58
+ 6:
59
+ - 1
60
+ - 1: eval/wer
61
+ 5: 1
62
+ 6:
63
+ - 1
64
+ - 1: eval/runtime
65
+ 5: 1
66
+ 6:
67
+ - 1
68
+ - 1: eval/samples_per_second
69
+ 5: 1
70
+ 6:
71
+ - 1
72
+ - 1: eval/steps_per_second
73
+ 5: 1
74
+ 6:
75
+ - 1
76
+ vocab_size:
77
+ desc: null
78
+ value: 51865
79
+ num_mel_bins:
80
+ desc: null
81
+ value: 80
82
+ d_model:
83
+ desc: null
84
+ value: 768
85
+ encoder_layers:
86
+ desc: null
87
+ value: 12
88
+ encoder_attention_heads:
89
+ desc: null
90
+ value: 12
91
+ decoder_layers:
92
+ desc: null
93
+ value: 12
94
+ decoder_attention_heads:
95
+ desc: null
96
+ value: 12
97
+ decoder_ffn_dim:
98
+ desc: null
99
+ value: 3072
100
+ encoder_ffn_dim:
101
+ desc: null
102
+ value: 3072
103
+ dropout:
104
+ desc: null
105
+ value: 0.0
106
+ attention_dropout:
107
+ desc: null
108
+ value: 0.0
109
+ activation_dropout:
110
+ desc: null
111
+ value: 0.0
112
+ activation_function:
113
+ desc: null
114
+ value: gelu
115
+ init_std:
116
+ desc: null
117
+ value: 0.02
118
+ encoder_layerdrop:
119
+ desc: null
120
+ value: 0.0
121
+ decoder_layerdrop:
122
+ desc: null
123
+ value: 0.0
124
+ use_cache:
125
+ desc: null
126
+ value: false
127
+ num_hidden_layers:
128
+ desc: null
129
+ value: 12
130
+ scale_embedding:
131
+ desc: null
132
+ value: false
133
+ max_source_positions:
134
+ desc: null
135
+ value: 1500
136
+ max_target_positions:
137
+ desc: null
138
+ value: 448
139
+ classifier_proj_size:
140
+ desc: null
141
+ value: 256
142
+ use_weighted_layer_sum:
143
+ desc: null
144
+ value: false
145
+ apply_spec_augment:
146
+ desc: null
147
+ value: false
148
+ mask_time_prob:
149
+ desc: null
150
+ value: 0.05
151
+ mask_time_length:
152
+ desc: null
153
+ value: 10
154
+ mask_time_min_masks:
155
+ desc: null
156
+ value: 2
157
+ mask_feature_prob:
158
+ desc: null
159
+ value: 0.0
160
+ mask_feature_length:
161
+ desc: null
162
+ value: 10
163
+ mask_feature_min_masks:
164
+ desc: null
165
+ value: 0
166
+ return_dict:
167
+ desc: null
168
+ value: true
169
+ output_hidden_states:
170
+ desc: null
171
+ value: false
172
+ output_attentions:
173
+ desc: null
174
+ value: false
175
+ torchscript:
176
+ desc: null
177
+ value: false
178
+ torch_dtype:
179
+ desc: null
180
+ value: float32
181
+ use_bfloat16:
182
+ desc: null
183
+ value: false
184
+ tf_legacy_loss:
185
+ desc: null
186
+ value: false
187
+ pruned_heads:
188
+ desc: null
189
+ value: {}
190
+ tie_word_embeddings:
191
+ desc: null
192
+ value: true
193
+ is_encoder_decoder:
194
+ desc: null
195
+ value: true
196
+ is_decoder:
197
+ desc: null
198
+ value: false
199
+ cross_attention_hidden_size:
200
+ desc: null
201
+ value: null
202
+ add_cross_attention:
203
+ desc: null
204
+ value: false
205
+ tie_encoder_decoder:
206
+ desc: null
207
+ value: false
208
+ max_length:
209
+ desc: null
210
+ value: 448
211
+ min_length:
212
+ desc: null
213
+ value: 0
214
+ do_sample:
215
+ desc: null
216
+ value: false
217
+ early_stopping:
218
+ desc: null
219
+ value: false
220
+ num_beams:
221
+ desc: null
222
+ value: 1
223
+ num_beam_groups:
224
+ desc: null
225
+ value: 1
226
+ diversity_penalty:
227
+ desc: null
228
+ value: 0.0
229
+ temperature:
230
+ desc: null
231
+ value: 1.0
232
+ top_k:
233
+ desc: null
234
+ value: 50
235
+ top_p:
236
+ desc: null
237
+ value: 1.0
238
+ typical_p:
239
+ desc: null
240
+ value: 1.0
241
+ repetition_penalty:
242
+ desc: null
243
+ value: 1.0
244
+ length_penalty:
245
+ desc: null
246
+ value: 1.0
247
+ no_repeat_ngram_size:
248
+ desc: null
249
+ value: 0
250
+ encoder_no_repeat_ngram_size:
251
+ desc: null
252
+ value: 0
253
+ bad_words_ids:
254
+ desc: null
255
+ value: null
256
+ num_return_sequences:
257
+ desc: null
258
+ value: 1
259
+ chunk_size_feed_forward:
260
+ desc: null
261
+ value: 0
262
+ output_scores:
263
+ desc: null
264
+ value: false
265
+ return_dict_in_generate:
266
+ desc: null
267
+ value: false
268
+ forced_bos_token_id:
269
+ desc: null
270
+ value: null
271
+ forced_eos_token_id:
272
+ desc: null
273
+ value: null
274
+ remove_invalid_values:
275
+ desc: null
276
+ value: false
277
+ exponential_decay_length_penalty:
278
+ desc: null
279
+ value: null
280
+ suppress_tokens:
281
+ desc: null
282
+ value: null
283
+ begin_suppress_tokens:
284
+ desc: null
285
+ value:
286
+ - 220
287
+ - 50257
288
+ architectures:
289
+ desc: null
290
+ value:
291
+ - WhisperForConditionalGeneration
292
+ finetuning_task:
293
+ desc: null
294
+ value: null
295
+ id2label:
296
+ desc: null
297
+ value:
298
+ '0': LABEL_0
299
+ '1': LABEL_1
300
+ label2id:
301
+ desc: null
302
+ value:
303
+ LABEL_0: 0
304
+ LABEL_1: 1
305
+ tokenizer_class:
306
+ desc: null
307
+ value: null
308
+ prefix:
309
+ desc: null
310
+ value: null
311
+ bos_token_id:
312
+ desc: null
313
+ value: 50257
314
+ pad_token_id:
315
+ desc: null
316
+ value: 50257
317
+ eos_token_id:
318
+ desc: null
319
+ value: 50257
320
+ sep_token_id:
321
+ desc: null
322
+ value: null
323
+ decoder_start_token_id:
324
+ desc: null
325
+ value: 50258
326
+ task_specific_params:
327
+ desc: null
328
+ value: null
329
+ problem_type:
330
+ desc: null
331
+ value: null
332
+ _name_or_path:
333
+ desc: null
334
+ value: openai/whisper-small
335
+ transformers_version:
336
+ desc: null
337
+ value: 4.29.0.dev0
338
+ forced_decoder_ids:
339
+ desc: null
340
+ value: null
341
+ model_type:
342
+ desc: null
343
+ value: whisper
344
+ output_dir:
345
+ desc: null
346
+ value: ./
347
+ overwrite_output_dir:
348
+ desc: null
349
+ value: true
350
+ do_train:
351
+ desc: null
352
+ value: true
353
+ do_eval:
354
+ desc: null
355
+ value: true
356
+ do_predict:
357
+ desc: null
358
+ value: false
359
+ evaluation_strategy:
360
+ desc: null
361
+ value: steps
362
+ prediction_loss_only:
363
+ desc: null
364
+ value: false
365
+ per_device_train_batch_size:
366
+ desc: null
367
+ value: 32
368
+ per_device_eval_batch_size:
369
+ desc: null
370
+ value: 32
371
+ per_gpu_train_batch_size:
372
+ desc: null
373
+ value: None
374
+ per_gpu_eval_batch_size:
375
+ desc: null
376
+ value: None
377
+ gradient_accumulation_steps:
378
+ desc: null
379
+ value: 8
380
+ eval_accumulation_steps:
381
+ desc: null
382
+ value: None
383
+ eval_delay:
384
+ desc: null
385
+ value: 0
386
+ learning_rate:
387
+ desc: null
388
+ value: 1.75e-05
389
+ weight_decay:
390
+ desc: null
391
+ value: 0.0
392
+ adam_beta1:
393
+ desc: null
394
+ value: 0.9
395
+ adam_beta2:
396
+ desc: null
397
+ value: 0.999
398
+ adam_epsilon:
399
+ desc: null
400
+ value: 1.0e-08
401
+ max_grad_norm:
402
+ desc: null
403
+ value: 1.0
404
+ num_train_epochs:
405
+ desc: null
406
+ value: 3.0
407
+ max_steps:
408
+ desc: null
409
+ value: 25000
410
+ lr_scheduler_type:
411
+ desc: null
412
+ value: linear
413
+ warmup_ratio:
414
+ desc: null
415
+ value: 0.0
416
+ warmup_steps:
417
+ desc: null
418
+ value: 5000
419
+ log_level:
420
+ desc: null
421
+ value: passive
422
+ log_level_replica:
423
+ desc: null
424
+ value: warning
425
+ log_on_each_node:
426
+ desc: null
427
+ value: true
428
+ logging_dir:
429
+ desc: null
430
+ value: ./runs/May22_13-29-14_crimv3mgpu016
431
+ logging_strategy:
432
+ desc: null
433
+ value: steps
434
+ logging_first_step:
435
+ desc: null
436
+ value: false
437
+ logging_steps:
438
+ desc: null
439
+ value: 25
440
+ logging_nan_inf_filter:
441
+ desc: null
442
+ value: true
443
+ save_strategy:
444
+ desc: null
445
+ value: steps
446
+ save_steps:
447
+ desc: null
448
+ value: 1000
449
+ save_total_limit:
450
+ desc: null
451
+ value: None
452
+ save_safetensors:
453
+ desc: null
454
+ value: false
455
+ save_on_each_node:
456
+ desc: null
457
+ value: false
458
+ no_cuda:
459
+ desc: null
460
+ value: false
461
+ use_mps_device:
462
+ desc: null
463
+ value: false
464
+ seed:
465
+ desc: null
466
+ value: 42
467
+ data_seed:
468
+ desc: null
469
+ value: None
470
+ jit_mode_eval:
471
+ desc: null
472
+ value: false
473
+ use_ipex:
474
+ desc: null
475
+ value: false
476
+ bf16:
477
+ desc: null
478
+ value: false
479
+ fp16:
480
+ desc: null
481
+ value: true
482
+ fp16_opt_level:
483
+ desc: null
484
+ value: O1
485
+ half_precision_backend:
486
+ desc: null
487
+ value: cuda_amp
488
+ bf16_full_eval:
489
+ desc: null
490
+ value: false
491
+ fp16_full_eval:
492
+ desc: null
493
+ value: false
494
+ tf32:
495
+ desc: null
496
+ value: None
497
+ local_rank:
498
+ desc: null
499
+ value: 0
500
+ ddp_backend:
501
+ desc: null
502
+ value: None
503
+ tpu_num_cores:
504
+ desc: null
505
+ value: None
506
+ tpu_metrics_debug:
507
+ desc: null
508
+ value: false
509
+ debug:
510
+ desc: null
511
+ value: '[]'
512
+ dataloader_drop_last:
513
+ desc: null
514
+ value: false
515
+ eval_steps:
516
+ desc: null
517
+ value: 1000
518
+ dataloader_num_workers:
519
+ desc: null
520
+ value: 0
521
+ past_index:
522
+ desc: null
523
+ value: -1
524
+ run_name:
525
+ desc: null
526
+ value: ./
527
+ disable_tqdm:
528
+ desc: null
529
+ value: false
530
+ remove_unused_columns:
531
+ desc: null
532
+ value: true
533
+ label_names:
534
+ desc: null
535
+ value: None
536
+ load_best_model_at_end:
537
+ desc: null
538
+ value: true
539
+ metric_for_best_model:
540
+ desc: null
541
+ value: wer
542
+ greater_is_better:
543
+ desc: null
544
+ value: false
545
+ ignore_data_skip:
546
+ desc: null
547
+ value: false
548
+ sharded_ddp:
549
+ desc: null
550
+ value: '[]'
551
+ fsdp:
552
+ desc: null
553
+ value: '[]'
554
+ fsdp_min_num_params:
555
+ desc: null
556
+ value: 0
557
+ fsdp_config:
558
+ desc: null
559
+ value: '{''fsdp_min_num_params'': 0, ''xla'': False, ''xla_fsdp_grad_ckpt'': False}'
560
+ fsdp_transformer_layer_cls_to_wrap:
561
+ desc: null
562
+ value: None
563
+ deepspeed:
564
+ desc: null
565
+ value: None
566
+ label_smoothing_factor:
567
+ desc: null
568
+ value: 0.0
569
+ optim:
570
+ desc: null
571
+ value: adamw_hf
572
+ optim_args:
573
+ desc: null
574
+ value: None
575
+ adafactor:
576
+ desc: null
577
+ value: false
578
+ group_by_length:
579
+ desc: null
580
+ value: false
581
+ length_column_name:
582
+ desc: null
583
+ value: input_length
584
+ report_to:
585
+ desc: null
586
+ value: '[''wandb'']'
587
+ ddp_find_unused_parameters:
588
+ desc: null
589
+ value: None
590
+ ddp_bucket_cap_mb:
591
+ desc: null
592
+ value: None
593
+ dataloader_pin_memory:
594
+ desc: null
595
+ value: true
596
+ skip_memory_metrics:
597
+ desc: null
598
+ value: true
599
+ use_legacy_prediction_loop:
600
+ desc: null
601
+ value: false
602
+ push_to_hub:
603
+ desc: null
604
+ value: true
605
+ resume_from_checkpoint:
606
+ desc: null
607
+ value: None
608
+ hub_model_id:
609
+ desc: null
610
+ value: None
611
+ hub_strategy:
612
+ desc: null
613
+ value: every_save
614
+ hub_token:
615
+ desc: null
616
+ value: <HUB_TOKEN>
617
+ hub_private_repo:
618
+ desc: null
619
+ value: false
620
+ gradient_checkpointing:
621
+ desc: null
622
+ value: true
623
+ include_inputs_for_metrics:
624
+ desc: null
625
+ value: false
626
+ fp16_backend:
627
+ desc: null
628
+ value: auto
629
+ push_to_hub_model_id:
630
+ desc: null
631
+ value: None
632
+ push_to_hub_organization:
633
+ desc: null
634
+ value: None
635
+ push_to_hub_token:
636
+ desc: null
637
+ value: <PUSH_TO_HUB_TOKEN>
638
+ mp_parameters:
639
+ desc: null
640
+ value: ''
641
+ auto_find_batch_size:
642
+ desc: null
643
+ value: false
644
+ full_determinism:
645
+ desc: null
646
+ value: false
647
+ torchdynamo:
648
+ desc: null
649
+ value: None
650
+ ray_scope:
651
+ desc: null
652
+ value: last
653
+ ddp_timeout:
654
+ desc: null
655
+ value: 1800
656
+ torch_compile:
657
+ desc: null
658
+ value: false
659
+ torch_compile_backend:
660
+ desc: null
661
+ value: None
662
+ torch_compile_mode:
663
+ desc: null
664
+ value: None
665
+ xpu_backend:
666
+ desc: null
667
+ value: None
668
+ sortish_sampler:
669
+ desc: null
670
+ value: false
671
+ predict_with_generate:
672
+ desc: null
673
+ value: true
674
+ generation_max_length:
675
+ desc: null
676
+ value: 225
677
+ generation_num_beams:
678
+ desc: null
679
+ value: None
680
+ generation_config:
681
+ desc: null
682
+ value: None
683
+ train_batch_size:
684
+ desc: null
685
+ value: 64
686
+ eval_batch_size:
687
+ desc: null
688
+ value: 64
wandb/run-20230522_132945-qz4vsw4s/files/requirements.txt ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ absl-py==1.4.0
2
+ accelerate==0.18.0
3
+ aiohttp==3.8.4
4
+ aiosignal==1.3.1
5
+ appdirs==1.4.4
6
+ async-timeout==4.0.2
7
+ attrs==23.1.0
8
+ audioread==3.0.0
9
+ cachetools==5.3.0
10
+ certifi==2022.12.7
11
+ cffi==1.15.1
12
+ charset-normalizer==3.1.0
13
+ click==8.1.3
14
+ cmake==3.26.3
15
+ datasets==2.12.1.dev0
16
+ decorator==5.1.1
17
+ dill==0.3.6
18
+ docker-pycreds==0.4.0
19
+ evaluate==0.4.0
20
+ filelock==3.12.0
21
+ frozenlist==1.3.3
22
+ fsspec==2023.4.0
23
+ gitdb==4.0.10
24
+ gitpython==3.1.31
25
+ google-auth-oauthlib==1.0.0
26
+ google-auth==2.17.3
27
+ grpcio==1.54.0
28
+ huggingface-hub==0.14.1
29
+ idna==3.4
30
+ importlib-metadata==6.6.0
31
+ jinja2==3.1.2
32
+ jiwer==3.0.1
33
+ joblib==1.2.0
34
+ lazy-loader==0.2
35
+ librosa==0.10.0.post2
36
+ lit==16.0.2
37
+ llvmlite==0.40.0
38
+ markdown==3.4.3
39
+ markupsafe==2.1.2
40
+ more-itertools==9.1.0
41
+ mpmath==1.3.0
42
+ msgpack==1.0.5
43
+ multidict==6.0.4
44
+ multiprocess==0.70.14
45
+ networkx==3.1
46
+ numba==0.57.0
47
+ numpy==1.24.3
48
+ nvidia-cublas-cu11==11.10.3.66
49
+ nvidia-cuda-cupti-cu11==11.7.101
50
+ nvidia-cuda-nvrtc-cu11==11.7.99
51
+ nvidia-cuda-runtime-cu11==11.7.99
52
+ nvidia-cudnn-cu11==8.5.0.96
53
+ nvidia-cufft-cu11==10.9.0.58
54
+ nvidia-curand-cu11==10.2.10.91
55
+ nvidia-cusolver-cu11==11.4.0.1
56
+ nvidia-cusparse-cu11==11.7.4.91
57
+ nvidia-nccl-cu11==2.14.3
58
+ nvidia-nvtx-cu11==11.7.91
59
+ oauthlib==3.2.2
60
+ packaging==23.1
61
+ pandas==2.0.1
62
+ pathtools==0.1.2
63
+ pip==23.1.2
64
+ pooch==1.6.0
65
+ protobuf==4.22.3
66
+ psutil==5.9.5
67
+ pyarrow==12.0.0
68
+ pyasn1-modules==0.3.0
69
+ pyasn1==0.5.0
70
+ pycparser==2.21
71
+ python-dateutil==2.8.2
72
+ pytz==2023.3
73
+ pyyaml==6.0
74
+ rapidfuzz==2.13.7
75
+ regex==2023.5.4
76
+ requests-oauthlib==1.3.1
77
+ requests==2.29.0
78
+ responses==0.18.0
79
+ rsa==4.9
80
+ scikit-learn==1.2.2
81
+ scipy==1.10.1
82
+ sentry-sdk==1.22.1
83
+ setproctitle==1.3.2
84
+ setuptools==67.7.2
85
+ six==1.16.0
86
+ smmap==5.0.0
87
+ soundfile==0.12.1
88
+ soxr==0.3.5
89
+ sympy==1.11.1
90
+ tensorboard-data-server==0.7.0
91
+ tensorboard==2.12.3
92
+ threadpoolctl==3.1.0
93
+ tokenizers==0.13.3
94
+ torch==2.0.0
95
+ torchaudio==2.0.1
96
+ tqdm==4.65.0
97
+ transformers==4.29.0.dev0
98
+ triton==2.0.0
99
+ typing-extensions==4.5.0
100
+ tzdata==2023.3
101
+ urllib3==1.26.15
102
+ wandb==0.15.2
103
+ werkzeug==2.3.3
104
+ wheel==0.40.0
105
+ xxhash==3.2.0
106
+ yarl==1.9.2
107
+ zipp==3.15.0
wandb/run-20230522_132945-qz4vsw4s/files/wandb-metadata.json ADDED
@@ -0,0 +1,369 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-3.10.0-957.27.2.el7.x86_64-x86_64-with-glibc2.17",
3
+ "python": "3.9.16",
4
+ "heartbeatAt": "2023-05-22T10:29:47.270303",
5
+ "startedAt": "2023-05-22T10:29:45.602790",
6
+ "docker": null,
7
+ "cuda": null,
8
+ "args": [
9
+ "--model_name_or_path=openai/whisper-small",
10
+ "--dataset_name=mozilla-foundation/common_voice_11_0",
11
+ "--dataset_config_name=ar",
12
+ "--language=arabic",
13
+ "--train_split_name=train+validation",
14
+ "--eval_split_name=test",
15
+ "--model_index_name=Whisper Small Arabic",
16
+ "--max_steps=25000",
17
+ "--output_dir=./",
18
+ "--per_device_train_batch_size=32",
19
+ "--gradient_accumulation_steps=8",
20
+ "--per_device_eval_batch_size=32",
21
+ "--logging_steps=25",
22
+ "--learning_rate=1.75e-5",
23
+ "--warmup_steps=5000",
24
+ "--evaluation_strategy=steps",
25
+ "--eval_steps=1000",
26
+ "--save_strategy=steps",
27
+ "--save_steps=1000",
28
+ "--generation_max_length=225",
29
+ "--length_column_name=input_length",
30
+ "--max_duration_in_seconds=30",
31
+ "--text_column_name=sentence",
32
+ "--freeze_feature_encoder=False",
33
+ "--report_to=tensorboard",
34
+ "--report_to=wandb",
35
+ "--metric_for_best_model=wer",
36
+ "--greater_is_better=False",
37
+ "--load_best_model_at_end",
38
+ "--gradient_checkpointing",
39
+ "--fp16",
40
+ "--overwrite_output_dir",
41
+ "--do_train",
42
+ "--do_eval",
43
+ "--predict_with_generate",
44
+ "--do_normalize_eval",
45
+ "--streaming",
46
+ "--use_auth_token",
47
+ "--push_to_hub"
48
+ ],
49
+ "state": "running",
50
+ "program": "/home/local/QCRI/dizham/kanari/whisper/whisper-small-ar/run_speech_recognition_seq2seq_streaming.py",
51
+ "codePath": "run_speech_recognition_seq2seq_streaming.py",
52
+ "git": {
53
+ "remote": "https://huggingface.co/danielizham/whisper-small-ar",
54
+ "commit": "f54a820bed3c4227672afb672e1bc7dba4a9df72"
55
+ },
56
+ "email": "daniel.izham@gmail.com",
57
+ "root": "/home/local/QCRI/dizham/kanari/whisper/whisper-small-ar",
58
+ "host": "crimv3mgpu016",
59
+ "username": "dizham",
60
+ "executable": "/home/local/QCRI/dizham/miniconda3/envs/whisper/bin/python",
61
+ "cpu_count": 28,
62
+ "cpu_count_logical": 56,
63
+ "cpu_freq": {
64
+ "current": 2201.0,
65
+ "min": 1000.0,
66
+ "max": 2201.0
67
+ },
68
+ "cpu_freq_per_core": [
69
+ {
70
+ "current": 2201.0,
71
+ "min": 1000.0,
72
+ "max": 2201.0
73
+ },
74
+ {
75
+ "current": 2201.0,
76
+ "min": 1000.0,
77
+ "max": 2201.0
78
+ },
79
+ {
80
+ "current": 2201.0,
81
+ "min": 1000.0,
82
+ "max": 2201.0
83
+ },
84
+ {
85
+ "current": 2201.0,
86
+ "min": 1000.0,
87
+ "max": 2201.0
88
+ },
89
+ {
90
+ "current": 2201.0,
91
+ "min": 1000.0,
92
+ "max": 2201.0
93
+ },
94
+ {
95
+ "current": 2201.0,
96
+ "min": 1000.0,
97
+ "max": 2201.0
98
+ },
99
+ {
100
+ "current": 2201.0,
101
+ "min": 1000.0,
102
+ "max": 2201.0
103
+ },
104
+ {
105
+ "current": 2201.0,
106
+ "min": 1000.0,
107
+ "max": 2201.0
108
+ },
109
+ {
110
+ "current": 2201.0,
111
+ "min": 1000.0,
112
+ "max": 2201.0
113
+ },
114
+ {
115
+ "current": 2201.0,
116
+ "min": 1000.0,
117
+ "max": 2201.0
118
+ },
119
+ {
120
+ "current": 2201.0,
121
+ "min": 1000.0,
122
+ "max": 2201.0
123
+ },
124
+ {
125
+ "current": 2201.0,
126
+ "min": 1000.0,
127
+ "max": 2201.0
128
+ },
129
+ {
130
+ "current": 2201.0,
131
+ "min": 1000.0,
132
+ "max": 2201.0
133
+ },
134
+ {
135
+ "current": 2201.0,
136
+ "min": 1000.0,
137
+ "max": 2201.0
138
+ },
139
+ {
140
+ "current": 2201.0,
141
+ "min": 1000.0,
142
+ "max": 2201.0
143
+ },
144
+ {
145
+ "current": 2201.0,
146
+ "min": 1000.0,
147
+ "max": 2201.0
148
+ },
149
+ {
150
+ "current": 2201.0,
151
+ "min": 1000.0,
152
+ "max": 2201.0
153
+ },
154
+ {
155
+ "current": 2201.0,
156
+ "min": 1000.0,
157
+ "max": 2201.0
158
+ },
159
+ {
160
+ "current": 2201.0,
161
+ "min": 1000.0,
162
+ "max": 2201.0
163
+ },
164
+ {
165
+ "current": 2201.0,
166
+ "min": 1000.0,
167
+ "max": 2201.0
168
+ },
169
+ {
170
+ "current": 2201.0,
171
+ "min": 1000.0,
172
+ "max": 2201.0
173
+ },
174
+ {
175
+ "current": 2201.0,
176
+ "min": 1000.0,
177
+ "max": 2201.0
178
+ },
179
+ {
180
+ "current": 2201.0,
181
+ "min": 1000.0,
182
+ "max": 2201.0
183
+ },
184
+ {
185
+ "current": 2201.0,
186
+ "min": 1000.0,
187
+ "max": 2201.0
188
+ },
189
+ {
190
+ "current": 2201.0,
191
+ "min": 1000.0,
192
+ "max": 2201.0
193
+ },
194
+ {
195
+ "current": 2201.0,
196
+ "min": 1000.0,
197
+ "max": 2201.0
198
+ },
199
+ {
200
+ "current": 2201.0,
201
+ "min": 1000.0,
202
+ "max": 2201.0
203
+ },
204
+ {
205
+ "current": 2201.0,
206
+ "min": 1000.0,
207
+ "max": 2201.0
208
+ },
209
+ {
210
+ "current": 2201.0,
211
+ "min": 1000.0,
212
+ "max": 2201.0
213
+ },
214
+ {
215
+ "current": 2201.0,
216
+ "min": 1000.0,
217
+ "max": 2201.0
218
+ },
219
+ {
220
+ "current": 2201.0,
221
+ "min": 1000.0,
222
+ "max": 2201.0
223
+ },
224
+ {
225
+ "current": 2201.0,
226
+ "min": 1000.0,
227
+ "max": 2201.0
228
+ },
229
+ {
230
+ "current": 2201.0,
231
+ "min": 1000.0,
232
+ "max": 2201.0
233
+ },
234
+ {
235
+ "current": 2201.0,
236
+ "min": 1000.0,
237
+ "max": 2201.0
238
+ },
239
+ {
240
+ "current": 2201.0,
241
+ "min": 1000.0,
242
+ "max": 2201.0
243
+ },
244
+ {
245
+ "current": 2201.0,
246
+ "min": 1000.0,
247
+ "max": 2201.0
248
+ },
249
+ {
250
+ "current": 2201.0,
251
+ "min": 1000.0,
252
+ "max": 2201.0
253
+ },
254
+ {
255
+ "current": 2201.0,
256
+ "min": 1000.0,
257
+ "max": 2201.0
258
+ },
259
+ {
260
+ "current": 2201.0,
261
+ "min": 1000.0,
262
+ "max": 2201.0
263
+ },
264
+ {
265
+ "current": 2201.0,
266
+ "min": 1000.0,
267
+ "max": 2201.0
268
+ },
269
+ {
270
+ "current": 2201.0,
271
+ "min": 1000.0,
272
+ "max": 2201.0
273
+ },
274
+ {
275
+ "current": 2201.0,
276
+ "min": 1000.0,
277
+ "max": 2201.0
278
+ },
279
+ {
280
+ "current": 2201.0,
281
+ "min": 1000.0,
282
+ "max": 2201.0
283
+ },
284
+ {
285
+ "current": 2201.0,
286
+ "min": 1000.0,
287
+ "max": 2201.0
288
+ },
289
+ {
290
+ "current": 2201.0,
291
+ "min": 1000.0,
292
+ "max": 2201.0
293
+ },
294
+ {
295
+ "current": 2201.0,
296
+ "min": 1000.0,
297
+ "max": 2201.0
298
+ },
299
+ {
300
+ "current": 2201.0,
301
+ "min": 1000.0,
302
+ "max": 2201.0
303
+ },
304
+ {
305
+ "current": 2201.0,
306
+ "min": 1000.0,
307
+ "max": 2201.0
308
+ },
309
+ {
310
+ "current": 2201.0,
311
+ "min": 1000.0,
312
+ "max": 2201.0
313
+ },
314
+ {
315
+ "current": 2201.0,
316
+ "min": 1000.0,
317
+ "max": 2201.0
318
+ },
319
+ {
320
+ "current": 2201.0,
321
+ "min": 1000.0,
322
+ "max": 2201.0
323
+ },
324
+ {
325
+ "current": 2201.0,
326
+ "min": 1000.0,
327
+ "max": 2201.0
328
+ },
329
+ {
330
+ "current": 2201.0,
331
+ "min": 1000.0,
332
+ "max": 2201.0
333
+ },
334
+ {
335
+ "current": 2201.0,
336
+ "min": 1000.0,
337
+ "max": 2201.0
338
+ },
339
+ {
340
+ "current": 2201.0,
341
+ "min": 1000.0,
342
+ "max": 2201.0
343
+ },
344
+ {
345
+ "current": 2201.0,
346
+ "min": 1000.0,
347
+ "max": 2201.0
348
+ }
349
+ ],
350
+ "disk": {
351
+ "total": 99.951171875,
352
+ "used": 20.10973358154297
353
+ },
354
+ "gpu": "Tesla V100-SXM2-32GB",
355
+ "gpu_count": 2,
356
+ "gpu_devices": [
357
+ {
358
+ "name": "Tesla V100-SXM2-32GB",
359
+ "memory_total": 34359738368
360
+ },
361
+ {
362
+ "name": "Tesla V100-SXM2-32GB",
363
+ "memory_total": 34359738368
364
+ }
365
+ ],
366
+ "memory": {
367
+ "total": 251.55353164672852
368
+ }
369
+ }
wandb/run-20230522_132945-qz4vsw4s/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"eval/loss": 0.42629721760749817, "eval/wer": 55.989333333333335, "eval/runtime": 2156.9645, "eval/samples_per_second": 4.84, "eval/steps_per_second": 0.076, "_timestamp": 1684864537.7000473, "_runtime": 113152.00527215004, "_step": 40}
wandb/run-20230522_132945-qz4vsw4s/run-qz4vsw4s.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c424f8ba221b1f85d0c8dae2b695a875c923fb6fac4411b2b506f77e123eae3
3
+ size 9963106