anonymoussubmitter222 commited on
Commit
0fdcdc4
1 Parent(s): be9098b
TunisianASR/results/14epoch_tunisian/1234/app.py ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ import logging
5
+ import speechbrain as sb
6
+ from speechbrain.utils.distributed import run_on_main
7
+ from hyperpyyaml import load_hyperpyyaml
8
+ from pathlib import Path
9
+ import torchaudio.transforms as T
10
+ from cv_train import ASRCV
11
+ import torchaudio
12
+ import numpy as np
13
+ import kenlm
14
+ from pyctcdecode import build_ctcdecoder
15
+ import re
16
+ from torch.nn.utils.rnn import pad_sequence
17
+ import torch.optim as optim
18
+ import torch.nn as nn
19
+
20
+
21
+ # Commented out IPython magic to ensure Python compatibility.
22
+ hparams_file, run_opts, overrides = sb.parse_arguments(["TunisianASR/semi_trained.yaml"])
23
+
24
+ # If distributed_launch=True then
25
+ # create ddp_group with the right communication protocol
26
+ sb.utils.distributed.ddp_init_group(run_opts)
27
+
28
+ with open(hparams_file) as fin:
29
+ hparams = load_hyperpyyaml(fin, overrides)
30
+
31
+ # Create experiment directory
32
+ sb.create_experiment_directory(
33
+ experiment_directory=hparams["output_folder"],
34
+ hyperparams_to_save=hparams_file,
35
+ overrides=overrides,
36
+ )
37
+ # Dataset prep (parsing Librispeech)
38
+
39
+ def dataio_prepare(hparams):
40
+ """This function prepares the datasets to be used in the brain class.
41
+ It also defines the data processing pipeline through user-defined functions."""
42
+
43
+ # 1. Define datasets
44
+ data_folder = hparams["data_folder"]
45
+
46
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
47
+ csv_path=hparams["train_csv"], replacements={"data_root": data_folder},
48
+ )
49
+
50
+ if hparams["sorting"] == "ascending":
51
+ # we sort training data to speed up training and get better results.
52
+ train_data = train_data.filtered_sorted(
53
+ sort_key="duration",
54
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
55
+ )
56
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
57
+ hparams["dataloader_options"]["shuffle"] = False
58
+
59
+ elif hparams["sorting"] == "descending":
60
+ train_data = train_data.filtered_sorted(
61
+ sort_key="duration",
62
+ reverse=True,
63
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
64
+ )
65
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
66
+ hparams["dataloader_options"]["shuffle"] = False
67
+
68
+ elif hparams["sorting"] == "random":
69
+ pass
70
+
71
+ else:
72
+ raise NotImplementedError(
73
+ "sorting must be random, ascending or descending"
74
+ )
75
+
76
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
77
+ csv_path=hparams["valid_csv"], replacements={"data_root": data_folder},
78
+ )
79
+ # We also sort the validation data so it is faster to validate
80
+ valid_data = valid_data.filtered_sorted(sort_key="duration")
81
+ test_datasets = {}
82
+ for csv_file in hparams["test_csv"]:
83
+ name = Path(csv_file).stem
84
+ test_datasets[name] = sb.dataio.dataset.DynamicItemDataset.from_csv(
85
+ csv_path=csv_file, replacements={"data_root": data_folder}
86
+ )
87
+ test_datasets[name] = test_datasets[name].filtered_sorted(
88
+ sort_key="duration"
89
+ )
90
+
91
+ datasets = [train_data, valid_data] + [i for k, i in test_datasets.items()]
92
+
93
+
94
+ # 2. Define audio pipeline:
95
+ @sb.utils.data_pipeline.takes("wav")
96
+ @sb.utils.data_pipeline.provides("sig")
97
+ def audio_pipeline(wav):
98
+ info = torchaudio.info(wav)
99
+ sig = sb.dataio.dataio.read_audio(wav)
100
+ if len(sig.shape)>1 :
101
+ sig = torch.mean(sig, dim=1)
102
+ resampled = torchaudio.transforms.Resample(
103
+ info.sample_rate, hparams["sample_rate"],
104
+ )(sig)
105
+ return resampled
106
+
107
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline)
108
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
109
+
110
+ # 3. Define text pipeline:
111
+ @sb.utils.data_pipeline.takes("wrd")
112
+ @sb.utils.data_pipeline.provides(
113
+ "wrd", "char_list", "tokens_list", "tokens"
114
+ )
115
+ def text_pipeline(wrd):
116
+ yield wrd
117
+ char_list = list(wrd)
118
+ yield char_list
119
+ tokens_list = label_encoder.encode_sequence(char_list)
120
+ yield tokens_list
121
+ tokens = torch.LongTensor(tokens_list)
122
+ yield tokens
123
+
124
+ sb.dataio.dataset.add_dynamic_item(datasets, text_pipeline)
125
+ lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
126
+ special_labels = {
127
+ "blank_label": hparams["blank_index"],
128
+ "unk_label": hparams["unk_index"]
129
+ }
130
+ label_encoder.load_or_create(
131
+ path=lab_enc_file,
132
+ from_didatasets=[train_data],
133
+ output_key="char_list",
134
+ special_labels=special_labels,
135
+ sequence_input=True,
136
+ )
137
+
138
+ # 4. Set output:
139
+ sb.dataio.dataset.set_output_keys(
140
+ datasets, ["id", "sig", "wrd", "char_list", "tokens"],
141
+ )
142
+ return train_data, valid_data,test_datasets, label_encoder
143
+
144
+ class ASR(sb.core.Brain):
145
+ def compute_forward(self, batch, stage):
146
+ """Forward computations from the waveform batches to the output probabilities."""
147
+
148
+ batch = batch.to(self.device)
149
+ wavs, wav_lens = batch.sig
150
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
151
+
152
+ if stage == sb.Stage.TRAIN:
153
+ if hasattr(self.hparams, "augmentation"):
154
+ wavs = self.hparams.augmentation(wavs, wav_lens)
155
+
156
+ # Forward pass
157
+ feats = self.modules.wav2vec2(wavs, wav_lens)
158
+ x = self.modules.enc(feats)
159
+ logits = self.modules.ctc_lin(x)
160
+ p_ctc = self.hparams.log_softmax(logits)
161
+
162
+ return p_ctc, wav_lens
163
+
164
+ def custom_encode(self,wavs,wav_lens) :
165
+ wavs = wavs.to("cpu")
166
+ if(wav_lens is not None): wav_lens.to(self.device)
167
+
168
+ feats = self.modules.wav2vec2(wavs, wav_lens)
169
+ x = self.modules.enc(feats)
170
+ logits = self.modules.ctc_lin(x)
171
+ p_ctc = self.hparams.log_softmax(logits)
172
+
173
+ return feats,p_ctc
174
+
175
+
176
+
177
+ def compute_objectives(self, predictions, batch, stage):
178
+ """Computes the loss (CTC) given predictions and targets."""
179
+
180
+ p_ctc, wav_lens = predictions
181
+
182
+ ids = batch.id
183
+ tokens, tokens_lens = batch.tokens
184
+
185
+ loss = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
186
+
187
+ if stage != sb.Stage.TRAIN:
188
+ predicted_tokens = sb.decoders.ctc_greedy_decode(
189
+ p_ctc, wav_lens, blank_id=self.hparams.blank_index
190
+ )
191
+ # Decode token terms to words
192
+ if self.hparams.use_language_modelling:
193
+ predicted_words = []
194
+ for logs in p_ctc:
195
+ text = decoder.decode(logs.detach().cpu().numpy())
196
+ predicted_words.append(text.split(" "))
197
+ else:
198
+ predicted_words = [
199
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
200
+ for utt_seq in predicted_tokens
201
+ ]
202
+ # Convert indices to words
203
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
204
+
205
+ self.wer_metric.append(ids, predicted_words, target_words)
206
+ self.cer_metric.append(ids, predicted_words, target_words)
207
+
208
+ return loss
209
+
210
+ def fit_batch(self, batch):
211
+ """Train the parameters given a single batch in input"""
212
+ should_step = self.step % self.grad_accumulation_factor == 0
213
+ # Managing automatic mixed precision
214
+ # TOFIX: CTC fine-tuning currently is unstable
215
+ # This is certainly due to CTC being done in fp16 instead of fp32
216
+ if self.auto_mix_prec:
217
+ with torch.cuda.amp.autocast():
218
+ with self.no_sync():
219
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
220
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
221
+ with self.no_sync(not should_step):
222
+ self.scaler.scale(
223
+ loss / self.grad_accumulation_factor
224
+ ).backward()
225
+ if should_step:
226
+
227
+ if not self.hparams.wav2vec2.freeze:
228
+ self.scaler.unscale_(self.wav2vec_optimizer)
229
+ self.scaler.unscale_(self.model_optimizer)
230
+ if self.check_gradients(loss):
231
+ if not self.hparams.wav2vec2.freeze:
232
+ if self.optimizer_step >= self.hparams.warmup_steps:
233
+ self.scaler.step(self.wav2vec_optimizer)
234
+ self.scaler.step(self.model_optimizer)
235
+ self.scaler.update()
236
+ self.zero_grad()
237
+ self.optimizer_step += 1
238
+ else:
239
+ # This is mandatory because HF models have a weird behavior with DDP
240
+ # on the forward pass
241
+ with self.no_sync():
242
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
243
+
244
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
245
+
246
+ with self.no_sync(not should_step):
247
+ (loss / self.grad_accumulation_factor).backward()
248
+ if should_step:
249
+ if self.check_gradients(loss):
250
+ if not self.hparams.wav2vec2.freeze:
251
+ if self.optimizer_step >= self.hparams.warmup_steps:
252
+ self.wav2vec_optimizer.step()
253
+ self.model_optimizer.step()
254
+ self.zero_grad()
255
+ self.optimizer_step += 1
256
+
257
+ self.on_fit_batch_end(batch, outputs, loss, should_step)
258
+ return loss.detach().cpu()
259
+
260
+ def evaluate_batch(self, batch, stage):
261
+ """Computations needed for validation/test batches"""
262
+ predictions = self.compute_forward(batch, stage=stage)
263
+ with torch.no_grad():
264
+ loss = self.compute_objectives(predictions, batch, stage=stage)
265
+ return loss.detach()
266
+
267
+ def on_stage_start(self, stage, epoch):
268
+ """Gets called at the beginning of each epoch"""
269
+ if stage != sb.Stage.TRAIN:
270
+ self.cer_metric = self.hparams.cer_computer()
271
+ self.wer_metric = self.hparams.error_rate_computer()
272
+
273
+ def on_stage_end(self, stage, stage_loss, epoch):
274
+ """Gets called at the end of an epoch."""
275
+ # Compute/store important stats
276
+ stage_stats = {"loss": stage_loss}
277
+ if stage == sb.Stage.TRAIN:
278
+ self.train_stats = stage_stats
279
+ else:
280
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
281
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
282
+
283
+ # Perform end-of-iteration things, like annealing, logging, etc.
284
+ if stage == sb.Stage.VALID:
285
+ old_lr_model, new_lr_model = self.hparams.lr_annealing_model(
286
+ stage_stats["loss"]
287
+ )
288
+ old_lr_wav2vec, new_lr_wav2vec = self.hparams.lr_annealing_wav2vec(
289
+ stage_stats["loss"]
290
+ )
291
+ sb.nnet.schedulers.update_learning_rate(
292
+ self.model_optimizer, new_lr_model
293
+ )
294
+ if not self.hparams.wav2vec2.freeze:
295
+ sb.nnet.schedulers.update_learning_rate(
296
+ self.wav2vec_optimizer, new_lr_wav2vec
297
+ )
298
+ self.hparams.train_logger.log_stats(
299
+ stats_meta={
300
+ "epoch": epoch,
301
+ "lr_model": old_lr_model,
302
+ "lr_wav2vec": old_lr_wav2vec,
303
+ },
304
+ train_stats=self.train_stats,
305
+ valid_stats=stage_stats,
306
+ )
307
+ self.checkpointer.save_and_keep_only(
308
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
309
+ )
310
+ elif stage == sb.Stage.TEST:
311
+ self.hparams.train_logger.log_stats(
312
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
313
+ test_stats=stage_stats,
314
+ )
315
+ with open(self.hparams.wer_file, "w") as w:
316
+ self.wer_metric.write_stats(w)
317
+
318
+ def init_optimizers(self):
319
+ "Initializes the wav2vec2 optimizer and model optimizer"
320
+
321
+ # If the wav2vec encoder is unfrozen, we create the optimizer
322
+ if not self.hparams.wav2vec2.freeze:
323
+ self.wav2vec_optimizer = self.hparams.wav2vec_opt_class(
324
+ self.modules.wav2vec2.parameters()
325
+ )
326
+ if self.checkpointer is not None:
327
+ self.checkpointer.add_recoverable(
328
+ "wav2vec_opt", self.wav2vec_optimizer
329
+ )
330
+
331
+ self.model_optimizer = self.hparams.model_opt_class(
332
+ self.hparams.model.parameters()
333
+ )
334
+
335
+ if self.checkpointer is not None:
336
+ self.checkpointer.add_recoverable("modelopt", self.model_optimizer)
337
+
338
+ def zero_grad(self, set_to_none=False):
339
+ if not self.hparams.wav2vec2.freeze:
340
+ self.wav2vec_optimizer.zero_grad(set_to_none)
341
+ self.model_optimizer.zero_grad(set_to_none)
342
+
343
+
344
+ from speechbrain.pretrained import EncoderASR,EncoderDecoderASR
345
+ french_asr_model = EncoderASR.from_hparams(source="asr-wav2vec2-commonvoice-fr", savedir="pretrained_models/asr-wav2vec2-commonvoice-fr")
346
+ french_asr_model.to("cpu")
347
+ cvhparams_file, cvrun_opts, cvoverrides = sb.parse_arguments(["EnglishCV/train_en_with_wav2vec.yaml"])
348
+ with open(cvhparams_file) as cvfin:
349
+ cvhparams = load_hyperpyyaml(cvfin, cvoverrides)
350
+ cvrun_opts["device"]="cpu"
351
+ english_asr_model = ASRCV(
352
+ modules=cvhparams["modules"],
353
+ hparams=cvhparams,
354
+ run_opts=cvrun_opts,
355
+ checkpointer=cvhparams["checkpointer"],
356
+ )
357
+ english_asr_model.modules.to("cpu")
358
+ english_asr_model.device="cpu"
359
+ english_asr_model.checkpointer.recover_if_possible()
360
+ run_opts["device"]="cpu"
361
+ print("moving to tunisian model")
362
+ asr_brain = ASR(
363
+ modules=hparams["modules"],
364
+ hparams=hparams,
365
+ run_opts=run_opts,
366
+ checkpointer=hparams["checkpointer"],
367
+ )
368
+ asr_brain.modules.to("cpu")
369
+ asr_brain.checkpointer.recover_if_possible()
370
+ asr_brain.modules.eval()
371
+ english_asr_model.modules.eval()
372
+ french_asr_model.mods.eval()
373
+ asr_brain.modules.to("cpu")
374
+
375
+ # Commented out IPython magic to ensure Python compatibility.
376
+ # %ls
377
+
378
+ #UTILS FUNCTIOJNS
379
+ def get_size_dimensions(arr):
380
+ size_dimensions = []
381
+ while isinstance(arr, list):
382
+ size_dimensions.append(len(arr))
383
+ arr = arr[0]
384
+ return size_dimensions
385
+
386
+ def scale_array(batch,n):
387
+ scaled_batch = []
388
+
389
+ for array in batch:
390
+ if(n < len(array)): raise ValueError("Cannot scale Array down")
391
+
392
+ repeat = round(n/len(array))+1
393
+ scaled_length_array= []
394
+
395
+ for i in array:
396
+ for j in range(repeat) :
397
+ if(len(scaled_length_array) == n): break
398
+ scaled_length_array.append(i)
399
+
400
+ scaled_batch.append(scaled_length_array)
401
+
402
+ return torch.tensor(scaled_batch)
403
+
404
+
405
+ def load_paths(wavs_path):
406
+ waveforms = []
407
+ for path in wavs_path :
408
+ waveform, _ = torchaudio.load(path)
409
+ waveforms.append(waveform.squeeze(0))
410
+ # normalize array length to the bigger arrays by pading with 0's
411
+ padded_arrays = pad_sequence(waveforms, batch_first=True)
412
+ return torch.tensor(padded_arrays)
413
+
414
+
415
+
416
+ device = 'cpu'
417
+ verbose = 0
418
+ #FLOW LEVEL FUNCTIONS
419
+ def merge_strategy(embeddings1, embeddings2, embeddings3,post1, post2,post3):
420
+
421
+
422
+ post1 = post1.to(device)
423
+ post2 = post2.to(device)
424
+ post3 = post3.to(device)
425
+ embeddings1 = embeddings1.to(device)
426
+ embeddings2 = embeddings2.to(device)
427
+ embeddings3 = embeddings3.to(device)
428
+
429
+ posteriograms_merged = torch.cat((post1,post2,post3),dim=2)
430
+ embeddings_merged = torch.cat((embeddings1,embeddings2,embeddings3),dim=2)
431
+
432
+ if(verbose !=0):
433
+ print('MERGED POST ',posteriograms_merged.shape)
434
+ print('MERGED emb ',embeddings_merged.shape)
435
+
436
+ return torch.cat((posteriograms_merged,embeddings_merged),dim=2).to(device)
437
+
438
+ def decode(model,wavs,wav_lens):
439
+
440
+ with torch.no_grad():
441
+ wav_lens = wav_lens.to(model.device)
442
+ encoder_out = model.encode_batch(wavs, wav_lens)
443
+ predictions = model.decoding_function(encoder_out, wav_lens)
444
+ return predictions
445
+
446
+ def middle_layer(batch, lens):
447
+
448
+ tn_embeddings, tn_posteriogram = asr_brain.custom_encode(batch,None)
449
+
450
+ fr_embeddings = french_asr_model.mods.encoder.wav2vec2(batch)
451
+ fr_posteriogram =french_asr_model.encode_batch(batch,lens)
452
+ en_embeddings = english_asr_model.modules.wav2vec2(batch, lens)
453
+ x = english_asr_model.modules.enc(en_embeddings)
454
+ en_posteriogram = english_asr_model.modules.ctc_lin(x)
455
+ #scores, en_posteriogram = english_asr_model.mods.decoder(en_embeddings ,lens)
456
+ if(verbose !=0):
457
+ print('[EMBEDDINGS] FR:',fr_embeddings.shape, "EN:",en_embeddings.shape, "TN:", tn_embeddings.shape)
458
+ print('[POSTERIOGRAM] FR:',fr_posteriogram.shape, "EN:",en_posteriogram.shape,"TN:",tn_posteriogram.shape)
459
+
460
+
461
+ bilangual_sample = merge_strategy(fr_embeddings,en_embeddings,tn_embeddings,fr_posteriogram,en_posteriogram,tn_posteriogram)
462
+ return bilangual_sample
463
+
464
+ class Mixer(sb.core.Brain):
465
+
466
+ def compute_forward(self, batch, stage):
467
+ """Forward computations from the waveform batches to the output probabilities."""
468
+ wavs, wav_lens = batch.sig
469
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
470
+
471
+ if stage == sb.Stage.TRAIN:
472
+ if hasattr(self.hparams, "augmentation"):
473
+ wavs = self.hparams.augmentation(wavs, wav_lens)
474
+
475
+ multi_langual_feats = middle_layer(wavs, wav_lens)
476
+ multi_langual_feats= multi_langual_feats.to(device)
477
+ feats, _ = self.modules.enc(multi_langual_feats)
478
+ logits = self.modules.ctc_lin(feats)
479
+ p_ctc = self.hparams.log_softmax(logits)
480
+
481
+ if stage!= sb.Stage.TRAIN:
482
+ p_tokens = sb.decoders.ctc_greedy_decode(
483
+ p_ctc, wav_lens, blank_id=self.hparams.blank_index
484
+ )
485
+ else :
486
+ p_tokens = None
487
+ return p_ctc, wav_lens, p_tokens
488
+
489
+
490
+ def treat_wav(self,sig):
491
+ multi_langual_feats = middle_layer(sig.to("cpu"), torch.tensor([1]).to("cpu"))
492
+ multi_langual_feats= multi_langual_feats.to(device)
493
+ feats, _ = self.modules.enc(multi_langual_feats)
494
+ logits = self.modules.ctc_lin(feats)
495
+ p_ctc = self.hparams.log_softmax(logits)
496
+ predicted_words =[]
497
+ for logs in p_ctc:
498
+ text = decoder.decode(logs.detach().cpu().numpy())
499
+ predicted_words.append(text.split(" "))
500
+ return " ".join(predicted_words[0])
501
+
502
+
503
+ def compute_objectives(self, predictions, batch, stage):
504
+ """Computes the loss (CTC) given predictions and targets."""
505
+
506
+ p_ctc, wav_lens , predicted_tokens= predictions
507
+
508
+ ids = batch.id
509
+ tokens, tokens_lens = batch.tokens
510
+
511
+ loss = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
512
+
513
+
514
+ if stage == sb.Stage.VALID:
515
+ predicted_words = [
516
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
517
+ for utt_seq in predicted_tokens
518
+ ]
519
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
520
+ self.wer_metric.append(ids, predicted_words, target_words)
521
+ self.cer_metric.append(ids, predicted_words, target_words)
522
+ if stage ==sb.Stage.TEST :
523
+ if self.hparams.language_modelling:
524
+ predicted_words = []
525
+ for logs in p_ctc:
526
+ text = decoder.decode(logs.detach().cpu().numpy())
527
+ predicted_words.append(text.split(" "))
528
+ else :
529
+ predicted_words = [
530
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
531
+ for utt_seq in predicted_tokens
532
+ ]
533
+
534
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
535
+ self.wer_metric.append(ids, predicted_words, target_words)
536
+ self.cer_metric.append(ids, predicted_words, target_words)
537
+
538
+ return loss
539
+
540
+ def fit_batch(self, batch):
541
+ """Train the parameters given a single batch in input"""
542
+ should_step = self.step % self.grad_accumulation_factor == 0
543
+ # Managing automatic mixed precision
544
+ # TOFIX: CTC fine-tuning currently is unstable
545
+ # This is certainly due to CTC being done in fp16 instead of fp32
546
+ if self.auto_mix_prec:
547
+ with torch.cuda.amp.autocast():
548
+ with self.no_sync():
549
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
550
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
551
+ with self.no_sync(not should_step):
552
+ self.scaler.scale(
553
+ loss / self.grad_accumulation_factor
554
+ ).backward()
555
+ if should_step:
556
+
557
+
558
+ self.scaler.unscale_(self.model_optimizer)
559
+ if self.check_gradients(loss):
560
+ self.scaler.step(self.model_optimizer)
561
+ self.scaler.update()
562
+ self.zero_grad()
563
+ self.optimizer_step += 1
564
+ else:
565
+ # This is mandatory because HF models have a weird behavior with DDP
566
+ # on the forward pass
567
+ with self.no_sync():
568
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
569
+
570
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
571
+
572
+ with self.no_sync(not should_step):
573
+ (loss / self.grad_accumulation_factor).backward()
574
+ if should_step:
575
+ if self.check_gradients(loss):
576
+ self.model_optimizer.step()
577
+ self.zero_grad()
578
+ self.optimizer_step += 1
579
+
580
+ self.on_fit_batch_end(batch, outputs, loss, should_step)
581
+ return loss.detach().cpu()
582
+
583
+ def evaluate_batch(self, batch, stage):
584
+ """Computations needed for validation/test batches"""
585
+ predictions = self.compute_forward(batch, stage=stage)
586
+ with torch.no_grad():
587
+ loss = self.compute_objectives(predictions, batch, stage=stage)
588
+ return loss.detach()
589
+
590
+ def on_stage_start(self, stage, epoch):
591
+ """Gets called at the beginning of each epoch"""
592
+ if stage != sb.Stage.TRAIN:
593
+ self.cer_metric = self.hparams.cer_computer()
594
+ self.wer_metric = self.hparams.error_rate_computer()
595
+
596
+ def on_stage_end(self, stage, stage_loss, epoch):
597
+ """Gets called at the end of an epoch."""
598
+ # Compute/store important stats
599
+ stage_stats = {"loss": stage_loss}
600
+ if stage == sb.Stage.TRAIN:
601
+ self.train_stats = stage_stats
602
+ else:
603
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
604
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
605
+
606
+ # Perform end-of-iteration things, like annealing, logging, etc.
607
+ if stage == sb.Stage.VALID:
608
+ old_lr_model, new_lr_model = self.hparams.lr_annealing_model(
609
+ stage_stats["loss"]
610
+ )
611
+ sb.nnet.schedulers.update_learning_rate(
612
+ self.model_optimizer, new_lr_model
613
+ )
614
+ self.hparams.train_logger.log_stats(
615
+ stats_meta={
616
+ "epoch": epoch,
617
+ "lr_model": old_lr_model,
618
+ },
619
+ train_stats=self.train_stats,
620
+ valid_stats=stage_stats,
621
+ )
622
+ self.checkpointer.save_and_keep_only(
623
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
624
+ )
625
+ elif stage == sb.Stage.TEST:
626
+ self.hparams.train_logger.log_stats(
627
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
628
+ test_stats=stage_stats,
629
+ )
630
+ with open(self.hparams.wer_file, "w") as w:
631
+ self.wer_metric.write_stats(w)
632
+
633
+ def init_optimizers(self):
634
+
635
+ self.model_optimizer = self.hparams.model_opt_class(
636
+ self.hparams.model.parameters()
637
+ )
638
+
639
+ if self.checkpointer is not None:
640
+ self.checkpointer.add_recoverable("modelopt", self.model_optimizer)
641
+
642
+ def zero_grad(self, set_to_none=False):
643
+
644
+ self.model_optimizer.zero_grad(set_to_none)
645
+
646
+
647
+
648
+
649
+ hparams_file, run_opts, overrides = sb.parse_arguments(["cs.yaml"])
650
+
651
+ # If distributed_launch=True then
652
+ # create ddp_group with the right communication protocol
653
+ sb.utils.distributed.ddp_init_group(run_opts)
654
+
655
+ with open(hparams_file) as fin:
656
+ hparams = load_hyperpyyaml(fin, overrides)
657
+
658
+ # Create experiment directory
659
+ sb.create_experiment_directory(
660
+ experiment_directory=hparams["output_folder"],
661
+ hyperparams_to_save=hparams_file,
662
+ overrides=overrides,
663
+ )
664
+ def read_labels_file(labels_file):
665
+ with open(labels_file, "r",encoding="utf-8") as lf:
666
+ lines = lf.read().splitlines()
667
+ division = "==="
668
+ numbers = {}
669
+ for line in lines :
670
+ if division in line :
671
+ break
672
+ string, number = line.split("=>")
673
+ number = int(number)
674
+ string = string[1:-2]
675
+ numbers[number] = string
676
+ return [numbers[x] for x in range(len(numbers))]
677
+
678
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
679
+
680
+ lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
681
+ special_labels = {
682
+ "blank_label": hparams["blank_index"],
683
+ "unk_label": hparams["unk_index"]
684
+ }
685
+ label_encoder.load_or_create(
686
+ path=lab_enc_file,
687
+ from_didatasets=[[]],
688
+ output_key="char_list",
689
+ special_labels=special_labels,
690
+ sequence_input=True,
691
+ )
692
+
693
+
694
+ labels = read_labels_file(os.path.join(hparams["save_folder"], "label_encoder.txt"))
695
+ labels = [""] + labels[1:-1] + ["1"]
696
+ if hparams["language_modelling"]:
697
+ decoder = build_ctcdecoder(
698
+ labels,
699
+ kenlm_model_path=hparams["ngram_lm_path"], # either .arpa or .bin file
700
+ alpha=0.5, # tuned on a val set
701
+ beta=1, # tuned on a val set
702
+ )
703
+
704
+
705
+
706
+ run_opts["device"]="cpu"
707
+
708
+ mixer = Mixer(
709
+ modules=hparams["modules"],
710
+ hparams=hparams,
711
+ run_opts=run_opts,
712
+ checkpointer=hparams["checkpointer"],
713
+ )
714
+ mixer.tokenizer = label_encoder
715
+ mixer.device = "cpu"
716
+ mixer.checkpointer.recover_if_possible()
717
+ mixer.modules.eval()
718
+
719
+
720
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
721
+
722
+
723
+ # We dynamicaly add the tokenizer to our brain class.
724
+ # NB: This tokenizer corresponds to the one used for the LM!!
725
+
726
+ decoder = build_ctcdecoder(
727
+ labels,
728
+ kenlm_model_path= "arpas/everything.arpa", # either .arpa or .bin file
729
+ alpha=0.5, # tuned on a val set
730
+ beta=1, # tuned on a val set
731
+ )
732
+
733
+
734
+
735
+ device = "cpu"
736
+ mixer.device= "cpu"
737
+ mixer.modules.to("cpu")
738
+
739
+ from enum import Enum, auto
740
+ class Stage(Enum):
741
+ TRAIN = auto()
742
+ VALID = auto()
743
+ TEST = auto()
744
+
745
+ asr_brain.on_evaluate_start()
746
+ asr_brain.modules.eval()
747
+
748
+
749
+ import gradio as gr
750
+
751
+ def treat_wav_file(file_mic,file_upload ,asr=mixer, device="cpu") :
752
+ if (file_mic is not None) and (file_upload is not None):
753
+ warn_output = "WARNING: You've uploaded an audio file and used the microphone. The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
754
+ wav = file_mic
755
+ elif (file_mic is None) and (file_upload is None):
756
+ return "ERROR: You have to either use the microphone or upload an audio file"
757
+ elif file_mic is not None:
758
+ wav = file_mic
759
+ else:
760
+ wav = file_upload
761
+ sig, sr = torchaudio.load(wav)
762
+ tensor_wav = sig.to(device)
763
+ resampled = torchaudio.functional.resample( tensor_wav, sr, 16000)
764
+ sentence = asr.treat_wav(resampled)
765
+ return sentence
766
+
767
+ gr.Interface(
768
+ fn=treat_wav_file,
769
+ inputs=[gr.Audio(source="microphone", type='filepath', label = "record", optional = True),
770
+ gr.Audio(source="upload", type='filepath', label="filein", optional=True)]
771
+ ,outputs="text").launch()
772
+
TunisianASR/results/14epoch_tunisian/1234/env.log CHANGED
@@ -5,343 +5,475 @@ Python version:
5
  [GCC 7.3.0]
6
  ==============================
7
  Installed Python packages:
8
- absl-py==1.2.0
9
- aiohttp==3.8.1
 
 
10
  aiosignal==1.2.0
11
  alabaster==0.7.12
12
- anaconda-client==1.7.2
13
- anaconda-navigator==1.10.0
14
- anaconda-project==0.8.3
15
  antlr4-python3-runtime==4.9.3
 
16
  appdirs==1.4.4
17
- argh==0.26.2
18
- argon2-cffi @ file:///tmp/build/80754af9/argon2-cffi_1596828493937/work
19
- asn1crypto @ file:///tmp/build/80754af9/asn1crypto_1596577642040/work
20
- astroid @ file:///tmp/build/80754af9/astroid_1592495912941/work
21
- astropy==4.0.2
 
22
  async-generator==1.10
23
- async-timeout==4.0.2
24
- atomicwrites==1.4.0
25
- attrs @ file:///tmp/build/80754af9/attrs_1604765588209/work
 
 
 
 
 
26
  audioread==2.1.9
27
- autopep8 @ file:///tmp/build/80754af9/autopep8_1596578164842/work
28
- Babel @ file:///tmp/build/80754af9/babel_1605108370292/work
 
 
 
29
  backcall==0.2.0
30
- backports.functools-lru-cache==1.6.1
31
- backports.shutil-get-terminal-size==1.0.0
32
- backports.tempfile==1.0
33
- backports.weakref==1.0.post1
34
- beautifulsoup4 @ file:///tmp/build/80754af9/beautifulsoup4_1601924105527/work
35
- bitarray @ file:///tmp/build/80754af9/bitarray_1605065113847/work
36
- bkcharts==0.2
37
- black==22.12.0
38
- bleach @ file:///tmp/build/80754af9/bleach_1600439572647/work
39
- bokeh @ file:///tmp/build/80754af9/bokeh_1603297833684/work
40
- boto==2.49.0
41
- boto3==1.28.43
42
- botocore==1.31.43
43
- Bottleneck==1.3.2
44
  bpemb==0.3.4
45
- brotlipy==0.7.0
46
- cachetools==5.2.0
47
- certifi==2020.6.20
48
- cffi @ file:///tmp/build/80754af9/cffi_1600699146221/work
 
49
  chardet==3.0.4
50
- charset-normalizer==2.0.12
51
- click==8.1.3
52
- cloudpickle @ file:///tmp/build/80754af9/cloudpickle_1598884132938/work
53
- clyent==1.2.2
54
- colorama @ file:///tmp/build/80754af9/colorama_1603211150991/work
55
- coloredlogs==15.0.1
56
- conda==4.9.2
57
- conda-build==3.20.5
58
- conda-package-handling @ file:///tmp/build/80754af9/conda-package-handling_1603018141399/work
59
- conda-verify==3.4.2
 
60
  conllu==4.5.3
61
- contextlib2==0.6.0.post1
62
- cryptography @ file:///tmp/build/80754af9/cryptography_1601046815590/work
 
 
63
  cycler==0.10.0
64
- Cython @ file:///tmp/build/80754af9/cython_1594831566883/work
65
- cytoolz==0.11.0
66
- dask @ file:///tmp/build/80754af9/dask-core_1602083700509/work
67
- datasets==1.18.3
68
  decorator==4.4.2
69
- defusedxml==0.6.0
 
 
70
  Deprecated==1.2.14
71
- diff-match-patch @ file:///tmp/build/80754af9/diff-match-patch_1594828741838/work
72
- dill==0.3.4
73
- distributed @ file:///tmp/build/80754af9/distributed_1605066520644/work
 
 
 
 
 
 
 
 
74
  docutils==0.16
 
 
 
75
  easyocr==1.2.1
76
- einops==0.3.0
 
 
77
  entrypoints==0.3
78
- et-xmlfile==1.0.1
 
79
  farasapy==0.0.14
80
- fastcache==1.1.0
 
 
81
  ffmpeg-python==0.2.0
 
82
  filelock==3.0.12
83
  flair==0.12.2
84
- flake8 @ file:///tmp/build/80754af9/flake8_1601911421857/work
85
- Flask==1.1.2
86
- flatbuffers==22.9.24
87
- frozenlist==1.3.0
88
- fsspec==2022.3.0
89
  ftfy==6.1.1
90
  future==0.18.2
 
 
91
  gdown==4.4.0
92
- gensim==4.1.2
93
- gevent @ file:///tmp/build/80754af9/gevent_1601397537062/work
94
- glob2==0.7
95
- gmpy2==2.0.8
96
- google-auth==2.12.0
97
- google-auth-oauthlib==0.4.6
98
- greenlet @ file:///tmp/build/80754af9/greenlet_1600874013538/work
99
- grpcio==1.49.1
 
 
 
 
 
 
 
 
 
100
  h5py==2.10.0
101
- HeapDict==1.0.1
102
- html5lib @ file:///tmp/build/80754af9/html5lib_1593446221756/work
103
- huggingface-hub==0.16.4
104
- humanfriendly==10.0
 
 
 
 
 
105
  hyperopt==0.2.7
106
- idna @ file:///tmp/build/80754af9/idna_1593446292537/work
107
- imageio @ file:///tmp/build/80754af9/imageio_1594161405741/work
 
 
 
108
  imagesize==1.2.0
109
- imhist==0.0.4
110
- importlib-metadata==5.0.0
111
- imWatermark==0.0.2
112
- iniconfig @ file:///tmp/build/80754af9/iniconfig_1602780191262/work
113
- install==1.3.5
114
- intervaltree @ file:///tmp/build/80754af9/intervaltree_1598376443606/work
115
- invisible-watermark==0.1.5
116
- ipykernel @ file:///tmp/build/80754af9/ipykernel_1596207638929/work/dist/ipykernel-5.3.4-py3-none-any.whl
117
- ipython @ file:///tmp/build/80754af9/ipython_1604101197014/work
118
  ipython-genutils==0.2.0
119
- ipywidgets @ file:///tmp/build/80754af9/ipywidgets_1601490159889/work
120
- isort @ file:///tmp/build/80754af9/isort_1602603989581/work
121
- itsdangerous==1.1.0
 
 
 
122
  Janome==0.5.0
123
- jdcal==1.4.1
124
- jedi @ file:///tmp/build/80754af9/jedi_1592841866100/work
125
- jeepney @ file:///tmp/build/80754af9/jeepney_1605069705079/work
126
- Jinja2==2.11.2
127
- jiwer==2.3.0
128
- jmespath==1.0.1
129
- joblib @ file:///tmp/build/80754af9/joblib_1601912903842/work
130
- json5==0.9.5
131
- jsonschema @ file:///tmp/build/80754af9/jsonschema_1602607155483/work
132
- jupyter==1.0.0
133
- jupyter-client @ file:///tmp/build/80754af9/jupyter_client_1601311786391/work
134
- jupyter-console @ file:///tmp/build/80754af9/jupyter_console_1598884538475/work
135
- jupyter-core==4.6.3
136
- jupyterlab==2.2.6
137
- jupyterlab-pygments @ file:///tmp/build/80754af9/jupyterlab_pygments_1601490720602/work
138
- jupyterlab-server @ file:///tmp/build/80754af9/jupyterlab_server_1594164409481/work
139
- keyring @ file:///tmp/build/80754af9/keyring_1601490835422/work
140
- kiwisolver @ file:///tmp/build/80754af9/kiwisolver_1604014535162/work
 
 
 
141
  langdetect==1.0.9
142
- lazy-object-proxy==1.4.3
143
- libarchive-c==2.9
144
- librosa==0.9.1
145
- llvmlite==0.34.0
146
- locket==0.2.0
147
- lxml @ file:///tmp/build/80754af9/lxml_1603216285000/work
148
- Markdown==3.4.1
149
- MarkupSafe==1.1.1
150
- matplotlib @ file:///tmp/build/80754af9/matplotlib-base_1603378225747/work
 
 
 
 
 
 
151
  mccabe==0.6.1
 
 
 
 
 
 
152
  mido==1.2.10
153
  mistune==0.8.4
154
- mkl-fft==1.2.0
155
- mkl-random==1.1.1
156
- mkl-service==2.3.0
157
- mock==4.0.2
158
- more-itertools @ file:///tmp/build/80754af9/more-itertools_1605111547926/work
159
  mpld3==0.3
160
- mpmath==1.1.0
161
- msgpack==1.0.0
162
- multidict==6.0.2
163
- multipledispatch==0.6.0
164
- multiprocess==0.70.12.2
165
- mypy-extensions==0.4.3
166
- navigator-updater==0.2.1
167
- nbclient @ file:///tmp/build/80754af9/nbclient_1602783176460/work
168
- nbconvert @ file:///tmp/build/80754af9/nbconvert_1601914830498/work
169
- nbformat @ file:///tmp/build/80754af9/nbformat_1602783287752/work
170
- nest-asyncio @ file:///tmp/build/80754af9/nest-asyncio_1605115881283/work
171
- networkx @ file:///tmp/build/80754af9/networkx_1598376031484/work
172
- nltk @ file:///tmp/build/80754af9/nltk_1592496090529/work
173
- nose==1.3.7
174
- notebook @ file:///tmp/build/80754af9/notebook_1601501575118/work
175
- numba @ file:///tmp/build/80754af9/numba_1600100669015/work
176
- numexpr==2.7.1
177
- numpy @ file:///tmp/build/80754af9/numpy_and_numpy_base_1603570489231/work
178
- numpydoc @ file:///tmp/build/80754af9/numpydoc_1605117425582/work
179
- oauthlib==3.2.1
180
- olefile==0.46
181
- omegaconf==2.2.3
182
- onnx==1.12.0
183
- onnxruntime==1.12.1
 
 
 
 
 
 
 
184
  opencv-python==4.4.0.46
185
- openpyxl @ file:///tmp/build/80754af9/openpyxl_1598113097404/work
186
- packaging==20.9
187
- pandas @ file:///tmp/build/80754af9/pandas_1602088120436/work
188
- pandocfilters @ file:///tmp/build/80754af9/pandocfilters_1605120460739/work
189
- parso==0.7.0
190
- partd==1.1.0
191
- path @ file:///tmp/build/80754af9/path_1598376507494/work
192
- pathlib2==2.3.5
193
- pathspec==0.10.3
 
 
 
 
 
 
194
  pathtools==0.1.2
195
- patsy==0.5.1
196
- pep8==1.7.1
 
 
197
  pexpect==4.8.0
 
198
  pickleshare==0.7.5
199
- Pillow @ file:///tmp/build/80754af9/pillow_1603822255246/work
200
- pkginfo==1.6.1
201
- platformdirs==2.6.0
202
  pluggy==0.13.1
203
- ply==3.11
204
- pooch==1.6.0
205
  pptree==3.1
 
 
206
  pretty-midi==0.2.9
207
- prometheus-client==0.8.0
208
- prompt-toolkit @ file:///tmp/build/80754af9/prompt-toolkit_1602688806899/work
209
- protobuf==3.19.6
210
- psutil @ file:///tmp/build/80754af9/psutil_1598370257551/work
 
 
 
 
211
  ptyprocess==0.6.0
212
- py @ file:///tmp/build/80754af9/py_1593446248552/work
213
  py-espeak-ng==0.1.8
214
  py4j==0.10.9.7
 
 
 
 
 
 
215
  PyArabic==0.6.15
216
- pyarrow==7.0.0
217
  pyasn1==0.4.8
218
  pyasn1-modules==0.2.8
219
- pycodestyle==2.6.0
220
- pycosat==0.6.3
221
- pycparser @ file:///tmp/build/80754af9/pycparser_1594388511720/work
222
- pycurl==7.43.0.6
 
 
 
 
223
  pyDeprecate==0.3.1
224
- pydocstyle @ file:///tmp/build/80754af9/pydocstyle_1598885001695/work
225
- pyflakes==2.2.0
226
- Pygments @ file:///tmp/build/80754af9/pygments_1604103097372/work
227
- pylint @ file:///tmp/build/80754af9/pylint_1598623985952/work
228
- pyodbc===4.0.0-unsupported
229
- pyOpenSSL @ file:///tmp/build/80754af9/pyopenssl_1594392929924/work
230
  pyparsing==2.4.7
231
- pyrsistent @ file:///tmp/build/80754af9/pyrsistent_1600141720057/work
 
 
 
232
  PySocks==1.7.1
233
- pytest==0.0.0
 
 
234
  python-bidi==0.4.2
235
  python-crfsuite==0.9.7
236
- python-dateutil==2.8.1
237
- python-jsonrpc-server @ file:///tmp/build/80754af9/python-jsonrpc-server_1600278539111/work
238
- python-language-server @ file:///tmp/build/80754af9/python-language-server_1600454544709/work
239
  python-Levenshtein==0.12.2
240
- pytorch-lightning==1.4.2
 
 
 
241
  pytorch-revgrad==0.2.0
242
- pytz==2020.1
243
- PyWavelets @ file:///tmp/build/80754af9/pywavelets_1601658317819/work
244
- pyxdg @ file:///tmp/build/80754af9/pyxdg_1603822279816/work
245
- PyYAML==5.3.1
246
- pyzmq==19.0.2
247
- QDarkStyle==2.8.1
248
- QtAwesome @ file:///tmp/build/80754af9/qtawesome_1602272867890/work
249
- qtconsole @ file:///tmp/build/80754af9/qtconsole_1600870028330/work
250
- QtPy==1.9.0
251
- regex @ file:///tmp/build/80754af9/regex_1602786672676/work
252
- requests @ file:///tmp/build/80754af9/requests_1592841827918/work
253
- requests-oauthlib==1.3.1
254
  resampy==0.2.2
255
- rope @ file:///tmp/build/80754af9/rope_1602264064449/work
256
- rsa==4.9
257
- Rtree==0.9.4
258
- ruamel-yaml==0.15.87
259
- s3transfer==0.6.2
260
- sacremoses==0.0.49
261
- safetensors==0.3.3
262
- scikit-image==0.17.2
263
- scikit-learn @ file:///tmp/build/80754af9/scikit-learn_1598376899566/work
264
- scipy @ file:///tmp/build/80754af9/scipy_1597686649129/work
265
- seaborn @ file:///tmp/build/80754af9/seaborn_1600553570093/work
266
- SecretStorage==3.1.2
 
 
 
 
 
 
267
  segtok==1.5.11
 
 
268
  Send2Trash==1.5.0
269
- sentencepiece==0.1.97
270
- simplegeneric==0.8.1
271
- singledispatch @ file:///tmp/build/80754af9/singledispatch_1602523705405/work
272
- sip==4.19.13
273
- six @ file:///tmp/build/80754af9/six_1605205327372/work
274
- smart-open==5.2.1
 
 
 
 
 
275
  snowballstemmer==2.0.0
276
- sortedcollections==1.2.1
277
- sortedcontainers==2.2.2
 
278
  SoundFile==0.10.3.post1
279
- soupsieve==2.0.1
 
 
 
280
  sphfile==1.0.3
281
- Sphinx @ file:///tmp/build/80754af9/sphinx_1597428793432/work
 
282
  sphinxcontrib-applehelp==1.0.2
 
283
  sphinxcontrib-devhelp==1.0.2
284
  sphinxcontrib-htmlhelp==1.0.3
285
  sphinxcontrib-jsmath==1.0.1
286
  sphinxcontrib-qthelp==1.0.3
287
  sphinxcontrib-serializinghtml==1.1.4
288
- sphinxcontrib-websupport @ file:///tmp/build/80754af9/sphinxcontrib-websupport_1597081412696/work
289
- spyder @ file:///tmp/build/80754af9/spyder_1599056981321/work
290
- spyder-kernels @ file:///tmp/build/80754af9/spyder-kernels_1599056754858/work
291
- SQLAlchemy @ file:///tmp/build/80754af9/sqlalchemy_1603397987316/work
292
  sqlitedict==2.1.0
293
- statsmodels @ file:///tmp/build/80754af9/statsmodels_1602280205159/work
294
- sympy @ file:///tmp/build/80754af9/sympy_1605119542615/work
295
- tables==3.6.1
296
- tabulate==0.9.0
297
- tblib @ file:///tmp/build/80754af9/tblib_1597928476713/work
298
- tensorboard==2.10.1
299
- tensorboard-data-server==0.6.1
300
- tensorboard-plugin-wit==1.8.1
301
- terminado==0.9.1
 
 
 
 
 
 
302
  testpath==0.4.4
303
- threadpoolctl @ file:///tmp/tmp9twdgx9k/threadpoolctl-2.1.0-py3-none-any.whl
304
- tifffile==2020.10.1
 
 
305
  tkseem==0.0.3
306
  tokenizers==0.13.3
307
- toml @ file:///tmp/build/80754af9/toml_1592853716807/work
308
- tomli==2.0.1
309
- toolz @ file:///tmp/build/80754af9/toolz_1601054250827/work
310
- torch==1.11.0
311
- torchaudio==0.11.0
312
- torchmetrics==0.6.0
313
- torchvision==0.8.2
314
- tornado==6.0.4
315
- tqdm==4.64.0
316
- traitlets @ file:///tmp/build/80754af9/traitlets_1602787416690/work
 
 
 
317
  transformer-smaller-training-vocab==0.3.1
318
- transformers==4.33.1
 
 
 
319
  typing-extensions==4.4.0
320
- ujson @ file:///tmp/build/80754af9/ujson_1602523317881/work
321
- unicodecsv==0.14.1
322
- urllib3 @ file:///tmp/build/80754af9/urllib3_1603305693037/work
323
- watchdog @ file:///tmp/build/80754af9/watchdog_1593447344699/work
324
- wcwidth @ file:///tmp/build/80754af9/wcwidth_1593447189090/work
 
 
 
 
 
325
  webencodings==0.5.1
 
 
326
  Werkzeug==1.0.1
 
327
  widgetsnbextension==3.5.1
328
  Wikipedia-API==0.6.0
329
- wrapt==1.11.2
330
- wurlitzer @ file:///tmp/build/80754af9/wurlitzer_1594753850195/work
331
- xlrd==1.2.0
332
- XlsxWriter @ file:///tmp/build/80754af9/xlsxwriter_1602692860603/work
333
- xlwt==1.3.0
334
- xmltodict==0.12.0
335
- xxhash==3.0.0
336
- yapf @ file:///tmp/build/80754af9/yapf_1593528177422/work
337
  yarl==1.7.2
338
- zict==2.0.0
339
- zipp @ file:///tmp/build/80754af9/zipp_1604001098328/work
340
- zope.event==4.5.0
341
- zope.interface @ file:///tmp/build/80754af9/zope.interface_1602002420968/work
342
  ==============================
343
  Git revision:
344
- 8a51838
345
  ==============================
346
  CUDA version:
347
  11.7
 
5
  [GCC 7.3.0]
6
  ==============================
7
  Installed Python packages:
8
+ abkhazia==1.0
9
+ absl-py==0.11.0
10
+ aiofiles==23.2.1
11
+ aiohttp==3.8.0
12
  aiosignal==1.2.0
13
  alabaster==0.7.12
14
+ alembic==1.7.4
15
+ altair==4.2.0
16
+ altgraph==0.17
17
  antlr4-python3-runtime==4.9.3
18
+ anyio==3.6.2
19
  appdirs==1.4.4
20
+ argcomplete==1.12.2
21
+ argon2-cffi==20.1.0
22
+ arrow==1.2.3
23
+ asgiref==3.6.0
24
+ asteroid-filterbanks==0.4.0
25
+ astunparse==1.6.3
26
  async-generator==1.10
27
+ async-timeout==4.0.0
28
+ attrdict==2.0.1
29
+ attrs==20.3.0
30
+ audeer==1.16.0
31
+ audformat==0.11.5
32
+ audinterface==0.7.0
33
+ audiofile==1.0.0
34
+ audiomentations==0.25.0
35
  audioread==2.1.9
36
+ audobject==0.4.14
37
+ audresample==0.1.6
38
+ -e git+https://github.com/facebookresearch/WavAugment.git@54afcdb00ccc852c2f030f239f8532c9562b550e#egg=augment
39
+ autopage==0.4.0
40
+ Babel==2.9.0
41
  backcall==0.2.0
42
+ backports.cached-property==1.0.2
43
+ beautifulsoup4==4.10.0
44
+ black==19.10b0
45
+ bleach==3.3.0
46
+ blessed==1.20.0
47
+ boto3==1.20.2
48
+ botocore==1.23.2
 
 
 
 
 
 
 
49
  bpemb==0.3.4
50
+ braceexpand==0.1.7
51
+ cachetools==4.2.0
52
+ certifi @ file:///croot/certifi_1671487769961/work/certifi
53
+ cffi==1.14.3
54
+ cfgv==3.2.0
55
  chardet==3.0.4
56
+ charset-normalizer==2.0.7
57
+ click==7.1.2
58
+ cliff==3.9.0
59
+ clldutils==3.5.4
60
+ cloudpickle==2.2.1
61
+ cmaes==0.8.2
62
+ cmake==3.18.4.post1
63
+ cmd2==2.2.0
64
+ colorama==0.4.4
65
+ colorlog==4.6.2
66
+ configparser==5.1.0
67
  conllu==4.5.3
68
+ croniter==1.3.15
69
+ cryptography==38.0.4
70
+ csrgraph==0.1.28
71
+ csvw==1.8.1
72
  cycler==0.10.0
73
+ Cython==0.29.21
74
+ dataclasses==0.6
75
+ dateutils==0.6.12
 
76
  decorator==4.4.2
77
+ deepdiff==6.3.0
78
+ deepspeech==0.9.1
79
+ defusedxml==0.7.1
80
  Deprecated==1.2.14
81
+ dill==0.3.3
82
+ Distance==0.1.3
83
+ distlib==0.3.1
84
+ Django==3.2.16
85
+ django-auditlog==2.2.1
86
+ django-filter==22.1
87
+ django-js-asset==1.2.2
88
+ django-mptt==0.14.0
89
+ djangorestframework==3.14.0
90
+ docker-pycreds==0.4.0
91
+ docopt==0.6.2
92
  docutils==0.16
93
+ drf-excel==2.2.0
94
+ drf-flex-fields==1.0.0
95
+ drf-renderer-xlsx==0.4.1
96
  easyocr==1.2.1
97
+ editdistance==0.6.0
98
+ einops==0.3.2
99
+ emoji==2.2.0
100
  entrypoints==0.3
101
+ et-xmlfile==1.1.0
102
+ exceptiongroup==1.1.0
103
  farasapy==0.0.14
104
+ fastapi==0.98.0
105
+ fastjsonschema==2.17.1
106
+ fasttext==0.9.2
107
  ffmpeg-python==0.2.0
108
+ ffmpy==0.3.0
109
  filelock==3.0.12
110
  flair==0.12.2
111
+ flake8==3.7.9
112
+ flatbuffers==1.12
113
+ frozendict==2.0.7
114
+ frozenlist==1.2.0
115
+ fsspec==2021.11.0
116
  ftfy==6.1.1
117
  future==0.18.2
118
+ g2p-en==2.1.0
119
+ gast==0.3.3
120
  gdown==4.4.0
121
+ gdrive==0.1.5
122
+ gensim==4.0.1
123
+ gitdb==4.0.9
124
+ GitPython==3.1.24
125
+ google-api-core==2.11.1
126
+ google-api-python-client==2.43.0
127
+ google-auth==1.24.0
128
+ google-auth-httplib2==0.1.0
129
+ google-auth-oauthlib==0.5.3
130
+ google-pasta==0.2.0
131
+ googleapis-common-protos==1.59.1
132
+ gradio==3.44.4
133
+ gradio-client==0.5.1
134
+ greenlet==1.1.2
135
+ grpcio==1.32.0
136
+ h11==0.14.0
137
+ h5features==1.3.2
138
  h5py==2.10.0
139
+ hierarchy==0.4.0
140
+ hmmlearn==0.2.8
141
+ htk-io==0.5
142
+ httpcore==0.16.3
143
+ httplib2==0.22.0
144
+ httpx==0.23.3
145
+ huggingface-hub==0.15.1
146
+ hydra-colorlog==0.1.4
147
+ hydra-core==1.3.2
148
  hyperopt==0.2.7
149
+ HyperPyYAML==1.1.0
150
+ hypothesis==6.61.2
151
+ identify==1.5.10
152
+ idna==2.10
153
+ imageio==2.9.0
154
  imagesize==1.2.0
155
+ importlib-metadata==4.8.1
156
+ importlib-resources==5.2.2
157
+ inflect==5.3.0
158
+ inquirer==3.1.3
159
+ ipadic==1.0.0
160
+ ipyevents==2.0.1
161
+ ipykernel==5.3.4
162
+ ipython==7.19.0
 
163
  ipython-genutils==0.2.0
164
+ ipywebrtc==0.6.0
165
+ ipywidgets==7.6.3
166
+ iso-639==0.4.5
167
+ isodate==0.6.0
168
+ isort==4.3.21
169
+ itsdangerous==2.1.2
170
  Janome==0.5.0
171
+ jedi==0.17.2
172
+ jeepney==0.8.0
173
+ jieba==0.42.1
174
+ Jinja2==3.0.3
175
+ jiwer==2.2.0
176
+ jmespath==0.10.0
177
+ joblib==0.17.0
178
+ jsonschema==3.2.0
179
+ julius==0.2.7
180
+ jupyter-client==6.1.7
181
+ jupyter-core==4.7.0
182
+ jupyterlab-pygments==0.1.2
183
+ jupyterlab-widgets==1.0.0
184
+ kaitaistruct==0.9
185
+ kaldi-io==0.9.4
186
+ kaldi-python-io==1.2.2
187
+ kaldiio==2.17.2
188
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
189
+ Keras-Preprocessing==1.1.2
190
+ kiwisolver==1.3.1
191
+ lang-trans==0.6.0
192
  langdetect==1.0.9
193
+ latexcodec==2.0.1
194
+ ldap3==2.9.1
195
+ librosa==0.9.0
196
+ lightning-cloud==0.5.37
197
+ lightning-utilities==0.8.0
198
+ linkify-it-py==1.0.3
199
+ lit==16.0.6
200
+ llvmlite==0.35.0
201
+ lxml==4.9.0
202
+ Mako==1.1.5
203
+ Markdown==3.3.3
204
+ markdown-it-py==3.0.0
205
+ MarkupSafe==2.1.3
206
+ marshmallow==3.14.0
207
+ matplotlib==3.3.3
208
  mccabe==0.6.1
209
+ mcd==0.4
210
+ mdit-py-plugins==0.3.3
211
+ mdurl==0.1.2
212
+ mecab-python3==1.0.3
213
+ megatron-lm==2.2.0
214
+ metrics==0.3.3
215
  mido==1.2.10
216
  mistune==0.8.4
217
+ more-itertools==8.6.0
 
 
 
 
218
  mpld3==0.3
219
+ mpmath==1.2.1
220
+ multidict==5.2.0
221
+ multiprocess==0.70.11.1
222
+ nbclient==0.5.3
223
+ nbconvert==5.6.1
224
+ nbformat==5.9.0
225
+ NEMO==4.3.2
226
+ nemo-toolkit==1.4.0
227
+ nest-asyncio==1.5.1
228
+ networkx==2.8.8
229
+ nltk==3.2.4
230
+ nodeenv==1.5.0
231
+ normalize==2.0.2
232
+ notebook==6.3.0
233
+ numba==0.52.0
234
+ numpy==1.19.4
235
+ nvidia-cublas-cu11==11.10.3.66
236
+ nvidia-cuda-cupti-cu11==11.7.101
237
+ nvidia-cuda-nvrtc-cu11==11.7.99
238
+ nvidia-cuda-runtime-cu11==11.7.99
239
+ nvidia-cudnn-cu11==8.5.0.96
240
+ nvidia-cufft-cu11==10.9.0.58
241
+ nvidia-curand-cu11==10.2.10.91
242
+ nvidia-cusolver-cu11==11.4.0.1
243
+ nvidia-cusparse-cu11==11.7.4.91
244
+ nvidia-nccl-cu11==2.14.3
245
+ nvidia-nvtx-cu11==11.7.91
246
+ oauthlib==3.1.0
247
+ omegaconf==2.3.0
248
+ onnx==1.10.2
249
+ OpenCC==1.1.2
250
  opencv-python==4.4.0.46
251
+ openpyxl==3.0.9
252
+ opensmile==2.2.0
253
+ opt-einsum==3.3.0
254
+ optuna==2.10.0
255
+ ordered-set==4.1.0
256
+ orjson==3.8.4
257
+ oyaml==1.0
258
+ packaging==22.0
259
+ pandas==1.2.5
260
+ pandocfilters==1.4.3
261
+ pangu==4.0.6.1
262
+ parameterized==0.8.1
263
+ parso==0.7.1
264
+ pathlib2==2.3.7.post1
265
+ pathspec==0.5.5
266
  pathtools==0.1.2
267
+ pbr==5.6.0
268
+ pefile==2019.4.18
269
+ pescador==2.1.0
270
+ pesq==0.0.3
271
  pexpect==4.8.0
272
+ phonemizer==2.2.1
273
  pickleshare==0.7.5
274
+ Pillow==9.3.0
275
+ pip-api==0.0.23
276
+ pipreqs==0.4.11
277
  pluggy==0.13.1
278
+ pooch==1.3.0
279
+ portalocker==2.3.2
280
  pptree==3.1
281
+ pre-commit==2.9.0
282
+ preprocessing==0.1.13
283
  pretty-midi==0.2.9
284
+ prettytable==2.2.1
285
+ primePy==1.3
286
+ progressbar2==3.53.1
287
+ prometheus-client==0.10.1
288
+ promise==2.3
289
+ prompt-toolkit==3.0.8
290
+ protobuf==3.20.3
291
+ psutil==5.6.6
292
  ptyprocess==0.6.0
293
+ py==1.9.0
294
  py-espeak-ng==0.1.8
295
  py4j==0.10.9.7
296
+ pyannote.audio==2.1.1
297
+ pyannote.core==4.5
298
+ pyannote.database==4.1.3
299
+ pyannote.metrics==3.2.1
300
+ pyannote.pipeline==2.3
301
+ pyannotebook==0.1.0.dev0
302
  PyArabic==0.6.15
303
+ pyarrow==3.0.0
304
  pyasn1==0.4.8
305
  pyasn1-modules==0.2.8
306
+ pybind11==2.8.1
307
+ pybtex==0.24.0
308
+ pybtex-docutils==1.0.1
309
+ pycodestyle==2.5.0
310
+ pycparser==2.20
311
+ pycryptodome==3.16.0
312
+ pyctcdecode==0.4.0
313
+ pydantic==1.10.4
314
  pyDeprecate==0.3.1
315
+ pydub==0.25.1
316
+ pyflakes==2.1.1
317
+ Pygments==2.15.1
318
+ pygtrie==2.5.0
319
+ PyJWT==2.7.0
320
+ pymodbus==2.5.3
321
  pyparsing==2.4.7
322
+ pyperclip==1.8.2
323
+ pypinyin==0.43.0
324
+ pyrsistent==0.17.3
325
+ pyserial==3.5
326
  PySocks==1.7.1
327
+ pystoi==0.3.3
328
+ pytest==5.4.1
329
+ pytest-runner==5.3.1
330
  python-bidi==0.4.2
331
  python-crfsuite==0.9.7
332
+ python-dateutil==2.8.2
333
+ python-editor==1.0.4
 
334
  python-Levenshtein==0.12.2
335
+ python-multipart==0.0.5
336
+ python-utils==2.4.0
337
+ pytorch-lightning==1.6.5
338
+ pytorch-metric-learning==1.7.3
339
  pytorch-revgrad==0.2.0
340
+ pytube==11.0.1
341
+ pytz==2022.6
342
+ PyWavelets==1.1.1
343
+ PyYAML==6.0
344
+ pyzmq==20.0.0
345
+ rapidfuzz==1.8.2
346
+ readchar==4.0.5
347
+ regex==2020.11.13
348
+ requests==2.28.1
349
+ requests-oauthlib==1.3.0
 
 
350
  resampy==0.2.2
351
+ rfc3986==1.4.0
352
+ rich==13.4.2
353
+ richenum==1.3.1
354
+ rsa==4.7
355
+ ruamel.yaml==0.17.21
356
+ ruamel.yaml.clib==0.2.7
357
+ s3m==1.1.0
358
+ s3transfer==0.5.0
359
+ sacrebleu==2.0.0
360
+ sacremoses==0.0.44
361
+ safetensors==0.3.1
362
+ scikit-image==0.18.1
363
+ scikit-learn==0.23.2
364
+ scipy==1.5.4
365
+ -e git+https://github.com/sanghack81/SDCIT@00d060dde733fde9345154a494f81e97fb395ca7#egg=SDCIT
366
+ seaborn==0.11.1
367
+ SecretStorage==3.3.3
368
+ segments==2.1.3
369
  segtok==1.5.11
370
+ semantic-version==2.10.0
371
+ semver==2.13.0
372
  Send2Trash==1.5.0
373
+ sentencepiece==0.1.99
374
+ sentry-sdk==1.4.3
375
+ shellingham==1.4.0
376
+ shortuuid==1.0.7
377
+ SIDEKIT==1.3.8.5.2
378
+ simplejson==3.17.5
379
+ singledispatchmethod==1.0
380
+ six==1.15.0
381
+ smart-open==5.0.0
382
+ smmap==5.0.0
383
+ sniffio==1.3.0
384
  snowballstemmer==2.0.0
385
+ sortedcollections==2.1.0
386
+ sortedcontainers==2.4.0
387
+ sounddevice==0.4.5
388
  SoundFile==0.10.3.post1
389
+ soupsieve==2.3
390
+ sox==1.4.1
391
+ sparsemax==0.1.9
392
+ speechbrain==0.5.14
393
  sphfile==1.0.3
394
+ Sphinx==3.3.1
395
+ sphinx-rtd-theme==0.2.4
396
  sphinxcontrib-applehelp==1.0.2
397
+ sphinxcontrib-bibtex==2.4.1
398
  sphinxcontrib-devhelp==1.0.2
399
  sphinxcontrib-htmlhelp==1.0.3
400
  sphinxcontrib-jsmath==1.0.1
401
  sphinxcontrib-qthelp==1.0.3
402
  sphinxcontrib-serializinghtml==1.1.4
403
+ SQLAlchemy==1.4.25
 
 
 
404
  sqlitedict==2.1.0
405
+ sqlparse==0.4.2
406
+ stanza==1.4.2
407
+ starlette==0.27.0
408
+ starsessions==1.3.0
409
+ stevedore==3.4.0
410
+ subprocess32==3.5.4
411
+ sympy==1.9
412
+ tabulate==0.8.9
413
+ tensorboard==2.4.0
414
+ tensorboard-plugin-wit==1.7.0
415
+ tensorboardX==2.6.1
416
+ tensorflow==2.4.0
417
+ tensorflow-estimator==2.4.0
418
+ termcolor==1.1.0
419
+ terminado==0.9.4
420
  testpath==0.4.4
421
+ threadpoolctl==2.1.0
422
+ tifffile==2020.12.8
423
+ tikzplotlib==0.9.8
424
+ tinycss2==1.2.1
425
  tkseem==0.0.3
426
  tokenizers==0.13.3
427
+ toml==0.10.2
428
+ toolz==0.12.0
429
+ torch==1.13.1
430
+ torch-audiomentations==0.11.0
431
+ torch-pitch-shift==1.2.4
432
+ torch-stft==0.1.4
433
+ torchaudio==0.13.1
434
+ torchmetrics==0.11.4
435
+ torchvision==0.14.1
436
+ tornado==6.1
437
+ tqdm==4.61.1
438
+ trackrip==1.2.1
439
+ traitlets==5.9.0
440
  transformer-smaller-training-vocab==0.3.1
441
+ transformers==4.30.2
442
+ triton==2.0.0
443
+ typed-ast==1.4.1
444
+ typer==0.4.0
445
  typing-extensions==4.4.0
446
+ uc-micro-py==1.0.1
447
+ Unidecode==1.3.2
448
+ uritemplate==3.0.1
449
+ urllib3==1.26.2
450
+ uvicorn==0.20.0
451
+ versioneer==0.28
452
+ virtualenv==20.2.1
453
+ wandb==0.12.6
454
+ wcwidth==0.2.5
455
+ webdataset==0.1.62
456
  webencodings==0.5.1
457
+ websocket-client==1.6.1
458
+ websockets==10.4
459
  Werkzeug==1.0.1
460
+ wget==3.2
461
  widgetsnbextension==3.5.1
462
  Wikipedia-API==0.6.0
463
+ wordninja==2.0.0
464
+ wrapt==1.12.1
465
+ xmltodict==0.13.0
466
+ xxhash==2.0.0
467
+ yamllint==1.23.0
468
+ yarg==0.1.9
 
 
469
  yarl==1.7.2
470
+ yaspin==2.1.0
471
+ youtokentome==1.0.6
472
+ youtube-dl==2021.6.6
473
+ zipp==3.6.0
474
  ==============================
475
  Git revision:
476
+ be9098b
477
  ==============================
478
  CUDA version:
479
  11.7
TunisianASR/results/14epoch_tunisian/1234/hyperparams.yaml CHANGED
@@ -1,5 +1,5 @@
1
- # Generated 2023-09-20 from:
2
- # /home/salah/Code_Switched_Tunisian_Speech_Recognition/TunisianASR/semi_trained.yaml
3
  # yamllint disable
4
  # ################################
5
  # Model: wav2vec2 + DNN + CTC
 
1
+ # Generated 2023-09-25 from:
2
+ # /home/salah/Code-Switched-Tunisian-SpeechToText/TunisianASR/semi_trained.yaml
3
  # yamllint disable
4
  # ################################
5
  # Model: wav2vec2 + DNN + CTC
TunisianASR/results/14epoch_tunisian/1234/log.txt CHANGED
@@ -357,3 +357,494 @@ zope.interface @ file:///tmp/build/80754af9/zope.interface_1602002420968/work
357
  2023-09-20 16:24:00,139 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
358
  2023-09-20 16:24:00,967 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from TunisianASR/results/14epoch_tunisian/1234/save/CKPT+2023-08-03+01-38-38+00
359
  2023-09-20 16:24:49,007 - speechbrain.utils.distributed - INFO - distributed_launch flag is disabled, this experiment will be executed without DDP.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
357
  2023-09-20 16:24:00,139 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
358
  2023-09-20 16:24:00,967 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from TunisianASR/results/14epoch_tunisian/1234/save/CKPT+2023-08-03+01-38-38+00
359
  2023-09-20 16:24:49,007 - speechbrain.utils.distributed - INFO - distributed_launch flag is disabled, this experiment will be executed without DDP.
360
+ 2023-09-25 11:12:54,556 - speechbrain.core - INFO - Beginning experiment!
361
+ 2023-09-25 11:12:54,556 - speechbrain.core - INFO - Experiment folder: TunisianASR/results/14epoch_tunisian/1234/
362
+ 2023-09-25 11:12:55,141 - speechbrain.utils.superpowers - DEBUG - abkhazia==1.0
363
+ absl-py==0.11.0
364
+ aiofiles==23.2.1
365
+ aiohttp==3.8.0
366
+ aiosignal==1.2.0
367
+ alabaster==0.7.12
368
+ alembic==1.7.4
369
+ altair==4.2.0
370
+ altgraph==0.17
371
+ antlr4-python3-runtime==4.9.3
372
+ anyio==3.6.2
373
+ appdirs==1.4.4
374
+ argcomplete==1.12.2
375
+ argon2-cffi==20.1.0
376
+ arrow==1.2.3
377
+ asgiref==3.6.0
378
+ asteroid-filterbanks==0.4.0
379
+ astunparse==1.6.3
380
+ async-generator==1.10
381
+ async-timeout==4.0.0
382
+ attrdict==2.0.1
383
+ attrs==20.3.0
384
+ audeer==1.16.0
385
+ audformat==0.11.5
386
+ audinterface==0.7.0
387
+ audiofile==1.0.0
388
+ audiomentations==0.25.0
389
+ audioread==2.1.9
390
+ audobject==0.4.14
391
+ audresample==0.1.6
392
+ -e git+https://github.com/facebookresearch/WavAugment.git@54afcdb00ccc852c2f030f239f8532c9562b550e#egg=augment
393
+ autopage==0.4.0
394
+ Babel==2.9.0
395
+ backcall==0.2.0
396
+ backports.cached-property==1.0.2
397
+ beautifulsoup4==4.10.0
398
+ black==19.10b0
399
+ bleach==3.3.0
400
+ blessed==1.20.0
401
+ boto3==1.20.2
402
+ botocore==1.23.2
403
+ bpemb==0.3.4
404
+ braceexpand==0.1.7
405
+ cachetools==4.2.0
406
+ certifi @ file:///croot/certifi_1671487769961/work/certifi
407
+ cffi==1.14.3
408
+ cfgv==3.2.0
409
+ chardet==3.0.4
410
+ charset-normalizer==2.0.7
411
+ click==7.1.2
412
+ cliff==3.9.0
413
+ clldutils==3.5.4
414
+ cloudpickle==2.2.1
415
+ cmaes==0.8.2
416
+ cmake==3.18.4.post1
417
+ cmd2==2.2.0
418
+ colorama==0.4.4
419
+ colorlog==4.6.2
420
+ configparser==5.1.0
421
+ conllu==4.5.3
422
+ croniter==1.3.15
423
+ cryptography==38.0.4
424
+ csrgraph==0.1.28
425
+ csvw==1.8.1
426
+ cycler==0.10.0
427
+ Cython==0.29.21
428
+ dataclasses==0.6
429
+ dateutils==0.6.12
430
+ decorator==4.4.2
431
+ deepdiff==6.3.0
432
+ deepspeech==0.9.1
433
+ defusedxml==0.7.1
434
+ Deprecated==1.2.14
435
+ dill==0.3.3
436
+ Distance==0.1.3
437
+ distlib==0.3.1
438
+ Django==3.2.16
439
+ django-auditlog==2.2.1
440
+ django-filter==22.1
441
+ django-js-asset==1.2.2
442
+ django-mptt==0.14.0
443
+ djangorestframework==3.14.0
444
+ docker-pycreds==0.4.0
445
+ docopt==0.6.2
446
+ docutils==0.16
447
+ drf-excel==2.2.0
448
+ drf-flex-fields==1.0.0
449
+ drf-renderer-xlsx==0.4.1
450
+ easyocr==1.2.1
451
+ editdistance==0.6.0
452
+ einops==0.3.2
453
+ emoji==2.2.0
454
+ entrypoints==0.3
455
+ et-xmlfile==1.1.0
456
+ exceptiongroup==1.1.0
457
+ farasapy==0.0.14
458
+ fastapi==0.98.0
459
+ fastjsonschema==2.17.1
460
+ fasttext==0.9.2
461
+ ffmpeg-python==0.2.0
462
+ ffmpy==0.3.0
463
+ filelock==3.0.12
464
+ flair==0.12.2
465
+ flake8==3.7.9
466
+ flatbuffers==1.12
467
+ frozendict==2.0.7
468
+ frozenlist==1.2.0
469
+ fsspec==2021.11.0
470
+ ftfy==6.1.1
471
+ future==0.18.2
472
+ g2p-en==2.1.0
473
+ gast==0.3.3
474
+ gdown==4.4.0
475
+ gdrive==0.1.5
476
+ gensim==4.0.1
477
+ gitdb==4.0.9
478
+ GitPython==3.1.24
479
+ google-api-core==2.11.1
480
+ google-api-python-client==2.43.0
481
+ google-auth==1.24.0
482
+ google-auth-httplib2==0.1.0
483
+ google-auth-oauthlib==0.5.3
484
+ google-pasta==0.2.0
485
+ googleapis-common-protos==1.59.1
486
+ gradio==3.44.4
487
+ gradio-client==0.5.1
488
+ greenlet==1.1.2
489
+ grpcio==1.32.0
490
+ h11==0.14.0
491
+ h5features==1.3.2
492
+ h5py==2.10.0
493
+ hierarchy==0.4.0
494
+ hmmlearn==0.2.8
495
+ htk-io==0.5
496
+ httpcore==0.16.3
497
+ httplib2==0.22.0
498
+ httpx==0.23.3
499
+ huggingface-hub==0.15.1
500
+ hydra-colorlog==0.1.4
501
+ hydra-core==1.3.2
502
+ hyperopt==0.2.7
503
+ HyperPyYAML==1.1.0
504
+ hypothesis==6.61.2
505
+ identify==1.5.10
506
+ idna==2.10
507
+ imageio==2.9.0
508
+ imagesize==1.2.0
509
+ importlib-metadata==4.8.1
510
+ importlib-resources==5.2.2
511
+ inflect==5.3.0
512
+ inquirer==3.1.3
513
+ ipadic==1.0.0
514
+ ipyevents==2.0.1
515
+ ipykernel==5.3.4
516
+ ipython==7.19.0
517
+ ipython-genutils==0.2.0
518
+ ipywebrtc==0.6.0
519
+ ipywidgets==7.6.3
520
+ iso-639==0.4.5
521
+ isodate==0.6.0
522
+ isort==4.3.21
523
+ itsdangerous==2.1.2
524
+ Janome==0.5.0
525
+ jedi==0.17.2
526
+ jeepney==0.8.0
527
+ jieba==0.42.1
528
+ Jinja2==3.0.3
529
+ jiwer==2.2.0
530
+ jmespath==0.10.0
531
+ joblib==0.17.0
532
+ jsonschema==3.2.0
533
+ julius==0.2.7
534
+ jupyter-client==6.1.7
535
+ jupyter-core==4.7.0
536
+ jupyterlab-pygments==0.1.2
537
+ jupyterlab-widgets==1.0.0
538
+ kaitaistruct==0.9
539
+ kaldi-io==0.9.4
540
+ kaldi-python-io==1.2.2
541
+ kaldiio==2.17.2
542
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
543
+ Keras-Preprocessing==1.1.2
544
+ kiwisolver==1.3.1
545
+ lang-trans==0.6.0
546
+ langdetect==1.0.9
547
+ latexcodec==2.0.1
548
+ ldap3==2.9.1
549
+ librosa==0.9.0
550
+ lightning-cloud==0.5.37
551
+ lightning-utilities==0.8.0
552
+ linkify-it-py==1.0.3
553
+ lit==16.0.6
554
+ llvmlite==0.35.0
555
+ lxml==4.9.0
556
+ Mako==1.1.5
557
+ Markdown==3.3.3
558
+ markdown-it-py==3.0.0
559
+ MarkupSafe==2.1.3
560
+ marshmallow==3.14.0
561
+ matplotlib==3.3.3
562
+ mccabe==0.6.1
563
+ mcd==0.4
564
+ mdit-py-plugins==0.3.3
565
+ mdurl==0.1.2
566
+ mecab-python3==1.0.3
567
+ megatron-lm==2.2.0
568
+ metrics==0.3.3
569
+ mido==1.2.10
570
+ mistune==0.8.4
571
+ more-itertools==8.6.0
572
+ mpld3==0.3
573
+ mpmath==1.2.1
574
+ multidict==5.2.0
575
+ multiprocess==0.70.11.1
576
+ nbclient==0.5.3
577
+ nbconvert==5.6.1
578
+ nbformat==5.9.0
579
+ NEMO==4.3.2
580
+ nemo-toolkit==1.4.0
581
+ nest-asyncio==1.5.1
582
+ networkx==2.8.8
583
+ nltk==3.2.4
584
+ nodeenv==1.5.0
585
+ normalize==2.0.2
586
+ notebook==6.3.0
587
+ numba==0.52.0
588
+ numpy==1.19.4
589
+ nvidia-cublas-cu11==11.10.3.66
590
+ nvidia-cuda-cupti-cu11==11.7.101
591
+ nvidia-cuda-nvrtc-cu11==11.7.99
592
+ nvidia-cuda-runtime-cu11==11.7.99
593
+ nvidia-cudnn-cu11==8.5.0.96
594
+ nvidia-cufft-cu11==10.9.0.58
595
+ nvidia-curand-cu11==10.2.10.91
596
+ nvidia-cusolver-cu11==11.4.0.1
597
+ nvidia-cusparse-cu11==11.7.4.91
598
+ nvidia-nccl-cu11==2.14.3
599
+ nvidia-nvtx-cu11==11.7.91
600
+ oauthlib==3.1.0
601
+ omegaconf==2.3.0
602
+ onnx==1.10.2
603
+ OpenCC==1.1.2
604
+ opencv-python==4.4.0.46
605
+ openpyxl==3.0.9
606
+ opensmile==2.2.0
607
+ opt-einsum==3.3.0
608
+ optuna==2.10.0
609
+ ordered-set==4.1.0
610
+ orjson==3.8.4
611
+ oyaml==1.0
612
+ packaging==22.0
613
+ pandas==1.2.5
614
+ pandocfilters==1.4.3
615
+ pangu==4.0.6.1
616
+ parameterized==0.8.1
617
+ parso==0.7.1
618
+ pathlib2==2.3.7.post1
619
+ pathspec==0.5.5
620
+ pathtools==0.1.2
621
+ pbr==5.6.0
622
+ pefile==2019.4.18
623
+ pescador==2.1.0
624
+ pesq==0.0.3
625
+ pexpect==4.8.0
626
+ phonemizer==2.2.1
627
+ pickleshare==0.7.5
628
+ Pillow==9.3.0
629
+ pip-api==0.0.23
630
+ pipreqs==0.4.11
631
+ pluggy==0.13.1
632
+ pooch==1.3.0
633
+ portalocker==2.3.2
634
+ pptree==3.1
635
+ pre-commit==2.9.0
636
+ preprocessing==0.1.13
637
+ pretty-midi==0.2.9
638
+ prettytable==2.2.1
639
+ primePy==1.3
640
+ progressbar2==3.53.1
641
+ prometheus-client==0.10.1
642
+ promise==2.3
643
+ prompt-toolkit==3.0.8
644
+ protobuf==3.20.3
645
+ psutil==5.6.6
646
+ ptyprocess==0.6.0
647
+ py==1.9.0
648
+ py-espeak-ng==0.1.8
649
+ py4j==0.10.9.7
650
+ pyannote.audio==2.1.1
651
+ pyannote.core==4.5
652
+ pyannote.database==4.1.3
653
+ pyannote.metrics==3.2.1
654
+ pyannote.pipeline==2.3
655
+ pyannotebook==0.1.0.dev0
656
+ PyArabic==0.6.15
657
+ pyarrow==3.0.0
658
+ pyasn1==0.4.8
659
+ pyasn1-modules==0.2.8
660
+ pybind11==2.8.1
661
+ pybtex==0.24.0
662
+ pybtex-docutils==1.0.1
663
+ pycodestyle==2.5.0
664
+ pycparser==2.20
665
+ pycryptodome==3.16.0
666
+ pyctcdecode==0.4.0
667
+ pydantic==1.10.4
668
+ pyDeprecate==0.3.1
669
+ pydub==0.25.1
670
+ pyflakes==2.1.1
671
+ Pygments==2.15.1
672
+ pygtrie==2.5.0
673
+ PyJWT==2.7.0
674
+ pymodbus==2.5.3
675
+ pyparsing==2.4.7
676
+ pyperclip==1.8.2
677
+ pypinyin==0.43.0
678
+ pyrsistent==0.17.3
679
+ pyserial==3.5
680
+ PySocks==1.7.1
681
+ pystoi==0.3.3
682
+ pytest==5.4.1
683
+ pytest-runner==5.3.1
684
+ python-bidi==0.4.2
685
+ python-crfsuite==0.9.7
686
+ python-dateutil==2.8.2
687
+ python-editor==1.0.4
688
+ python-Levenshtein==0.12.2
689
+ python-multipart==0.0.5
690
+ python-utils==2.4.0
691
+ pytorch-lightning==1.6.5
692
+ pytorch-metric-learning==1.7.3
693
+ pytorch-revgrad==0.2.0
694
+ pytube==11.0.1
695
+ pytz==2022.6
696
+ PyWavelets==1.1.1
697
+ PyYAML==6.0
698
+ pyzmq==20.0.0
699
+ rapidfuzz==1.8.2
700
+ readchar==4.0.5
701
+ regex==2020.11.13
702
+ requests==2.28.1
703
+ requests-oauthlib==1.3.0
704
+ resampy==0.2.2
705
+ rfc3986==1.4.0
706
+ rich==13.4.2
707
+ richenum==1.3.1
708
+ rsa==4.7
709
+ ruamel.yaml==0.17.21
710
+ ruamel.yaml.clib==0.2.7
711
+ s3m==1.1.0
712
+ s3transfer==0.5.0
713
+ sacrebleu==2.0.0
714
+ sacremoses==0.0.44
715
+ safetensors==0.3.1
716
+ scikit-image==0.18.1
717
+ scikit-learn==0.23.2
718
+ scipy==1.5.4
719
+ -e git+https://github.com/sanghack81/SDCIT@00d060dde733fde9345154a494f81e97fb395ca7#egg=SDCIT
720
+ seaborn==0.11.1
721
+ SecretStorage==3.3.3
722
+ segments==2.1.3
723
+ segtok==1.5.11
724
+ semantic-version==2.10.0
725
+ semver==2.13.0
726
+ Send2Trash==1.5.0
727
+ sentencepiece==0.1.99
728
+ sentry-sdk==1.4.3
729
+ shellingham==1.4.0
730
+ shortuuid==1.0.7
731
+ SIDEKIT==1.3.8.5.2
732
+ simplejson==3.17.5
733
+ singledispatchmethod==1.0
734
+ six==1.15.0
735
+ smart-open==5.0.0
736
+ smmap==5.0.0
737
+ sniffio==1.3.0
738
+ snowballstemmer==2.0.0
739
+ sortedcollections==2.1.0
740
+ sortedcontainers==2.4.0
741
+ sounddevice==0.4.5
742
+ SoundFile==0.10.3.post1
743
+ soupsieve==2.3
744
+ sox==1.4.1
745
+ sparsemax==0.1.9
746
+ speechbrain==0.5.14
747
+ sphfile==1.0.3
748
+ Sphinx==3.3.1
749
+ sphinx-rtd-theme==0.2.4
750
+ sphinxcontrib-applehelp==1.0.2
751
+ sphinxcontrib-bibtex==2.4.1
752
+ sphinxcontrib-devhelp==1.0.2
753
+ sphinxcontrib-htmlhelp==1.0.3
754
+ sphinxcontrib-jsmath==1.0.1
755
+ sphinxcontrib-qthelp==1.0.3
756
+ sphinxcontrib-serializinghtml==1.1.4
757
+ SQLAlchemy==1.4.25
758
+ sqlitedict==2.1.0
759
+ sqlparse==0.4.2
760
+ stanza==1.4.2
761
+ starlette==0.27.0
762
+ starsessions==1.3.0
763
+ stevedore==3.4.0
764
+ subprocess32==3.5.4
765
+ sympy==1.9
766
+ tabulate==0.8.9
767
+ tensorboard==2.4.0
768
+ tensorboard-plugin-wit==1.7.0
769
+ tensorboardX==2.6.1
770
+ tensorflow==2.4.0
771
+ tensorflow-estimator==2.4.0
772
+ termcolor==1.1.0
773
+ terminado==0.9.4
774
+ testpath==0.4.4
775
+ threadpoolctl==2.1.0
776
+ tifffile==2020.12.8
777
+ tikzplotlib==0.9.8
778
+ tinycss2==1.2.1
779
+ tkseem==0.0.3
780
+ tokenizers==0.13.3
781
+ toml==0.10.2
782
+ toolz==0.12.0
783
+ torch==1.13.1
784
+ torch-audiomentations==0.11.0
785
+ torch-pitch-shift==1.2.4
786
+ torch-stft==0.1.4
787
+ torchaudio==0.13.1
788
+ torchmetrics==0.11.4
789
+ torchvision==0.14.1
790
+ tornado==6.1
791
+ tqdm==4.61.1
792
+ trackrip==1.2.1
793
+ traitlets==5.9.0
794
+ transformer-smaller-training-vocab==0.3.1
795
+ transformers==4.30.2
796
+ triton==2.0.0
797
+ typed-ast==1.4.1
798
+ typer==0.4.0
799
+ typing-extensions==4.4.0
800
+ uc-micro-py==1.0.1
801
+ Unidecode==1.3.2
802
+ uritemplate==3.0.1
803
+ urllib3==1.26.2
804
+ uvicorn==0.20.0
805
+ versioneer==0.28
806
+ virtualenv==20.2.1
807
+ wandb==0.12.6
808
+ wcwidth==0.2.5
809
+ webdataset==0.1.62
810
+ webencodings==0.5.1
811
+ websocket-client==1.6.1
812
+ websockets==10.4
813
+ Werkzeug==1.0.1
814
+ wget==3.2
815
+ widgetsnbextension==3.5.1
816
+ Wikipedia-API==0.6.0
817
+ wordninja==2.0.0
818
+ wrapt==1.12.1
819
+ xmltodict==0.13.0
820
+ xxhash==2.0.0
821
+ yamllint==1.23.0
822
+ yarg==0.1.9
823
+ yarl==1.7.2
824
+ yaspin==2.1.0
825
+ youtokentome==1.0.6
826
+ youtube-dl==2021.6.6
827
+ zipp==3.6.0
828
+
829
+
830
+ 2023-09-25 11:12:55,173 - speechbrain.utils.superpowers - DEBUG - be9098b
831
+
832
+
833
+ 2023-09-25 11:12:55,216 - speechbrain.pretrained.fetching - INFO - Fetch hyperparams.yaml: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/hyperparams.yaml.
834
+ 2023-09-25 11:12:55,217 - speechbrain.pretrained.fetching - INFO - Fetch custom.py: Linking to local file in /home/salah/Code-Switched-Tunisian-SpeechToText/asr-wav2vec2-commonvoice-fr/custom.py.
835
+ 2023-09-25 11:12:58,078 - speechbrain.lobes.models.huggingface_wav2vec - WARNING - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 is frozen.
836
+ 2023-09-25 11:12:58,080 - speechbrain.utils.parameter_transfer - DEBUG - Collecting files (or symlinks) for pretraining in pretrained_models/asr-wav2vec2-commonvoice-fr.
837
+ 2023-09-25 11:12:58,087 - speechbrain.pretrained.fetching - INFO - Fetch wav2vec2.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/wav2vec2.ckpt.
838
+ 2023-09-25 11:12:58,087 - speechbrain.pretrained.fetching - INFO - Fetch asr.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/asr.ckpt.
839
+ 2023-09-25 11:12:58,087 - speechbrain.pretrained.fetching - INFO - Fetch tokenizer.ckpt: Using existing file/symlink in pretrained_models/asr-wav2vec2-commonvoice-fr/tokenizer.ckpt.
840
+ 2023-09-25 11:12:58,087 - speechbrain.utils.parameter_transfer - INFO - Loading pretrained files for: wav2vec2, asr, tokenizer
841
+ 2023-09-25 11:13:01,875 - speechbrain.lobes.models.huggingface_wav2vec - WARNING - speechbrain.lobes.models.huggingface_wav2vec - wav2vec 2.0 feature extractor is frozen.
842
+ 2023-09-25 11:13:01,877 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
843
+ 2023-09-25 11:13:01,877 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
844
+ 2023-09-25 11:13:01,880 - speechbrain.core - INFO - 314.4M trainable parameters in ASRCV
845
+ 2023-09-25 11:13:01,885 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from EnglishCV/results/wav2vec2_ctc_en/1234/save/CKPT+2023-09-06+22-56-31+00
846
+ 2023-09-25 11:13:04,505 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
847
+ 2023-09-25 11:13:04,505 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
848
+ 2023-09-25 11:13:04,509 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
849
+ 2023-09-25 11:13:04,513 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from TunisianASR/results/14epoch_tunisian/1234/save/CKPT+2023-08-03+01-38-38+00
850
+ 2023-09-25 11:13:05,900 - speechbrain.utils.distributed - INFO - distributed_launch flag is disabled, this experiment will be executed without DDP.
__pycache__/cv_train.cpython-38.pyc CHANGED
Binary files a/__pycache__/cv_train.cpython-38.pyc and b/__pycache__/cv_train.cpython-38.pyc differ
 
app.py CHANGED
@@ -356,7 +356,7 @@ english_asr_model = ASRCV(
356
  )
357
  english_asr_model.modules.to("cpu")
358
  english_asr_model.device="cpu"
359
- #english_asr_model.checkpointer.recover_if_possible()
360
  run_opts["device"]="cpu"
361
  print("moving to tunisian model")
362
  asr_brain = ASR(
@@ -366,7 +366,7 @@ asr_brain = ASR(
366
  checkpointer=hparams["checkpointer"],
367
  )
368
  asr_brain.modules.to("cpu")
369
- #asr_brain.checkpointer.recover_if_possible()
370
  asr_brain.modules.eval()
371
  english_asr_model.modules.eval()
372
  french_asr_model.mods.eval()
@@ -713,7 +713,7 @@ mixer = Mixer(
713
  )
714
  mixer.tokenizer = label_encoder
715
  mixer.device = "cpu"
716
- #mixer.checkpointer.recover_if_possible()
717
  mixer.modules.eval()
718
 
719
 
 
356
  )
357
  english_asr_model.modules.to("cpu")
358
  english_asr_model.device="cpu"
359
+ english_asr_model.checkpointer.recover_if_possible(device="cpu")
360
  run_opts["device"]="cpu"
361
  print("moving to tunisian model")
362
  asr_brain = ASR(
 
366
  checkpointer=hparams["checkpointer"],
367
  )
368
  asr_brain.modules.to("cpu")
369
+ asr_brain.checkpointer.recover_if_possible(device="cpu")
370
  asr_brain.modules.eval()
371
  english_asr_model.modules.eval()
372
  french_asr_model.mods.eval()
 
713
  )
714
  mixer.tokenizer = label_encoder
715
  mixer.device = "cpu"
716
+ mixer.checkpointer.recover_if_possible(device="cpu")
717
  mixer.modules.eval()
718
 
719
 
pretrained_models/asr-wav2vec2-commonvoice-fr/custom.py CHANGED
@@ -1 +1 @@
1
- /home/salah/Code_Switched_Tunisian_Speech_Recognition/asr-wav2vec2-commonvoice-fr/custom.py
 
1
+ /home/salah/Code-Switched-Tunisian-SpeechToText/asr-wav2vec2-commonvoice-fr/custom.py
results/non_semi_final_stac/app.py ADDED
@@ -0,0 +1,772 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import torch
4
+ import logging
5
+ import speechbrain as sb
6
+ from speechbrain.utils.distributed import run_on_main
7
+ from hyperpyyaml import load_hyperpyyaml
8
+ from pathlib import Path
9
+ import torchaudio.transforms as T
10
+ from cv_train import ASRCV
11
+ import torchaudio
12
+ import numpy as np
13
+ import kenlm
14
+ from pyctcdecode import build_ctcdecoder
15
+ import re
16
+ from torch.nn.utils.rnn import pad_sequence
17
+ import torch.optim as optim
18
+ import torch.nn as nn
19
+
20
+
21
+ # Commented out IPython magic to ensure Python compatibility.
22
+ hparams_file, run_opts, overrides = sb.parse_arguments(["TunisianASR/semi_trained.yaml"])
23
+
24
+ # If distributed_launch=True then
25
+ # create ddp_group with the right communication protocol
26
+ sb.utils.distributed.ddp_init_group(run_opts)
27
+
28
+ with open(hparams_file) as fin:
29
+ hparams = load_hyperpyyaml(fin, overrides)
30
+
31
+ # Create experiment directory
32
+ sb.create_experiment_directory(
33
+ experiment_directory=hparams["output_folder"],
34
+ hyperparams_to_save=hparams_file,
35
+ overrides=overrides,
36
+ )
37
+ # Dataset prep (parsing Librispeech)
38
+
39
+ def dataio_prepare(hparams):
40
+ """This function prepares the datasets to be used in the brain class.
41
+ It also defines the data processing pipeline through user-defined functions."""
42
+
43
+ # 1. Define datasets
44
+ data_folder = hparams["data_folder"]
45
+
46
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
47
+ csv_path=hparams["train_csv"], replacements={"data_root": data_folder},
48
+ )
49
+
50
+ if hparams["sorting"] == "ascending":
51
+ # we sort training data to speed up training and get better results.
52
+ train_data = train_data.filtered_sorted(
53
+ sort_key="duration",
54
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
55
+ )
56
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
57
+ hparams["dataloader_options"]["shuffle"] = False
58
+
59
+ elif hparams["sorting"] == "descending":
60
+ train_data = train_data.filtered_sorted(
61
+ sort_key="duration",
62
+ reverse=True,
63
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
64
+ )
65
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
66
+ hparams["dataloader_options"]["shuffle"] = False
67
+
68
+ elif hparams["sorting"] == "random":
69
+ pass
70
+
71
+ else:
72
+ raise NotImplementedError(
73
+ "sorting must be random, ascending or descending"
74
+ )
75
+
76
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
77
+ csv_path=hparams["valid_csv"], replacements={"data_root": data_folder},
78
+ )
79
+ # We also sort the validation data so it is faster to validate
80
+ valid_data = valid_data.filtered_sorted(sort_key="duration")
81
+ test_datasets = {}
82
+ for csv_file in hparams["test_csv"]:
83
+ name = Path(csv_file).stem
84
+ test_datasets[name] = sb.dataio.dataset.DynamicItemDataset.from_csv(
85
+ csv_path=csv_file, replacements={"data_root": data_folder}
86
+ )
87
+ test_datasets[name] = test_datasets[name].filtered_sorted(
88
+ sort_key="duration"
89
+ )
90
+
91
+ datasets = [train_data, valid_data] + [i for k, i in test_datasets.items()]
92
+
93
+
94
+ # 2. Define audio pipeline:
95
+ @sb.utils.data_pipeline.takes("wav")
96
+ @sb.utils.data_pipeline.provides("sig")
97
+ def audio_pipeline(wav):
98
+ info = torchaudio.info(wav)
99
+ sig = sb.dataio.dataio.read_audio(wav)
100
+ if len(sig.shape)>1 :
101
+ sig = torch.mean(sig, dim=1)
102
+ resampled = torchaudio.transforms.Resample(
103
+ info.sample_rate, hparams["sample_rate"],
104
+ )(sig)
105
+ return resampled
106
+
107
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline)
108
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
109
+
110
+ # 3. Define text pipeline:
111
+ @sb.utils.data_pipeline.takes("wrd")
112
+ @sb.utils.data_pipeline.provides(
113
+ "wrd", "char_list", "tokens_list", "tokens"
114
+ )
115
+ def text_pipeline(wrd):
116
+ yield wrd
117
+ char_list = list(wrd)
118
+ yield char_list
119
+ tokens_list = label_encoder.encode_sequence(char_list)
120
+ yield tokens_list
121
+ tokens = torch.LongTensor(tokens_list)
122
+ yield tokens
123
+
124
+ sb.dataio.dataset.add_dynamic_item(datasets, text_pipeline)
125
+ lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
126
+ special_labels = {
127
+ "blank_label": hparams["blank_index"],
128
+ "unk_label": hparams["unk_index"]
129
+ }
130
+ label_encoder.load_or_create(
131
+ path=lab_enc_file,
132
+ from_didatasets=[train_data],
133
+ output_key="char_list",
134
+ special_labels=special_labels,
135
+ sequence_input=True,
136
+ )
137
+
138
+ # 4. Set output:
139
+ sb.dataio.dataset.set_output_keys(
140
+ datasets, ["id", "sig", "wrd", "char_list", "tokens"],
141
+ )
142
+ return train_data, valid_data,test_datasets, label_encoder
143
+
144
+ class ASR(sb.core.Brain):
145
+ def compute_forward(self, batch, stage):
146
+ """Forward computations from the waveform batches to the output probabilities."""
147
+
148
+ batch = batch.to(self.device)
149
+ wavs, wav_lens = batch.sig
150
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
151
+
152
+ if stage == sb.Stage.TRAIN:
153
+ if hasattr(self.hparams, "augmentation"):
154
+ wavs = self.hparams.augmentation(wavs, wav_lens)
155
+
156
+ # Forward pass
157
+ feats = self.modules.wav2vec2(wavs, wav_lens)
158
+ x = self.modules.enc(feats)
159
+ logits = self.modules.ctc_lin(x)
160
+ p_ctc = self.hparams.log_softmax(logits)
161
+
162
+ return p_ctc, wav_lens
163
+
164
+ def custom_encode(self,wavs,wav_lens) :
165
+ wavs = wavs.to("cpu")
166
+ if(wav_lens is not None): wav_lens.to(self.device)
167
+
168
+ feats = self.modules.wav2vec2(wavs, wav_lens)
169
+ x = self.modules.enc(feats)
170
+ logits = self.modules.ctc_lin(x)
171
+ p_ctc = self.hparams.log_softmax(logits)
172
+
173
+ return feats,p_ctc
174
+
175
+
176
+
177
+ def compute_objectives(self, predictions, batch, stage):
178
+ """Computes the loss (CTC) given predictions and targets."""
179
+
180
+ p_ctc, wav_lens = predictions
181
+
182
+ ids = batch.id
183
+ tokens, tokens_lens = batch.tokens
184
+
185
+ loss = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
186
+
187
+ if stage != sb.Stage.TRAIN:
188
+ predicted_tokens = sb.decoders.ctc_greedy_decode(
189
+ p_ctc, wav_lens, blank_id=self.hparams.blank_index
190
+ )
191
+ # Decode token terms to words
192
+ if self.hparams.use_language_modelling:
193
+ predicted_words = []
194
+ for logs in p_ctc:
195
+ text = decoder.decode(logs.detach().cpu().numpy())
196
+ predicted_words.append(text.split(" "))
197
+ else:
198
+ predicted_words = [
199
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
200
+ for utt_seq in predicted_tokens
201
+ ]
202
+ # Convert indices to words
203
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
204
+
205
+ self.wer_metric.append(ids, predicted_words, target_words)
206
+ self.cer_metric.append(ids, predicted_words, target_words)
207
+
208
+ return loss
209
+
210
+ def fit_batch(self, batch):
211
+ """Train the parameters given a single batch in input"""
212
+ should_step = self.step % self.grad_accumulation_factor == 0
213
+ # Managing automatic mixed precision
214
+ # TOFIX: CTC fine-tuning currently is unstable
215
+ # This is certainly due to CTC being done in fp16 instead of fp32
216
+ if self.auto_mix_prec:
217
+ with torch.cuda.amp.autocast():
218
+ with self.no_sync():
219
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
220
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
221
+ with self.no_sync(not should_step):
222
+ self.scaler.scale(
223
+ loss / self.grad_accumulation_factor
224
+ ).backward()
225
+ if should_step:
226
+
227
+ if not self.hparams.wav2vec2.freeze:
228
+ self.scaler.unscale_(self.wav2vec_optimizer)
229
+ self.scaler.unscale_(self.model_optimizer)
230
+ if self.check_gradients(loss):
231
+ if not self.hparams.wav2vec2.freeze:
232
+ if self.optimizer_step >= self.hparams.warmup_steps:
233
+ self.scaler.step(self.wav2vec_optimizer)
234
+ self.scaler.step(self.model_optimizer)
235
+ self.scaler.update()
236
+ self.zero_grad()
237
+ self.optimizer_step += 1
238
+ else:
239
+ # This is mandatory because HF models have a weird behavior with DDP
240
+ # on the forward pass
241
+ with self.no_sync():
242
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
243
+
244
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
245
+
246
+ with self.no_sync(not should_step):
247
+ (loss / self.grad_accumulation_factor).backward()
248
+ if should_step:
249
+ if self.check_gradients(loss):
250
+ if not self.hparams.wav2vec2.freeze:
251
+ if self.optimizer_step >= self.hparams.warmup_steps:
252
+ self.wav2vec_optimizer.step()
253
+ self.model_optimizer.step()
254
+ self.zero_grad()
255
+ self.optimizer_step += 1
256
+
257
+ self.on_fit_batch_end(batch, outputs, loss, should_step)
258
+ return loss.detach().cpu()
259
+
260
+ def evaluate_batch(self, batch, stage):
261
+ """Computations needed for validation/test batches"""
262
+ predictions = self.compute_forward(batch, stage=stage)
263
+ with torch.no_grad():
264
+ loss = self.compute_objectives(predictions, batch, stage=stage)
265
+ return loss.detach()
266
+
267
+ def on_stage_start(self, stage, epoch):
268
+ """Gets called at the beginning of each epoch"""
269
+ if stage != sb.Stage.TRAIN:
270
+ self.cer_metric = self.hparams.cer_computer()
271
+ self.wer_metric = self.hparams.error_rate_computer()
272
+
273
+ def on_stage_end(self, stage, stage_loss, epoch):
274
+ """Gets called at the end of an epoch."""
275
+ # Compute/store important stats
276
+ stage_stats = {"loss": stage_loss}
277
+ if stage == sb.Stage.TRAIN:
278
+ self.train_stats = stage_stats
279
+ else:
280
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
281
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
282
+
283
+ # Perform end-of-iteration things, like annealing, logging, etc.
284
+ if stage == sb.Stage.VALID:
285
+ old_lr_model, new_lr_model = self.hparams.lr_annealing_model(
286
+ stage_stats["loss"]
287
+ )
288
+ old_lr_wav2vec, new_lr_wav2vec = self.hparams.lr_annealing_wav2vec(
289
+ stage_stats["loss"]
290
+ )
291
+ sb.nnet.schedulers.update_learning_rate(
292
+ self.model_optimizer, new_lr_model
293
+ )
294
+ if not self.hparams.wav2vec2.freeze:
295
+ sb.nnet.schedulers.update_learning_rate(
296
+ self.wav2vec_optimizer, new_lr_wav2vec
297
+ )
298
+ self.hparams.train_logger.log_stats(
299
+ stats_meta={
300
+ "epoch": epoch,
301
+ "lr_model": old_lr_model,
302
+ "lr_wav2vec": old_lr_wav2vec,
303
+ },
304
+ train_stats=self.train_stats,
305
+ valid_stats=stage_stats,
306
+ )
307
+ self.checkpointer.save_and_keep_only(
308
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
309
+ )
310
+ elif stage == sb.Stage.TEST:
311
+ self.hparams.train_logger.log_stats(
312
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
313
+ test_stats=stage_stats,
314
+ )
315
+ with open(self.hparams.wer_file, "w") as w:
316
+ self.wer_metric.write_stats(w)
317
+
318
+ def init_optimizers(self):
319
+ "Initializes the wav2vec2 optimizer and model optimizer"
320
+
321
+ # If the wav2vec encoder is unfrozen, we create the optimizer
322
+ if not self.hparams.wav2vec2.freeze:
323
+ self.wav2vec_optimizer = self.hparams.wav2vec_opt_class(
324
+ self.modules.wav2vec2.parameters()
325
+ )
326
+ if self.checkpointer is not None:
327
+ self.checkpointer.add_recoverable(
328
+ "wav2vec_opt", self.wav2vec_optimizer
329
+ )
330
+
331
+ self.model_optimizer = self.hparams.model_opt_class(
332
+ self.hparams.model.parameters()
333
+ )
334
+
335
+ if self.checkpointer is not None:
336
+ self.checkpointer.add_recoverable("modelopt", self.model_optimizer)
337
+
338
+ def zero_grad(self, set_to_none=False):
339
+ if not self.hparams.wav2vec2.freeze:
340
+ self.wav2vec_optimizer.zero_grad(set_to_none)
341
+ self.model_optimizer.zero_grad(set_to_none)
342
+
343
+
344
+ from speechbrain.pretrained import EncoderASR,EncoderDecoderASR
345
+ french_asr_model = EncoderASR.from_hparams(source="asr-wav2vec2-commonvoice-fr", savedir="pretrained_models/asr-wav2vec2-commonvoice-fr")
346
+ french_asr_model.to("cpu")
347
+ cvhparams_file, cvrun_opts, cvoverrides = sb.parse_arguments(["EnglishCV/train_en_with_wav2vec.yaml"])
348
+ with open(cvhparams_file) as cvfin:
349
+ cvhparams = load_hyperpyyaml(cvfin, cvoverrides)
350
+ cvrun_opts["device"]="cpu"
351
+ english_asr_model = ASRCV(
352
+ modules=cvhparams["modules"],
353
+ hparams=cvhparams,
354
+ run_opts=cvrun_opts,
355
+ checkpointer=cvhparams["checkpointer"],
356
+ )
357
+ english_asr_model.modules.to("cpu")
358
+ english_asr_model.device="cpu"
359
+ english_asr_model.checkpointer.recover_if_possible()
360
+ run_opts["device"]="cpu"
361
+ print("moving to tunisian model")
362
+ asr_brain = ASR(
363
+ modules=hparams["modules"],
364
+ hparams=hparams,
365
+ run_opts=run_opts,
366
+ checkpointer=hparams["checkpointer"],
367
+ )
368
+ asr_brain.modules.to("cpu")
369
+ asr_brain.checkpointer.recover_if_possible()
370
+ asr_brain.modules.eval()
371
+ english_asr_model.modules.eval()
372
+ french_asr_model.mods.eval()
373
+ asr_brain.modules.to("cpu")
374
+
375
+ # Commented out IPython magic to ensure Python compatibility.
376
+ # %ls
377
+
378
+ #UTILS FUNCTIOJNS
379
+ def get_size_dimensions(arr):
380
+ size_dimensions = []
381
+ while isinstance(arr, list):
382
+ size_dimensions.append(len(arr))
383
+ arr = arr[0]
384
+ return size_dimensions
385
+
386
+ def scale_array(batch,n):
387
+ scaled_batch = []
388
+
389
+ for array in batch:
390
+ if(n < len(array)): raise ValueError("Cannot scale Array down")
391
+
392
+ repeat = round(n/len(array))+1
393
+ scaled_length_array= []
394
+
395
+ for i in array:
396
+ for j in range(repeat) :
397
+ if(len(scaled_length_array) == n): break
398
+ scaled_length_array.append(i)
399
+
400
+ scaled_batch.append(scaled_length_array)
401
+
402
+ return torch.tensor(scaled_batch)
403
+
404
+
405
+ def load_paths(wavs_path):
406
+ waveforms = []
407
+ for path in wavs_path :
408
+ waveform, _ = torchaudio.load(path)
409
+ waveforms.append(waveform.squeeze(0))
410
+ # normalize array length to the bigger arrays by pading with 0's
411
+ padded_arrays = pad_sequence(waveforms, batch_first=True)
412
+ return torch.tensor(padded_arrays)
413
+
414
+
415
+
416
+ device = 'cpu'
417
+ verbose = 0
418
+ #FLOW LEVEL FUNCTIONS
419
+ def merge_strategy(embeddings1, embeddings2, embeddings3,post1, post2,post3):
420
+
421
+
422
+ post1 = post1.to(device)
423
+ post2 = post2.to(device)
424
+ post3 = post3.to(device)
425
+ embeddings1 = embeddings1.to(device)
426
+ embeddings2 = embeddings2.to(device)
427
+ embeddings3 = embeddings3.to(device)
428
+
429
+ posteriograms_merged = torch.cat((post1,post2,post3),dim=2)
430
+ embeddings_merged = torch.cat((embeddings1,embeddings2,embeddings3),dim=2)
431
+
432
+ if(verbose !=0):
433
+ print('MERGED POST ',posteriograms_merged.shape)
434
+ print('MERGED emb ',embeddings_merged.shape)
435
+
436
+ return torch.cat((posteriograms_merged,embeddings_merged),dim=2).to(device)
437
+
438
+ def decode(model,wavs,wav_lens):
439
+
440
+ with torch.no_grad():
441
+ wav_lens = wav_lens.to(model.device)
442
+ encoder_out = model.encode_batch(wavs, wav_lens)
443
+ predictions = model.decoding_function(encoder_out, wav_lens)
444
+ return predictions
445
+
446
+ def middle_layer(batch, lens):
447
+
448
+ tn_embeddings, tn_posteriogram = asr_brain.custom_encode(batch,None)
449
+
450
+ fr_embeddings = french_asr_model.mods.encoder.wav2vec2(batch)
451
+ fr_posteriogram =french_asr_model.encode_batch(batch,lens)
452
+ en_embeddings = english_asr_model.modules.wav2vec2(batch, lens)
453
+ x = english_asr_model.modules.enc(en_embeddings)
454
+ en_posteriogram = english_asr_model.modules.ctc_lin(x)
455
+ #scores, en_posteriogram = english_asr_model.mods.decoder(en_embeddings ,lens)
456
+ if(verbose !=0):
457
+ print('[EMBEDDINGS] FR:',fr_embeddings.shape, "EN:",en_embeddings.shape, "TN:", tn_embeddings.shape)
458
+ print('[POSTERIOGRAM] FR:',fr_posteriogram.shape, "EN:",en_posteriogram.shape,"TN:",tn_posteriogram.shape)
459
+
460
+
461
+ bilangual_sample = merge_strategy(fr_embeddings,en_embeddings,tn_embeddings,fr_posteriogram,en_posteriogram,tn_posteriogram)
462
+ return bilangual_sample
463
+
464
+ class Mixer(sb.core.Brain):
465
+
466
+ def compute_forward(self, batch, stage):
467
+ """Forward computations from the waveform batches to the output probabilities."""
468
+ wavs, wav_lens = batch.sig
469
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
470
+
471
+ if stage == sb.Stage.TRAIN:
472
+ if hasattr(self.hparams, "augmentation"):
473
+ wavs = self.hparams.augmentation(wavs, wav_lens)
474
+
475
+ multi_langual_feats = middle_layer(wavs, wav_lens)
476
+ multi_langual_feats= multi_langual_feats.to(device)
477
+ feats, _ = self.modules.enc(multi_langual_feats)
478
+ logits = self.modules.ctc_lin(feats)
479
+ p_ctc = self.hparams.log_softmax(logits)
480
+
481
+ if stage!= sb.Stage.TRAIN:
482
+ p_tokens = sb.decoders.ctc_greedy_decode(
483
+ p_ctc, wav_lens, blank_id=self.hparams.blank_index
484
+ )
485
+ else :
486
+ p_tokens = None
487
+ return p_ctc, wav_lens, p_tokens
488
+
489
+
490
+ def treat_wav(self,sig):
491
+ multi_langual_feats = middle_layer(sig.to("cpu"), torch.tensor([1]).to("cpu"))
492
+ multi_langual_feats= multi_langual_feats.to(device)
493
+ feats, _ = self.modules.enc(multi_langual_feats)
494
+ logits = self.modules.ctc_lin(feats)
495
+ p_ctc = self.hparams.log_softmax(logits)
496
+ predicted_words =[]
497
+ for logs in p_ctc:
498
+ text = decoder.decode(logs.detach().cpu().numpy())
499
+ predicted_words.append(text.split(" "))
500
+ return " ".join(predicted_words[0])
501
+
502
+
503
+ def compute_objectives(self, predictions, batch, stage):
504
+ """Computes the loss (CTC) given predictions and targets."""
505
+
506
+ p_ctc, wav_lens , predicted_tokens= predictions
507
+
508
+ ids = batch.id
509
+ tokens, tokens_lens = batch.tokens
510
+
511
+ loss = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
512
+
513
+
514
+ if stage == sb.Stage.VALID:
515
+ predicted_words = [
516
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
517
+ for utt_seq in predicted_tokens
518
+ ]
519
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
520
+ self.wer_metric.append(ids, predicted_words, target_words)
521
+ self.cer_metric.append(ids, predicted_words, target_words)
522
+ if stage ==sb.Stage.TEST :
523
+ if self.hparams.language_modelling:
524
+ predicted_words = []
525
+ for logs in p_ctc:
526
+ text = decoder.decode(logs.detach().cpu().numpy())
527
+ predicted_words.append(text.split(" "))
528
+ else :
529
+ predicted_words = [
530
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
531
+ for utt_seq in predicted_tokens
532
+ ]
533
+
534
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
535
+ self.wer_metric.append(ids, predicted_words, target_words)
536
+ self.cer_metric.append(ids, predicted_words, target_words)
537
+
538
+ return loss
539
+
540
+ def fit_batch(self, batch):
541
+ """Train the parameters given a single batch in input"""
542
+ should_step = self.step % self.grad_accumulation_factor == 0
543
+ # Managing automatic mixed precision
544
+ # TOFIX: CTC fine-tuning currently is unstable
545
+ # This is certainly due to CTC being done in fp16 instead of fp32
546
+ if self.auto_mix_prec:
547
+ with torch.cuda.amp.autocast():
548
+ with self.no_sync():
549
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
550
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
551
+ with self.no_sync(not should_step):
552
+ self.scaler.scale(
553
+ loss / self.grad_accumulation_factor
554
+ ).backward()
555
+ if should_step:
556
+
557
+
558
+ self.scaler.unscale_(self.model_optimizer)
559
+ if self.check_gradients(loss):
560
+ self.scaler.step(self.model_optimizer)
561
+ self.scaler.update()
562
+ self.zero_grad()
563
+ self.optimizer_step += 1
564
+ else:
565
+ # This is mandatory because HF models have a weird behavior with DDP
566
+ # on the forward pass
567
+ with self.no_sync():
568
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
569
+
570
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
571
+
572
+ with self.no_sync(not should_step):
573
+ (loss / self.grad_accumulation_factor).backward()
574
+ if should_step:
575
+ if self.check_gradients(loss):
576
+ self.model_optimizer.step()
577
+ self.zero_grad()
578
+ self.optimizer_step += 1
579
+
580
+ self.on_fit_batch_end(batch, outputs, loss, should_step)
581
+ return loss.detach().cpu()
582
+
583
+ def evaluate_batch(self, batch, stage):
584
+ """Computations needed for validation/test batches"""
585
+ predictions = self.compute_forward(batch, stage=stage)
586
+ with torch.no_grad():
587
+ loss = self.compute_objectives(predictions, batch, stage=stage)
588
+ return loss.detach()
589
+
590
+ def on_stage_start(self, stage, epoch):
591
+ """Gets called at the beginning of each epoch"""
592
+ if stage != sb.Stage.TRAIN:
593
+ self.cer_metric = self.hparams.cer_computer()
594
+ self.wer_metric = self.hparams.error_rate_computer()
595
+
596
+ def on_stage_end(self, stage, stage_loss, epoch):
597
+ """Gets called at the end of an epoch."""
598
+ # Compute/store important stats
599
+ stage_stats = {"loss": stage_loss}
600
+ if stage == sb.Stage.TRAIN:
601
+ self.train_stats = stage_stats
602
+ else:
603
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
604
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
605
+
606
+ # Perform end-of-iteration things, like annealing, logging, etc.
607
+ if stage == sb.Stage.VALID:
608
+ old_lr_model, new_lr_model = self.hparams.lr_annealing_model(
609
+ stage_stats["loss"]
610
+ )
611
+ sb.nnet.schedulers.update_learning_rate(
612
+ self.model_optimizer, new_lr_model
613
+ )
614
+ self.hparams.train_logger.log_stats(
615
+ stats_meta={
616
+ "epoch": epoch,
617
+ "lr_model": old_lr_model,
618
+ },
619
+ train_stats=self.train_stats,
620
+ valid_stats=stage_stats,
621
+ )
622
+ self.checkpointer.save_and_keep_only(
623
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
624
+ )
625
+ elif stage == sb.Stage.TEST:
626
+ self.hparams.train_logger.log_stats(
627
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
628
+ test_stats=stage_stats,
629
+ )
630
+ with open(self.hparams.wer_file, "w") as w:
631
+ self.wer_metric.write_stats(w)
632
+
633
+ def init_optimizers(self):
634
+
635
+ self.model_optimizer = self.hparams.model_opt_class(
636
+ self.hparams.model.parameters()
637
+ )
638
+
639
+ if self.checkpointer is not None:
640
+ self.checkpointer.add_recoverable("modelopt", self.model_optimizer)
641
+
642
+ def zero_grad(self, set_to_none=False):
643
+
644
+ self.model_optimizer.zero_grad(set_to_none)
645
+
646
+
647
+
648
+
649
+ hparams_file, run_opts, overrides = sb.parse_arguments(["cs.yaml"])
650
+
651
+ # If distributed_launch=True then
652
+ # create ddp_group with the right communication protocol
653
+ sb.utils.distributed.ddp_init_group(run_opts)
654
+
655
+ with open(hparams_file) as fin:
656
+ hparams = load_hyperpyyaml(fin, overrides)
657
+
658
+ # Create experiment directory
659
+ sb.create_experiment_directory(
660
+ experiment_directory=hparams["output_folder"],
661
+ hyperparams_to_save=hparams_file,
662
+ overrides=overrides,
663
+ )
664
+ def read_labels_file(labels_file):
665
+ with open(labels_file, "r",encoding="utf-8") as lf:
666
+ lines = lf.read().splitlines()
667
+ division = "==="
668
+ numbers = {}
669
+ for line in lines :
670
+ if division in line :
671
+ break
672
+ string, number = line.split("=>")
673
+ number = int(number)
674
+ string = string[1:-2]
675
+ numbers[number] = string
676
+ return [numbers[x] for x in range(len(numbers))]
677
+
678
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
679
+
680
+ lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
681
+ special_labels = {
682
+ "blank_label": hparams["blank_index"],
683
+ "unk_label": hparams["unk_index"]
684
+ }
685
+ label_encoder.load_or_create(
686
+ path=lab_enc_file,
687
+ from_didatasets=[[]],
688
+ output_key="char_list",
689
+ special_labels=special_labels,
690
+ sequence_input=True,
691
+ )
692
+
693
+
694
+ labels = read_labels_file(os.path.join(hparams["save_folder"], "label_encoder.txt"))
695
+ labels = [""] + labels[1:-1] + ["1"]
696
+ if hparams["language_modelling"]:
697
+ decoder = build_ctcdecoder(
698
+ labels,
699
+ kenlm_model_path=hparams["ngram_lm_path"], # either .arpa or .bin file
700
+ alpha=0.5, # tuned on a val set
701
+ beta=1, # tuned on a val set
702
+ )
703
+
704
+
705
+
706
+ run_opts["device"]="cpu"
707
+
708
+ mixer = Mixer(
709
+ modules=hparams["modules"],
710
+ hparams=hparams,
711
+ run_opts=run_opts,
712
+ checkpointer=hparams["checkpointer"],
713
+ )
714
+ mixer.tokenizer = label_encoder
715
+ mixer.device = "cpu"
716
+ mixer.checkpointer.recover_if_possible()
717
+ mixer.modules.eval()
718
+
719
+
720
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
721
+
722
+
723
+ # We dynamicaly add the tokenizer to our brain class.
724
+ # NB: This tokenizer corresponds to the one used for the LM!!
725
+
726
+ decoder = build_ctcdecoder(
727
+ labels,
728
+ kenlm_model_path= "arpas/everything.arpa", # either .arpa or .bin file
729
+ alpha=0.5, # tuned on a val set
730
+ beta=1, # tuned on a val set
731
+ )
732
+
733
+
734
+
735
+ device = "cpu"
736
+ mixer.device= "cpu"
737
+ mixer.modules.to("cpu")
738
+
739
+ from enum import Enum, auto
740
+ class Stage(Enum):
741
+ TRAIN = auto()
742
+ VALID = auto()
743
+ TEST = auto()
744
+
745
+ asr_brain.on_evaluate_start()
746
+ asr_brain.modules.eval()
747
+
748
+
749
+ import gradio as gr
750
+
751
+ def treat_wav_file(file_mic,file_upload ,asr=mixer, device="cpu") :
752
+ if (file_mic is not None) and (file_upload is not None):
753
+ warn_output = "WARNING: You've uploaded an audio file and used the microphone. The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
754
+ wav = file_mic
755
+ elif (file_mic is None) and (file_upload is None):
756
+ return "ERROR: You have to either use the microphone or upload an audio file"
757
+ elif file_mic is not None:
758
+ wav = file_mic
759
+ else:
760
+ wav = file_upload
761
+ sig, sr = torchaudio.load(wav)
762
+ tensor_wav = sig.to(device)
763
+ resampled = torchaudio.functional.resample( tensor_wav, sr, 16000)
764
+ sentence = asr.treat_wav(resampled)
765
+ return sentence
766
+
767
+ gr.Interface(
768
+ fn=treat_wav_file,
769
+ inputs=[gr.Audio(source="microphone", type='filepath', label = "record", optional = True),
770
+ gr.Audio(source="upload", type='filepath', label="filein", optional=True)]
771
+ ,outputs="text").launch()
772
+
results/non_semi_final_stac/env.log ADDED
@@ -0,0 +1,479 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SpeechBrain system description
2
+ ==============================
3
+ Python version:
4
+ 3.8.5 (default, Sep 4 2020, 07:30:14)
5
+ [GCC 7.3.0]
6
+ ==============================
7
+ Installed Python packages:
8
+ abkhazia==1.0
9
+ absl-py==0.11.0
10
+ aiofiles==23.2.1
11
+ aiohttp==3.8.0
12
+ aiosignal==1.2.0
13
+ alabaster==0.7.12
14
+ alembic==1.7.4
15
+ altair==4.2.0
16
+ altgraph==0.17
17
+ antlr4-python3-runtime==4.9.3
18
+ anyio==3.6.2
19
+ appdirs==1.4.4
20
+ argcomplete==1.12.2
21
+ argon2-cffi==20.1.0
22
+ arrow==1.2.3
23
+ asgiref==3.6.0
24
+ asteroid-filterbanks==0.4.0
25
+ astunparse==1.6.3
26
+ async-generator==1.10
27
+ async-timeout==4.0.0
28
+ attrdict==2.0.1
29
+ attrs==20.3.0
30
+ audeer==1.16.0
31
+ audformat==0.11.5
32
+ audinterface==0.7.0
33
+ audiofile==1.0.0
34
+ audiomentations==0.25.0
35
+ audioread==2.1.9
36
+ audobject==0.4.14
37
+ audresample==0.1.6
38
+ -e git+https://github.com/facebookresearch/WavAugment.git@54afcdb00ccc852c2f030f239f8532c9562b550e#egg=augment
39
+ autopage==0.4.0
40
+ Babel==2.9.0
41
+ backcall==0.2.0
42
+ backports.cached-property==1.0.2
43
+ beautifulsoup4==4.10.0
44
+ black==19.10b0
45
+ bleach==3.3.0
46
+ blessed==1.20.0
47
+ boto3==1.20.2
48
+ botocore==1.23.2
49
+ bpemb==0.3.4
50
+ braceexpand==0.1.7
51
+ cachetools==4.2.0
52
+ certifi @ file:///croot/certifi_1671487769961/work/certifi
53
+ cffi==1.14.3
54
+ cfgv==3.2.0
55
+ chardet==3.0.4
56
+ charset-normalizer==2.0.7
57
+ click==7.1.2
58
+ cliff==3.9.0
59
+ clldutils==3.5.4
60
+ cloudpickle==2.2.1
61
+ cmaes==0.8.2
62
+ cmake==3.18.4.post1
63
+ cmd2==2.2.0
64
+ colorama==0.4.4
65
+ colorlog==4.6.2
66
+ configparser==5.1.0
67
+ conllu==4.5.3
68
+ croniter==1.3.15
69
+ cryptography==38.0.4
70
+ csrgraph==0.1.28
71
+ csvw==1.8.1
72
+ cycler==0.10.0
73
+ Cython==0.29.21
74
+ dataclasses==0.6
75
+ dateutils==0.6.12
76
+ decorator==4.4.2
77
+ deepdiff==6.3.0
78
+ deepspeech==0.9.1
79
+ defusedxml==0.7.1
80
+ Deprecated==1.2.14
81
+ dill==0.3.3
82
+ Distance==0.1.3
83
+ distlib==0.3.1
84
+ Django==3.2.16
85
+ django-auditlog==2.2.1
86
+ django-filter==22.1
87
+ django-js-asset==1.2.2
88
+ django-mptt==0.14.0
89
+ djangorestframework==3.14.0
90
+ docker-pycreds==0.4.0
91
+ docopt==0.6.2
92
+ docutils==0.16
93
+ drf-excel==2.2.0
94
+ drf-flex-fields==1.0.0
95
+ drf-renderer-xlsx==0.4.1
96
+ easyocr==1.2.1
97
+ editdistance==0.6.0
98
+ einops==0.3.2
99
+ emoji==2.2.0
100
+ entrypoints==0.3
101
+ et-xmlfile==1.1.0
102
+ exceptiongroup==1.1.0
103
+ farasapy==0.0.14
104
+ fastapi==0.98.0
105
+ fastjsonschema==2.17.1
106
+ fasttext==0.9.2
107
+ ffmpeg-python==0.2.0
108
+ ffmpy==0.3.0
109
+ filelock==3.0.12
110
+ flair==0.12.2
111
+ flake8==3.7.9
112
+ flatbuffers==1.12
113
+ frozendict==2.0.7
114
+ frozenlist==1.2.0
115
+ fsspec==2021.11.0
116
+ ftfy==6.1.1
117
+ future==0.18.2
118
+ g2p-en==2.1.0
119
+ gast==0.3.3
120
+ gdown==4.4.0
121
+ gdrive==0.1.5
122
+ gensim==4.0.1
123
+ gitdb==4.0.9
124
+ GitPython==3.1.24
125
+ google-api-core==2.11.1
126
+ google-api-python-client==2.43.0
127
+ google-auth==1.24.0
128
+ google-auth-httplib2==0.1.0
129
+ google-auth-oauthlib==0.5.3
130
+ google-pasta==0.2.0
131
+ googleapis-common-protos==1.59.1
132
+ gradio==3.44.4
133
+ gradio-client==0.5.1
134
+ greenlet==1.1.2
135
+ grpcio==1.32.0
136
+ h11==0.14.0
137
+ h5features==1.3.2
138
+ h5py==2.10.0
139
+ hierarchy==0.4.0
140
+ hmmlearn==0.2.8
141
+ htk-io==0.5
142
+ httpcore==0.16.3
143
+ httplib2==0.22.0
144
+ httpx==0.23.3
145
+ huggingface-hub==0.15.1
146
+ hydra-colorlog==0.1.4
147
+ hydra-core==1.3.2
148
+ hyperopt==0.2.7
149
+ HyperPyYAML==1.1.0
150
+ hypothesis==6.61.2
151
+ identify==1.5.10
152
+ idna==2.10
153
+ imageio==2.9.0
154
+ imagesize==1.2.0
155
+ importlib-metadata==4.8.1
156
+ importlib-resources==5.2.2
157
+ inflect==5.3.0
158
+ inquirer==3.1.3
159
+ ipadic==1.0.0
160
+ ipyevents==2.0.1
161
+ ipykernel==5.3.4
162
+ ipython==7.19.0
163
+ ipython-genutils==0.2.0
164
+ ipywebrtc==0.6.0
165
+ ipywidgets==7.6.3
166
+ iso-639==0.4.5
167
+ isodate==0.6.0
168
+ isort==4.3.21
169
+ itsdangerous==2.1.2
170
+ Janome==0.5.0
171
+ jedi==0.17.2
172
+ jeepney==0.8.0
173
+ jieba==0.42.1
174
+ Jinja2==3.0.3
175
+ jiwer==2.2.0
176
+ jmespath==0.10.0
177
+ joblib==0.17.0
178
+ jsonschema==3.2.0
179
+ julius==0.2.7
180
+ jupyter-client==6.1.7
181
+ jupyter-core==4.7.0
182
+ jupyterlab-pygments==0.1.2
183
+ jupyterlab-widgets==1.0.0
184
+ kaitaistruct==0.9
185
+ kaldi-io==0.9.4
186
+ kaldi-python-io==1.2.2
187
+ kaldiio==2.17.2
188
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
189
+ Keras-Preprocessing==1.1.2
190
+ kiwisolver==1.3.1
191
+ lang-trans==0.6.0
192
+ langdetect==1.0.9
193
+ latexcodec==2.0.1
194
+ ldap3==2.9.1
195
+ librosa==0.9.0
196
+ lightning-cloud==0.5.37
197
+ lightning-utilities==0.8.0
198
+ linkify-it-py==1.0.3
199
+ lit==16.0.6
200
+ llvmlite==0.35.0
201
+ lxml==4.9.0
202
+ Mako==1.1.5
203
+ Markdown==3.3.3
204
+ markdown-it-py==3.0.0
205
+ MarkupSafe==2.1.3
206
+ marshmallow==3.14.0
207
+ matplotlib==3.3.3
208
+ mccabe==0.6.1
209
+ mcd==0.4
210
+ mdit-py-plugins==0.3.3
211
+ mdurl==0.1.2
212
+ mecab-python3==1.0.3
213
+ megatron-lm==2.2.0
214
+ metrics==0.3.3
215
+ mido==1.2.10
216
+ mistune==0.8.4
217
+ more-itertools==8.6.0
218
+ mpld3==0.3
219
+ mpmath==1.2.1
220
+ multidict==5.2.0
221
+ multiprocess==0.70.11.1
222
+ nbclient==0.5.3
223
+ nbconvert==5.6.1
224
+ nbformat==5.9.0
225
+ NEMO==4.3.2
226
+ nemo-toolkit==1.4.0
227
+ nest-asyncio==1.5.1
228
+ networkx==2.8.8
229
+ nltk==3.2.4
230
+ nodeenv==1.5.0
231
+ normalize==2.0.2
232
+ notebook==6.3.0
233
+ numba==0.52.0
234
+ numpy==1.19.4
235
+ nvidia-cublas-cu11==11.10.3.66
236
+ nvidia-cuda-cupti-cu11==11.7.101
237
+ nvidia-cuda-nvrtc-cu11==11.7.99
238
+ nvidia-cuda-runtime-cu11==11.7.99
239
+ nvidia-cudnn-cu11==8.5.0.96
240
+ nvidia-cufft-cu11==10.9.0.58
241
+ nvidia-curand-cu11==10.2.10.91
242
+ nvidia-cusolver-cu11==11.4.0.1
243
+ nvidia-cusparse-cu11==11.7.4.91
244
+ nvidia-nccl-cu11==2.14.3
245
+ nvidia-nvtx-cu11==11.7.91
246
+ oauthlib==3.1.0
247
+ omegaconf==2.3.0
248
+ onnx==1.10.2
249
+ OpenCC==1.1.2
250
+ opencv-python==4.4.0.46
251
+ openpyxl==3.0.9
252
+ opensmile==2.2.0
253
+ opt-einsum==3.3.0
254
+ optuna==2.10.0
255
+ ordered-set==4.1.0
256
+ orjson==3.8.4
257
+ oyaml==1.0
258
+ packaging==22.0
259
+ pandas==1.2.5
260
+ pandocfilters==1.4.3
261
+ pangu==4.0.6.1
262
+ parameterized==0.8.1
263
+ parso==0.7.1
264
+ pathlib2==2.3.7.post1
265
+ pathspec==0.5.5
266
+ pathtools==0.1.2
267
+ pbr==5.6.0
268
+ pefile==2019.4.18
269
+ pescador==2.1.0
270
+ pesq==0.0.3
271
+ pexpect==4.8.0
272
+ phonemizer==2.2.1
273
+ pickleshare==0.7.5
274
+ Pillow==9.3.0
275
+ pip-api==0.0.23
276
+ pipreqs==0.4.11
277
+ pluggy==0.13.1
278
+ pooch==1.3.0
279
+ portalocker==2.3.2
280
+ pptree==3.1
281
+ pre-commit==2.9.0
282
+ preprocessing==0.1.13
283
+ pretty-midi==0.2.9
284
+ prettytable==2.2.1
285
+ primePy==1.3
286
+ progressbar2==3.53.1
287
+ prometheus-client==0.10.1
288
+ promise==2.3
289
+ prompt-toolkit==3.0.8
290
+ protobuf==3.20.3
291
+ psutil==5.6.6
292
+ ptyprocess==0.6.0
293
+ py==1.9.0
294
+ py-espeak-ng==0.1.8
295
+ py4j==0.10.9.7
296
+ pyannote.audio==2.1.1
297
+ pyannote.core==4.5
298
+ pyannote.database==4.1.3
299
+ pyannote.metrics==3.2.1
300
+ pyannote.pipeline==2.3
301
+ pyannotebook==0.1.0.dev0
302
+ PyArabic==0.6.15
303
+ pyarrow==3.0.0
304
+ pyasn1==0.4.8
305
+ pyasn1-modules==0.2.8
306
+ pybind11==2.8.1
307
+ pybtex==0.24.0
308
+ pybtex-docutils==1.0.1
309
+ pycodestyle==2.5.0
310
+ pycparser==2.20
311
+ pycryptodome==3.16.0
312
+ pyctcdecode==0.4.0
313
+ pydantic==1.10.4
314
+ pyDeprecate==0.3.1
315
+ pydub==0.25.1
316
+ pyflakes==2.1.1
317
+ Pygments==2.15.1
318
+ pygtrie==2.5.0
319
+ PyJWT==2.7.0
320
+ pymodbus==2.5.3
321
+ pyparsing==2.4.7
322
+ pyperclip==1.8.2
323
+ pypinyin==0.43.0
324
+ pyrsistent==0.17.3
325
+ pyserial==3.5
326
+ PySocks==1.7.1
327
+ pystoi==0.3.3
328
+ pytest==5.4.1
329
+ pytest-runner==5.3.1
330
+ python-bidi==0.4.2
331
+ python-crfsuite==0.9.7
332
+ python-dateutil==2.8.2
333
+ python-editor==1.0.4
334
+ python-Levenshtein==0.12.2
335
+ python-multipart==0.0.5
336
+ python-utils==2.4.0
337
+ pytorch-lightning==1.6.5
338
+ pytorch-metric-learning==1.7.3
339
+ pytorch-revgrad==0.2.0
340
+ pytube==11.0.1
341
+ pytz==2022.6
342
+ PyWavelets==1.1.1
343
+ PyYAML==6.0
344
+ pyzmq==20.0.0
345
+ rapidfuzz==1.8.2
346
+ readchar==4.0.5
347
+ regex==2020.11.13
348
+ requests==2.28.1
349
+ requests-oauthlib==1.3.0
350
+ resampy==0.2.2
351
+ rfc3986==1.4.0
352
+ rich==13.4.2
353
+ richenum==1.3.1
354
+ rsa==4.7
355
+ ruamel.yaml==0.17.21
356
+ ruamel.yaml.clib==0.2.7
357
+ s3m==1.1.0
358
+ s3transfer==0.5.0
359
+ sacrebleu==2.0.0
360
+ sacremoses==0.0.44
361
+ safetensors==0.3.1
362
+ scikit-image==0.18.1
363
+ scikit-learn==0.23.2
364
+ scipy==1.5.4
365
+ -e git+https://github.com/sanghack81/SDCIT@00d060dde733fde9345154a494f81e97fb395ca7#egg=SDCIT
366
+ seaborn==0.11.1
367
+ SecretStorage==3.3.3
368
+ segments==2.1.3
369
+ segtok==1.5.11
370
+ semantic-version==2.10.0
371
+ semver==2.13.0
372
+ Send2Trash==1.5.0
373
+ sentencepiece==0.1.99
374
+ sentry-sdk==1.4.3
375
+ shellingham==1.4.0
376
+ shortuuid==1.0.7
377
+ SIDEKIT==1.3.8.5.2
378
+ simplejson==3.17.5
379
+ singledispatchmethod==1.0
380
+ six==1.15.0
381
+ smart-open==5.0.0
382
+ smmap==5.0.0
383
+ sniffio==1.3.0
384
+ snowballstemmer==2.0.0
385
+ sortedcollections==2.1.0
386
+ sortedcontainers==2.4.0
387
+ sounddevice==0.4.5
388
+ SoundFile==0.10.3.post1
389
+ soupsieve==2.3
390
+ sox==1.4.1
391
+ sparsemax==0.1.9
392
+ speechbrain==0.5.14
393
+ sphfile==1.0.3
394
+ Sphinx==3.3.1
395
+ sphinx-rtd-theme==0.2.4
396
+ sphinxcontrib-applehelp==1.0.2
397
+ sphinxcontrib-bibtex==2.4.1
398
+ sphinxcontrib-devhelp==1.0.2
399
+ sphinxcontrib-htmlhelp==1.0.3
400
+ sphinxcontrib-jsmath==1.0.1
401
+ sphinxcontrib-qthelp==1.0.3
402
+ sphinxcontrib-serializinghtml==1.1.4
403
+ SQLAlchemy==1.4.25
404
+ sqlitedict==2.1.0
405
+ sqlparse==0.4.2
406
+ stanza==1.4.2
407
+ starlette==0.27.0
408
+ starsessions==1.3.0
409
+ stevedore==3.4.0
410
+ subprocess32==3.5.4
411
+ sympy==1.9
412
+ tabulate==0.8.9
413
+ tensorboard==2.4.0
414
+ tensorboard-plugin-wit==1.7.0
415
+ tensorboardX==2.6.1
416
+ tensorflow==2.4.0
417
+ tensorflow-estimator==2.4.0
418
+ termcolor==1.1.0
419
+ terminado==0.9.4
420
+ testpath==0.4.4
421
+ threadpoolctl==2.1.0
422
+ tifffile==2020.12.8
423
+ tikzplotlib==0.9.8
424
+ tinycss2==1.2.1
425
+ tkseem==0.0.3
426
+ tokenizers==0.13.3
427
+ toml==0.10.2
428
+ toolz==0.12.0
429
+ torch==1.13.1
430
+ torch-audiomentations==0.11.0
431
+ torch-pitch-shift==1.2.4
432
+ torch-stft==0.1.4
433
+ torchaudio==0.13.1
434
+ torchmetrics==0.11.4
435
+ torchvision==0.14.1
436
+ tornado==6.1
437
+ tqdm==4.61.1
438
+ trackrip==1.2.1
439
+ traitlets==5.9.0
440
+ transformer-smaller-training-vocab==0.3.1
441
+ transformers==4.30.2
442
+ triton==2.0.0
443
+ typed-ast==1.4.1
444
+ typer==0.4.0
445
+ typing-extensions==4.4.0
446
+ uc-micro-py==1.0.1
447
+ Unidecode==1.3.2
448
+ uritemplate==3.0.1
449
+ urllib3==1.26.2
450
+ uvicorn==0.20.0
451
+ versioneer==0.28
452
+ virtualenv==20.2.1
453
+ wandb==0.12.6
454
+ wcwidth==0.2.5
455
+ webdataset==0.1.62
456
+ webencodings==0.5.1
457
+ websocket-client==1.6.1
458
+ websockets==10.4
459
+ Werkzeug==1.0.1
460
+ wget==3.2
461
+ widgetsnbextension==3.5.1
462
+ Wikipedia-API==0.6.0
463
+ wordninja==2.0.0
464
+ wrapt==1.12.1
465
+ xmltodict==0.13.0
466
+ xxhash==2.0.0
467
+ yamllint==1.23.0
468
+ yarg==0.1.9
469
+ yarl==1.7.2
470
+ yaspin==2.1.0
471
+ youtokentome==1.0.6
472
+ youtube-dl==2021.6.6
473
+ zipp==3.6.0
474
+ ==============================
475
+ Git revision:
476
+ be9098b
477
+ ==============================
478
+ CUDA version:
479
+ 11.7
results/non_semi_final_stac/hyperparams.yaml CHANGED
@@ -1,5 +1,5 @@
1
- # Generated 2023-09-20 from:
2
- # /home/salah/Code_Switched_Tunisian_Speech_Recognition/cs.yaml
3
  # yamllint disable
4
  # Generated 2023-08-03 from:
5
  # /home/salah/new_tunisian_model/hparams/train_tunisian_withwavlm.yaml
 
1
+ # Generated 2023-09-25 from:
2
+ # /home/salah/Code-Switched-Tunisian-SpeechToText/cs.yaml
3
  # yamllint disable
4
  # Generated 2023-08-03 from:
5
  # /home/salah/new_tunisian_model/hparams/train_tunisian_withwavlm.yaml
results/non_semi_final_stac/log.txt CHANGED
The diff for this file is too large to render. See raw diff