anonymoussubmitter222 commited on
Commit
8ae3bec
1 Parent(s): 18b985c

first commit

Browse files
Files changed (27) hide show
  1. .gitattributes +1 -0
  2. README.md +84 -13
  3. app.py +430 -0
  4. outdomain.arpa +3 -0
  5. requirements.txt +5 -0
  6. semi_wavlm_large_tunisian_ctc/1234/app.py +430 -0
  7. semi_wavlm_large_tunisian_ctc/1234/env.log +479 -0
  8. semi_wavlm_large_tunisian_ctc/1234/hyperparams.yaml +177 -0
  9. semi_wavlm_large_tunisian_ctc/1234/log.txt +2270 -0
  10. semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/CKPT.yaml +4 -0
  11. semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/brain.ckpt +3 -0
  12. semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/counter.ckpt +3 -0
  13. semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/dataloader-TRAIN.ckpt +3 -0
  14. semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/model.ckpt +3 -0
  15. semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/modelopt.ckpt +3 -0
  16. semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/scheduler_model.ckpt +3 -0
  17. semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/scheduler_wav2vec.ckpt +3 -0
  18. semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/wav2vec2.ckpt +3 -0
  19. semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/wav2vec_opt.ckpt +3 -0
  20. semi_wavlm_large_tunisian_ctc/1234/save/label_encoder.txt +44 -0
  21. taric_test.csv +381 -0
  22. train_semi.yaml +175 -0
  23. train_with_wavlm.py +399 -0
  24. wavlm-large/README.md +64 -0
  25. wavlm-large/config.json +99 -0
  26. wavlm-large/preprocessor_config.json +9 -0
  27. wavlm-large/pytorch_model.bin +3 -0
.gitattributes CHANGED
@@ -31,5 +31,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
31
  *.wasm filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.arpa filter=lfs diff=lfs merge=lfs -text
35
  *.zst filter=lfs diff=lfs merge=lfs -text
36
  *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,13 +1,84 @@
1
- ---
2
- title: Tunisian Speech Recognition
3
- emoji: 🏢
4
- colorFrom: red
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 3.44.4
8
- app_file: app.py
9
- pinned: false
10
- license: cc-by-4.0
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Overview
2
+
3
+ This project aims to create an Automatic Speech Recognition (ASR) model dedicated for the Tunisian Arabic dialect. The goal is to improve speech recognition technology for underrepresented linguistic communities by transcribing Tunisian dialect speech into written text.
4
+
5
+ ## Dataset
6
+ Part of the audio and text data (The ones we collected) used to train and test the model has been provided to encourage and support research within the community. Please find the dataset [here](https://zenodo.org/record/8370566). This Zenodo record contains labeled and unlabeled Tunisian Arabic audio data, along with textual data for language modelling.
7
+ The folder also contains a 4-gram language model trained with KenLM on data released within the Zenodo record. The .arpa file is called "outdomain.arpa".
8
+
9
+ ## Performance
10
+
11
+ The following table summarizes the performance of the model on various considered test sets :
12
+
13
+ | Dataset | CER | WER |
14
+ | :-------- | :------- | :------------------------- |
15
+ | `TARIC` | `6.22%` | `10.55%` |
16
+ | `IWSLT` | `21.18%` | `39.53%` |
17
+ | `TunSwitch TO` | `9.67%` | `25.54%` |
18
+
19
+ More details about the test sets, and the conditions leading to this performance in the paper.
20
+
21
+
22
+
23
+ ## Team
24
+
25
+ Here are the team members who have contributed to this project
26
+
27
+ * [Salah Zaiem](https://fr.linkedin.com/in/salah-zaiem)
28
+ * [Ahmed Amine Ben Aballah](https://www.linkedin.com/in/aabenz/)
29
+ * [Ata Kaboudi](https://www.linkedin.com/in/ata-kaboudi-63365b1a8)
30
+ * [Amir Kanoun](https://tn.linkedin.com/in/ahmed-amir-kanoun)
31
+
32
+ ## Paper
33
+ More in-depth details and insights are available in a released preprint. Please find the paper [here](https://arxiv.org/abs/2309.11327).
34
+ If you use or refer to this model, please cite :
35
+
36
+ ```
37
+ @misc{abdallah2023leveraging,
38
+ title={Leveraging Data Collection and Unsupervised Learning for Code-switched Tunisian Arabic Automatic Speech Recognition},
39
+ author={Ahmed Amine Ben Abdallah and Ata Kabboudi and Amir Kanoun and Salah Zaiem},
40
+ year={2023},
41
+ eprint={2309.11327},
42
+ archivePrefix={arXiv},
43
+ primaryClass={eess.AS}
44
+ }
45
+ ```
46
+
47
+
48
+ ## Datasets
49
+ This ASR model was trained on :
50
+ * TARIC : The corpus, named TARIC (Tunisian Arabic Railway Interaction Corpus) has a collection of audio recordings and transcriptions from dialogues in the Tunisian Railway Transport Network. - [Taric Corpus](https://aclanthology.org/L14-1385/) -
51
+ * IWSLT : A Tunisian conversational speech - [IWSLT Corpus](https://iwslt.org/2022/dialect)-
52
+ * TunSwitch : Our crowd-collected dataset described in the paper presented above.
53
+
54
+ ## Demo
55
+ Here is a working live demo : [LINK](https://huggingface.co/spaces/SalahZa/Code-Switched-Tunisian-SpeechToText)
56
+
57
+
58
+ ## Inference
59
+
60
+ ### 1. Create a CSV test file
61
+ First, you have to create a csv file that follows SpeechBrain's format which contain 4 columns:
62
+ * ID: contain ID to identify each audio sample in the dataset
63
+ * wav: contain the path to the audio file
64
+ * wrd: contain the text transcription of the spoken content in the audio file if you have it and use your set for evaluation. Put anything if you don't have transcriptions. An example is provided in this folder, the file is called : taric_test.csv
65
+ * duration: the duration of the audio in seconds
66
+
67
+
68
+ ### 2. Adjust the hyperparams.yaml file
69
+
70
+ Adjust the path of **test_csv** parameter to your csv file path
71
+
72
+
73
+ To run this recipe, do the following:
74
+ ```
75
+ > python train_with_wavlm.py semi_wavlm_large_tunisian_ctc/1234/hyperparams.yaml --test_csv = path_to_csv
76
+ ```
77
+
78
+ If you want to infer on single files, the space demo offers proper easy-to-use inference code.
79
+
80
+
81
+ ## Contact :
82
+ If you have questions, you can send an email to : zaiemsalah@gmail.com
83
+
84
+
app.py ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import sys
3
+ import torch
4
+ import logging
5
+ import gradio as gr
6
+ import speechbrain as sb
7
+ from pathlib import Path
8
+ import os
9
+ import torchaudio
10
+ from hyperpyyaml import load_hyperpyyaml
11
+ from speechbrain.tokenizers.SentencePiece import SentencePiece
12
+ from speechbrain.utils.data_utils import undo_padding
13
+ from speechbrain.utils.distributed import run_on_main
14
+
15
+ """Recipe for training a sequence-to-sequence ASR system with CommonVoice.
16
+ The system employs a wav2vec2 encoder and a CTC decoder.
17
+ Decoding is performed with greedy decoding (will be extended to beam search).
18
+
19
+ To run this recipe, do the following:
20
+ > python train_with_wav2vec2.py hparams/train_with_wav2vec2.yaml
21
+
22
+ With the default hyperparameters, the system employs a pretrained wav2vec2 encoder.
23
+ The wav2vec2 model is pretrained following the model given in the hprams file.
24
+ It may be dependent on the language.
25
+
26
+ The neural network is trained with CTC on sub-word units estimated with
27
+ Byte Pairwise Encoding (BPE).
28
+
29
+ The experiment file is flexible enough to support a large variety of
30
+ different systems. By properly changing the parameter files, you can try
31
+ different encoders, decoders, tokens (e.g, characters instead of BPE),
32
+ training languages (all CommonVoice languages), and many
33
+ other possible variations.
34
+
35
+ Authors
36
+ * Titouan Parcollet 2021
37
+ """
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ # Define training procedure
43
+ class ASR(sb.core.Brain):
44
+ def compute_forward(self, batch, stage):
45
+ """Forward computations from the waveform batches to the output probabilities."""
46
+
47
+ batch = batch.to(self.device)
48
+ wavs, wav_lens = batch.sig
49
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
50
+ if stage == sb.Stage.TRAIN:
51
+ if hasattr(self.hparams, "augmentation"):
52
+ wavs = self.hparams.augmentation(wavs, wav_lens)
53
+
54
+ # Forward pass
55
+ feats = self.modules.wav2vec2(wavs, wav_lens)
56
+ x = self.modules.enc(feats)
57
+ logits = self.modules.ctc_lin(x)
58
+ p_ctc = self.hparams.log_softmax(logits)
59
+
60
+ return p_ctc, wav_lens
61
+
62
+ def treat_wav(self,sig):
63
+ feats = self.modules.wav2vec2(sig.to("cpu"), torch.tensor([1]).to("cpu"))
64
+ feats = self.modules.enc(feats)
65
+ logits = self.modules.ctc_lin(feats)
66
+ p_ctc = self.hparams.log_softmax(logits)
67
+ predicted_words =[]
68
+ for logs in p_ctc:
69
+ text = decoder.decode(logs.detach().cpu().numpy())
70
+ predicted_words.append(text.split(" "))
71
+ return " ".join(predicted_words[0])
72
+
73
+ def compute_objectives(self, predictions, batch, stage):
74
+ """Computes the loss (CTC) given predictions and targets."""
75
+
76
+ p_ctc, wav_lens = predictions
77
+
78
+ ids = batch.id
79
+ tokens, tokens_lens = batch.tokens
80
+
81
+ loss = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
82
+
83
+ if stage != sb.Stage.TRAIN:
84
+ predicted_tokens = sb.decoders.ctc_greedy_decode(
85
+ p_ctc, wav_lens, blank_id=self.hparams.blank_index
86
+ )
87
+ # Decode token terms to words
88
+ if self.hparams.use_language_modelling:
89
+ predicted_words = []
90
+ for logs in p_ctc:
91
+ text = decoder.decode(logs.detach().cpu().numpy())
92
+ predicted_words.append(text.split(" "))
93
+ else:
94
+ predicted_words = [
95
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
96
+ for utt_seq in predicted_tokens
97
+ ]
98
+ # Convert indices to words
99
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
100
+
101
+ self.wer_metric.append(ids, predicted_words, target_words)
102
+ self.cer_metric.append(ids, predicted_words, target_words)
103
+
104
+ return loss
105
+
106
+ def fit_batch(self, batch):
107
+ """Train the parameters given a single batch in input"""
108
+ should_step = self.step % self.grad_accumulation_factor == 0
109
+ # Managing automatic mixed precision
110
+ # TOFIX: CTC fine-tuning currently is unstable
111
+ # This is certainly due to CTC being done in fp16 instead of fp32
112
+ if self.auto_mix_prec:
113
+ with torch.cuda.amp.autocast():
114
+ with self.no_sync():
115
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
116
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
117
+ with self.no_sync(not should_step):
118
+ self.scaler.scale(
119
+ loss / self.grad_accumulation_factor
120
+ ).backward()
121
+ if should_step:
122
+
123
+ if not self.hparams.wav2vec2.freeze:
124
+ self.scaler.unscale_(self.wav2vec_optimizer)
125
+ self.scaler.unscale_(self.model_optimizer)
126
+ if self.check_gradients(loss):
127
+ if not self.hparams.wav2vec2.freeze:
128
+ if self.optimizer_step >= self.hparams.warmup_steps:
129
+ self.scaler.step(self.wav2vec_optimizer)
130
+ self.scaler.step(self.model_optimizer)
131
+ self.scaler.update()
132
+ self.zero_grad()
133
+ self.optimizer_step += 1
134
+ else:
135
+ # This is mandatory because HF models have a weird behavior with DDP
136
+ # on the forward pass
137
+ with self.no_sync():
138
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
139
+
140
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
141
+
142
+ with self.no_sync(not should_step):
143
+ (loss / self.grad_accumulation_factor).backward()
144
+ if should_step:
145
+ if self.check_gradients(loss):
146
+ if not self.hparams.wav2vec2.freeze:
147
+ if self.optimizer_step >= self.hparams.warmup_steps:
148
+ self.wav2vec_optimizer.step()
149
+ self.model_optimizer.step()
150
+ self.zero_grad()
151
+ self.optimizer_step += 1
152
+
153
+ self.on_fit_batch_end(batch, outputs, loss, should_step)
154
+ return loss.detach().cpu()
155
+
156
+ def evaluate_batch(self, batch, stage):
157
+ """Computations needed for validation/test batches"""
158
+ predictions = self.compute_forward(batch, stage=stage)
159
+ with torch.no_grad():
160
+ loss = self.compute_objectives(predictions, batch, stage=stage)
161
+ return loss.detach()
162
+
163
+ def on_stage_start(self, stage, epoch):
164
+ """Gets called at the beginning of each epoch"""
165
+ if stage != sb.Stage.TRAIN:
166
+ self.cer_metric = self.hparams.cer_computer()
167
+ self.wer_metric = self.hparams.error_rate_computer()
168
+
169
+ def on_stage_end(self, stage, stage_loss, epoch):
170
+ """Gets called at the end of an epoch."""
171
+ # Compute/store important stats
172
+ stage_stats = {"loss": stage_loss}
173
+ if stage == sb.Stage.TRAIN:
174
+ self.train_stats = stage_stats
175
+ else:
176
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
177
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
178
+
179
+ # Perform end-of-iteration things, like annealing, logging, etc.
180
+ if stage == sb.Stage.VALID:
181
+ old_lr_model, new_lr_model = self.hparams.lr_annealing_model(
182
+ stage_stats["loss"]
183
+ )
184
+ old_lr_wav2vec, new_lr_wav2vec = self.hparams.lr_annealing_wav2vec(
185
+ stage_stats["loss"]
186
+ )
187
+ sb.nnet.schedulers.update_learning_rate(
188
+ self.model_optimizer, new_lr_model
189
+ )
190
+ if not self.hparams.wav2vec2.freeze:
191
+ sb.nnet.schedulers.update_learning_rate(
192
+ self.wav2vec_optimizer, new_lr_wav2vec
193
+ )
194
+ self.hparams.train_logger.log_stats(
195
+ stats_meta={
196
+ "epoch": epoch,
197
+ "lr_model": old_lr_model,
198
+ "lr_wav2vec": old_lr_wav2vec,
199
+ },
200
+ train_stats=self.train_stats,
201
+ valid_stats=stage_stats,
202
+ )
203
+ self.checkpointer.save_and_keep_only(
204
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
205
+ )
206
+ elif stage == sb.Stage.TEST:
207
+ self.hparams.train_logger.log_stats(
208
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
209
+ test_stats=stage_stats,
210
+ )
211
+ with open(self.hparams.wer_file, "w") as w:
212
+ self.wer_metric.write_stats(w)
213
+
214
+ def init_optimizers(self):
215
+ "Initializes the wav2vec2 optimizer and model optimizer"
216
+
217
+ # If the wav2vec encoder is unfrozen, we create the optimizer
218
+ if not self.hparams.wav2vec2.freeze:
219
+ self.wav2vec_optimizer = self.hparams.wav2vec_opt_class(
220
+ self.modules.wav2vec2.parameters()
221
+ )
222
+ if self.checkpointer is not None:
223
+ self.checkpointer.add_recoverable(
224
+ "wav2vec_opt", self.wav2vec_optimizer
225
+ )
226
+
227
+ self.model_optimizer = self.hparams.model_opt_class(
228
+ self.hparams.model.parameters()
229
+ )
230
+
231
+ if self.checkpointer is not None:
232
+ self.checkpointer.add_recoverable("modelopt", self.model_optimizer)
233
+
234
+ def zero_grad(self, set_to_none=False):
235
+ if not self.hparams.wav2vec2.freeze:
236
+ self.wav2vec_optimizer.zero_grad(set_to_none)
237
+ self.model_optimizer.zero_grad(set_to_none)
238
+
239
+
240
+ # Define custom data procedure
241
+ def dataio_prepare(hparams):
242
+ """This function prepares the datasets to be used in the brain class.
243
+ It also defines the data processing pipeline through user-defined functions."""
244
+
245
+ # 1. Define datasets
246
+ data_folder = hparams["data_folder"]
247
+
248
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
249
+ csv_path=hparams["train_csv"], replacements={"data_root": data_folder},
250
+ )
251
+
252
+ if hparams["sorting"] == "ascending":
253
+ # we sort training data to speed up training and get better results.
254
+ train_data = train_data.filtered_sorted(
255
+ sort_key="duration",
256
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
257
+ )
258
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
259
+ hparams["dataloader_options"]["shuffle"] = False
260
+
261
+ elif hparams["sorting"] == "descending":
262
+ train_data = train_data.filtered_sorted(
263
+ sort_key="duration",
264
+ reverse=True,
265
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
266
+ )
267
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
268
+ hparams["dataloader_options"]["shuffle"] = False
269
+
270
+ elif hparams["sorting"] == "random":
271
+ pass
272
+
273
+ else:
274
+ raise NotImplementedError(
275
+ "sorting must be random, ascending or descending"
276
+ )
277
+
278
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
279
+ csv_path=hparams["valid_csv"], replacements={"data_root": data_folder},
280
+ )
281
+ # We also sort the validation data so it is faster to validate
282
+ valid_data = valid_data.filtered_sorted(sort_key="duration")
283
+ test_datasets = {}
284
+ for csv_file in hparams["test_csv"]:
285
+ name = Path(csv_file).stem
286
+ test_datasets[name] = sb.dataio.dataset.DynamicItemDataset.from_csv(
287
+ csv_path=csv_file, replacements={"data_root": data_folder}
288
+ )
289
+ test_datasets[name] = test_datasets[name].filtered_sorted(
290
+ sort_key="duration"
291
+ )
292
+
293
+ datasets = [train_data, valid_data] + [i for k, i in test_datasets.items()]
294
+
295
+
296
+ # 2. Define audio pipeline:
297
+ @sb.utils.data_pipeline.takes("wav")
298
+ @sb.utils.data_pipeline.provides("sig")
299
+ def audio_pipeline(wav):
300
+ info = torchaudio.info(wav)
301
+ sig = sb.dataio.dataio.read_audio(wav)
302
+ resampled = torchaudio.transforms.Resample(
303
+ info.sample_rate, hparams["sample_rate"],
304
+ )(sig)
305
+ return resampled
306
+
307
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline)
308
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
309
+
310
+ # 3. Define text pipeline:
311
+ @sb.utils.data_pipeline.takes("wrd")
312
+ @sb.utils.data_pipeline.provides(
313
+ "wrd", "char_list", "tokens_list", "tokens"
314
+ )
315
+ def text_pipeline(wrd):
316
+ yield wrd
317
+ char_list = list(wrd)
318
+ yield char_list
319
+ tokens_list = label_encoder.encode_sequence(char_list)
320
+ yield tokens_list
321
+ tokens = torch.LongTensor(tokens_list)
322
+ yield tokens
323
+
324
+ sb.dataio.dataset.add_dynamic_item(datasets, text_pipeline)
325
+ lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
326
+ special_labels = {
327
+ "blank_label": hparams["blank_index"],
328
+ "unk_label": hparams["unk_index"]
329
+ }
330
+ label_encoder.load_or_create(
331
+ path=lab_enc_file,
332
+ from_didatasets=[train_data],
333
+ output_key="char_list",
334
+ special_labels=special_labels,
335
+ sequence_input=True,
336
+ )
337
+
338
+ # 4. Set output:
339
+ sb.dataio.dataset.set_output_keys(
340
+ datasets, ["id", "sig", "wrd", "char_list", "tokens"],
341
+ )
342
+ return train_data, valid_data,test_datasets, label_encoder
343
+
344
+
345
+
346
+ # Load hyperparameters file with command-line overrides
347
+ hparams_file, run_opts, overrides = sb.parse_arguments(["train_semi.yaml"])
348
+ with open(hparams_file) as fin:
349
+ hparams = load_hyperpyyaml(fin, overrides)
350
+
351
+ # If --distributed_launch then
352
+ # create ddp_group with the right communication protocol
353
+ sb.utils.distributed.ddp_init_group(run_opts)
354
+
355
+
356
+ # Create experiment directory
357
+ sb.create_experiment_directory(
358
+ experiment_directory=hparams["output_folder"],
359
+ hyperparams_to_save=hparams_file,
360
+ overrides=overrides,
361
+ )
362
+
363
+ # Due to DDP, we do the preparation ONLY on the main python process
364
+ # Defining tokenizer and loading it
365
+ # Create the datasets objects as well as tokenization and encoding :-D
366
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
367
+
368
+ lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
369
+ special_labels = {
370
+ "blank_label": hparams["blank_index"],
371
+ "unk_label": hparams["unk_index"]
372
+ }
373
+ label_encoder.load_or_create(
374
+ path=lab_enc_file,
375
+ from_didatasets=[[]],
376
+ output_key="char_list",
377
+ special_labels=special_labels,
378
+ sequence_input=True,
379
+ )
380
+
381
+ from pyctcdecode import build_ctcdecoder
382
+ ind2lab = label_encoder.ind2lab
383
+ print(ind2lab)
384
+ labels = [ind2lab[x] for x in range(len(ind2lab))]
385
+ labels = [""] + labels[1:-1] + ["1"]
386
+ # Replace the <blank> token with a blank character, needed for PyCTCdecode
387
+ print(labels)
388
+ decoder = build_ctcdecoder(
389
+ labels,
390
+ kenlm_model_path=hparams["ngram_lm_path"], # .arpa or .bin
391
+ alpha=0.5, # Default by KenLM
392
+ beta=1.0, # Default by KenLM
393
+ )
394
+ # Trainer initialization
395
+ run_opts["device"] = "cpu"
396
+ asr_brain = ASR(
397
+ modules=hparams["modules"],
398
+ hparams=hparams,
399
+ run_opts=run_opts,
400
+ checkpointer=hparams["checkpointer"],
401
+ )
402
+
403
+ # Adding objects to trainer.
404
+ asr_brain.tokenizer = label_encoder
405
+ asr_brain.checkpointer.recover_if_possible(device="cpu")
406
+ asr_brain.modules.eval()
407
+
408
+ def treat_wav_file(file_mic,file_upload ,asr=asr_brain, device="cpu") :
409
+ if (file_mic is not None) and (file_upload is not None):
410
+ warn_output = "WARNING: You've uploaded an audio file and used the microphone. The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
411
+ wav = file_mic
412
+ elif (file_mic is None) and (file_upload is None):
413
+ return "ERROR: You have to either use the microphone or upload an audio file"
414
+ elif file_mic is not None:
415
+ wav = file_mic
416
+ else:
417
+ wav = file_upload
418
+ sig, sr = torchaudio.load(wav)
419
+ tensor_wav = sig.to(device)
420
+ resampled = torchaudio.functional.resample( tensor_wav, sr, 16000)
421
+ sentence = asr.treat_wav(resampled)
422
+ return sentence
423
+
424
+ gr.Interface(
425
+ fn=treat_wav_file,
426
+ inputs=[gr.Audio(source="microphone", type='filepath', label = "record", optional = True),
427
+ gr.Audio(source="upload", type='filepath', label="filein", optional=True)]
428
+ ,outputs="text").launch()
429
+
430
+
outdomain.arpa ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24654c1d236bb1bd367125131c847c4a734e69914eda71a6786964c20440d8fe
3
+ size 324243244
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ https://github.com/kpu/kenlm/archive/master.zip
2
+ transformers
3
+ speechbrain
4
+ pyctcdecode
5
+
semi_wavlm_large_tunisian_ctc/1234/app.py ADDED
@@ -0,0 +1,430 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import sys
3
+ import torch
4
+ import logging
5
+ import gradio as gr
6
+ import speechbrain as sb
7
+ from pathlib import Path
8
+ import os
9
+ import torchaudio
10
+ from hyperpyyaml import load_hyperpyyaml
11
+ from speechbrain.tokenizers.SentencePiece import SentencePiece
12
+ from speechbrain.utils.data_utils import undo_padding
13
+ from speechbrain.utils.distributed import run_on_main
14
+
15
+ """Recipe for training a sequence-to-sequence ASR system with CommonVoice.
16
+ The system employs a wav2vec2 encoder and a CTC decoder.
17
+ Decoding is performed with greedy decoding (will be extended to beam search).
18
+
19
+ To run this recipe, do the following:
20
+ > python train_with_wav2vec2.py hparams/train_with_wav2vec2.yaml
21
+
22
+ With the default hyperparameters, the system employs a pretrained wav2vec2 encoder.
23
+ The wav2vec2 model is pretrained following the model given in the hprams file.
24
+ It may be dependent on the language.
25
+
26
+ The neural network is trained with CTC on sub-word units estimated with
27
+ Byte Pairwise Encoding (BPE).
28
+
29
+ The experiment file is flexible enough to support a large variety of
30
+ different systems. By properly changing the parameter files, you can try
31
+ different encoders, decoders, tokens (e.g, characters instead of BPE),
32
+ training languages (all CommonVoice languages), and many
33
+ other possible variations.
34
+
35
+ Authors
36
+ * Titouan Parcollet 2021
37
+ """
38
+
39
+ logger = logging.getLogger(__name__)
40
+
41
+
42
+ # Define training procedure
43
+ class ASR(sb.core.Brain):
44
+ def compute_forward(self, batch, stage):
45
+ """Forward computations from the waveform batches to the output probabilities."""
46
+
47
+ batch = batch.to(self.device)
48
+ wavs, wav_lens = batch.sig
49
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
50
+ if stage == sb.Stage.TRAIN:
51
+ if hasattr(self.hparams, "augmentation"):
52
+ wavs = self.hparams.augmentation(wavs, wav_lens)
53
+
54
+ # Forward pass
55
+ feats = self.modules.wav2vec2(wavs, wav_lens)
56
+ x = self.modules.enc(feats)
57
+ logits = self.modules.ctc_lin(x)
58
+ p_ctc = self.hparams.log_softmax(logits)
59
+
60
+ return p_ctc, wav_lens
61
+
62
+ def treat_wav(self,sig):
63
+ feats = self.modules.wav2vec2(sig.to("cpu"), torch.tensor([1]).to("cpu"))
64
+ feats = self.modules.enc(feats)
65
+ logits = self.modules.ctc_lin(feats)
66
+ p_ctc = self.hparams.log_softmax(logits)
67
+ predicted_words =[]
68
+ for logs in p_ctc:
69
+ text = decoder.decode(logs.detach().cpu().numpy())
70
+ predicted_words.append(text.split(" "))
71
+ return " ".join(predicted_words[0])
72
+
73
+ def compute_objectives(self, predictions, batch, stage):
74
+ """Computes the loss (CTC) given predictions and targets."""
75
+
76
+ p_ctc, wav_lens = predictions
77
+
78
+ ids = batch.id
79
+ tokens, tokens_lens = batch.tokens
80
+
81
+ loss = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
82
+
83
+ if stage != sb.Stage.TRAIN:
84
+ predicted_tokens = sb.decoders.ctc_greedy_decode(
85
+ p_ctc, wav_lens, blank_id=self.hparams.blank_index
86
+ )
87
+ # Decode token terms to words
88
+ if self.hparams.use_language_modelling:
89
+ predicted_words = []
90
+ for logs in p_ctc:
91
+ text = decoder.decode(logs.detach().cpu().numpy())
92
+ predicted_words.append(text.split(" "))
93
+ else:
94
+ predicted_words = [
95
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
96
+ for utt_seq in predicted_tokens
97
+ ]
98
+ # Convert indices to words
99
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
100
+
101
+ self.wer_metric.append(ids, predicted_words, target_words)
102
+ self.cer_metric.append(ids, predicted_words, target_words)
103
+
104
+ return loss
105
+
106
+ def fit_batch(self, batch):
107
+ """Train the parameters given a single batch in input"""
108
+ should_step = self.step % self.grad_accumulation_factor == 0
109
+ # Managing automatic mixed precision
110
+ # TOFIX: CTC fine-tuning currently is unstable
111
+ # This is certainly due to CTC being done in fp16 instead of fp32
112
+ if self.auto_mix_prec:
113
+ with torch.cuda.amp.autocast():
114
+ with self.no_sync():
115
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
116
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
117
+ with self.no_sync(not should_step):
118
+ self.scaler.scale(
119
+ loss / self.grad_accumulation_factor
120
+ ).backward()
121
+ if should_step:
122
+
123
+ if not self.hparams.wav2vec2.freeze:
124
+ self.scaler.unscale_(self.wav2vec_optimizer)
125
+ self.scaler.unscale_(self.model_optimizer)
126
+ if self.check_gradients(loss):
127
+ if not self.hparams.wav2vec2.freeze:
128
+ if self.optimizer_step >= self.hparams.warmup_steps:
129
+ self.scaler.step(self.wav2vec_optimizer)
130
+ self.scaler.step(self.model_optimizer)
131
+ self.scaler.update()
132
+ self.zero_grad()
133
+ self.optimizer_step += 1
134
+ else:
135
+ # This is mandatory because HF models have a weird behavior with DDP
136
+ # on the forward pass
137
+ with self.no_sync():
138
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
139
+
140
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
141
+
142
+ with self.no_sync(not should_step):
143
+ (loss / self.grad_accumulation_factor).backward()
144
+ if should_step:
145
+ if self.check_gradients(loss):
146
+ if not self.hparams.wav2vec2.freeze:
147
+ if self.optimizer_step >= self.hparams.warmup_steps:
148
+ self.wav2vec_optimizer.step()
149
+ self.model_optimizer.step()
150
+ self.zero_grad()
151
+ self.optimizer_step += 1
152
+
153
+ self.on_fit_batch_end(batch, outputs, loss, should_step)
154
+ return loss.detach().cpu()
155
+
156
+ def evaluate_batch(self, batch, stage):
157
+ """Computations needed for validation/test batches"""
158
+ predictions = self.compute_forward(batch, stage=stage)
159
+ with torch.no_grad():
160
+ loss = self.compute_objectives(predictions, batch, stage=stage)
161
+ return loss.detach()
162
+
163
+ def on_stage_start(self, stage, epoch):
164
+ """Gets called at the beginning of each epoch"""
165
+ if stage != sb.Stage.TRAIN:
166
+ self.cer_metric = self.hparams.cer_computer()
167
+ self.wer_metric = self.hparams.error_rate_computer()
168
+
169
+ def on_stage_end(self, stage, stage_loss, epoch):
170
+ """Gets called at the end of an epoch."""
171
+ # Compute/store important stats
172
+ stage_stats = {"loss": stage_loss}
173
+ if stage == sb.Stage.TRAIN:
174
+ self.train_stats = stage_stats
175
+ else:
176
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
177
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
178
+
179
+ # Perform end-of-iteration things, like annealing, logging, etc.
180
+ if stage == sb.Stage.VALID:
181
+ old_lr_model, new_lr_model = self.hparams.lr_annealing_model(
182
+ stage_stats["loss"]
183
+ )
184
+ old_lr_wav2vec, new_lr_wav2vec = self.hparams.lr_annealing_wav2vec(
185
+ stage_stats["loss"]
186
+ )
187
+ sb.nnet.schedulers.update_learning_rate(
188
+ self.model_optimizer, new_lr_model
189
+ )
190
+ if not self.hparams.wav2vec2.freeze:
191
+ sb.nnet.schedulers.update_learning_rate(
192
+ self.wav2vec_optimizer, new_lr_wav2vec
193
+ )
194
+ self.hparams.train_logger.log_stats(
195
+ stats_meta={
196
+ "epoch": epoch,
197
+ "lr_model": old_lr_model,
198
+ "lr_wav2vec": old_lr_wav2vec,
199
+ },
200
+ train_stats=self.train_stats,
201
+ valid_stats=stage_stats,
202
+ )
203
+ self.checkpointer.save_and_keep_only(
204
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
205
+ )
206
+ elif stage == sb.Stage.TEST:
207
+ self.hparams.train_logger.log_stats(
208
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
209
+ test_stats=stage_stats,
210
+ )
211
+ with open(self.hparams.wer_file, "w") as w:
212
+ self.wer_metric.write_stats(w)
213
+
214
+ def init_optimizers(self):
215
+ "Initializes the wav2vec2 optimizer and model optimizer"
216
+
217
+ # If the wav2vec encoder is unfrozen, we create the optimizer
218
+ if not self.hparams.wav2vec2.freeze:
219
+ self.wav2vec_optimizer = self.hparams.wav2vec_opt_class(
220
+ self.modules.wav2vec2.parameters()
221
+ )
222
+ if self.checkpointer is not None:
223
+ self.checkpointer.add_recoverable(
224
+ "wav2vec_opt", self.wav2vec_optimizer
225
+ )
226
+
227
+ self.model_optimizer = self.hparams.model_opt_class(
228
+ self.hparams.model.parameters()
229
+ )
230
+
231
+ if self.checkpointer is not None:
232
+ self.checkpointer.add_recoverable("modelopt", self.model_optimizer)
233
+
234
+ def zero_grad(self, set_to_none=False):
235
+ if not self.hparams.wav2vec2.freeze:
236
+ self.wav2vec_optimizer.zero_grad(set_to_none)
237
+ self.model_optimizer.zero_grad(set_to_none)
238
+
239
+
240
+ # Define custom data procedure
241
+ def dataio_prepare(hparams):
242
+ """This function prepares the datasets to be used in the brain class.
243
+ It also defines the data processing pipeline through user-defined functions."""
244
+
245
+ # 1. Define datasets
246
+ data_folder = hparams["data_folder"]
247
+
248
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
249
+ csv_path=hparams["train_csv"], replacements={"data_root": data_folder},
250
+ )
251
+
252
+ if hparams["sorting"] == "ascending":
253
+ # we sort training data to speed up training and get better results.
254
+ train_data = train_data.filtered_sorted(
255
+ sort_key="duration",
256
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
257
+ )
258
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
259
+ hparams["dataloader_options"]["shuffle"] = False
260
+
261
+ elif hparams["sorting"] == "descending":
262
+ train_data = train_data.filtered_sorted(
263
+ sort_key="duration",
264
+ reverse=True,
265
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
266
+ )
267
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
268
+ hparams["dataloader_options"]["shuffle"] = False
269
+
270
+ elif hparams["sorting"] == "random":
271
+ pass
272
+
273
+ else:
274
+ raise NotImplementedError(
275
+ "sorting must be random, ascending or descending"
276
+ )
277
+
278
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
279
+ csv_path=hparams["valid_csv"], replacements={"data_root": data_folder},
280
+ )
281
+ # We also sort the validation data so it is faster to validate
282
+ valid_data = valid_data.filtered_sorted(sort_key="duration")
283
+ test_datasets = {}
284
+ for csv_file in hparams["test_csv"]:
285
+ name = Path(csv_file).stem
286
+ test_datasets[name] = sb.dataio.dataset.DynamicItemDataset.from_csv(
287
+ csv_path=csv_file, replacements={"data_root": data_folder}
288
+ )
289
+ test_datasets[name] = test_datasets[name].filtered_sorted(
290
+ sort_key="duration"
291
+ )
292
+
293
+ datasets = [train_data, valid_data] + [i for k, i in test_datasets.items()]
294
+
295
+
296
+ # 2. Define audio pipeline:
297
+ @sb.utils.data_pipeline.takes("wav")
298
+ @sb.utils.data_pipeline.provides("sig")
299
+ def audio_pipeline(wav):
300
+ info = torchaudio.info(wav)
301
+ sig = sb.dataio.dataio.read_audio(wav)
302
+ resampled = torchaudio.transforms.Resample(
303
+ info.sample_rate, hparams["sample_rate"],
304
+ )(sig)
305
+ return resampled
306
+
307
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline)
308
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
309
+
310
+ # 3. Define text pipeline:
311
+ @sb.utils.data_pipeline.takes("wrd")
312
+ @sb.utils.data_pipeline.provides(
313
+ "wrd", "char_list", "tokens_list", "tokens"
314
+ )
315
+ def text_pipeline(wrd):
316
+ yield wrd
317
+ char_list = list(wrd)
318
+ yield char_list
319
+ tokens_list = label_encoder.encode_sequence(char_list)
320
+ yield tokens_list
321
+ tokens = torch.LongTensor(tokens_list)
322
+ yield tokens
323
+
324
+ sb.dataio.dataset.add_dynamic_item(datasets, text_pipeline)
325
+ lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
326
+ special_labels = {
327
+ "blank_label": hparams["blank_index"],
328
+ "unk_label": hparams["unk_index"]
329
+ }
330
+ label_encoder.load_or_create(
331
+ path=lab_enc_file,
332
+ from_didatasets=[train_data],
333
+ output_key="char_list",
334
+ special_labels=special_labels,
335
+ sequence_input=True,
336
+ )
337
+
338
+ # 4. Set output:
339
+ sb.dataio.dataset.set_output_keys(
340
+ datasets, ["id", "sig", "wrd", "char_list", "tokens"],
341
+ )
342
+ return train_data, valid_data,test_datasets, label_encoder
343
+
344
+
345
+
346
+ # Load hyperparameters file with command-line overrides
347
+ hparams_file, run_opts, overrides = sb.parse_arguments(["train_semi.yaml"])
348
+ with open(hparams_file) as fin:
349
+ hparams = load_hyperpyyaml(fin, overrides)
350
+
351
+ # If --distributed_launch then
352
+ # create ddp_group with the right communication protocol
353
+ sb.utils.distributed.ddp_init_group(run_opts)
354
+
355
+
356
+ # Create experiment directory
357
+ sb.create_experiment_directory(
358
+ experiment_directory=hparams["output_folder"],
359
+ hyperparams_to_save=hparams_file,
360
+ overrides=overrides,
361
+ )
362
+
363
+ # Due to DDP, we do the preparation ONLY on the main python process
364
+ # Defining tokenizer and loading it
365
+ # Create the datasets objects as well as tokenization and encoding :-D
366
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
367
+
368
+ lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
369
+ special_labels = {
370
+ "blank_label": hparams["blank_index"],
371
+ "unk_label": hparams["unk_index"]
372
+ }
373
+ label_encoder.load_or_create(
374
+ path=lab_enc_file,
375
+ from_didatasets=[[]],
376
+ output_key="char_list",
377
+ special_labels=special_labels,
378
+ sequence_input=True,
379
+ )
380
+
381
+ from pyctcdecode import build_ctcdecoder
382
+ ind2lab = label_encoder.ind2lab
383
+ print(ind2lab)
384
+ labels = [ind2lab[x] for x in range(len(ind2lab))]
385
+ labels = [""] + labels[1:-1] + ["1"]
386
+ # Replace the <blank> token with a blank character, needed for PyCTCdecode
387
+ print(labels)
388
+ decoder = build_ctcdecoder(
389
+ labels,
390
+ kenlm_model_path=hparams["ngram_lm_path"], # .arpa or .bin
391
+ alpha=0.5, # Default by KenLM
392
+ beta=1.0, # Default by KenLM
393
+ )
394
+ # Trainer initialization
395
+ run_opts["device"] = "cpu"
396
+ asr_brain = ASR(
397
+ modules=hparams["modules"],
398
+ hparams=hparams,
399
+ run_opts=run_opts,
400
+ checkpointer=hparams["checkpointer"],
401
+ )
402
+
403
+ # Adding objects to trainer.
404
+ asr_brain.tokenizer = label_encoder
405
+ asr_brain.checkpointer.recover_if_possible(device="cpu")
406
+ asr_brain.modules.eval()
407
+
408
+ def treat_wav_file(file_mic,file_upload ,asr=asr_brain, device="cpu") :
409
+ if (file_mic is not None) and (file_upload is not None):
410
+ warn_output = "WARNING: You've uploaded an audio file and used the microphone. The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
411
+ wav = file_mic
412
+ elif (file_mic is None) and (file_upload is None):
413
+ return "ERROR: You have to either use the microphone or upload an audio file"
414
+ elif file_mic is not None:
415
+ wav = file_mic
416
+ else:
417
+ wav = file_upload
418
+ sig, sr = torchaudio.load(wav)
419
+ tensor_wav = sig.to(device)
420
+ resampled = torchaudio.functional.resample( tensor_wav, sr, 16000)
421
+ sentence = asr.treat_wav(resampled)
422
+ return sentence
423
+
424
+ gr.Interface(
425
+ fn=treat_wav_file,
426
+ inputs=[gr.Audio(source="microphone", type='filepath', label = "record", optional = True),
427
+ gr.Audio(source="upload", type='filepath', label="filein", optional=True)]
428
+ ,outputs="text").launch()
429
+
430
+
semi_wavlm_large_tunisian_ctc/1234/env.log ADDED
@@ -0,0 +1,479 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SpeechBrain system description
2
+ ==============================
3
+ Python version:
4
+ 3.8.5 (default, Sep 4 2020, 07:30:14)
5
+ [GCC 7.3.0]
6
+ ==============================
7
+ Installed Python packages:
8
+ abkhazia==1.0
9
+ absl-py==0.11.0
10
+ aiofiles==23.2.1
11
+ aiohttp==3.8.0
12
+ aiosignal==1.2.0
13
+ alabaster==0.7.12
14
+ alembic==1.7.4
15
+ altair==4.2.0
16
+ altgraph==0.17
17
+ antlr4-python3-runtime==4.9.3
18
+ anyio==3.6.2
19
+ appdirs==1.4.4
20
+ argcomplete==1.12.2
21
+ argon2-cffi==20.1.0
22
+ arrow==1.2.3
23
+ asgiref==3.6.0
24
+ asteroid-filterbanks==0.4.0
25
+ astunparse==1.6.3
26
+ async-generator==1.10
27
+ async-timeout==4.0.0
28
+ attrdict==2.0.1
29
+ attrs==20.3.0
30
+ audeer==1.16.0
31
+ audformat==0.11.5
32
+ audinterface==0.7.0
33
+ audiofile==1.0.0
34
+ audiomentations==0.25.0
35
+ audioread==2.1.9
36
+ audobject==0.4.14
37
+ audresample==0.1.6
38
+ -e git+https://github.com/facebookresearch/WavAugment.git@54afcdb00ccc852c2f030f239f8532c9562b550e#egg=augment
39
+ autopage==0.4.0
40
+ Babel==2.9.0
41
+ backcall==0.2.0
42
+ backports.cached-property==1.0.2
43
+ beautifulsoup4==4.10.0
44
+ black==19.10b0
45
+ bleach==3.3.0
46
+ blessed==1.20.0
47
+ boto3==1.20.2
48
+ botocore==1.23.2
49
+ bpemb==0.3.4
50
+ braceexpand==0.1.7
51
+ cachetools==4.2.0
52
+ certifi @ file:///croot/certifi_1671487769961/work/certifi
53
+ cffi==1.14.3
54
+ cfgv==3.2.0
55
+ chardet==3.0.4
56
+ charset-normalizer==2.0.7
57
+ click==7.1.2
58
+ cliff==3.9.0
59
+ clldutils==3.5.4
60
+ cloudpickle==2.2.1
61
+ cmaes==0.8.2
62
+ cmake==3.18.4.post1
63
+ cmd2==2.2.0
64
+ colorama==0.4.4
65
+ colorlog==4.6.2
66
+ configparser==5.1.0
67
+ conllu==4.5.3
68
+ croniter==1.3.15
69
+ cryptography==38.0.4
70
+ csrgraph==0.1.28
71
+ csvw==1.8.1
72
+ cycler==0.10.0
73
+ Cython==0.29.21
74
+ dataclasses==0.6
75
+ dateutils==0.6.12
76
+ decorator==4.4.2
77
+ deepdiff==6.3.0
78
+ deepspeech==0.9.1
79
+ defusedxml==0.7.1
80
+ Deprecated==1.2.14
81
+ dill==0.3.3
82
+ Distance==0.1.3
83
+ distlib==0.3.1
84
+ Django==3.2.16
85
+ django-auditlog==2.2.1
86
+ django-filter==22.1
87
+ django-js-asset==1.2.2
88
+ django-mptt==0.14.0
89
+ djangorestframework==3.14.0
90
+ docker-pycreds==0.4.0
91
+ docopt==0.6.2
92
+ docutils==0.16
93
+ drf-excel==2.2.0
94
+ drf-flex-fields==1.0.0
95
+ drf-renderer-xlsx==0.4.1
96
+ easyocr==1.2.1
97
+ editdistance==0.6.0
98
+ einops==0.3.2
99
+ emoji==2.2.0
100
+ entrypoints==0.3
101
+ et-xmlfile==1.1.0
102
+ exceptiongroup==1.1.0
103
+ farasapy==0.0.14
104
+ fastapi==0.98.0
105
+ fastjsonschema==2.17.1
106
+ fasttext==0.9.2
107
+ ffmpeg-python==0.2.0
108
+ ffmpy==0.3.0
109
+ filelock==3.0.12
110
+ flair==0.12.2
111
+ flake8==3.7.9
112
+ flatbuffers==1.12
113
+ frozendict==2.0.7
114
+ frozenlist==1.2.0
115
+ fsspec==2021.11.0
116
+ ftfy==6.1.1
117
+ future==0.18.2
118
+ g2p-en==2.1.0
119
+ gast==0.3.3
120
+ gdown==4.4.0
121
+ gdrive==0.1.5
122
+ gensim==4.0.1
123
+ gitdb==4.0.9
124
+ GitPython==3.1.24
125
+ google-api-core==2.11.1
126
+ google-api-python-client==2.43.0
127
+ google-auth==1.24.0
128
+ google-auth-httplib2==0.1.0
129
+ google-auth-oauthlib==0.5.3
130
+ google-pasta==0.2.0
131
+ googleapis-common-protos==1.59.1
132
+ gradio==3.44.4
133
+ gradio-client==0.5.1
134
+ greenlet==1.1.2
135
+ grpcio==1.32.0
136
+ h11==0.14.0
137
+ h5features==1.3.2
138
+ h5py==2.10.0
139
+ hierarchy==0.4.0
140
+ hmmlearn==0.2.8
141
+ htk-io==0.5
142
+ httpcore==0.16.3
143
+ httplib2==0.22.0
144
+ httpx==0.23.3
145
+ huggingface-hub==0.15.1
146
+ hydra-colorlog==0.1.4
147
+ hydra-core==1.3.2
148
+ hyperopt==0.2.7
149
+ HyperPyYAML==1.1.0
150
+ hypothesis==6.61.2
151
+ identify==1.5.10
152
+ idna==2.10
153
+ imageio==2.9.0
154
+ imagesize==1.2.0
155
+ importlib-metadata==4.8.1
156
+ importlib-resources==5.2.2
157
+ inflect==5.3.0
158
+ inquirer==3.1.3
159
+ ipadic==1.0.0
160
+ ipyevents==2.0.1
161
+ ipykernel==5.3.4
162
+ ipython==7.19.0
163
+ ipython-genutils==0.2.0
164
+ ipywebrtc==0.6.0
165
+ ipywidgets==7.6.3
166
+ iso-639==0.4.5
167
+ isodate==0.6.0
168
+ isort==4.3.21
169
+ itsdangerous==2.1.2
170
+ Janome==0.5.0
171
+ jedi==0.17.2
172
+ jeepney==0.8.0
173
+ jieba==0.42.1
174
+ Jinja2==3.0.3
175
+ jiwer==2.2.0
176
+ jmespath==0.10.0
177
+ joblib==0.17.0
178
+ jsonschema==3.2.0
179
+ julius==0.2.7
180
+ jupyter-client==6.1.7
181
+ jupyter-core==4.7.0
182
+ jupyterlab-pygments==0.1.2
183
+ jupyterlab-widgets==1.0.0
184
+ kaitaistruct==0.9
185
+ kaldi-io==0.9.4
186
+ kaldi-python-io==1.2.2
187
+ kaldiio==2.17.2
188
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
189
+ Keras-Preprocessing==1.1.2
190
+ kiwisolver==1.3.1
191
+ lang-trans==0.6.0
192
+ langdetect==1.0.9
193
+ latexcodec==2.0.1
194
+ ldap3==2.9.1
195
+ librosa==0.9.0
196
+ lightning-cloud==0.5.37
197
+ lightning-utilities==0.8.0
198
+ linkify-it-py==1.0.3
199
+ lit==16.0.6
200
+ llvmlite==0.35.0
201
+ lxml==4.9.0
202
+ Mako==1.1.5
203
+ Markdown==3.3.3
204
+ markdown-it-py==3.0.0
205
+ MarkupSafe==2.1.3
206
+ marshmallow==3.14.0
207
+ matplotlib==3.3.3
208
+ mccabe==0.6.1
209
+ mcd==0.4
210
+ mdit-py-plugins==0.3.3
211
+ mdurl==0.1.2
212
+ mecab-python3==1.0.3
213
+ megatron-lm==2.2.0
214
+ metrics==0.3.3
215
+ mido==1.2.10
216
+ mistune==0.8.4
217
+ more-itertools==8.6.0
218
+ mpld3==0.3
219
+ mpmath==1.2.1
220
+ multidict==5.2.0
221
+ multiprocess==0.70.11.1
222
+ nbclient==0.5.3
223
+ nbconvert==5.6.1
224
+ nbformat==5.9.0
225
+ NEMO==4.3.2
226
+ nemo-toolkit==1.4.0
227
+ nest-asyncio==1.5.1
228
+ networkx==2.8.8
229
+ nltk==3.2.4
230
+ nodeenv==1.5.0
231
+ normalize==2.0.2
232
+ notebook==6.3.0
233
+ numba==0.52.0
234
+ numpy==1.19.4
235
+ nvidia-cublas-cu11==11.10.3.66
236
+ nvidia-cuda-cupti-cu11==11.7.101
237
+ nvidia-cuda-nvrtc-cu11==11.7.99
238
+ nvidia-cuda-runtime-cu11==11.7.99
239
+ nvidia-cudnn-cu11==8.5.0.96
240
+ nvidia-cufft-cu11==10.9.0.58
241
+ nvidia-curand-cu11==10.2.10.91
242
+ nvidia-cusolver-cu11==11.4.0.1
243
+ nvidia-cusparse-cu11==11.7.4.91
244
+ nvidia-nccl-cu11==2.14.3
245
+ nvidia-nvtx-cu11==11.7.91
246
+ oauthlib==3.1.0
247
+ omegaconf==2.3.0
248
+ onnx==1.10.2
249
+ OpenCC==1.1.2
250
+ opencv-python==4.4.0.46
251
+ openpyxl==3.0.9
252
+ opensmile==2.2.0
253
+ opt-einsum==3.3.0
254
+ optuna==2.10.0
255
+ ordered-set==4.1.0
256
+ orjson==3.8.4
257
+ oyaml==1.0
258
+ packaging==22.0
259
+ pandas==1.2.5
260
+ pandocfilters==1.4.3
261
+ pangu==4.0.6.1
262
+ parameterized==0.8.1
263
+ parso==0.7.1
264
+ pathlib2==2.3.7.post1
265
+ pathspec==0.5.5
266
+ pathtools==0.1.2
267
+ pbr==5.6.0
268
+ pefile==2019.4.18
269
+ pescador==2.1.0
270
+ pesq==0.0.3
271
+ pexpect==4.8.0
272
+ phonemizer==2.2.1
273
+ pickleshare==0.7.5
274
+ Pillow==9.3.0
275
+ pip-api==0.0.23
276
+ pipreqs==0.4.11
277
+ pluggy==0.13.1
278
+ pooch==1.3.0
279
+ portalocker==2.3.2
280
+ pptree==3.1
281
+ pre-commit==2.9.0
282
+ preprocessing==0.1.13
283
+ pretty-midi==0.2.9
284
+ prettytable==2.2.1
285
+ primePy==1.3
286
+ progressbar2==3.53.1
287
+ prometheus-client==0.10.1
288
+ promise==2.3
289
+ prompt-toolkit==3.0.8
290
+ protobuf==3.20.3
291
+ psutil==5.6.6
292
+ ptyprocess==0.6.0
293
+ py==1.9.0
294
+ py-espeak-ng==0.1.8
295
+ py4j==0.10.9.7
296
+ pyannote.audio==2.1.1
297
+ pyannote.core==4.5
298
+ pyannote.database==4.1.3
299
+ pyannote.metrics==3.2.1
300
+ pyannote.pipeline==2.3
301
+ pyannotebook==0.1.0.dev0
302
+ PyArabic==0.6.15
303
+ pyarrow==3.0.0
304
+ pyasn1==0.4.8
305
+ pyasn1-modules==0.2.8
306
+ pybind11==2.8.1
307
+ pybtex==0.24.0
308
+ pybtex-docutils==1.0.1
309
+ pycodestyle==2.5.0
310
+ pycparser==2.20
311
+ pycryptodome==3.16.0
312
+ pyctcdecode==0.4.0
313
+ pydantic==1.10.4
314
+ pyDeprecate==0.3.1
315
+ pydub==0.25.1
316
+ pyflakes==2.1.1
317
+ Pygments==2.15.1
318
+ pygtrie==2.5.0
319
+ PyJWT==2.7.0
320
+ pymodbus==2.5.3
321
+ pyparsing==2.4.7
322
+ pyperclip==1.8.2
323
+ pypinyin==0.43.0
324
+ pyrsistent==0.17.3
325
+ pyserial==3.5
326
+ PySocks==1.7.1
327
+ pystoi==0.3.3
328
+ pytest==5.4.1
329
+ pytest-runner==5.3.1
330
+ python-bidi==0.4.2
331
+ python-crfsuite==0.9.7
332
+ python-dateutil==2.8.2
333
+ python-editor==1.0.4
334
+ python-Levenshtein==0.12.2
335
+ python-multipart==0.0.5
336
+ python-utils==2.4.0
337
+ pytorch-lightning==1.6.5
338
+ pytorch-metric-learning==1.7.3
339
+ pytorch-revgrad==0.2.0
340
+ pytube==11.0.1
341
+ pytz==2022.6
342
+ PyWavelets==1.1.1
343
+ PyYAML==6.0
344
+ pyzmq==20.0.0
345
+ rapidfuzz==1.8.2
346
+ readchar==4.0.5
347
+ regex==2020.11.13
348
+ requests==2.28.1
349
+ requests-oauthlib==1.3.0
350
+ resampy==0.2.2
351
+ rfc3986==1.4.0
352
+ rich==13.4.2
353
+ richenum==1.3.1
354
+ rsa==4.7
355
+ ruamel.yaml==0.17.21
356
+ ruamel.yaml.clib==0.2.7
357
+ s3m==1.1.0
358
+ s3transfer==0.5.0
359
+ sacrebleu==2.0.0
360
+ sacremoses==0.0.44
361
+ safetensors==0.3.1
362
+ scikit-image==0.18.1
363
+ scikit-learn==0.23.2
364
+ scipy==1.5.4
365
+ -e git+https://github.com/sanghack81/SDCIT@00d060dde733fde9345154a494f81e97fb395ca7#egg=SDCIT
366
+ seaborn==0.11.1
367
+ SecretStorage==3.3.3
368
+ segments==2.1.3
369
+ segtok==1.5.11
370
+ semantic-version==2.10.0
371
+ semver==2.13.0
372
+ Send2Trash==1.5.0
373
+ sentencepiece==0.1.99
374
+ sentry-sdk==1.4.3
375
+ shellingham==1.4.0
376
+ shortuuid==1.0.7
377
+ SIDEKIT==1.3.8.5.2
378
+ simplejson==3.17.5
379
+ singledispatchmethod==1.0
380
+ six==1.15.0
381
+ smart-open==5.0.0
382
+ smmap==5.0.0
383
+ sniffio==1.3.0
384
+ snowballstemmer==2.0.0
385
+ sortedcollections==2.1.0
386
+ sortedcontainers==2.4.0
387
+ sounddevice==0.4.5
388
+ SoundFile==0.10.3.post1
389
+ soupsieve==2.3
390
+ sox==1.4.1
391
+ sparsemax==0.1.9
392
+ speechbrain==0.5.14
393
+ sphfile==1.0.3
394
+ Sphinx==3.3.1
395
+ sphinx-rtd-theme==0.2.4
396
+ sphinxcontrib-applehelp==1.0.2
397
+ sphinxcontrib-bibtex==2.4.1
398
+ sphinxcontrib-devhelp==1.0.2
399
+ sphinxcontrib-htmlhelp==1.0.3
400
+ sphinxcontrib-jsmath==1.0.1
401
+ sphinxcontrib-qthelp==1.0.3
402
+ sphinxcontrib-serializinghtml==1.1.4
403
+ SQLAlchemy==1.4.25
404
+ sqlitedict==2.1.0
405
+ sqlparse==0.4.2
406
+ stanza==1.4.2
407
+ starlette==0.27.0
408
+ starsessions==1.3.0
409
+ stevedore==3.4.0
410
+ subprocess32==3.5.4
411
+ sympy==1.9
412
+ tabulate==0.8.9
413
+ tensorboard==2.4.0
414
+ tensorboard-plugin-wit==1.7.0
415
+ tensorboardX==2.6.1
416
+ tensorflow==2.4.0
417
+ tensorflow-estimator==2.4.0
418
+ termcolor==1.1.0
419
+ terminado==0.9.4
420
+ testpath==0.4.4
421
+ threadpoolctl==2.1.0
422
+ tifffile==2020.12.8
423
+ tikzplotlib==0.9.8
424
+ tinycss2==1.2.1
425
+ tkseem==0.0.3
426
+ tokenizers==0.13.3
427
+ toml==0.10.2
428
+ toolz==0.12.0
429
+ torch==1.13.1
430
+ torch-audiomentations==0.11.0
431
+ torch-pitch-shift==1.2.4
432
+ torch-stft==0.1.4
433
+ torchaudio==0.13.1
434
+ torchmetrics==0.11.4
435
+ torchvision==0.14.1
436
+ tornado==6.1
437
+ tqdm==4.61.1
438
+ trackrip==1.2.1
439
+ traitlets==5.9.0
440
+ transformer-smaller-training-vocab==0.3.1
441
+ transformers==4.30.2
442
+ triton==2.0.0
443
+ typed-ast==1.4.1
444
+ typer==0.4.0
445
+ typing-extensions==4.4.0
446
+ uc-micro-py==1.0.1
447
+ Unidecode==1.3.2
448
+ uritemplate==3.0.1
449
+ urllib3==1.26.2
450
+ uvicorn==0.20.0
451
+ versioneer==0.28
452
+ virtualenv==20.2.1
453
+ wandb==0.12.6
454
+ wcwidth==0.2.5
455
+ webdataset==0.1.62
456
+ webencodings==0.5.1
457
+ websocket-client==1.6.1
458
+ websockets==10.4
459
+ Werkzeug==1.0.1
460
+ wget==3.2
461
+ widgetsnbextension==3.5.1
462
+ Wikipedia-API==0.6.0
463
+ wordninja==2.0.0
464
+ wrapt==1.12.1
465
+ xmltodict==0.13.0
466
+ xxhash==2.0.0
467
+ yamllint==1.23.0
468
+ yarg==0.1.9
469
+ yarl==1.7.2
470
+ yaspin==2.1.0
471
+ youtokentome==1.0.6
472
+ youtube-dl==2021.6.6
473
+ zipp==3.6.0
474
+ ==============================
475
+ Git revision:
476
+ 082323a
477
+ ==============================
478
+ CUDA version:
479
+ 11.7
semi_wavlm_large_tunisian_ctc/1234/hyperparams.yaml ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2023-09-25 from:
2
+ # /home/salah/Tunisian_Automatic_Speech_Recognition/train_semi.yaml
3
+ # yamllint disable
4
+ # ################################
5
+ # Model: wav2vec2 + DNN + CTC
6
+ # Augmentation: SpecAugment
7
+ # Authors: Titouan Parcollet 2021
8
+ # ################################
9
+
10
+ # Seed needs to be set at top of yaml, before objects with parameters are made
11
+ seed: 1234
12
+ __set_seed: !!python/object/apply:torch.manual_seed [1234]
13
+ output_folder: semi_wavlm_large_tunisian_ctc/1234
14
+ wer_file: semi_wavlm_large_tunisian_ctc/1234/wer.txt
15
+ save_folder: semi_wavlm_large_tunisian_ctc/1234/save
16
+ train_log: semi_wavlm_large_tunisian_ctc/1234/train_log.txt
17
+
18
+ # URL for the biggest LeBenchmark wav2vec french.
19
+ wav2vec2_folder: semi_wavlm_large_tunisian_ctc/1234/save/wav2vec2_checkpoint
20
+
21
+ # Data files
22
+ data_folder: /path/to/data # e.g, /localscratch/cv-corpus-5.1-2020-06-22/fr
23
+ train_tsv_file: /path/to/data/train.tsv # Standard CommonVoice .tsv files
24
+ dev_tsv_file: /path/to/data/dev.tsv # Standard CommonVoice .tsv files
25
+ test_tsv_file: /path/to/data/test.tsv # Standard CommonVoice .tsv files
26
+ accented_letters: true
27
+ language: fr # use 'it' for Italian, 'rw' for Kinyarwanda, 'en' for english
28
+ test_csv:
29
+ - /path/to/test_data
30
+
31
+ skip_prep: true # Skip data preparation
32
+
33
+ use_language_modelling: true
34
+ ngram_lm_path: outdomain.arpa
35
+
36
+ # We remove utterance slonger than 10s in the train/dev/test sets as
37
+ # longer sentences certainly correspond to "open microphones".
38
+ avoid_if_longer_than: 10.0
39
+ avoid_if_shorter_than: 1.2
40
+
41
+
42
+ # Training parameters
43
+ number_of_epochs: 12
44
+ lr: 1.0
45
+ lr_wav2vec: 0.0001
46
+ sorting: ascending
47
+ auto_mix_prec: false
48
+ sample_rate: 16000
49
+ ckpt_interval_minutes: 30 # save checkpoint every N min
50
+
51
+ # With data_parallel batch_size is split into N jobs
52
+ # With DDP batch_size is multiplied by N jobs
53
+ # Must be 6 per GPU to fit 16GB of VRAM
54
+ batch_size: 10
55
+ test_batch_size: 4
56
+
57
+ dataloader_options:
58
+ batch_size: 10
59
+ num_workers: 6
60
+ test_dataloader_options:
61
+ batch_size: 4
62
+ num_workers: 6
63
+
64
+ # BPE parameters
65
+ token_type: char # ["unigram", "bpe", "char"]
66
+ character_coverage: 1.0
67
+
68
+ # Model parameters
69
+ # activation: !name:torch.nn.LeakyReLU
70
+ wav2vec_output_dim: 1024
71
+ dnn_neurons: 1024
72
+ freeze_wav2vec: false
73
+ freeze_feature_extractor: true
74
+ dropout: 0.15
75
+ warmup_steps: 500 # The wav2vec 2 model isn't updated for this amount of steps
76
+
77
+ # Outputs
78
+ output_neurons: 40 # BPE size, index(blank/eos/bos) = 0
79
+
80
+ # Decoding parameters
81
+ # Be sure that the bos and eos index match with the BPEs ones
82
+ blank_index: 0
83
+ unk_index: 1
84
+
85
+ #
86
+ # Functions and classes
87
+ #
88
+ epoch_counter: &id007 !new:speechbrain.utils.epoch_loop.EpochCounter
89
+
90
+ limit: 12
91
+
92
+ augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
93
+ sample_rate: 16000
94
+ speeds: [95, 100, 105]
95
+
96
+ enc: &id002 !new:speechbrain.nnet.containers.Sequential
97
+ input_shape: [null, null, 1024]
98
+ linear1: !name:speechbrain.nnet.linear.Linear
99
+ n_neurons: 1024
100
+ bias: true
101
+ bn1: !name:speechbrain.nnet.normalization.BatchNorm1d
102
+ activation: !new:torch.nn.LeakyReLU
103
+ drop: !new:torch.nn.Dropout
104
+ p: 0.15
105
+ linear2: !name:speechbrain.nnet.linear.Linear
106
+ n_neurons: 1024
107
+ bias: true
108
+ bn2: !name:speechbrain.nnet.normalization.BatchNorm1d
109
+ activation2: !new:torch.nn.LeakyReLU
110
+ drop2: !new:torch.nn.Dropout
111
+ p: 0.15
112
+ linear3: !name:speechbrain.nnet.linear.Linear
113
+ n_neurons: 1024
114
+ bias: true
115
+ bn3: !name:speechbrain.nnet.normalization.BatchNorm1d
116
+ activation3: !new:torch.nn.LeakyReLU
117
+
118
+ wav2vec2: &id001 !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
119
+ source: wavlm-large/
120
+ output_norm: false
121
+ freeze: false
122
+ freeze_feature_extractor: true
123
+ save_path: semi_wavlm_large_tunisian_ctc/1234/save/wav2vec2_checkpoint
124
+
125
+
126
+ ctc_lin: &id003 !new:speechbrain.nnet.linear.Linear
127
+
128
+ input_size: 1024
129
+ n_neurons: 40
130
+
131
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
132
+ apply_log: true
133
+
134
+ ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
135
+ blank_index: 0
136
+
137
+ modules:
138
+ wav2vec2: *id001
139
+ enc: *id002
140
+ ctc_lin: *id003
141
+ model: &id004 !new:torch.nn.ModuleList
142
+ - [*id002, *id003]
143
+ model_opt_class: !name:torch.optim.Adadelta
144
+ lr: 1.0
145
+ rho: 0.95
146
+ eps: 1.e-8
147
+
148
+ wav2vec_opt_class: !name:torch.optim.Adam
149
+ lr: 0.0001
150
+
151
+ lr_annealing_model: &id005 !new:speechbrain.nnet.schedulers.NewBobScheduler
152
+ initial_value: 1.0
153
+ improvement_threshold: 0.0025
154
+ annealing_factor: 0.8
155
+ patient: 0
156
+
157
+ lr_annealing_wav2vec: &id006 !new:speechbrain.nnet.schedulers.NewBobScheduler
158
+ initial_value: 0.0001
159
+ improvement_threshold: 0.0025
160
+ annealing_factor: 0.9
161
+ patient: 0
162
+
163
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
164
+ checkpoints_dir: semi_wavlm_large_tunisian_ctc/1234/save
165
+ recoverables:
166
+ wav2vec2: *id001
167
+ model: *id004
168
+ scheduler_model: *id005
169
+ scheduler_wav2vec: *id006
170
+ counter: *id007
171
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
172
+ save_file: semi_wavlm_large_tunisian_ctc/1234/train_log.txt
173
+
174
+ error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
175
+
176
+ cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
177
+ split_tokens: true
semi_wavlm_large_tunisian_ctc/1234/log.txt ADDED
@@ -0,0 +1,2270 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2023-09-25 11:52:05,502 - speechbrain.core - INFO - Beginning experiment!
2
+ 2023-09-25 11:52:05,502 - speechbrain.core - INFO - Experiment folder: semi_wavlm_large_tunisian_ctc/1234
3
+ 2023-09-25 11:52:06,047 - speechbrain.utils.superpowers - DEBUG - abkhazia==1.0
4
+ absl-py==0.11.0
5
+ aiofiles==23.2.1
6
+ aiohttp==3.8.0
7
+ aiosignal==1.2.0
8
+ alabaster==0.7.12
9
+ alembic==1.7.4
10
+ altair==4.2.0
11
+ altgraph==0.17
12
+ antlr4-python3-runtime==4.9.3
13
+ anyio==3.6.2
14
+ appdirs==1.4.4
15
+ argcomplete==1.12.2
16
+ argon2-cffi==20.1.0
17
+ arrow==1.2.3
18
+ asgiref==3.6.0
19
+ asteroid-filterbanks==0.4.0
20
+ astunparse==1.6.3
21
+ async-generator==1.10
22
+ async-timeout==4.0.0
23
+ attrdict==2.0.1
24
+ attrs==20.3.0
25
+ audeer==1.16.0
26
+ audformat==0.11.5
27
+ audinterface==0.7.0
28
+ audiofile==1.0.0
29
+ audiomentations==0.25.0
30
+ audioread==2.1.9
31
+ audobject==0.4.14
32
+ audresample==0.1.6
33
+ -e git+https://github.com/facebookresearch/WavAugment.git@54afcdb00ccc852c2f030f239f8532c9562b550e#egg=augment
34
+ autopage==0.4.0
35
+ Babel==2.9.0
36
+ backcall==0.2.0
37
+ backports.cached-property==1.0.2
38
+ beautifulsoup4==4.10.0
39
+ black==19.10b0
40
+ bleach==3.3.0
41
+ blessed==1.20.0
42
+ boto3==1.20.2
43
+ botocore==1.23.2
44
+ bpemb==0.3.4
45
+ braceexpand==0.1.7
46
+ cachetools==4.2.0
47
+ certifi @ file:///croot/certifi_1671487769961/work/certifi
48
+ cffi==1.14.3
49
+ cfgv==3.2.0
50
+ chardet==3.0.4
51
+ charset-normalizer==2.0.7
52
+ click==7.1.2
53
+ cliff==3.9.0
54
+ clldutils==3.5.4
55
+ cloudpickle==2.2.1
56
+ cmaes==0.8.2
57
+ cmake==3.18.4.post1
58
+ cmd2==2.2.0
59
+ colorama==0.4.4
60
+ colorlog==4.6.2
61
+ configparser==5.1.0
62
+ conllu==4.5.3
63
+ croniter==1.3.15
64
+ cryptography==38.0.4
65
+ csrgraph==0.1.28
66
+ csvw==1.8.1
67
+ cycler==0.10.0
68
+ Cython==0.29.21
69
+ dataclasses==0.6
70
+ dateutils==0.6.12
71
+ decorator==4.4.2
72
+ deepdiff==6.3.0
73
+ deepspeech==0.9.1
74
+ defusedxml==0.7.1
75
+ Deprecated==1.2.14
76
+ dill==0.3.3
77
+ Distance==0.1.3
78
+ distlib==0.3.1
79
+ Django==3.2.16
80
+ django-auditlog==2.2.1
81
+ django-filter==22.1
82
+ django-js-asset==1.2.2
83
+ django-mptt==0.14.0
84
+ djangorestframework==3.14.0
85
+ docker-pycreds==0.4.0
86
+ docopt==0.6.2
87
+ docutils==0.16
88
+ drf-excel==2.2.0
89
+ drf-flex-fields==1.0.0
90
+ drf-renderer-xlsx==0.4.1
91
+ easyocr==1.2.1
92
+ editdistance==0.6.0
93
+ einops==0.3.2
94
+ emoji==2.2.0
95
+ entrypoints==0.3
96
+ et-xmlfile==1.1.0
97
+ exceptiongroup==1.1.0
98
+ farasapy==0.0.14
99
+ fastapi==0.98.0
100
+ fastjsonschema==2.17.1
101
+ fasttext==0.9.2
102
+ ffmpeg-python==0.2.0
103
+ ffmpy==0.3.0
104
+ filelock==3.0.12
105
+ flair==0.12.2
106
+ flake8==3.7.9
107
+ flatbuffers==1.12
108
+ frozendict==2.0.7
109
+ frozenlist==1.2.0
110
+ fsspec==2021.11.0
111
+ ftfy==6.1.1
112
+ future==0.18.2
113
+ g2p-en==2.1.0
114
+ gast==0.3.3
115
+ gdown==4.4.0
116
+ gdrive==0.1.5
117
+ gensim==4.0.1
118
+ gitdb==4.0.9
119
+ GitPython==3.1.24
120
+ google-api-core==2.11.1
121
+ google-api-python-client==2.43.0
122
+ google-auth==1.24.0
123
+ google-auth-httplib2==0.1.0
124
+ google-auth-oauthlib==0.5.3
125
+ google-pasta==0.2.0
126
+ googleapis-common-protos==1.59.1
127
+ gradio==3.44.4
128
+ gradio-client==0.5.1
129
+ greenlet==1.1.2
130
+ grpcio==1.32.0
131
+ h11==0.14.0
132
+ h5features==1.3.2
133
+ h5py==2.10.0
134
+ hierarchy==0.4.0
135
+ hmmlearn==0.2.8
136
+ htk-io==0.5
137
+ httpcore==0.16.3
138
+ httplib2==0.22.0
139
+ httpx==0.23.3
140
+ huggingface-hub==0.15.1
141
+ hydra-colorlog==0.1.4
142
+ hydra-core==1.3.2
143
+ hyperopt==0.2.7
144
+ HyperPyYAML==1.1.0
145
+ hypothesis==6.61.2
146
+ identify==1.5.10
147
+ idna==2.10
148
+ imageio==2.9.0
149
+ imagesize==1.2.0
150
+ importlib-metadata==4.8.1
151
+ importlib-resources==5.2.2
152
+ inflect==5.3.0
153
+ inquirer==3.1.3
154
+ ipadic==1.0.0
155
+ ipyevents==2.0.1
156
+ ipykernel==5.3.4
157
+ ipython==7.19.0
158
+ ipython-genutils==0.2.0
159
+ ipywebrtc==0.6.0
160
+ ipywidgets==7.6.3
161
+ iso-639==0.4.5
162
+ isodate==0.6.0
163
+ isort==4.3.21
164
+ itsdangerous==2.1.2
165
+ Janome==0.5.0
166
+ jedi==0.17.2
167
+ jeepney==0.8.0
168
+ jieba==0.42.1
169
+ Jinja2==3.0.3
170
+ jiwer==2.2.0
171
+ jmespath==0.10.0
172
+ joblib==0.17.0
173
+ jsonschema==3.2.0
174
+ julius==0.2.7
175
+ jupyter-client==6.1.7
176
+ jupyter-core==4.7.0
177
+ jupyterlab-pygments==0.1.2
178
+ jupyterlab-widgets==1.0.0
179
+ kaitaistruct==0.9
180
+ kaldi-io==0.9.4
181
+ kaldi-python-io==1.2.2
182
+ kaldiio==2.17.2
183
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
184
+ Keras-Preprocessing==1.1.2
185
+ kiwisolver==1.3.1
186
+ lang-trans==0.6.0
187
+ langdetect==1.0.9
188
+ latexcodec==2.0.1
189
+ ldap3==2.9.1
190
+ librosa==0.9.0
191
+ lightning-cloud==0.5.37
192
+ lightning-utilities==0.8.0
193
+ linkify-it-py==1.0.3
194
+ lit==16.0.6
195
+ llvmlite==0.35.0
196
+ lxml==4.9.0
197
+ Mako==1.1.5
198
+ Markdown==3.3.3
199
+ markdown-it-py==3.0.0
200
+ MarkupSafe==2.1.3
201
+ marshmallow==3.14.0
202
+ matplotlib==3.3.3
203
+ mccabe==0.6.1
204
+ mcd==0.4
205
+ mdit-py-plugins==0.3.3
206
+ mdurl==0.1.2
207
+ mecab-python3==1.0.3
208
+ megatron-lm==2.2.0
209
+ metrics==0.3.3
210
+ mido==1.2.10
211
+ mistune==0.8.4
212
+ more-itertools==8.6.0
213
+ mpld3==0.3
214
+ mpmath==1.2.1
215
+ multidict==5.2.0
216
+ multiprocess==0.70.11.1
217
+ nbclient==0.5.3
218
+ nbconvert==5.6.1
219
+ nbformat==5.9.0
220
+ NEMO==4.3.2
221
+ nemo-toolkit==1.4.0
222
+ nest-asyncio==1.5.1
223
+ networkx==2.8.8
224
+ nltk==3.2.4
225
+ nodeenv==1.5.0
226
+ normalize==2.0.2
227
+ notebook==6.3.0
228
+ numba==0.52.0
229
+ numpy==1.19.4
230
+ nvidia-cublas-cu11==11.10.3.66
231
+ nvidia-cuda-cupti-cu11==11.7.101
232
+ nvidia-cuda-nvrtc-cu11==11.7.99
233
+ nvidia-cuda-runtime-cu11==11.7.99
234
+ nvidia-cudnn-cu11==8.5.0.96
235
+ nvidia-cufft-cu11==10.9.0.58
236
+ nvidia-curand-cu11==10.2.10.91
237
+ nvidia-cusolver-cu11==11.4.0.1
238
+ nvidia-cusparse-cu11==11.7.4.91
239
+ nvidia-nccl-cu11==2.14.3
240
+ nvidia-nvtx-cu11==11.7.91
241
+ oauthlib==3.1.0
242
+ omegaconf==2.3.0
243
+ onnx==1.10.2
244
+ OpenCC==1.1.2
245
+ opencv-python==4.4.0.46
246
+ openpyxl==3.0.9
247
+ opensmile==2.2.0
248
+ opt-einsum==3.3.0
249
+ optuna==2.10.0
250
+ ordered-set==4.1.0
251
+ orjson==3.8.4
252
+ oyaml==1.0
253
+ packaging==22.0
254
+ pandas==1.2.5
255
+ pandocfilters==1.4.3
256
+ pangu==4.0.6.1
257
+ parameterized==0.8.1
258
+ parso==0.7.1
259
+ pathlib2==2.3.7.post1
260
+ pathspec==0.5.5
261
+ pathtools==0.1.2
262
+ pbr==5.6.0
263
+ pefile==2019.4.18
264
+ pescador==2.1.0
265
+ pesq==0.0.3
266
+ pexpect==4.8.0
267
+ phonemizer==2.2.1
268
+ pickleshare==0.7.5
269
+ Pillow==9.3.0
270
+ pip-api==0.0.23
271
+ pipreqs==0.4.11
272
+ pluggy==0.13.1
273
+ pooch==1.3.0
274
+ portalocker==2.3.2
275
+ pptree==3.1
276
+ pre-commit==2.9.0
277
+ preprocessing==0.1.13
278
+ pretty-midi==0.2.9
279
+ prettytable==2.2.1
280
+ primePy==1.3
281
+ progressbar2==3.53.1
282
+ prometheus-client==0.10.1
283
+ promise==2.3
284
+ prompt-toolkit==3.0.8
285
+ protobuf==3.20.3
286
+ psutil==5.6.6
287
+ ptyprocess==0.6.0
288
+ py==1.9.0
289
+ py-espeak-ng==0.1.8
290
+ py4j==0.10.9.7
291
+ pyannote.audio==2.1.1
292
+ pyannote.core==4.5
293
+ pyannote.database==4.1.3
294
+ pyannote.metrics==3.2.1
295
+ pyannote.pipeline==2.3
296
+ pyannotebook==0.1.0.dev0
297
+ PyArabic==0.6.15
298
+ pyarrow==3.0.0
299
+ pyasn1==0.4.8
300
+ pyasn1-modules==0.2.8
301
+ pybind11==2.8.1
302
+ pybtex==0.24.0
303
+ pybtex-docutils==1.0.1
304
+ pycodestyle==2.5.0
305
+ pycparser==2.20
306
+ pycryptodome==3.16.0
307
+ pyctcdecode==0.4.0
308
+ pydantic==1.10.4
309
+ pyDeprecate==0.3.1
310
+ pydub==0.25.1
311
+ pyflakes==2.1.1
312
+ Pygments==2.15.1
313
+ pygtrie==2.5.0
314
+ PyJWT==2.7.0
315
+ pymodbus==2.5.3
316
+ pyparsing==2.4.7
317
+ pyperclip==1.8.2
318
+ pypinyin==0.43.0
319
+ pyrsistent==0.17.3
320
+ pyserial==3.5
321
+ PySocks==1.7.1
322
+ pystoi==0.3.3
323
+ pytest==5.4.1
324
+ pytest-runner==5.3.1
325
+ python-bidi==0.4.2
326
+ python-crfsuite==0.9.7
327
+ python-dateutil==2.8.2
328
+ python-editor==1.0.4
329
+ python-Levenshtein==0.12.2
330
+ python-multipart==0.0.5
331
+ python-utils==2.4.0
332
+ pytorch-lightning==1.6.5
333
+ pytorch-metric-learning==1.7.3
334
+ pytorch-revgrad==0.2.0
335
+ pytube==11.0.1
336
+ pytz==2022.6
337
+ PyWavelets==1.1.1
338
+ PyYAML==6.0
339
+ pyzmq==20.0.0
340
+ rapidfuzz==1.8.2
341
+ readchar==4.0.5
342
+ regex==2020.11.13
343
+ requests==2.28.1
344
+ requests-oauthlib==1.3.0
345
+ resampy==0.2.2
346
+ rfc3986==1.4.0
347
+ rich==13.4.2
348
+ richenum==1.3.1
349
+ rsa==4.7
350
+ ruamel.yaml==0.17.21
351
+ ruamel.yaml.clib==0.2.7
352
+ s3m==1.1.0
353
+ s3transfer==0.5.0
354
+ sacrebleu==2.0.0
355
+ sacremoses==0.0.44
356
+ safetensors==0.3.1
357
+ scikit-image==0.18.1
358
+ scikit-learn==0.23.2
359
+ scipy==1.5.4
360
+ -e git+https://github.com/sanghack81/SDCIT@00d060dde733fde9345154a494f81e97fb395ca7#egg=SDCIT
361
+ seaborn==0.11.1
362
+ SecretStorage==3.3.3
363
+ segments==2.1.3
364
+ segtok==1.5.11
365
+ semantic-version==2.10.0
366
+ semver==2.13.0
367
+ Send2Trash==1.5.0
368
+ sentencepiece==0.1.99
369
+ sentry-sdk==1.4.3
370
+ shellingham==1.4.0
371
+ shortuuid==1.0.7
372
+ SIDEKIT==1.3.8.5.2
373
+ simplejson==3.17.5
374
+ singledispatchmethod==1.0
375
+ six==1.15.0
376
+ smart-open==5.0.0
377
+ smmap==5.0.0
378
+ sniffio==1.3.0
379
+ snowballstemmer==2.0.0
380
+ sortedcollections==2.1.0
381
+ sortedcontainers==2.4.0
382
+ sounddevice==0.4.5
383
+ SoundFile==0.10.3.post1
384
+ soupsieve==2.3
385
+ sox==1.4.1
386
+ sparsemax==0.1.9
387
+ speechbrain==0.5.14
388
+ sphfile==1.0.3
389
+ Sphinx==3.3.1
390
+ sphinx-rtd-theme==0.2.4
391
+ sphinxcontrib-applehelp==1.0.2
392
+ sphinxcontrib-bibtex==2.4.1
393
+ sphinxcontrib-devhelp==1.0.2
394
+ sphinxcontrib-htmlhelp==1.0.3
395
+ sphinxcontrib-jsmath==1.0.1
396
+ sphinxcontrib-qthelp==1.0.3
397
+ sphinxcontrib-serializinghtml==1.1.4
398
+ SQLAlchemy==1.4.25
399
+ sqlitedict==2.1.0
400
+ sqlparse==0.4.2
401
+ stanza==1.4.2
402
+ starlette==0.27.0
403
+ starsessions==1.3.0
404
+ stevedore==3.4.0
405
+ subprocess32==3.5.4
406
+ sympy==1.9
407
+ tabulate==0.8.9
408
+ tensorboard==2.4.0
409
+ tensorboard-plugin-wit==1.7.0
410
+ tensorboardX==2.6.1
411
+ tensorflow==2.4.0
412
+ tensorflow-estimator==2.4.0
413
+ termcolor==1.1.0
414
+ terminado==0.9.4
415
+ testpath==0.4.4
416
+ threadpoolctl==2.1.0
417
+ tifffile==2020.12.8
418
+ tikzplotlib==0.9.8
419
+ tinycss2==1.2.1
420
+ tkseem==0.0.3
421
+ tokenizers==0.13.3
422
+ toml==0.10.2
423
+ toolz==0.12.0
424
+ torch==1.13.1
425
+ torch-audiomentations==0.11.0
426
+ torch-pitch-shift==1.2.4
427
+ torch-stft==0.1.4
428
+ torchaudio==0.13.1
429
+ torchmetrics==0.11.4
430
+ torchvision==0.14.1
431
+ tornado==6.1
432
+ tqdm==4.61.1
433
+ trackrip==1.2.1
434
+ traitlets==5.9.0
435
+ transformer-smaller-training-vocab==0.3.1
436
+ transformers==4.30.2
437
+ triton==2.0.0
438
+ typed-ast==1.4.1
439
+ typer==0.4.0
440
+ typing-extensions==4.4.0
441
+ uc-micro-py==1.0.1
442
+ Unidecode==1.3.2
443
+ uritemplate==3.0.1
444
+ urllib3==1.26.2
445
+ uvicorn==0.20.0
446
+ versioneer==0.28
447
+ virtualenv==20.2.1
448
+ wandb==0.12.6
449
+ wcwidth==0.2.5
450
+ webdataset==0.1.62
451
+ webencodings==0.5.1
452
+ websocket-client==1.6.1
453
+ websockets==10.4
454
+ Werkzeug==1.0.1
455
+ wget==3.2
456
+ widgetsnbextension==3.5.1
457
+ Wikipedia-API==0.6.0
458
+ wordninja==2.0.0
459
+ wrapt==1.12.1
460
+ xmltodict==0.13.0
461
+ xxhash==2.0.0
462
+ yamllint==1.23.0
463
+ yarg==0.1.9
464
+ yarl==1.7.2
465
+ yaspin==2.1.0
466
+ youtokentome==1.0.6
467
+ youtube-dl==2021.6.6
468
+ zipp==3.6.0
469
+
470
+
471
+ 2023-09-25 11:52:06,077 - speechbrain.utils.superpowers - DEBUG - 082323a
472
+
473
+
474
+ 2023-09-25 11:52:06,124 - speechbrain.core - ERROR - Exception:
475
+ Traceback (most recent call last):
476
+ File "app.py", line 365, in <module>
477
+ train_data, valid_data, test_datasets, label_encoder = dataio_prepare(hparams)
478
+ File "app.py", line 248, in dataio_prepare
479
+ csv_path=hparams["train_csv"], replacements={"data_root": data_folder},
480
+ KeyError: 'train_csv'
481
+ 2023-09-25 11:53:48,838 - speechbrain.core - INFO - Beginning experiment!
482
+ 2023-09-25 11:53:48,838 - speechbrain.core - INFO - Experiment folder: semi_wavlm_large_tunisian_ctc/1234
483
+ 2023-09-25 11:53:49,330 - speechbrain.utils.superpowers - DEBUG - abkhazia==1.0
484
+ absl-py==0.11.0
485
+ aiofiles==23.2.1
486
+ aiohttp==3.8.0
487
+ aiosignal==1.2.0
488
+ alabaster==0.7.12
489
+ alembic==1.7.4
490
+ altair==4.2.0
491
+ altgraph==0.17
492
+ antlr4-python3-runtime==4.9.3
493
+ anyio==3.6.2
494
+ appdirs==1.4.4
495
+ argcomplete==1.12.2
496
+ argon2-cffi==20.1.0
497
+ arrow==1.2.3
498
+ asgiref==3.6.0
499
+ asteroid-filterbanks==0.4.0
500
+ astunparse==1.6.3
501
+ async-generator==1.10
502
+ async-timeout==4.0.0
503
+ attrdict==2.0.1
504
+ attrs==20.3.0
505
+ audeer==1.16.0
506
+ audformat==0.11.5
507
+ audinterface==0.7.0
508
+ audiofile==1.0.0
509
+ audiomentations==0.25.0
510
+ audioread==2.1.9
511
+ audobject==0.4.14
512
+ audresample==0.1.6
513
+ -e git+https://github.com/facebookresearch/WavAugment.git@54afcdb00ccc852c2f030f239f8532c9562b550e#egg=augment
514
+ autopage==0.4.0
515
+ Babel==2.9.0
516
+ backcall==0.2.0
517
+ backports.cached-property==1.0.2
518
+ beautifulsoup4==4.10.0
519
+ black==19.10b0
520
+ bleach==3.3.0
521
+ blessed==1.20.0
522
+ boto3==1.20.2
523
+ botocore==1.23.2
524
+ bpemb==0.3.4
525
+ braceexpand==0.1.7
526
+ cachetools==4.2.0
527
+ certifi @ file:///croot/certifi_1671487769961/work/certifi
528
+ cffi==1.14.3
529
+ cfgv==3.2.0
530
+ chardet==3.0.4
531
+ charset-normalizer==2.0.7
532
+ click==7.1.2
533
+ cliff==3.9.0
534
+ clldutils==3.5.4
535
+ cloudpickle==2.2.1
536
+ cmaes==0.8.2
537
+ cmake==3.18.4.post1
538
+ cmd2==2.2.0
539
+ colorama==0.4.4
540
+ colorlog==4.6.2
541
+ configparser==5.1.0
542
+ conllu==4.5.3
543
+ croniter==1.3.15
544
+ cryptography==38.0.4
545
+ csrgraph==0.1.28
546
+ csvw==1.8.1
547
+ cycler==0.10.0
548
+ Cython==0.29.21
549
+ dataclasses==0.6
550
+ dateutils==0.6.12
551
+ decorator==4.4.2
552
+ deepdiff==6.3.0
553
+ deepspeech==0.9.1
554
+ defusedxml==0.7.1
555
+ Deprecated==1.2.14
556
+ dill==0.3.3
557
+ Distance==0.1.3
558
+ distlib==0.3.1
559
+ Django==3.2.16
560
+ django-auditlog==2.2.1
561
+ django-filter==22.1
562
+ django-js-asset==1.2.2
563
+ django-mptt==0.14.0
564
+ djangorestframework==3.14.0
565
+ docker-pycreds==0.4.0
566
+ docopt==0.6.2
567
+ docutils==0.16
568
+ drf-excel==2.2.0
569
+ drf-flex-fields==1.0.0
570
+ drf-renderer-xlsx==0.4.1
571
+ easyocr==1.2.1
572
+ editdistance==0.6.0
573
+ einops==0.3.2
574
+ emoji==2.2.0
575
+ entrypoints==0.3
576
+ et-xmlfile==1.1.0
577
+ exceptiongroup==1.1.0
578
+ farasapy==0.0.14
579
+ fastapi==0.98.0
580
+ fastjsonschema==2.17.1
581
+ fasttext==0.9.2
582
+ ffmpeg-python==0.2.0
583
+ ffmpy==0.3.0
584
+ filelock==3.0.12
585
+ flair==0.12.2
586
+ flake8==3.7.9
587
+ flatbuffers==1.12
588
+ frozendict==2.0.7
589
+ frozenlist==1.2.0
590
+ fsspec==2021.11.0
591
+ ftfy==6.1.1
592
+ future==0.18.2
593
+ g2p-en==2.1.0
594
+ gast==0.3.3
595
+ gdown==4.4.0
596
+ gdrive==0.1.5
597
+ gensim==4.0.1
598
+ gitdb==4.0.9
599
+ GitPython==3.1.24
600
+ google-api-core==2.11.1
601
+ google-api-python-client==2.43.0
602
+ google-auth==1.24.0
603
+ google-auth-httplib2==0.1.0
604
+ google-auth-oauthlib==0.5.3
605
+ google-pasta==0.2.0
606
+ googleapis-common-protos==1.59.1
607
+ gradio==3.44.4
608
+ gradio-client==0.5.1
609
+ greenlet==1.1.2
610
+ grpcio==1.32.0
611
+ h11==0.14.0
612
+ h5features==1.3.2
613
+ h5py==2.10.0
614
+ hierarchy==0.4.0
615
+ hmmlearn==0.2.8
616
+ htk-io==0.5
617
+ httpcore==0.16.3
618
+ httplib2==0.22.0
619
+ httpx==0.23.3
620
+ huggingface-hub==0.15.1
621
+ hydra-colorlog==0.1.4
622
+ hydra-core==1.3.2
623
+ hyperopt==0.2.7
624
+ HyperPyYAML==1.1.0
625
+ hypothesis==6.61.2
626
+ identify==1.5.10
627
+ idna==2.10
628
+ imageio==2.9.0
629
+ imagesize==1.2.0
630
+ importlib-metadata==4.8.1
631
+ importlib-resources==5.2.2
632
+ inflect==5.3.0
633
+ inquirer==3.1.3
634
+ ipadic==1.0.0
635
+ ipyevents==2.0.1
636
+ ipykernel==5.3.4
637
+ ipython==7.19.0
638
+ ipython-genutils==0.2.0
639
+ ipywebrtc==0.6.0
640
+ ipywidgets==7.6.3
641
+ iso-639==0.4.5
642
+ isodate==0.6.0
643
+ isort==4.3.21
644
+ itsdangerous==2.1.2
645
+ Janome==0.5.0
646
+ jedi==0.17.2
647
+ jeepney==0.8.0
648
+ jieba==0.42.1
649
+ Jinja2==3.0.3
650
+ jiwer==2.2.0
651
+ jmespath==0.10.0
652
+ joblib==0.17.0
653
+ jsonschema==3.2.0
654
+ julius==0.2.7
655
+ jupyter-client==6.1.7
656
+ jupyter-core==4.7.0
657
+ jupyterlab-pygments==0.1.2
658
+ jupyterlab-widgets==1.0.0
659
+ kaitaistruct==0.9
660
+ kaldi-io==0.9.4
661
+ kaldi-python-io==1.2.2
662
+ kaldiio==2.17.2
663
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
664
+ Keras-Preprocessing==1.1.2
665
+ kiwisolver==1.3.1
666
+ lang-trans==0.6.0
667
+ langdetect==1.0.9
668
+ latexcodec==2.0.1
669
+ ldap3==2.9.1
670
+ librosa==0.9.0
671
+ lightning-cloud==0.5.37
672
+ lightning-utilities==0.8.0
673
+ linkify-it-py==1.0.3
674
+ lit==16.0.6
675
+ llvmlite==0.35.0
676
+ lxml==4.9.0
677
+ Mako==1.1.5
678
+ Markdown==3.3.3
679
+ markdown-it-py==3.0.0
680
+ MarkupSafe==2.1.3
681
+ marshmallow==3.14.0
682
+ matplotlib==3.3.3
683
+ mccabe==0.6.1
684
+ mcd==0.4
685
+ mdit-py-plugins==0.3.3
686
+ mdurl==0.1.2
687
+ mecab-python3==1.0.3
688
+ megatron-lm==2.2.0
689
+ metrics==0.3.3
690
+ mido==1.2.10
691
+ mistune==0.8.4
692
+ more-itertools==8.6.0
693
+ mpld3==0.3
694
+ mpmath==1.2.1
695
+ multidict==5.2.0
696
+ multiprocess==0.70.11.1
697
+ nbclient==0.5.3
698
+ nbconvert==5.6.1
699
+ nbformat==5.9.0
700
+ NEMO==4.3.2
701
+ nemo-toolkit==1.4.0
702
+ nest-asyncio==1.5.1
703
+ networkx==2.8.8
704
+ nltk==3.2.4
705
+ nodeenv==1.5.0
706
+ normalize==2.0.2
707
+ notebook==6.3.0
708
+ numba==0.52.0
709
+ numpy==1.19.4
710
+ nvidia-cublas-cu11==11.10.3.66
711
+ nvidia-cuda-cupti-cu11==11.7.101
712
+ nvidia-cuda-nvrtc-cu11==11.7.99
713
+ nvidia-cuda-runtime-cu11==11.7.99
714
+ nvidia-cudnn-cu11==8.5.0.96
715
+ nvidia-cufft-cu11==10.9.0.58
716
+ nvidia-curand-cu11==10.2.10.91
717
+ nvidia-cusolver-cu11==11.4.0.1
718
+ nvidia-cusparse-cu11==11.7.4.91
719
+ nvidia-nccl-cu11==2.14.3
720
+ nvidia-nvtx-cu11==11.7.91
721
+ oauthlib==3.1.0
722
+ omegaconf==2.3.0
723
+ onnx==1.10.2
724
+ OpenCC==1.1.2
725
+ opencv-python==4.4.0.46
726
+ openpyxl==3.0.9
727
+ opensmile==2.2.0
728
+ opt-einsum==3.3.0
729
+ optuna==2.10.0
730
+ ordered-set==4.1.0
731
+ orjson==3.8.4
732
+ oyaml==1.0
733
+ packaging==22.0
734
+ pandas==1.2.5
735
+ pandocfilters==1.4.3
736
+ pangu==4.0.6.1
737
+ parameterized==0.8.1
738
+ parso==0.7.1
739
+ pathlib2==2.3.7.post1
740
+ pathspec==0.5.5
741
+ pathtools==0.1.2
742
+ pbr==5.6.0
743
+ pefile==2019.4.18
744
+ pescador==2.1.0
745
+ pesq==0.0.3
746
+ pexpect==4.8.0
747
+ phonemizer==2.2.1
748
+ pickleshare==0.7.5
749
+ Pillow==9.3.0
750
+ pip-api==0.0.23
751
+ pipreqs==0.4.11
752
+ pluggy==0.13.1
753
+ pooch==1.3.0
754
+ portalocker==2.3.2
755
+ pptree==3.1
756
+ pre-commit==2.9.0
757
+ preprocessing==0.1.13
758
+ pretty-midi==0.2.9
759
+ prettytable==2.2.1
760
+ primePy==1.3
761
+ progressbar2==3.53.1
762
+ prometheus-client==0.10.1
763
+ promise==2.3
764
+ prompt-toolkit==3.0.8
765
+ protobuf==3.20.3
766
+ psutil==5.6.6
767
+ ptyprocess==0.6.0
768
+ py==1.9.0
769
+ py-espeak-ng==0.1.8
770
+ py4j==0.10.9.7
771
+ pyannote.audio==2.1.1
772
+ pyannote.core==4.5
773
+ pyannote.database==4.1.3
774
+ pyannote.metrics==3.2.1
775
+ pyannote.pipeline==2.3
776
+ pyannotebook==0.1.0.dev0
777
+ PyArabic==0.6.15
778
+ pyarrow==3.0.0
779
+ pyasn1==0.4.8
780
+ pyasn1-modules==0.2.8
781
+ pybind11==2.8.1
782
+ pybtex==0.24.0
783
+ pybtex-docutils==1.0.1
784
+ pycodestyle==2.5.0
785
+ pycparser==2.20
786
+ pycryptodome==3.16.0
787
+ pyctcdecode==0.4.0
788
+ pydantic==1.10.4
789
+ pyDeprecate==0.3.1
790
+ pydub==0.25.1
791
+ pyflakes==2.1.1
792
+ Pygments==2.15.1
793
+ pygtrie==2.5.0
794
+ PyJWT==2.7.0
795
+ pymodbus==2.5.3
796
+ pyparsing==2.4.7
797
+ pyperclip==1.8.2
798
+ pypinyin==0.43.0
799
+ pyrsistent==0.17.3
800
+ pyserial==3.5
801
+ PySocks==1.7.1
802
+ pystoi==0.3.3
803
+ pytest==5.4.1
804
+ pytest-runner==5.3.1
805
+ python-bidi==0.4.2
806
+ python-crfsuite==0.9.7
807
+ python-dateutil==2.8.2
808
+ python-editor==1.0.4
809
+ python-Levenshtein==0.12.2
810
+ python-multipart==0.0.5
811
+ python-utils==2.4.0
812
+ pytorch-lightning==1.6.5
813
+ pytorch-metric-learning==1.7.3
814
+ pytorch-revgrad==0.2.0
815
+ pytube==11.0.1
816
+ pytz==2022.6
817
+ PyWavelets==1.1.1
818
+ PyYAML==6.0
819
+ pyzmq==20.0.0
820
+ rapidfuzz==1.8.2
821
+ readchar==4.0.5
822
+ regex==2020.11.13
823
+ requests==2.28.1
824
+ requests-oauthlib==1.3.0
825
+ resampy==0.2.2
826
+ rfc3986==1.4.0
827
+ rich==13.4.2
828
+ richenum==1.3.1
829
+ rsa==4.7
830
+ ruamel.yaml==0.17.21
831
+ ruamel.yaml.clib==0.2.7
832
+ s3m==1.1.0
833
+ s3transfer==0.5.0
834
+ sacrebleu==2.0.0
835
+ sacremoses==0.0.44
836
+ safetensors==0.3.1
837
+ scikit-image==0.18.1
838
+ scikit-learn==0.23.2
839
+ scipy==1.5.4
840
+ -e git+https://github.com/sanghack81/SDCIT@00d060dde733fde9345154a494f81e97fb395ca7#egg=SDCIT
841
+ seaborn==0.11.1
842
+ SecretStorage==3.3.3
843
+ segments==2.1.3
844
+ segtok==1.5.11
845
+ semantic-version==2.10.0
846
+ semver==2.13.0
847
+ Send2Trash==1.5.0
848
+ sentencepiece==0.1.99
849
+ sentry-sdk==1.4.3
850
+ shellingham==1.4.0
851
+ shortuuid==1.0.7
852
+ SIDEKIT==1.3.8.5.2
853
+ simplejson==3.17.5
854
+ singledispatchmethod==1.0
855
+ six==1.15.0
856
+ smart-open==5.0.0
857
+ smmap==5.0.0
858
+ sniffio==1.3.0
859
+ snowballstemmer==2.0.0
860
+ sortedcollections==2.1.0
861
+ sortedcontainers==2.4.0
862
+ sounddevice==0.4.5
863
+ SoundFile==0.10.3.post1
864
+ soupsieve==2.3
865
+ sox==1.4.1
866
+ sparsemax==0.1.9
867
+ speechbrain==0.5.14
868
+ sphfile==1.0.3
869
+ Sphinx==3.3.1
870
+ sphinx-rtd-theme==0.2.4
871
+ sphinxcontrib-applehelp==1.0.2
872
+ sphinxcontrib-bibtex==2.4.1
873
+ sphinxcontrib-devhelp==1.0.2
874
+ sphinxcontrib-htmlhelp==1.0.3
875
+ sphinxcontrib-jsmath==1.0.1
876
+ sphinxcontrib-qthelp==1.0.3
877
+ sphinxcontrib-serializinghtml==1.1.4
878
+ SQLAlchemy==1.4.25
879
+ sqlitedict==2.1.0
880
+ sqlparse==0.4.2
881
+ stanza==1.4.2
882
+ starlette==0.27.0
883
+ starsessions==1.3.0
884
+ stevedore==3.4.0
885
+ subprocess32==3.5.4
886
+ sympy==1.9
887
+ tabulate==0.8.9
888
+ tensorboard==2.4.0
889
+ tensorboard-plugin-wit==1.7.0
890
+ tensorboardX==2.6.1
891
+ tensorflow==2.4.0
892
+ tensorflow-estimator==2.4.0
893
+ termcolor==1.1.0
894
+ terminado==0.9.4
895
+ testpath==0.4.4
896
+ threadpoolctl==2.1.0
897
+ tifffile==2020.12.8
898
+ tikzplotlib==0.9.8
899
+ tinycss2==1.2.1
900
+ tkseem==0.0.3
901
+ tokenizers==0.13.3
902
+ toml==0.10.2
903
+ toolz==0.12.0
904
+ torch==1.13.1
905
+ torch-audiomentations==0.11.0
906
+ torch-pitch-shift==1.2.4
907
+ torch-stft==0.1.4
908
+ torchaudio==0.13.1
909
+ torchmetrics==0.11.4
910
+ torchvision==0.14.1
911
+ tornado==6.1
912
+ tqdm==4.61.1
913
+ trackrip==1.2.1
914
+ traitlets==5.9.0
915
+ transformer-smaller-training-vocab==0.3.1
916
+ transformers==4.30.2
917
+ triton==2.0.0
918
+ typed-ast==1.4.1
919
+ typer==0.4.0
920
+ typing-extensions==4.4.0
921
+ uc-micro-py==1.0.1
922
+ Unidecode==1.3.2
923
+ uritemplate==3.0.1
924
+ urllib3==1.26.2
925
+ uvicorn==0.20.0
926
+ versioneer==0.28
927
+ virtualenv==20.2.1
928
+ wandb==0.12.6
929
+ wcwidth==0.2.5
930
+ webdataset==0.1.62
931
+ webencodings==0.5.1
932
+ websocket-client==1.6.1
933
+ websockets==10.4
934
+ Werkzeug==1.0.1
935
+ wget==3.2
936
+ widgetsnbextension==3.5.1
937
+ Wikipedia-API==0.6.0
938
+ wordninja==2.0.0
939
+ wrapt==1.12.1
940
+ xmltodict==0.13.0
941
+ xxhash==2.0.0
942
+ yamllint==1.23.0
943
+ yarg==0.1.9
944
+ yarl==1.7.2
945
+ yaspin==2.1.0
946
+ youtokentome==1.0.6
947
+ youtube-dl==2021.6.6
948
+ zipp==3.6.0
949
+
950
+
951
+ 2023-09-25 11:53:49,358 - speechbrain.utils.superpowers - DEBUG - 082323a
952
+
953
+
954
+ 2023-09-25 11:53:49,390 - speechbrain.dataio.encoder - DEBUG - Loaded categorical encoding from semi_wavlm_large_tunisian_ctc/1234/save/label_encoder.txt
955
+ 2023-09-25 11:53:49,390 - speechbrain.dataio.encoder - INFO - Load called, but CTCTextEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
956
+ 2023-09-25 11:53:49,391 - speechbrain.dataio.encoder - DEBUG - Loaded categorical encoding from semi_wavlm_large_tunisian_ctc/1234/save/label_encoder.txt
957
+ 2023-09-25 11:53:51,607 - pyctcdecode.decoder - INFO - Using arpa instead of binary LM file, decoder instantiation might be slow.
958
+ 2023-09-25 11:53:51,713 - pyctcdecode.alphabet - INFO - Alphabet determined to be of regular style.
959
+ 2023-09-25 11:53:51,774 - pyctcdecode.alphabet - WARNING - Unigrams and labels don't seem to agree.
960
+ 2023-09-25 11:53:52,870 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
961
+ 2023-09-25 11:53:52,871 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
962
+ 2023-09-25 11:53:53,969 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
963
+ 2023-09-25 11:53:53,970 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00
964
+ 2023-09-25 11:53:54,516 - speechbrain.core - ERROR - Exception:
965
+ Traceback (most recent call last):
966
+ File "app.py", line 422, in <module>
967
+ gr.Interface(
968
+ NameError: name 'gr' is not defined
969
+ 2023-09-25 11:54:18,422 - speechbrain.core - INFO - Beginning experiment!
970
+ 2023-09-25 11:54:18,422 - speechbrain.core - INFO - Experiment folder: semi_wavlm_large_tunisian_ctc/1234
971
+ 2023-09-25 11:54:18,903 - speechbrain.utils.superpowers - DEBUG - abkhazia==1.0
972
+ absl-py==0.11.0
973
+ aiofiles==23.2.1
974
+ aiohttp==3.8.0
975
+ aiosignal==1.2.0
976
+ alabaster==0.7.12
977
+ alembic==1.7.4
978
+ altair==4.2.0
979
+ altgraph==0.17
980
+ antlr4-python3-runtime==4.9.3
981
+ anyio==3.6.2
982
+ appdirs==1.4.4
983
+ argcomplete==1.12.2
984
+ argon2-cffi==20.1.0
985
+ arrow==1.2.3
986
+ asgiref==3.6.0
987
+ asteroid-filterbanks==0.4.0
988
+ astunparse==1.6.3
989
+ async-generator==1.10
990
+ async-timeout==4.0.0
991
+ attrdict==2.0.1
992
+ attrs==20.3.0
993
+ audeer==1.16.0
994
+ audformat==0.11.5
995
+ audinterface==0.7.0
996
+ audiofile==1.0.0
997
+ audiomentations==0.25.0
998
+ audioread==2.1.9
999
+ audobject==0.4.14
1000
+ audresample==0.1.6
1001
+ -e git+https://github.com/facebookresearch/WavAugment.git@54afcdb00ccc852c2f030f239f8532c9562b550e#egg=augment
1002
+ autopage==0.4.0
1003
+ Babel==2.9.0
1004
+ backcall==0.2.0
1005
+ backports.cached-property==1.0.2
1006
+ beautifulsoup4==4.10.0
1007
+ black==19.10b0
1008
+ bleach==3.3.0
1009
+ blessed==1.20.0
1010
+ boto3==1.20.2
1011
+ botocore==1.23.2
1012
+ bpemb==0.3.4
1013
+ braceexpand==0.1.7
1014
+ cachetools==4.2.0
1015
+ certifi @ file:///croot/certifi_1671487769961/work/certifi
1016
+ cffi==1.14.3
1017
+ cfgv==3.2.0
1018
+ chardet==3.0.4
1019
+ charset-normalizer==2.0.7
1020
+ click==7.1.2
1021
+ cliff==3.9.0
1022
+ clldutils==3.5.4
1023
+ cloudpickle==2.2.1
1024
+ cmaes==0.8.2
1025
+ cmake==3.18.4.post1
1026
+ cmd2==2.2.0
1027
+ colorama==0.4.4
1028
+ colorlog==4.6.2
1029
+ configparser==5.1.0
1030
+ conllu==4.5.3
1031
+ croniter==1.3.15
1032
+ cryptography==38.0.4
1033
+ csrgraph==0.1.28
1034
+ csvw==1.8.1
1035
+ cycler==0.10.0
1036
+ Cython==0.29.21
1037
+ dataclasses==0.6
1038
+ dateutils==0.6.12
1039
+ decorator==4.4.2
1040
+ deepdiff==6.3.0
1041
+ deepspeech==0.9.1
1042
+ defusedxml==0.7.1
1043
+ Deprecated==1.2.14
1044
+ dill==0.3.3
1045
+ Distance==0.1.3
1046
+ distlib==0.3.1
1047
+ Django==3.2.16
1048
+ django-auditlog==2.2.1
1049
+ django-filter==22.1
1050
+ django-js-asset==1.2.2
1051
+ django-mptt==0.14.0
1052
+ djangorestframework==3.14.0
1053
+ docker-pycreds==0.4.0
1054
+ docopt==0.6.2
1055
+ docutils==0.16
1056
+ drf-excel==2.2.0
1057
+ drf-flex-fields==1.0.0
1058
+ drf-renderer-xlsx==0.4.1
1059
+ easyocr==1.2.1
1060
+ editdistance==0.6.0
1061
+ einops==0.3.2
1062
+ emoji==2.2.0
1063
+ entrypoints==0.3
1064
+ et-xmlfile==1.1.0
1065
+ exceptiongroup==1.1.0
1066
+ farasapy==0.0.14
1067
+ fastapi==0.98.0
1068
+ fastjsonschema==2.17.1
1069
+ fasttext==0.9.2
1070
+ ffmpeg-python==0.2.0
1071
+ ffmpy==0.3.0
1072
+ filelock==3.0.12
1073
+ flair==0.12.2
1074
+ flake8==3.7.9
1075
+ flatbuffers==1.12
1076
+ frozendict==2.0.7
1077
+ frozenlist==1.2.0
1078
+ fsspec==2021.11.0
1079
+ ftfy==6.1.1
1080
+ future==0.18.2
1081
+ g2p-en==2.1.0
1082
+ gast==0.3.3
1083
+ gdown==4.4.0
1084
+ gdrive==0.1.5
1085
+ gensim==4.0.1
1086
+ gitdb==4.0.9
1087
+ GitPython==3.1.24
1088
+ google-api-core==2.11.1
1089
+ google-api-python-client==2.43.0
1090
+ google-auth==1.24.0
1091
+ google-auth-httplib2==0.1.0
1092
+ google-auth-oauthlib==0.5.3
1093
+ google-pasta==0.2.0
1094
+ googleapis-common-protos==1.59.1
1095
+ gradio==3.44.4
1096
+ gradio-client==0.5.1
1097
+ greenlet==1.1.2
1098
+ grpcio==1.32.0
1099
+ h11==0.14.0
1100
+ h5features==1.3.2
1101
+ h5py==2.10.0
1102
+ hierarchy==0.4.0
1103
+ hmmlearn==0.2.8
1104
+ htk-io==0.5
1105
+ httpcore==0.16.3
1106
+ httplib2==0.22.0
1107
+ httpx==0.23.3
1108
+ huggingface-hub==0.15.1
1109
+ hydra-colorlog==0.1.4
1110
+ hydra-core==1.3.2
1111
+ hyperopt==0.2.7
1112
+ HyperPyYAML==1.1.0
1113
+ hypothesis==6.61.2
1114
+ identify==1.5.10
1115
+ idna==2.10
1116
+ imageio==2.9.0
1117
+ imagesize==1.2.0
1118
+ importlib-metadata==4.8.1
1119
+ importlib-resources==5.2.2
1120
+ inflect==5.3.0
1121
+ inquirer==3.1.3
1122
+ ipadic==1.0.0
1123
+ ipyevents==2.0.1
1124
+ ipykernel==5.3.4
1125
+ ipython==7.19.0
1126
+ ipython-genutils==0.2.0
1127
+ ipywebrtc==0.6.0
1128
+ ipywidgets==7.6.3
1129
+ iso-639==0.4.5
1130
+ isodate==0.6.0
1131
+ isort==4.3.21
1132
+ itsdangerous==2.1.2
1133
+ Janome==0.5.0
1134
+ jedi==0.17.2
1135
+ jeepney==0.8.0
1136
+ jieba==0.42.1
1137
+ Jinja2==3.0.3
1138
+ jiwer==2.2.0
1139
+ jmespath==0.10.0
1140
+ joblib==0.17.0
1141
+ jsonschema==3.2.0
1142
+ julius==0.2.7
1143
+ jupyter-client==6.1.7
1144
+ jupyter-core==4.7.0
1145
+ jupyterlab-pygments==0.1.2
1146
+ jupyterlab-widgets==1.0.0
1147
+ kaitaistruct==0.9
1148
+ kaldi-io==0.9.4
1149
+ kaldi-python-io==1.2.2
1150
+ kaldiio==2.17.2
1151
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
1152
+ Keras-Preprocessing==1.1.2
1153
+ kiwisolver==1.3.1
1154
+ lang-trans==0.6.0
1155
+ langdetect==1.0.9
1156
+ latexcodec==2.0.1
1157
+ ldap3==2.9.1
1158
+ librosa==0.9.0
1159
+ lightning-cloud==0.5.37
1160
+ lightning-utilities==0.8.0
1161
+ linkify-it-py==1.0.3
1162
+ lit==16.0.6
1163
+ llvmlite==0.35.0
1164
+ lxml==4.9.0
1165
+ Mako==1.1.5
1166
+ Markdown==3.3.3
1167
+ markdown-it-py==3.0.0
1168
+ MarkupSafe==2.1.3
1169
+ marshmallow==3.14.0
1170
+ matplotlib==3.3.3
1171
+ mccabe==0.6.1
1172
+ mcd==0.4
1173
+ mdit-py-plugins==0.3.3
1174
+ mdurl==0.1.2
1175
+ mecab-python3==1.0.3
1176
+ megatron-lm==2.2.0
1177
+ metrics==0.3.3
1178
+ mido==1.2.10
1179
+ mistune==0.8.4
1180
+ more-itertools==8.6.0
1181
+ mpld3==0.3
1182
+ mpmath==1.2.1
1183
+ multidict==5.2.0
1184
+ multiprocess==0.70.11.1
1185
+ nbclient==0.5.3
1186
+ nbconvert==5.6.1
1187
+ nbformat==5.9.0
1188
+ NEMO==4.3.2
1189
+ nemo-toolkit==1.4.0
1190
+ nest-asyncio==1.5.1
1191
+ networkx==2.8.8
1192
+ nltk==3.2.4
1193
+ nodeenv==1.5.0
1194
+ normalize==2.0.2
1195
+ notebook==6.3.0
1196
+ numba==0.52.0
1197
+ numpy==1.19.4
1198
+ nvidia-cublas-cu11==11.10.3.66
1199
+ nvidia-cuda-cupti-cu11==11.7.101
1200
+ nvidia-cuda-nvrtc-cu11==11.7.99
1201
+ nvidia-cuda-runtime-cu11==11.7.99
1202
+ nvidia-cudnn-cu11==8.5.0.96
1203
+ nvidia-cufft-cu11==10.9.0.58
1204
+ nvidia-curand-cu11==10.2.10.91
1205
+ nvidia-cusolver-cu11==11.4.0.1
1206
+ nvidia-cusparse-cu11==11.7.4.91
1207
+ nvidia-nccl-cu11==2.14.3
1208
+ nvidia-nvtx-cu11==11.7.91
1209
+ oauthlib==3.1.0
1210
+ omegaconf==2.3.0
1211
+ onnx==1.10.2
1212
+ OpenCC==1.1.2
1213
+ opencv-python==4.4.0.46
1214
+ openpyxl==3.0.9
1215
+ opensmile==2.2.0
1216
+ opt-einsum==3.3.0
1217
+ optuna==2.10.0
1218
+ ordered-set==4.1.0
1219
+ orjson==3.8.4
1220
+ oyaml==1.0
1221
+ packaging==22.0
1222
+ pandas==1.2.5
1223
+ pandocfilters==1.4.3
1224
+ pangu==4.0.6.1
1225
+ parameterized==0.8.1
1226
+ parso==0.7.1
1227
+ pathlib2==2.3.7.post1
1228
+ pathspec==0.5.5
1229
+ pathtools==0.1.2
1230
+ pbr==5.6.0
1231
+ pefile==2019.4.18
1232
+ pescador==2.1.0
1233
+ pesq==0.0.3
1234
+ pexpect==4.8.0
1235
+ phonemizer==2.2.1
1236
+ pickleshare==0.7.5
1237
+ Pillow==9.3.0
1238
+ pip-api==0.0.23
1239
+ pipreqs==0.4.11
1240
+ pluggy==0.13.1
1241
+ pooch==1.3.0
1242
+ portalocker==2.3.2
1243
+ pptree==3.1
1244
+ pre-commit==2.9.0
1245
+ preprocessing==0.1.13
1246
+ pretty-midi==0.2.9
1247
+ prettytable==2.2.1
1248
+ primePy==1.3
1249
+ progressbar2==3.53.1
1250
+ prometheus-client==0.10.1
1251
+ promise==2.3
1252
+ prompt-toolkit==3.0.8
1253
+ protobuf==3.20.3
1254
+ psutil==5.6.6
1255
+ ptyprocess==0.6.0
1256
+ py==1.9.0
1257
+ py-espeak-ng==0.1.8
1258
+ py4j==0.10.9.7
1259
+ pyannote.audio==2.1.1
1260
+ pyannote.core==4.5
1261
+ pyannote.database==4.1.3
1262
+ pyannote.metrics==3.2.1
1263
+ pyannote.pipeline==2.3
1264
+ pyannotebook==0.1.0.dev0
1265
+ PyArabic==0.6.15
1266
+ pyarrow==3.0.0
1267
+ pyasn1==0.4.8
1268
+ pyasn1-modules==0.2.8
1269
+ pybind11==2.8.1
1270
+ pybtex==0.24.0
1271
+ pybtex-docutils==1.0.1
1272
+ pycodestyle==2.5.0
1273
+ pycparser==2.20
1274
+ pycryptodome==3.16.0
1275
+ pyctcdecode==0.4.0
1276
+ pydantic==1.10.4
1277
+ pyDeprecate==0.3.1
1278
+ pydub==0.25.1
1279
+ pyflakes==2.1.1
1280
+ Pygments==2.15.1
1281
+ pygtrie==2.5.0
1282
+ PyJWT==2.7.0
1283
+ pymodbus==2.5.3
1284
+ pyparsing==2.4.7
1285
+ pyperclip==1.8.2
1286
+ pypinyin==0.43.0
1287
+ pyrsistent==0.17.3
1288
+ pyserial==3.5
1289
+ PySocks==1.7.1
1290
+ pystoi==0.3.3
1291
+ pytest==5.4.1
1292
+ pytest-runner==5.3.1
1293
+ python-bidi==0.4.2
1294
+ python-crfsuite==0.9.7
1295
+ python-dateutil==2.8.2
1296
+ python-editor==1.0.4
1297
+ python-Levenshtein==0.12.2
1298
+ python-multipart==0.0.5
1299
+ python-utils==2.4.0
1300
+ pytorch-lightning==1.6.5
1301
+ pytorch-metric-learning==1.7.3
1302
+ pytorch-revgrad==0.2.0
1303
+ pytube==11.0.1
1304
+ pytz==2022.6
1305
+ PyWavelets==1.1.1
1306
+ PyYAML==6.0
1307
+ pyzmq==20.0.0
1308
+ rapidfuzz==1.8.2
1309
+ readchar==4.0.5
1310
+ regex==2020.11.13
1311
+ requests==2.28.1
1312
+ requests-oauthlib==1.3.0
1313
+ resampy==0.2.2
1314
+ rfc3986==1.4.0
1315
+ rich==13.4.2
1316
+ richenum==1.3.1
1317
+ rsa==4.7
1318
+ ruamel.yaml==0.17.21
1319
+ ruamel.yaml.clib==0.2.7
1320
+ s3m==1.1.0
1321
+ s3transfer==0.5.0
1322
+ sacrebleu==2.0.0
1323
+ sacremoses==0.0.44
1324
+ safetensors==0.3.1
1325
+ scikit-image==0.18.1
1326
+ scikit-learn==0.23.2
1327
+ scipy==1.5.4
1328
+ -e git+https://github.com/sanghack81/SDCIT@00d060dde733fde9345154a494f81e97fb395ca7#egg=SDCIT
1329
+ seaborn==0.11.1
1330
+ SecretStorage==3.3.3
1331
+ segments==2.1.3
1332
+ segtok==1.5.11
1333
+ semantic-version==2.10.0
1334
+ semver==2.13.0
1335
+ Send2Trash==1.5.0
1336
+ sentencepiece==0.1.99
1337
+ sentry-sdk==1.4.3
1338
+ shellingham==1.4.0
1339
+ shortuuid==1.0.7
1340
+ SIDEKIT==1.3.8.5.2
1341
+ simplejson==3.17.5
1342
+ singledispatchmethod==1.0
1343
+ six==1.15.0
1344
+ smart-open==5.0.0
1345
+ smmap==5.0.0
1346
+ sniffio==1.3.0
1347
+ snowballstemmer==2.0.0
1348
+ sortedcollections==2.1.0
1349
+ sortedcontainers==2.4.0
1350
+ sounddevice==0.4.5
1351
+ SoundFile==0.10.3.post1
1352
+ soupsieve==2.3
1353
+ sox==1.4.1
1354
+ sparsemax==0.1.9
1355
+ speechbrain==0.5.14
1356
+ sphfile==1.0.3
1357
+ Sphinx==3.3.1
1358
+ sphinx-rtd-theme==0.2.4
1359
+ sphinxcontrib-applehelp==1.0.2
1360
+ sphinxcontrib-bibtex==2.4.1
1361
+ sphinxcontrib-devhelp==1.0.2
1362
+ sphinxcontrib-htmlhelp==1.0.3
1363
+ sphinxcontrib-jsmath==1.0.1
1364
+ sphinxcontrib-qthelp==1.0.3
1365
+ sphinxcontrib-serializinghtml==1.1.4
1366
+ SQLAlchemy==1.4.25
1367
+ sqlitedict==2.1.0
1368
+ sqlparse==0.4.2
1369
+ stanza==1.4.2
1370
+ starlette==0.27.0
1371
+ starsessions==1.3.0
1372
+ stevedore==3.4.0
1373
+ subprocess32==3.5.4
1374
+ sympy==1.9
1375
+ tabulate==0.8.9
1376
+ tensorboard==2.4.0
1377
+ tensorboard-plugin-wit==1.7.0
1378
+ tensorboardX==2.6.1
1379
+ tensorflow==2.4.0
1380
+ tensorflow-estimator==2.4.0
1381
+ termcolor==1.1.0
1382
+ terminado==0.9.4
1383
+ testpath==0.4.4
1384
+ threadpoolctl==2.1.0
1385
+ tifffile==2020.12.8
1386
+ tikzplotlib==0.9.8
1387
+ tinycss2==1.2.1
1388
+ tkseem==0.0.3
1389
+ tokenizers==0.13.3
1390
+ toml==0.10.2
1391
+ toolz==0.12.0
1392
+ torch==1.13.1
1393
+ torch-audiomentations==0.11.0
1394
+ torch-pitch-shift==1.2.4
1395
+ torch-stft==0.1.4
1396
+ torchaudio==0.13.1
1397
+ torchmetrics==0.11.4
1398
+ torchvision==0.14.1
1399
+ tornado==6.1
1400
+ tqdm==4.61.1
1401
+ trackrip==1.2.1
1402
+ traitlets==5.9.0
1403
+ transformer-smaller-training-vocab==0.3.1
1404
+ transformers==4.30.2
1405
+ triton==2.0.0
1406
+ typed-ast==1.4.1
1407
+ typer==0.4.0
1408
+ typing-extensions==4.4.0
1409
+ uc-micro-py==1.0.1
1410
+ Unidecode==1.3.2
1411
+ uritemplate==3.0.1
1412
+ urllib3==1.26.2
1413
+ uvicorn==0.20.0
1414
+ versioneer==0.28
1415
+ virtualenv==20.2.1
1416
+ wandb==0.12.6
1417
+ wcwidth==0.2.5
1418
+ webdataset==0.1.62
1419
+ webencodings==0.5.1
1420
+ websocket-client==1.6.1
1421
+ websockets==10.4
1422
+ Werkzeug==1.0.1
1423
+ wget==3.2
1424
+ widgetsnbextension==3.5.1
1425
+ Wikipedia-API==0.6.0
1426
+ wordninja==2.0.0
1427
+ wrapt==1.12.1
1428
+ xmltodict==0.13.0
1429
+ xxhash==2.0.0
1430
+ yamllint==1.23.0
1431
+ yarg==0.1.9
1432
+ yarl==1.7.2
1433
+ yaspin==2.1.0
1434
+ youtokentome==1.0.6
1435
+ youtube-dl==2021.6.6
1436
+ zipp==3.6.0
1437
+
1438
+
1439
+ 2023-09-25 11:54:18,932 - speechbrain.utils.superpowers - DEBUG - 082323a
1440
+
1441
+
1442
+ 2023-09-25 11:54:18,967 - speechbrain.dataio.encoder - DEBUG - Loaded categorical encoding from semi_wavlm_large_tunisian_ctc/1234/save/label_encoder.txt
1443
+ 2023-09-25 11:54:18,967 - speechbrain.dataio.encoder - INFO - Load called, but CTCTextEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
1444
+ 2023-09-25 11:54:18,967 - speechbrain.dataio.encoder - DEBUG - Loaded categorical encoding from semi_wavlm_large_tunisian_ctc/1234/save/label_encoder.txt
1445
+ 2023-09-25 11:54:21,194 - pyctcdecode.decoder - INFO - Using arpa instead of binary LM file, decoder instantiation might be slow.
1446
+ 2023-09-25 11:54:21,296 - pyctcdecode.alphabet - INFO - Alphabet determined to be of regular style.
1447
+ 2023-09-25 11:54:21,358 - pyctcdecode.alphabet - WARNING - Unigrams and labels don't seem to agree.
1448
+ 2023-09-25 11:54:22,645 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
1449
+ 2023-09-25 11:54:22,645 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
1450
+ 2023-09-25 11:54:23,707 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
1451
+ 2023-09-25 11:54:23,708 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00
1452
+ 2023-09-25 11:54:24,309 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): checkip.amazonaws.com:443
1453
+ 2023-09-25 11:54:24,311 - asyncio - DEBUG - Using selector: EpollSelector
1454
+ 2023-09-25 11:54:24,312 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): api.gradio.app:443
1455
+ 2023-09-25 11:54:24,340 - asyncio - DEBUG - Using selector: EpollSelector
1456
+ 2023-09-25 11:54:24,347 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): 127.0.0.1:7860
1457
+ 2023-09-25 11:54:24,349 - urllib3.connectionpool - DEBUG - http://127.0.0.1:7860 "GET /startup-events HTTP/1.1" 200 5
1458
+ 2023-09-25 11:54:24,376 - botocore.hooks - DEBUG - Changing event name from creating-client-class.iot-data to creating-client-class.iot-data-plane
1459
+ 2023-09-25 11:54:24,377 - botocore.hooks - DEBUG - Changing event name from before-call.apigateway to before-call.api-gateway
1460
+ 2023-09-25 11:54:24,377 - botocore.hooks - DEBUG - Changing event name from request-created.machinelearning.Predict to request-created.machine-learning.Predict
1461
+ 2023-09-25 11:54:24,378 - botocore.hooks - DEBUG - Changing event name from before-parameter-build.autoscaling.CreateLaunchConfiguration to before-parameter-build.auto-scaling.CreateLaunchConfiguration
1462
+ 2023-09-25 11:54:24,378 - botocore.hooks - DEBUG - Changing event name from before-parameter-build.route53 to before-parameter-build.route-53
1463
+ 2023-09-25 11:54:24,379 - botocore.hooks - DEBUG - Changing event name from request-created.cloudsearchdomain.Search to request-created.cloudsearch-domain.Search
1464
+ 2023-09-25 11:54:24,379 - botocore.hooks - DEBUG - Changing event name from docs.*.autoscaling.CreateLaunchConfiguration.complete-section to docs.*.auto-scaling.CreateLaunchConfiguration.complete-section
1465
+ 2023-09-25 11:54:24,381 - botocore.hooks - DEBUG - Changing event name from before-parameter-build.logs.CreateExportTask to before-parameter-build.cloudwatch-logs.CreateExportTask
1466
+ 2023-09-25 11:54:24,381 - botocore.hooks - DEBUG - Changing event name from docs.*.logs.CreateExportTask.complete-section to docs.*.cloudwatch-logs.CreateExportTask.complete-section
1467
+ 2023-09-25 11:54:24,381 - botocore.hooks - DEBUG - Changing event name from before-parameter-build.cloudsearchdomain.Search to before-parameter-build.cloudsearch-domain.Search
1468
+ 2023-09-25 11:54:24,381 - botocore.hooks - DEBUG - Changing event name from docs.*.cloudsearchdomain.Search.complete-section to docs.*.cloudsearch-domain.Search.complete-section
1469
+ 2023-09-25 11:54:24,382 - botocore.utils - DEBUG - IMDS ENDPOINT: http://169.254.169.254/
1470
+ 2023-09-25 11:54:24,383 - botocore.credentials - DEBUG - Looking for credentials via: env
1471
+ 2023-09-25 11:54:24,383 - botocore.credentials - DEBUG - Looking for credentials via: assume-role
1472
+ 2023-09-25 11:54:24,383 - botocore.credentials - DEBUG - Looking for credentials via: assume-role-with-web-identity
1473
+ 2023-09-25 11:54:24,383 - botocore.credentials - DEBUG - Looking for credentials via: sso
1474
+ 2023-09-25 11:54:24,383 - botocore.credentials - DEBUG - Looking for credentials via: shared-credentials-file
1475
+ 2023-09-25 11:54:24,383 - botocore.credentials - DEBUG - Looking for credentials via: custom-process
1476
+ 2023-09-25 11:54:24,383 - botocore.credentials - DEBUG - Looking for credentials via: config-file
1477
+ 2023-09-25 11:54:24,383 - botocore.credentials - DEBUG - Looking for credentials via: ec2-credentials-file
1478
+ 2023-09-25 11:54:24,383 - botocore.credentials - DEBUG - Looking for credentials via: boto-config
1479
+ 2023-09-25 11:54:24,383 - botocore.credentials - DEBUG - Looking for credentials via: container-role
1480
+ 2023-09-25 11:54:24,383 - botocore.credentials - DEBUG - Looking for credentials via: iam-role
1481
+ 2023-09-25 11:54:24,383 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): 169.254.169.254:80
1482
+ 2023-09-25 11:54:24,733 - urllib3.connectionpool - DEBUG - https://checkip.amazonaws.com:443 "GET / HTTP/1.1" 200 14
1483
+ 2023-09-25 11:54:24,740 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): api.gradio.app:443
1484
+ 2023-09-25 11:54:25,385 - botocore.utils - DEBUG - Caught retryable HTTP exception while making metadata service request to http://169.254.169.254/latest/api/token: Connect timeout on endpoint URL: "http://169.254.169.254/latest/api/token"
1485
+ Traceback (most recent call last):
1486
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 169, in _new_conn
1487
+ conn = connection.create_connection(
1488
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/connection.py", line 96, in create_connection
1489
+ raise err
1490
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/connection.py", line 86, in create_connection
1491
+ sock.connect(sa)
1492
+ socket.timeout: timed out
1493
+
1494
+ During handling of the above exception, another exception occurred:
1495
+
1496
+ Traceback (most recent call last):
1497
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/httpsession.py", line 385, in send
1498
+ urllib_response = conn.urlopen(
1499
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 755, in urlopen
1500
+ retries = retries.increment(
1501
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/retry.py", line 506, in increment
1502
+ raise six.reraise(type(error), error, _stacktrace)
1503
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/packages/six.py", line 735, in reraise
1504
+ raise value
1505
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 699, in urlopen
1506
+ httplib_response = self._make_request(
1507
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 394, in _make_request
1508
+ conn.request(method, url, **httplib_request_kw)
1509
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 234, in request
1510
+ super(HTTPConnection, self).request(method, url, body=body, headers=headers)
1511
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1255, in request
1512
+ self._send_request(method, url, body, headers, encode_chunked)
1513
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 92, in _send_request
1514
+ rval = super(AWSConnection, self)._send_request(
1515
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1301, in _send_request
1516
+ self.endheaders(body, encode_chunked=encode_chunked)
1517
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1250, in endheaders
1518
+ self._send_output(message_body, encode_chunked=encode_chunked)
1519
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 120, in _send_output
1520
+ self.send(msg)
1521
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 204, in send
1522
+ return super(AWSConnection, self).send(str)
1523
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 950, in send
1524
+ self.connect()
1525
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 200, in connect
1526
+ conn = self._new_conn()
1527
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 174, in _new_conn
1528
+ raise ConnectTimeoutError(
1529
+ urllib3.exceptions.ConnectTimeoutError: (<botocore.awsrequest.AWSHTTPConnection object at 0x7fccca8c8a00>, 'Connection to 169.254.169.254 timed out. (connect timeout=1)')
1530
+
1531
+ During handling of the above exception, another exception occurred:
1532
+
1533
+ Traceback (most recent call last):
1534
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/utils.py", line 430, in _fetch_metadata_token
1535
+ response = self._session.send(request.prepare())
1536
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/httpsession.py", line 418, in send
1537
+ raise ConnectTimeoutError(endpoint_url=request.url, error=e)
1538
+ botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "http://169.254.169.254/latest/api/token"
1539
+ 2023-09-25 11:54:25,391 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (2): 169.254.169.254:80
1540
+ 2023-09-25 11:54:25,437 - urllib3.connectionpool - DEBUG - https://api.gradio.app:443 "GET /pkg-version HTTP/1.1" 200 21
1541
+ 2023-09-25 11:54:25,998 - urllib3.connectionpool - DEBUG - https://api.gradio.app:443 "POST /gradio-initiated-analytics/ HTTP/1.1" 200 None
1542
+ 2023-09-25 11:54:26,392 - botocore.utils - DEBUG - Caught retryable HTTP exception while making metadata service request to http://169.254.169.254/latest/meta-data/iam/security-credentials/: Connect timeout on endpoint URL: "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
1543
+ Traceback (most recent call last):
1544
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 169, in _new_conn
1545
+ conn = connection.create_connection(
1546
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/connection.py", line 96, in create_connection
1547
+ raise err
1548
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/connection.py", line 86, in create_connection
1549
+ sock.connect(sa)
1550
+ socket.timeout: timed out
1551
+
1552
+ During handling of the above exception, another exception occurred:
1553
+
1554
+ Traceback (most recent call last):
1555
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/httpsession.py", line 385, in send
1556
+ urllib_response = conn.urlopen(
1557
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 755, in urlopen
1558
+ retries = retries.increment(
1559
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/retry.py", line 506, in increment
1560
+ raise six.reraise(type(error), error, _stacktrace)
1561
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/packages/six.py", line 735, in reraise
1562
+ raise value
1563
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 699, in urlopen
1564
+ httplib_response = self._make_request(
1565
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 394, in _make_request
1566
+ conn.request(method, url, **httplib_request_kw)
1567
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 234, in request
1568
+ super(HTTPConnection, self).request(method, url, body=body, headers=headers)
1569
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1255, in request
1570
+ self._send_request(method, url, body, headers, encode_chunked)
1571
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 92, in _send_request
1572
+ rval = super(AWSConnection, self)._send_request(
1573
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1301, in _send_request
1574
+ self.endheaders(body, encode_chunked=encode_chunked)
1575
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1250, in endheaders
1576
+ self._send_output(message_body, encode_chunked=encode_chunked)
1577
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 120, in _send_output
1578
+ self.send(msg)
1579
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 204, in send
1580
+ return super(AWSConnection, self).send(str)
1581
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 950, in send
1582
+ self.connect()
1583
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 200, in connect
1584
+ conn = self._new_conn()
1585
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 174, in _new_conn
1586
+ raise ConnectTimeoutError(
1587
+ urllib3.exceptions.ConnectTimeoutError: (<botocore.awsrequest.AWSHTTPConnection object at 0x7fccca8d92e0>, 'Connection to 169.254.169.254 timed out. (connect timeout=1)')
1588
+
1589
+ During handling of the above exception, another exception occurred:
1590
+
1591
+ Traceback (most recent call last):
1592
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/utils.py", line 478, in _get_request
1593
+ response = self._session.send(request.prepare())
1594
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/httpsession.py", line 418, in send
1595
+ raise ConnectTimeoutError(endpoint_url=request.url, error=e)
1596
+ botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
1597
+ 2023-09-25 11:54:26,393 - botocore.utils - DEBUG - Max number of attempts exceeded (1) when attempting to retrieve data from metadata service.
1598
+ 2023-09-25 11:54:26,394 - botocore.loaders - DEBUG - Loading JSON file: /home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/data/endpoints.json
1599
+ 2023-09-25 11:54:26,421 - botocore.hooks - DEBUG - Event choose-service-name: calling handler <function handle_service_name_alias at 0x7fcce336fa60>
1600
+ 2023-09-25 11:54:26,432 - botocore.loaders - DEBUG - Loading JSON file: /home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/data/sts/2011-06-15/service-2.json
1601
+ 2023-09-25 11:54:26,433 - botocore.hooks - DEBUG - Event creating-client-class.sts: calling handler <function add_generate_presigned_url at 0x7fcce3280430>
1602
+ 2023-09-25 11:54:26,435 - botocore.endpoint - DEBUG - Setting sts timeout as (60, 60)
1603
+ 2023-09-25 11:54:26,436 - botocore.loaders - DEBUG - Loading JSON file: /home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/data/_retry.json
1604
+ 2023-09-25 11:54:26,436 - botocore.client - DEBUG - Registering retry handlers for service: sts
1605
+ 2023-09-25 11:54:26,437 - botocore.hooks - DEBUG - Event before-parameter-build.sts.GetCallerIdentity: calling handler <function generate_idempotent_uuid at 0x7fcce3268dc0>
1606
+ 2023-09-25 11:54:26,437 - botocore.hooks - DEBUG - Event before-call.sts.GetCallerIdentity: calling handler <function inject_api_version_header_if_needed at 0x7fcce3271670>
1607
+ 2023-09-25 11:54:26,437 - botocore.endpoint - DEBUG - Making request for OperationModel(name=GetCallerIdentity) with params: {'url_path': '/', 'query_string': '', 'method': 'POST', 'headers': {'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', 'User-Agent': 'Boto3/1.20.2 Python/3.8.5 Linux/5.4.0-150-generic Botocore/1.23.2'}, 'body': {'Action': 'GetCallerIdentity', 'Version': '2011-06-15'}, 'url': 'https://sts.amazonaws.com/', 'context': {'client_region': 'aws-global', 'client_config': <botocore.config.Config object at 0x7fcce353d490>, 'has_streaming_input': False, 'auth_type': None}}
1608
+ 2023-09-25 11:54:26,437 - botocore.hooks - DEBUG - Event request-created.sts.GetCallerIdentity: calling handler <bound method RequestSigner.handler of <botocore.signers.RequestSigner object at 0x7fcce353d550>>
1609
+ 2023-09-25 11:54:26,437 - botocore.hooks - DEBUG - Event choose-signer.sts.GetCallerIdentity: calling handler <function set_operation_specific_signer at 0x7fcce3268ca0>
1610
+ 2023-09-25 11:54:26,439 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): 127.0.0.1:7860
1611
+ 2023-09-25 11:54:26,448 - urllib3.connectionpool - DEBUG - http://127.0.0.1:7860 "HEAD / HTTP/1.1" 200 0
1612
+ 2023-09-25 11:54:26,449 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): api.gradio.app:443
1613
+ 2023-09-25 11:54:27,539 - urllib3.connectionpool - DEBUG - https://api.gradio.app:443 "POST /gradio-launched-telemetry/ HTTP/1.1" 200 None
1614
+ 2023-09-25 11:54:36,120 - matplotlib.pyplot - DEBUG - Loaded backend agg version unknown.
1615
+ 2023-09-25 11:54:36,189 - pydub.converter - DEBUG - subprocess.call(['ffmpeg', '-y', '-i', '/tmp/gradio/a676790f6947ae75dde556fb6a864b2ddb801922/audio.wav', '-acodec', 'pcm_s32le', '-vn', '-f', 'wav', '-'])
1616
+ 2023-09-25 11:54:36,267 - matplotlib.pyplot - DEBUG - Loaded backend TkAgg version unknown.
1617
+ 2023-09-25 11:55:11,177 - speechbrain.core - INFO - Beginning experiment!
1618
+ 2023-09-25 11:55:11,177 - speechbrain.core - INFO - Experiment folder: semi_wavlm_large_tunisian_ctc/1234
1619
+ 2023-09-25 11:55:11,673 - speechbrain.utils.superpowers - DEBUG - abkhazia==1.0
1620
+ absl-py==0.11.0
1621
+ aiofiles==23.2.1
1622
+ aiohttp==3.8.0
1623
+ aiosignal==1.2.0
1624
+ alabaster==0.7.12
1625
+ alembic==1.7.4
1626
+ altair==4.2.0
1627
+ altgraph==0.17
1628
+ antlr4-python3-runtime==4.9.3
1629
+ anyio==3.6.2
1630
+ appdirs==1.4.4
1631
+ argcomplete==1.12.2
1632
+ argon2-cffi==20.1.0
1633
+ arrow==1.2.3
1634
+ asgiref==3.6.0
1635
+ asteroid-filterbanks==0.4.0
1636
+ astunparse==1.6.3
1637
+ async-generator==1.10
1638
+ async-timeout==4.0.0
1639
+ attrdict==2.0.1
1640
+ attrs==20.3.0
1641
+ audeer==1.16.0
1642
+ audformat==0.11.5
1643
+ audinterface==0.7.0
1644
+ audiofile==1.0.0
1645
+ audiomentations==0.25.0
1646
+ audioread==2.1.9
1647
+ audobject==0.4.14
1648
+ audresample==0.1.6
1649
+ -e git+https://github.com/facebookresearch/WavAugment.git@54afcdb00ccc852c2f030f239f8532c9562b550e#egg=augment
1650
+ autopage==0.4.0
1651
+ Babel==2.9.0
1652
+ backcall==0.2.0
1653
+ backports.cached-property==1.0.2
1654
+ beautifulsoup4==4.10.0
1655
+ black==19.10b0
1656
+ bleach==3.3.0
1657
+ blessed==1.20.0
1658
+ boto3==1.20.2
1659
+ botocore==1.23.2
1660
+ bpemb==0.3.4
1661
+ braceexpand==0.1.7
1662
+ cachetools==4.2.0
1663
+ certifi @ file:///croot/certifi_1671487769961/work/certifi
1664
+ cffi==1.14.3
1665
+ cfgv==3.2.0
1666
+ chardet==3.0.4
1667
+ charset-normalizer==2.0.7
1668
+ click==7.1.2
1669
+ cliff==3.9.0
1670
+ clldutils==3.5.4
1671
+ cloudpickle==2.2.1
1672
+ cmaes==0.8.2
1673
+ cmake==3.18.4.post1
1674
+ cmd2==2.2.0
1675
+ colorama==0.4.4
1676
+ colorlog==4.6.2
1677
+ configparser==5.1.0
1678
+ conllu==4.5.3
1679
+ croniter==1.3.15
1680
+ cryptography==38.0.4
1681
+ csrgraph==0.1.28
1682
+ csvw==1.8.1
1683
+ cycler==0.10.0
1684
+ Cython==0.29.21
1685
+ dataclasses==0.6
1686
+ dateutils==0.6.12
1687
+ decorator==4.4.2
1688
+ deepdiff==6.3.0
1689
+ deepspeech==0.9.1
1690
+ defusedxml==0.7.1
1691
+ Deprecated==1.2.14
1692
+ dill==0.3.3
1693
+ Distance==0.1.3
1694
+ distlib==0.3.1
1695
+ Django==3.2.16
1696
+ django-auditlog==2.2.1
1697
+ django-filter==22.1
1698
+ django-js-asset==1.2.2
1699
+ django-mptt==0.14.0
1700
+ djangorestframework==3.14.0
1701
+ docker-pycreds==0.4.0
1702
+ docopt==0.6.2
1703
+ docutils==0.16
1704
+ drf-excel==2.2.0
1705
+ drf-flex-fields==1.0.0
1706
+ drf-renderer-xlsx==0.4.1
1707
+ easyocr==1.2.1
1708
+ editdistance==0.6.0
1709
+ einops==0.3.2
1710
+ emoji==2.2.0
1711
+ entrypoints==0.3
1712
+ et-xmlfile==1.1.0
1713
+ exceptiongroup==1.1.0
1714
+ farasapy==0.0.14
1715
+ fastapi==0.98.0
1716
+ fastjsonschema==2.17.1
1717
+ fasttext==0.9.2
1718
+ ffmpeg-python==0.2.0
1719
+ ffmpy==0.3.0
1720
+ filelock==3.0.12
1721
+ flair==0.12.2
1722
+ flake8==3.7.9
1723
+ flatbuffers==1.12
1724
+ frozendict==2.0.7
1725
+ frozenlist==1.2.0
1726
+ fsspec==2021.11.0
1727
+ ftfy==6.1.1
1728
+ future==0.18.2
1729
+ g2p-en==2.1.0
1730
+ gast==0.3.3
1731
+ gdown==4.4.0
1732
+ gdrive==0.1.5
1733
+ gensim==4.0.1
1734
+ gitdb==4.0.9
1735
+ GitPython==3.1.24
1736
+ google-api-core==2.11.1
1737
+ google-api-python-client==2.43.0
1738
+ google-auth==1.24.0
1739
+ google-auth-httplib2==0.1.0
1740
+ google-auth-oauthlib==0.5.3
1741
+ google-pasta==0.2.0
1742
+ googleapis-common-protos==1.59.1
1743
+ gradio==3.44.4
1744
+ gradio-client==0.5.1
1745
+ greenlet==1.1.2
1746
+ grpcio==1.32.0
1747
+ h11==0.14.0
1748
+ h5features==1.3.2
1749
+ h5py==2.10.0
1750
+ hierarchy==0.4.0
1751
+ hmmlearn==0.2.8
1752
+ htk-io==0.5
1753
+ httpcore==0.16.3
1754
+ httplib2==0.22.0
1755
+ httpx==0.23.3
1756
+ huggingface-hub==0.15.1
1757
+ hydra-colorlog==0.1.4
1758
+ hydra-core==1.3.2
1759
+ hyperopt==0.2.7
1760
+ HyperPyYAML==1.1.0
1761
+ hypothesis==6.61.2
1762
+ identify==1.5.10
1763
+ idna==2.10
1764
+ imageio==2.9.0
1765
+ imagesize==1.2.0
1766
+ importlib-metadata==4.8.1
1767
+ importlib-resources==5.2.2
1768
+ inflect==5.3.0
1769
+ inquirer==3.1.3
1770
+ ipadic==1.0.0
1771
+ ipyevents==2.0.1
1772
+ ipykernel==5.3.4
1773
+ ipython==7.19.0
1774
+ ipython-genutils==0.2.0
1775
+ ipywebrtc==0.6.0
1776
+ ipywidgets==7.6.3
1777
+ iso-639==0.4.5
1778
+ isodate==0.6.0
1779
+ isort==4.3.21
1780
+ itsdangerous==2.1.2
1781
+ Janome==0.5.0
1782
+ jedi==0.17.2
1783
+ jeepney==0.8.0
1784
+ jieba==0.42.1
1785
+ Jinja2==3.0.3
1786
+ jiwer==2.2.0
1787
+ jmespath==0.10.0
1788
+ joblib==0.17.0
1789
+ jsonschema==3.2.0
1790
+ julius==0.2.7
1791
+ jupyter-client==6.1.7
1792
+ jupyter-core==4.7.0
1793
+ jupyterlab-pygments==0.1.2
1794
+ jupyterlab-widgets==1.0.0
1795
+ kaitaistruct==0.9
1796
+ kaldi-io==0.9.4
1797
+ kaldi-python-io==1.2.2
1798
+ kaldiio==2.17.2
1799
+ kenlm @ https://github.com/kpu/kenlm/archive/master.zip
1800
+ Keras-Preprocessing==1.1.2
1801
+ kiwisolver==1.3.1
1802
+ lang-trans==0.6.0
1803
+ langdetect==1.0.9
1804
+ latexcodec==2.0.1
1805
+ ldap3==2.9.1
1806
+ librosa==0.9.0
1807
+ lightning-cloud==0.5.37
1808
+ lightning-utilities==0.8.0
1809
+ linkify-it-py==1.0.3
1810
+ lit==16.0.6
1811
+ llvmlite==0.35.0
1812
+ lxml==4.9.0
1813
+ Mako==1.1.5
1814
+ Markdown==3.3.3
1815
+ markdown-it-py==3.0.0
1816
+ MarkupSafe==2.1.3
1817
+ marshmallow==3.14.0
1818
+ matplotlib==3.3.3
1819
+ mccabe==0.6.1
1820
+ mcd==0.4
1821
+ mdit-py-plugins==0.3.3
1822
+ mdurl==0.1.2
1823
+ mecab-python3==1.0.3
1824
+ megatron-lm==2.2.0
1825
+ metrics==0.3.3
1826
+ mido==1.2.10
1827
+ mistune==0.8.4
1828
+ more-itertools==8.6.0
1829
+ mpld3==0.3
1830
+ mpmath==1.2.1
1831
+ multidict==5.2.0
1832
+ multiprocess==0.70.11.1
1833
+ nbclient==0.5.3
1834
+ nbconvert==5.6.1
1835
+ nbformat==5.9.0
1836
+ NEMO==4.3.2
1837
+ nemo-toolkit==1.4.0
1838
+ nest-asyncio==1.5.1
1839
+ networkx==2.8.8
1840
+ nltk==3.2.4
1841
+ nodeenv==1.5.0
1842
+ normalize==2.0.2
1843
+ notebook==6.3.0
1844
+ numba==0.52.0
1845
+ numpy==1.19.4
1846
+ nvidia-cublas-cu11==11.10.3.66
1847
+ nvidia-cuda-cupti-cu11==11.7.101
1848
+ nvidia-cuda-nvrtc-cu11==11.7.99
1849
+ nvidia-cuda-runtime-cu11==11.7.99
1850
+ nvidia-cudnn-cu11==8.5.0.96
1851
+ nvidia-cufft-cu11==10.9.0.58
1852
+ nvidia-curand-cu11==10.2.10.91
1853
+ nvidia-cusolver-cu11==11.4.0.1
1854
+ nvidia-cusparse-cu11==11.7.4.91
1855
+ nvidia-nccl-cu11==2.14.3
1856
+ nvidia-nvtx-cu11==11.7.91
1857
+ oauthlib==3.1.0
1858
+ omegaconf==2.3.0
1859
+ onnx==1.10.2
1860
+ OpenCC==1.1.2
1861
+ opencv-python==4.4.0.46
1862
+ openpyxl==3.0.9
1863
+ opensmile==2.2.0
1864
+ opt-einsum==3.3.0
1865
+ optuna==2.10.0
1866
+ ordered-set==4.1.0
1867
+ orjson==3.8.4
1868
+ oyaml==1.0
1869
+ packaging==22.0
1870
+ pandas==1.2.5
1871
+ pandocfilters==1.4.3
1872
+ pangu==4.0.6.1
1873
+ parameterized==0.8.1
1874
+ parso==0.7.1
1875
+ pathlib2==2.3.7.post1
1876
+ pathspec==0.5.5
1877
+ pathtools==0.1.2
1878
+ pbr==5.6.0
1879
+ pefile==2019.4.18
1880
+ pescador==2.1.0
1881
+ pesq==0.0.3
1882
+ pexpect==4.8.0
1883
+ phonemizer==2.2.1
1884
+ pickleshare==0.7.5
1885
+ Pillow==9.3.0
1886
+ pip-api==0.0.23
1887
+ pipreqs==0.4.11
1888
+ pluggy==0.13.1
1889
+ pooch==1.3.0
1890
+ portalocker==2.3.2
1891
+ pptree==3.1
1892
+ pre-commit==2.9.0
1893
+ preprocessing==0.1.13
1894
+ pretty-midi==0.2.9
1895
+ prettytable==2.2.1
1896
+ primePy==1.3
1897
+ progressbar2==3.53.1
1898
+ prometheus-client==0.10.1
1899
+ promise==2.3
1900
+ prompt-toolkit==3.0.8
1901
+ protobuf==3.20.3
1902
+ psutil==5.6.6
1903
+ ptyprocess==0.6.0
1904
+ py==1.9.0
1905
+ py-espeak-ng==0.1.8
1906
+ py4j==0.10.9.7
1907
+ pyannote.audio==2.1.1
1908
+ pyannote.core==4.5
1909
+ pyannote.database==4.1.3
1910
+ pyannote.metrics==3.2.1
1911
+ pyannote.pipeline==2.3
1912
+ pyannotebook==0.1.0.dev0
1913
+ PyArabic==0.6.15
1914
+ pyarrow==3.0.0
1915
+ pyasn1==0.4.8
1916
+ pyasn1-modules==0.2.8
1917
+ pybind11==2.8.1
1918
+ pybtex==0.24.0
1919
+ pybtex-docutils==1.0.1
1920
+ pycodestyle==2.5.0
1921
+ pycparser==2.20
1922
+ pycryptodome==3.16.0
1923
+ pyctcdecode==0.4.0
1924
+ pydantic==1.10.4
1925
+ pyDeprecate==0.3.1
1926
+ pydub==0.25.1
1927
+ pyflakes==2.1.1
1928
+ Pygments==2.15.1
1929
+ pygtrie==2.5.0
1930
+ PyJWT==2.7.0
1931
+ pymodbus==2.5.3
1932
+ pyparsing==2.4.7
1933
+ pyperclip==1.8.2
1934
+ pypinyin==0.43.0
1935
+ pyrsistent==0.17.3
1936
+ pyserial==3.5
1937
+ PySocks==1.7.1
1938
+ pystoi==0.3.3
1939
+ pytest==5.4.1
1940
+ pytest-runner==5.3.1
1941
+ python-bidi==0.4.2
1942
+ python-crfsuite==0.9.7
1943
+ python-dateutil==2.8.2
1944
+ python-editor==1.0.4
1945
+ python-Levenshtein==0.12.2
1946
+ python-multipart==0.0.5
1947
+ python-utils==2.4.0
1948
+ pytorch-lightning==1.6.5
1949
+ pytorch-metric-learning==1.7.3
1950
+ pytorch-revgrad==0.2.0
1951
+ pytube==11.0.1
1952
+ pytz==2022.6
1953
+ PyWavelets==1.1.1
1954
+ PyYAML==6.0
1955
+ pyzmq==20.0.0
1956
+ rapidfuzz==1.8.2
1957
+ readchar==4.0.5
1958
+ regex==2020.11.13
1959
+ requests==2.28.1
1960
+ requests-oauthlib==1.3.0
1961
+ resampy==0.2.2
1962
+ rfc3986==1.4.0
1963
+ rich==13.4.2
1964
+ richenum==1.3.1
1965
+ rsa==4.7
1966
+ ruamel.yaml==0.17.21
1967
+ ruamel.yaml.clib==0.2.7
1968
+ s3m==1.1.0
1969
+ s3transfer==0.5.0
1970
+ sacrebleu==2.0.0
1971
+ sacremoses==0.0.44
1972
+ safetensors==0.3.1
1973
+ scikit-image==0.18.1
1974
+ scikit-learn==0.23.2
1975
+ scipy==1.5.4
1976
+ -e git+https://github.com/sanghack81/SDCIT@00d060dde733fde9345154a494f81e97fb395ca7#egg=SDCIT
1977
+ seaborn==0.11.1
1978
+ SecretStorage==3.3.3
1979
+ segments==2.1.3
1980
+ segtok==1.5.11
1981
+ semantic-version==2.10.0
1982
+ semver==2.13.0
1983
+ Send2Trash==1.5.0
1984
+ sentencepiece==0.1.99
1985
+ sentry-sdk==1.4.3
1986
+ shellingham==1.4.0
1987
+ shortuuid==1.0.7
1988
+ SIDEKIT==1.3.8.5.2
1989
+ simplejson==3.17.5
1990
+ singledispatchmethod==1.0
1991
+ six==1.15.0
1992
+ smart-open==5.0.0
1993
+ smmap==5.0.0
1994
+ sniffio==1.3.0
1995
+ snowballstemmer==2.0.0
1996
+ sortedcollections==2.1.0
1997
+ sortedcontainers==2.4.0
1998
+ sounddevice==0.4.5
1999
+ SoundFile==0.10.3.post1
2000
+ soupsieve==2.3
2001
+ sox==1.4.1
2002
+ sparsemax==0.1.9
2003
+ speechbrain==0.5.14
2004
+ sphfile==1.0.3
2005
+ Sphinx==3.3.1
2006
+ sphinx-rtd-theme==0.2.4
2007
+ sphinxcontrib-applehelp==1.0.2
2008
+ sphinxcontrib-bibtex==2.4.1
2009
+ sphinxcontrib-devhelp==1.0.2
2010
+ sphinxcontrib-htmlhelp==1.0.3
2011
+ sphinxcontrib-jsmath==1.0.1
2012
+ sphinxcontrib-qthelp==1.0.3
2013
+ sphinxcontrib-serializinghtml==1.1.4
2014
+ SQLAlchemy==1.4.25
2015
+ sqlitedict==2.1.0
2016
+ sqlparse==0.4.2
2017
+ stanza==1.4.2
2018
+ starlette==0.27.0
2019
+ starsessions==1.3.0
2020
+ stevedore==3.4.0
2021
+ subprocess32==3.5.4
2022
+ sympy==1.9
2023
+ tabulate==0.8.9
2024
+ tensorboard==2.4.0
2025
+ tensorboard-plugin-wit==1.7.0
2026
+ tensorboardX==2.6.1
2027
+ tensorflow==2.4.0
2028
+ tensorflow-estimator==2.4.0
2029
+ termcolor==1.1.0
2030
+ terminado==0.9.4
2031
+ testpath==0.4.4
2032
+ threadpoolctl==2.1.0
2033
+ tifffile==2020.12.8
2034
+ tikzplotlib==0.9.8
2035
+ tinycss2==1.2.1
2036
+ tkseem==0.0.3
2037
+ tokenizers==0.13.3
2038
+ toml==0.10.2
2039
+ toolz==0.12.0
2040
+ torch==1.13.1
2041
+ torch-audiomentations==0.11.0
2042
+ torch-pitch-shift==1.2.4
2043
+ torch-stft==0.1.4
2044
+ torchaudio==0.13.1
2045
+ torchmetrics==0.11.4
2046
+ torchvision==0.14.1
2047
+ tornado==6.1
2048
+ tqdm==4.61.1
2049
+ trackrip==1.2.1
2050
+ traitlets==5.9.0
2051
+ transformer-smaller-training-vocab==0.3.1
2052
+ transformers==4.30.2
2053
+ triton==2.0.0
2054
+ typed-ast==1.4.1
2055
+ typer==0.4.0
2056
+ typing-extensions==4.4.0
2057
+ uc-micro-py==1.0.1
2058
+ Unidecode==1.3.2
2059
+ uritemplate==3.0.1
2060
+ urllib3==1.26.2
2061
+ uvicorn==0.20.0
2062
+ versioneer==0.28
2063
+ virtualenv==20.2.1
2064
+ wandb==0.12.6
2065
+ wcwidth==0.2.5
2066
+ webdataset==0.1.62
2067
+ webencodings==0.5.1
2068
+ websocket-client==1.6.1
2069
+ websockets==10.4
2070
+ Werkzeug==1.0.1
2071
+ wget==3.2
2072
+ widgetsnbextension==3.5.1
2073
+ Wikipedia-API==0.6.0
2074
+ wordninja==2.0.0
2075
+ wrapt==1.12.1
2076
+ xmltodict==0.13.0
2077
+ xxhash==2.0.0
2078
+ yamllint==1.23.0
2079
+ yarg==0.1.9
2080
+ yarl==1.7.2
2081
+ yaspin==2.1.0
2082
+ youtokentome==1.0.6
2083
+ youtube-dl==2021.6.6
2084
+ zipp==3.6.0
2085
+
2086
+
2087
+ 2023-09-25 11:55:11,703 - speechbrain.utils.superpowers - DEBUG - 082323a
2088
+
2089
+
2090
+ 2023-09-25 11:55:11,733 - speechbrain.dataio.encoder - DEBUG - Loaded categorical encoding from semi_wavlm_large_tunisian_ctc/1234/save/label_encoder.txt
2091
+ 2023-09-25 11:55:11,733 - speechbrain.dataio.encoder - INFO - Load called, but CTCTextEncoder is not empty. Loaded data will overwrite everything. This is normal if there is e.g. an unk label defined at init.
2092
+ 2023-09-25 11:55:11,734 - speechbrain.dataio.encoder - DEBUG - Loaded categorical encoding from semi_wavlm_large_tunisian_ctc/1234/save/label_encoder.txt
2093
+ 2023-09-25 11:55:14,066 - pyctcdecode.decoder - INFO - Using arpa instead of binary LM file, decoder instantiation might be slow.
2094
+ 2023-09-25 11:55:14,171 - pyctcdecode.alphabet - INFO - Alphabet determined to be of regular style.
2095
+ 2023-09-25 11:55:14,236 - pyctcdecode.alphabet - WARNING - Unigrams and labels don't seem to agree.
2096
+ 2023-09-25 11:55:15,539 - speechbrain.core - INFO - Info: auto_mix_prec arg from hparam file is used
2097
+ 2023-09-25 11:55:15,539 - speechbrain.core - INFO - Info: ckpt_interval_minutes arg from hparam file is used
2098
+ 2023-09-25 11:55:15,543 - speechbrain.core - INFO - 314.4M trainable parameters in ASR
2099
+ 2023-09-25 11:55:15,543 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00
2100
+ 2023-09-25 11:55:16,121 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): api.gradio.app:443
2101
+ 2023-09-25 11:55:16,121 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): checkip.amazonaws.com:443
2102
+ 2023-09-25 11:55:16,122 - asyncio - DEBUG - Using selector: EpollSelector
2103
+ 2023-09-25 11:55:16,149 - asyncio - DEBUG - Using selector: EpollSelector
2104
+ 2023-09-25 11:55:16,157 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): 127.0.0.1:7860
2105
+ 2023-09-25 11:55:16,158 - urllib3.connectionpool - DEBUG - http://127.0.0.1:7860 "GET /startup-events HTTP/1.1" 200 5
2106
+ 2023-09-25 11:55:16,185 - botocore.hooks - DEBUG - Changing event name from creating-client-class.iot-data to creating-client-class.iot-data-plane
2107
+ 2023-09-25 11:55:16,186 - botocore.hooks - DEBUG - Changing event name from before-call.apigateway to before-call.api-gateway
2108
+ 2023-09-25 11:55:16,186 - botocore.hooks - DEBUG - Changing event name from request-created.machinelearning.Predict to request-created.machine-learning.Predict
2109
+ 2023-09-25 11:55:16,187 - botocore.hooks - DEBUG - Changing event name from before-parameter-build.autoscaling.CreateLaunchConfiguration to before-parameter-build.auto-scaling.CreateLaunchConfiguration
2110
+ 2023-09-25 11:55:16,187 - botocore.hooks - DEBUG - Changing event name from before-parameter-build.route53 to before-parameter-build.route-53
2111
+ 2023-09-25 11:55:16,187 - botocore.hooks - DEBUG - Changing event name from request-created.cloudsearchdomain.Search to request-created.cloudsearch-domain.Search
2112
+ 2023-09-25 11:55:16,188 - botocore.hooks - DEBUG - Changing event name from docs.*.autoscaling.CreateLaunchConfiguration.complete-section to docs.*.auto-scaling.CreateLaunchConfiguration.complete-section
2113
+ 2023-09-25 11:55:16,189 - botocore.hooks - DEBUG - Changing event name from before-parameter-build.logs.CreateExportTask to before-parameter-build.cloudwatch-logs.CreateExportTask
2114
+ 2023-09-25 11:55:16,189 - botocore.hooks - DEBUG - Changing event name from docs.*.logs.CreateExportTask.complete-section to docs.*.cloudwatch-logs.CreateExportTask.complete-section
2115
+ 2023-09-25 11:55:16,189 - botocore.hooks - DEBUG - Changing event name from before-parameter-build.cloudsearchdomain.Search to before-parameter-build.cloudsearch-domain.Search
2116
+ 2023-09-25 11:55:16,189 - botocore.hooks - DEBUG - Changing event name from docs.*.cloudsearchdomain.Search.complete-section to docs.*.cloudsearch-domain.Search.complete-section
2117
+ 2023-09-25 11:55:16,190 - botocore.utils - DEBUG - IMDS ENDPOINT: http://169.254.169.254/
2118
+ 2023-09-25 11:55:16,191 - botocore.credentials - DEBUG - Looking for credentials via: env
2119
+ 2023-09-25 11:55:16,191 - botocore.credentials - DEBUG - Looking for credentials via: assume-role
2120
+ 2023-09-25 11:55:16,192 - botocore.credentials - DEBUG - Looking for credentials via: assume-role-with-web-identity
2121
+ 2023-09-25 11:55:16,192 - botocore.credentials - DEBUG - Looking for credentials via: sso
2122
+ 2023-09-25 11:55:16,192 - botocore.credentials - DEBUG - Looking for credentials via: shared-credentials-file
2123
+ 2023-09-25 11:55:16,192 - botocore.credentials - DEBUG - Looking for credentials via: custom-process
2124
+ 2023-09-25 11:55:16,192 - botocore.credentials - DEBUG - Looking for credentials via: config-file
2125
+ 2023-09-25 11:55:16,192 - botocore.credentials - DEBUG - Looking for credentials via: ec2-credentials-file
2126
+ 2023-09-25 11:55:16,192 - botocore.credentials - DEBUG - Looking for credentials via: boto-config
2127
+ 2023-09-25 11:55:16,192 - botocore.credentials - DEBUG - Looking for credentials via: container-role
2128
+ 2023-09-25 11:55:16,192 - botocore.credentials - DEBUG - Looking for credentials via: iam-role
2129
+ 2023-09-25 11:55:16,192 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): 169.254.169.254:80
2130
+ 2023-09-25 11:55:16,411 - urllib3.connectionpool - DEBUG - https://checkip.amazonaws.com:443 "GET / HTTP/1.1" 200 14
2131
+ 2023-09-25 11:55:16,418 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): api.gradio.app:443
2132
+ 2023-09-25 11:55:17,100 - urllib3.connectionpool - DEBUG - https://api.gradio.app:443 "GET /pkg-version HTTP/1.1" 200 21
2133
+ 2023-09-25 11:55:17,194 - botocore.utils - DEBUG - Caught retryable HTTP exception while making metadata service request to http://169.254.169.254/latest/api/token: Connect timeout on endpoint URL: "http://169.254.169.254/latest/api/token"
2134
+ Traceback (most recent call last):
2135
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 169, in _new_conn
2136
+ conn = connection.create_connection(
2137
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/connection.py", line 96, in create_connection
2138
+ raise err
2139
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/connection.py", line 86, in create_connection
2140
+ sock.connect(sa)
2141
+ socket.timeout: timed out
2142
+
2143
+ During handling of the above exception, another exception occurred:
2144
+
2145
+ Traceback (most recent call last):
2146
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/httpsession.py", line 385, in send
2147
+ urllib_response = conn.urlopen(
2148
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 755, in urlopen
2149
+ retries = retries.increment(
2150
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/retry.py", line 506, in increment
2151
+ raise six.reraise(type(error), error, _stacktrace)
2152
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/packages/six.py", line 735, in reraise
2153
+ raise value
2154
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 699, in urlopen
2155
+ httplib_response = self._make_request(
2156
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 394, in _make_request
2157
+ conn.request(method, url, **httplib_request_kw)
2158
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 234, in request
2159
+ super(HTTPConnection, self).request(method, url, body=body, headers=headers)
2160
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1255, in request
2161
+ self._send_request(method, url, body, headers, encode_chunked)
2162
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 92, in _send_request
2163
+ rval = super(AWSConnection, self)._send_request(
2164
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1301, in _send_request
2165
+ self.endheaders(body, encode_chunked=encode_chunked)
2166
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1250, in endheaders
2167
+ self._send_output(message_body, encode_chunked=encode_chunked)
2168
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 120, in _send_output
2169
+ self.send(msg)
2170
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 204, in send
2171
+ return super(AWSConnection, self).send(str)
2172
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 950, in send
2173
+ self.connect()
2174
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 200, in connect
2175
+ conn = self._new_conn()
2176
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 174, in _new_conn
2177
+ raise ConnectTimeoutError(
2178
+ urllib3.exceptions.ConnectTimeoutError: (<botocore.awsrequest.AWSHTTPConnection object at 0x7fecf86e99a0>, 'Connection to 169.254.169.254 timed out. (connect timeout=1)')
2179
+
2180
+ During handling of the above exception, another exception occurred:
2181
+
2182
+ Traceback (most recent call last):
2183
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/utils.py", line 430, in _fetch_metadata_token
2184
+ response = self._session.send(request.prepare())
2185
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/httpsession.py", line 418, in send
2186
+ raise ConnectTimeoutError(endpoint_url=request.url, error=e)
2187
+ botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "http://169.254.169.254/latest/api/token"
2188
+ 2023-09-25 11:55:17,200 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (2): 169.254.169.254:80
2189
+ 2023-09-25 11:55:17,412 - urllib3.connectionpool - DEBUG - https://api.gradio.app:443 "POST /gradio-initiated-analytics/ HTTP/1.1" 200 None
2190
+ 2023-09-25 11:55:18,202 - botocore.utils - DEBUG - Caught retryable HTTP exception while making metadata service request to http://169.254.169.254/latest/meta-data/iam/security-credentials/: Connect timeout on endpoint URL: "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
2191
+ Traceback (most recent call last):
2192
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 169, in _new_conn
2193
+ conn = connection.create_connection(
2194
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/connection.py", line 96, in create_connection
2195
+ raise err
2196
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/connection.py", line 86, in create_connection
2197
+ sock.connect(sa)
2198
+ socket.timeout: timed out
2199
+
2200
+ During handling of the above exception, another exception occurred:
2201
+
2202
+ Traceback (most recent call last):
2203
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/httpsession.py", line 385, in send
2204
+ urllib_response = conn.urlopen(
2205
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 755, in urlopen
2206
+ retries = retries.increment(
2207
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/util/retry.py", line 506, in increment
2208
+ raise six.reraise(type(error), error, _stacktrace)
2209
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/packages/six.py", line 735, in reraise
2210
+ raise value
2211
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 699, in urlopen
2212
+ httplib_response = self._make_request(
2213
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connectionpool.py", line 394, in _make_request
2214
+ conn.request(method, url, **httplib_request_kw)
2215
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 234, in request
2216
+ super(HTTPConnection, self).request(method, url, body=body, headers=headers)
2217
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1255, in request
2218
+ self._send_request(method, url, body, headers, encode_chunked)
2219
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 92, in _send_request
2220
+ rval = super(AWSConnection, self)._send_request(
2221
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1301, in _send_request
2222
+ self.endheaders(body, encode_chunked=encode_chunked)
2223
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 1250, in endheaders
2224
+ self._send_output(message_body, encode_chunked=encode_chunked)
2225
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 120, in _send_output
2226
+ self.send(msg)
2227
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/awsrequest.py", line 204, in send
2228
+ return super(AWSConnection, self).send(str)
2229
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/http/client.py", line 950, in send
2230
+ self.connect()
2231
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 200, in connect
2232
+ conn = self._new_conn()
2233
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/urllib3/connection.py", line 174, in _new_conn
2234
+ raise ConnectTimeoutError(
2235
+ urllib3.exceptions.ConnectTimeoutError: (<botocore.awsrequest.AWSHTTPConnection object at 0x7fecf8a9e790>, 'Connection to 169.254.169.254 timed out. (connect timeout=1)')
2236
+
2237
+ During handling of the above exception, another exception occurred:
2238
+
2239
+ Traceback (most recent call last):
2240
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/utils.py", line 478, in _get_request
2241
+ response = self._session.send(request.prepare())
2242
+ File "/home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/httpsession.py", line 418, in send
2243
+ raise ConnectTimeoutError(endpoint_url=request.url, error=e)
2244
+ botocore.exceptions.ConnectTimeoutError: Connect timeout on endpoint URL: "http://169.254.169.254/latest/meta-data/iam/security-credentials/"
2245
+ 2023-09-25 11:55:18,203 - botocore.utils - DEBUG - Max number of attempts exceeded (1) when attempting to retrieve data from metadata service.
2246
+ 2023-09-25 11:55:18,203 - botocore.loaders - DEBUG - Loading JSON file: /home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/data/endpoints.json
2247
+ 2023-09-25 11:55:18,212 - botocore.hooks - DEBUG - Event choose-service-name: calling handler <function handle_service_name_alias at 0x7fecf895f820>
2248
+ 2023-09-25 11:55:18,216 - botocore.loaders - DEBUG - Loading JSON file: /home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/data/sts/2011-06-15/service-2.json
2249
+ 2023-09-25 11:55:18,217 - botocore.hooks - DEBUG - Event creating-client-class.sts: calling handler <function add_generate_presigned_url at 0x7fecf87df430>
2250
+ 2023-09-25 11:55:18,218 - botocore.endpoint - DEBUG - Setting sts timeout as (60, 60)
2251
+ 2023-09-25 11:55:18,218 - botocore.loaders - DEBUG - Loading JSON file: /home/salah/anaconda3/envs/salah/lib/python3.8/site-packages/botocore/data/_retry.json
2252
+ 2023-09-25 11:55:18,218 - botocore.client - DEBUG - Registering retry handlers for service: sts
2253
+ 2023-09-25 11:55:18,218 - botocore.hooks - DEBUG - Event before-parameter-build.sts.GetCallerIdentity: calling handler <function generate_idempotent_uuid at 0x7fecf87c7e50>
2254
+ 2023-09-25 11:55:18,219 - botocore.hooks - DEBUG - Event before-call.sts.GetCallerIdentity: calling handler <function inject_api_version_header_if_needed at 0x7fecf87d0700>
2255
+ 2023-09-25 11:55:18,219 - botocore.endpoint - DEBUG - Making request for OperationModel(name=GetCallerIdentity) with params: {'url_path': '/', 'query_string': '', 'method': 'POST', 'headers': {'Content-Type': 'application/x-www-form-urlencoded; charset=utf-8', 'User-Agent': 'Boto3/1.20.2 Python/3.8.5 Linux/5.4.0-150-generic Botocore/1.23.2'}, 'body': {'Action': 'GetCallerIdentity', 'Version': '2011-06-15'}, 'url': 'https://sts.amazonaws.com/', 'context': {'client_region': 'aws-global', 'client_config': <botocore.config.Config object at 0x7fecf8b0fc40>, 'has_streaming_input': False, 'auth_type': None}}
2256
+ 2023-09-25 11:55:18,219 - botocore.hooks - DEBUG - Event request-created.sts.GetCallerIdentity: calling handler <bound method RequestSigner.handler of <botocore.signers.RequestSigner object at 0x7fecf8b0fe50>>
2257
+ 2023-09-25 11:55:18,219 - botocore.hooks - DEBUG - Event choose-signer.sts.GetCallerIdentity: calling handler <function set_operation_specific_signer at 0x7fecf87c7d30>
2258
+ 2023-09-25 11:55:18,220 - urllib3.connectionpool - DEBUG - Starting new HTTP connection (1): 127.0.0.1:7860
2259
+ 2023-09-25 11:55:18,226 - urllib3.connectionpool - DEBUG - http://127.0.0.1:7860 "HEAD / HTTP/1.1" 200 0
2260
+ 2023-09-25 11:55:18,228 - urllib3.connectionpool - DEBUG - Starting new HTTPS connection (1): api.gradio.app:443
2261
+ 2023-09-25 11:55:19,252 - urllib3.connectionpool - DEBUG - https://api.gradio.app:443 "POST /gradio-launched-telemetry/ HTTP/1.1" 200 None
2262
+ 2023-09-25 11:55:26,648 - matplotlib.pyplot - DEBUG - Loaded backend agg version unknown.
2263
+ 2023-09-25 11:55:26,688 - pydub.converter - DEBUG - subprocess.call(['ffmpeg', '-y', '-i', '/tmp/gradio/8c62f8e9d42a7333da0ae55f015d665ae74f7913/audio.wav', '-acodec', 'pcm_s32le', '-vn', '-f', 'wav', '-'])
2264
+ 2023-09-25 11:55:26,941 - matplotlib.pyplot - DEBUG - Loaded backend TkAgg version unknown.
2265
+ 2023-09-25 11:55:34,439 - matplotlib.pyplot - DEBUG - Loaded backend agg version unknown.
2266
+ 2023-09-25 11:55:34,479 - pydub.converter - DEBUG - subprocess.call(['ffmpeg', '-y', '-i', '/tmp/gradio/a0134345072496f4b50c5c2ed175ba6f070ef6f0/audio.wav', '-acodec', 'pcm_s32le', '-vn', '-f', 'wav', '-'])
2267
+ 2023-09-25 11:55:34,955 - matplotlib.pyplot - DEBUG - Loaded backend TkAgg version unknown.
2268
+ 2023-09-25 11:55:41,408 - matplotlib.pyplot - DEBUG - Loaded backend agg version unknown.
2269
+ 2023-09-25 11:55:41,456 - pydub.converter - DEBUG - subprocess.call(['ffmpeg', '-y', '-i', '/tmp/gradio/31eca7d10f9506b4380f815f4bc19a04caed0f0f/audio.wav', '-acodec', 'pcm_s32le', '-vn', '-f', 'wav', '-'])
2270
+ 2023-09-25 11:55:41,780 - matplotlib.pyplot - DEBUG - Loaded backend TkAgg version unknown.
semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/CKPT.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # yamllint disable
2
+ WER: 27.83210816487267
3
+ end-of-epoch: true
4
+ unixtime: 1693868963.5220973
semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/brain.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3947a24e8dff5a14299b9cf2fe66ffb4d738cb88717de7f0cf7e8547a76e9776
3
+ size 51
semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/counter.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b51d431df5d7f141cbececcf79edf3dd861c3b4069f0b11661a3eefacbba918
3
+ size 2
semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/dataloader-TRAIN.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b363886c229e536bd3c84e0c3e89312d70e00422578e076a62df1b45c9390793
3
+ size 5
semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc1dbeca1e1f1340b08d8ebea6e492f474708dddbbe8cabbcdde5ee9660704f2
3
+ size 12814446
semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/modelopt.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3af1791eb9a5bfbfc087d2c10b94634df24cad3ac503ce9ba280a3ecc4737781
3
+ size 25575663
semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/scheduler_model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c275ab9245b440d1586f72058d9edaac1a2fb3e7a52712aa9a9ad022b99a1c0d
3
+ size 639
semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/scheduler_wav2vec.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a88187f7882dc3e10c108f1b7abfbd819285b34bded4e88e91c4ff699c1bb5d2
3
+ size 643
semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/wav2vec2.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:788267bd25ef37623715fa21a975090e5e316fff05971375cd3f62e5160f0743
3
+ size 1262005979
semi_wavlm_large_tunisian_ctc/1234/save/CKPT+2023-09-05+01-09-23+00/wav2vec_opt.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efa967fdd8067be7d88c18cd197980c9c91f344a3dff2b2518b8381c49f28b1e
3
+ size 2490361859
semi_wavlm_large_tunisian_ctc/1234/save/label_encoder.txt ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 'ب' => 38
2
+ 'ا' => 1
3
+ 'ه' => 2
4
+ 'ي' => 3
5
+ 'و' => 4
6
+ 'ن' => 5
7
+ 'أ' => 6
8
+ ' ' => 7
9
+ 'م' => 8
10
+ 'ش' => 9
11
+ 'ل' => 10
12
+ 'س' => 11
13
+ 'ت' => 12
14
+ 'د' => 13
15
+ 'ر' => 14
16
+ 'ى' => 15
17
+ 'ح' => 16
18
+ 'ط' => 17
19
+ 'ع' => 18
20
+ 'ك' => 19
21
+ 'ف' => 20
22
+ 'ق' => 21
23
+ 'آ' => 22
24
+ 'ة' => 23
25
+ 'ج' => 24
26
+ 'ض' => 25
27
+ 'ز' => 26
28
+ 'ص' => 27
29
+ 'إ' => 28
30
+ 'ث' => 29
31
+ 'خ' => 30
32
+ 'ڨ' => 31
33
+ 'ذ' => 32
34
+ 'ظ' => 33
35
+ 'ء' => 34
36
+ 'غ' => 35
37
+ 'ئ' => 36
38
+ 'ؤ' => 37
39
+ '<blank>' => 0
40
+ 1 => 39
41
+ ================
42
+ 'starting_index' => 0
43
+ 'unk_label' => 1
44
+ 'blank_label' => '<blank>'
taric_test.csv ADDED
@@ -0,0 +1,381 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Unnamed: 0,ID,wav,wrd,duration
2
+ 270681,audio4_1h_transcrit_360.719_361.907,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_360.719_361.907.wav,ڨابس وقتاش,1.188
3
+ 275905,audio7_test_transcrit_3968.362_3971.002,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_3968.362_3971.002.wav,الساعتين مع الستة الستة و درجين,2.64
4
+ 270058,audio7_test_transcrit_3694.359_3697.671,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_3694.359_3697.671.wav,أيوه يوصل العشرة العشرة غير ربع العشرة غير درجين,3.312
5
+ 271296,audio5_Transcrit_2485.137_2487.215,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_2485.137_2487.215.wav,الثلاثة غير درج موش الأربعة و نصف,2.0780000000000003
6
+ 274681,audio1_2hTranscrit_2666.861_2667.791,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_2666.861_2667.791.wav,بالله,0.93
7
+ 273082,audio1_2hTranscrit_3182.755_3184.453,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_3182.755_3184.453.wav,باهي أعطيني تونس,1.6980000000000002
8
+ 267991,audio7_test_transcrit_4503.851_4505.820,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_4503.851_4505.820.wav,السلام عليكم ڨابس كنفور,1.969
9
+ 262671,audio10_1752.729_1755.454,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_1752.729_1755.454.wav,وقتاه هو التران يخرج,2.725
10
+ 272639,audio8-1h30.tst_1371.833_1373.792,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_1371.833_1373.792.wav,أي آهي البطاقة متاعي,1.959
11
+ 270677,audio12_2881.038_2882.838,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_2881.038_2882.838.wav,ساعتين و نصف إنتي تحب,1.8
12
+ 276090,audio1_2hTranscrit_95.063_96.160,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_95.063_96.160.wav,سلام عليكم,1.097
13
+ 278161,audio12_3594.075_3595.275,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_3594.075_3595.275.wav,تونس,1.2
14
+ 269161,audio17_1h_1591.741_1593.705,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_1591.741_1593.705.wav,وين ماشية تكلم هنا هنا,1.964
15
+ 277093,audio3_transcrit_1067.879_1069.395,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_1067.879_1069.395.wav,و إلا نصف الليل و درجين,1.516
16
+ 268969,audio8-1h30.tst_5033.163_5036.389,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_5033.163_5036.389.wav,ثلاثة ألاي سامبل و إلا ثمة لألاي رتور,3.2260000000000004
17
+ 271109,audio11_Transcrit_2899.443_2901.130,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_2899.443_2901.130.wav,آكسبراس الأربعة و نصف,1.6869999999999998
18
+ 10415,audio5_Transcrit_423.305_430.317,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_423.305_430.317.wav,و الله ما نعرفش توا كي نقرب إنق توا إذا آني غادي توا نقلك,7.0120000000000005
19
+ 276418,audio14-1h_1087.372_1089.260,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_1087.372_1089.260.wav,الساعتين و ربع أي تفضل مدام,1.8880000000000001
20
+ 275484,audio8-1h30.tst_4388.209_4391.520,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_4388.209_4391.520.wav,حاشتي بثلاثة بلايص لبير بورڨبة,3.3110000000000004
21
+ 269977,audio12_3352.756_3357.172,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_3352.756_3357.172.wav,أعطيني ألفين و خمسة مية أعطيني نحب ألفين و خمسة مية,4.416
22
+ 273952,audio1_2hTranscrit_5535.218_5536.281,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5535.218_5536.281.wav,ثلاثة غير درج,1.063
23
+ 266742,audio5_Transcrit_1174.151_1174.891,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1174.151_1174.891.wav,توا,0.74
24
+ 15939,audio11_Transcrit_1892.293_1897.957,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_1892.293_1897.957.wav,بالله القلعة يعيشك بروميار قول هو كنفور بكلو,5.664
25
+ 271436,audio17_1h_376.916_377.956,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_376.916_377.956.wav,سرس,1.04
26
+ 267117,audio10_1234.611_1237.386,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_1234.611_1237.386.wav,باهي أختي بقداه التكاي,2.775
27
+ 266559,audio14-1h_2584.370_2589.563,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_2584.370_2589.563.wav,خمسة آلاف و مية و خمسين أعطيني مية و خمسين فرنك عندكشي خمسة مية,5.193
28
+ 273290,audio1_2hTranscrit_882.335_883.710,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_882.335_883.710.wav,نابل بير بورڨبة,1.375
29
+ 266645,audio1_2hTranscrit_5063.235_5065.076,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5063.235_5065.076.wav,سلام باللاهي تونس,1.841
30
+ 272054,audio3_transcrit_714.997_715.765,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_714.997_715.765.wav,أي ب,0.768
31
+ 270920,audio12_2740.304_2741.264,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_2740.304_2741.264.wav,تفضل,0.96
32
+ 270263,audio5_Transcrit_1830.538_1832.065,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1830.538_1832.065.wav,أعطيني ألفين و مية,1.527
33
+ 269434,audio12_842.457_847.441,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_842.457_847.441.wav,إليوم باريود روج تتسمى خاطر ويكاند دانك بسبعطاش دينار,4.984
34
+ 262602,audio4_1h_transcrit_3315.769_3316.914,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_3315.769_3316.914.wav,ثلاثة غير درج,1.145
35
+ 276898,audio12_1499.442_1503.066,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_1499.442_1503.066.wav,عندكشي مية و خمسين عيش ولدي,3.6239999999999997
36
+ 261117,audio1_2hTranscrit_5260.824_5262.199,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5260.824_5262.199.wav,ثمة تران لتونس,1.375
37
+ 266521,audio10_3322.288_3324.667,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_3322.288_3324.667.wav,عسلامة باللاهي تكاي للمهدية,2.379
38
+ 274371,audio3_transcrit_2182.048_2183.364,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_2182.048_2183.364.wav,كارت جون ما عندكش,1.3159999999999998
39
+ 270951,audio1_2hTranscrit_4864.609_4865.997,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_4864.609_4865.997.wav,تران وقتاش يخرج,1.3880000000000001
40
+ 265283,audio1_2hTranscrit_489.928_491.030,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_489.928_491.030.wav,غدوة الصباح,1.102
41
+ 268083,audio7_test_transcrit_3774.600_3775.177,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_3774.600_3775.177.wav,ياقف,0.5770000000000001
42
+ 261488,audio6.tst_495.711_496.789,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_495.711_496.789.wav,ألاي و رتور,1.078
43
+ 275219,audio6.tst_451.267_452.974,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_451.267_452.974.wav,إسمع التران أماهو إلي نركب فيه,1.7069999999999999
44
+ 261387,audio1_2hTranscrit_1558.619_1561.075,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_1558.619_1561.075.wav,ماضي ثلاثة أربعة سوايع معناها,2.456
45
+ 263997,audio1_2hTranscrit_3990.908_3991.843,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_3990.908_3991.843.wav,يا ولدي,0.935
46
+ 268313,audio17_1h_719.875_720.580,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_719.875_720.580.wav,سلام,0.705
47
+ 264510,audio1_2hTranscrit_5682.582_5683.551,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5682.582_5683.551.wav,و الساعتين,0.9690000000000001
48
+ 274380,audio1_2hTranscrit_2849.132_2850.866,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_2849.132_2850.866.wav,ثلاثة غير درج آكسبراس,1.734
49
+ 263438,audio1_2hTranscrit_752.713_754.220,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_752.713_754.220.wav,خرج الحداش و درجين,1.507
50
+ 276901,audio11_Transcrit_995.823_996.761,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_995.823_996.761.wav,لا لا,0.938
51
+ 274005,audio5_Transcrit_3757.520_3759.929,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_3757.520_3759.929.wav,بالله وريقة متاع الأوقات متاع التران,2.4090000000000003
52
+ 271846,audio6.tst_3293.175_3295.577,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_3293.175_3295.577.wav,سوسة الأربعة و أربعة آه الأربعة غير أربعة,2.4019999999999997
53
+ 271819,audio6.tst_3404.361_3407.239,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_3404.361_3407.239.wav,عشرة و ثلاثة مية دوزيام كلاس أما هاذا بروميار كلاس,2.878
54
+ 267595,audio8-1h30.tst_2409.161_2411.080,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_2409.161_2411.080.wav,كل نهار هاذوم موجودين,1.919
55
+ 15792,audio1_2hTranscrit_1844.951_1845.619,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_1844.951_1845.619.wav,نعم,0.6679999999999999
56
+ 267084,audio14-1h_2532.547_2533.797,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_2532.547_2533.797.wav,أعطيني ألفين و خمسة مية,1.25
57
+ 267000,audio7_test_transcrit_779.182_780.854,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_779.182_780.854.wav,حداش ألف,1.672
58
+ 261576,audio7_test_transcrit_150.267_151.533,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_150.267_151.533.wav,سوسة بروميار,1.266
59
+ 272817,audio1_2hTranscrit_3170.793_3172.465,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_3170.793_3172.465.wav,مازلنا نستحقوها,1.672
60
+ 265865,audio14-1h_84.796_86.662,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_84.796_86.662.wav,لازمك تزيد أربعة آلاف و خمسة مية,1.8659999999999999
61
+ 263750,audio11_Transcrit_2013.200_2014.638,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_2013.200_2014.638.wav,بقداش يا خويا,1.4380000000000002
62
+ 262824,audio4_1h_transcrit_3512.147_3515.152,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_3512.147_3515.152.wav,قلي آني كنت باش نسافر إليوم,3.005
63
+ 273921,audio4_1h_transcrit_3642.557_3644.227,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_3642.557_3644.227.wav,متاع الثلاثة و درج هاذا,1.67
64
+ 270512,audio10_1654.348_1655.621,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_1654.348_1655.621.wav,عسلامة,1.273
65
+ 273531,audio5_Transcrit_408.042_409.358,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_408.042_409.358.wav,صباح الخير,1.3159999999999998
66
+ 276096,audio5_Transcrit_3027.938_3028.988,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_3027.938_3028.988.wav,وحدة تونس,1.05
67
+ 271977,audio6.tst_147.396_148.771,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_147.396_148.771.wav,هذي التكاي و هاذا الدينار,1.375
68
+ 266323,audio11_Transcrit_2596.029_2596.873,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_2596.029_2596.873.wav,ثلاثة,0.8440000000000001
69
+ 276428,audio14-1h_577.224_583.781,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_577.224_583.781.wav,آه أيا باهي و اللاهي فجعتني نستخايل الأربعة قلت قداش باش نقعد نستنى توا في اللاڨار أيا باهي بارك الله فيك يرحم والديك,6.557
70
+ 263205,audio4_1h_transcrit_2136.598_2138.442,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_2136.598_2138.442.wav,نحساب فيه أكثر من رحلة,1.844
71
+ 261969,audio6.tst_197.733_198.380,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_197.733_198.380.wav,صباح الخير,0.647
72
+ 266581,audio8-1h30.tst_4040.022_4042.205,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_4040.022_4042.205.wav,آكسبراس و إلا دراكت إنتي شتحب,2.1830000000000003
73
+ 270532,audio3_transcrit_3285.837_3286.815,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3285.837_3286.815.wav,آكسبراس,0.978
74
+ 260617,audio5_Transcrit_1663.042_1664.545,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1663.042_1664.545.wav,توا ماضي ساعة و نصف,1.5030000000000001
75
+ 270653,audio18_30_524.952_528.157,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_524.952_528.157.wav,أعطيني ألفين و خمسة مية يعيشك,3.205
76
+ 261778,audio7_test_transcrit_3921.025_3922.275,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_3921.025_3922.275.wav,آ ڨابس,1.25
77
+ 264599,audio7_test_transcrit_4489.914_4490.777,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_4489.914_4490.777.wav,ميتين,0.863
78
+ 272815,audio7_test_transcrit_3183.614_3185.455,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_3183.614_3185.455.wav,معناتها حق الكارت خلص,1.841
79
+ 261970,audio5_Transcrit_1741.696_1743.696,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1741.696_1743.696.wav,وقتاش يقصوا,2.0
80
+ 268946,audio5_Transcrit_1513.905_1517.889,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1513.905_1517.889.wav,بربي كيفاش نعمل آني باش روح للجم,3.984
81
+ 263151,audio7_test_transcrit_5114.512_5117.355,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_5114.512_5117.355.wav,آ بالله ستة تران تونس,2.843
82
+ 269245,audio10_1555.684_1558.644,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_1555.684_1558.644.wav,باهي أوكاي يعيشك أعطيني التكاي,2.96
83
+ 270610,audio5_Transcrit_3350.606_3351.699,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_3350.606_3351.699.wav,الستة و أربعة,1.093
84
+ 267644,audio5_Transcrit_713.453_715.600,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_713.453_715.600.wav,نجم نقص الجم تونس,2.147
85
+ 277094,audio17_1h_1294.057_1295.965,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_1294.057_1295.965.wav,باللاهي شنوة أول تران لصفاقس,1.9080000000000001
86
+ 261658,audio4_1h_transcrit_1693.084_1694.210,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_1693.084_1694.210.wav,زوز تونس,1.126
87
+ 10506,audio1_2hTranscrit_6401.597_6402.718,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_6401.597_6402.718.wav,الخمسة و خمسة,1.121
88
+ 262447,audio14-1h_2563.909_2564.914,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_2563.909_2564.914.wav,أعطيني صرف أمان,1.005
89
+ 271333,audio3_transcrit_3615.353_3616.571,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3615.353_3616.571.wav,ما مشاش بيها,1.218
90
+ 268231,audio7_test_transcrit_239.503_240.266,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_239.503_240.266.wav,نعم,0.763
91
+ 261534,audio11_Transcrit_1546.871_1548.278,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_1546.871_1548.278.wav,بلاصة و نصف تونس,1.4069999999999998
92
+ 273816,audio3_transcrit_3287.798_3288.985,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3287.798_3288.985.wav,و فمة في لآكسبراس,1.187
93
+ 269668,audio18_30_1283.188_1285.143,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_1283.188_1285.143.wav,أي تكاي وحدة أي,1.955
94
+ 15820,audio1_2hTranscrit_2486.442_2487.895,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_2486.442_2487.895.wav,الستة متاع الصباح,1.453
95
+ 16015,audio6.tst_45.661_46.510,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_45.661_46.510.wav,تفضل يا مدام,0.8490000000000001
96
+ 267908,audio3_transcrit_3022.185_3022.977,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3022.185_3022.977.wav,ثلاثين,0.792
97
+ 277986,audio1_2hTranscrit_2394.213_2395.072,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_2394.213_2395.072.wav,شنوة,0.8590000000000001
98
+ 266729,audio4_1h_transcrit_2117.490_2119.286,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_2117.490_2119.286.wav,يا ذهاب و عودة,1.796
99
+ 264898,audio1_2hTranscrit_3633.724_3636.849,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_3633.724_3636.849.wav,و متاع الستة غير درج متاع ماطر موجود,3.125
100
+ 267899,audio11_Transcrit_2115.211_2117.993,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_2115.211_2117.993.wav,الأربعة و نصف و بعد السبعة و نصف,2.782
101
+ 266459,audio4_1h_transcrit_2958.173_2961.079,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_2958.173_2961.079.wav,بربي تران الصباح أول تران,2.906
102
+ 261835,audio4_1h_transcrit_699.018_700.143,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_699.018_700.143.wav,بارك الله فيك,1.125
103
+ 266103,audio1_2hTranscrit_1977.211_1979.076,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_1977.211_1979.076.wav,و أعطيني وحدة ألاي رتور,1.865
104
+ 277806,audio1_2hTranscrit_2067.017_2070.451,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_2067.017_2070.451.wav,تران بالله تونس الماضي ساعة و ربع متاع الليل,3.4339999999999997
105
+ 267121,audio12_2189.848_2193.592,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_2189.848_2193.592.wav,ماه أقل من عشرة سنين يخلص نصف صحيح و إلا لا,3.7439999999999998
106
+ 277523,audio10_1121.954_1125.472,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_1121.954_1125.472.wav,دو تكاي بلاصة كاملة ألاي و رتور,3.5180000000000002
107
+ 265383,audio8-1h30.tst_662.141_663.125,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_662.141_663.125.wav,أي باهي,0.9840000000000001
108
+ 269709,audio7_test_transcrit_2919.721_2921.467,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_2919.721_2921.467.wav,هات أي هات,1.746
109
+ 270049,audio12_324.787_328.267,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_324.787_328.267.wav,حداش و تسعة مية شوفلي ألفين,3.48
110
+ 278087,audio1_2hTranscrit_429.786_431.341,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_429.786_431.341.wav,أربعة و ثلاثين دينار و ميتين,1.555
111
+ 274394,audio7_test_transcrit_1671.513_1673.201,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_1671.513_1673.201.wav,ستة و أربعة,1.6880000000000002
112
+ 273948,audio10_3460.409_3462.097,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_3460.409_3462.097.wav,ألاي رتور آه,1.6880000000000002
113
+ 260666,audio18_30_1535.886_1537.195,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_1535.886_1537.195.wav,و الكنفور,1.3090000000000002
114
+ 263924,audio5_Transcrit_788.685_789.651,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_788.685_789.651.wav,صباح الخير,0.966
115
+ 270402,audio5_Transcrit_3418.982_3422.045,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_3418.982_3422.045.wav,سانكنت سانك بروميار سوسة,3.063
116
+ 272296,audio8-1h30.tst_203.630_205.678,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_203.630_205.678.wav,معناتها بعد الثالث جور,2.048
117
+ 262699,audio1_2hTranscrit_5274.256_5278.148,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5274.256_5278.148.wav,غدوة الحي الصباح فمة تران لتونس مع الخمسة هكا,3.892
118
+ 262641,audio10_1797.784_1800.744,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_1797.784_1800.744.wav,أنا عندي كارت دو فدالتاي نجم نقص بيها,2.96
119
+ 273705,audio3_transcrit_1823.346_1824.627,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_1823.346_1824.627.wav,نعطيوك بروميار,1.281
120
+ 276782,audio18_30_979.813_980.937,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_979.813_980.937.wav,أي,1.124
121
+ 276670,audio12_2193.592_2195.392,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_2193.592_2195.392.wav,أي باهي أي تفضل أي,1.8
122
+ 276849,audio1_2hTranscrit_778.288_779.015,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_778.288_779.015.wav,أيه,0.727
123
+ 269385,audio7_test_transcrit_127.799_129.139,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_127.799_129.139.wav,تفضل أي,1.34
124
+ 277418,audio1_2hTranscrit_5281.062_5282.283,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5281.062_5282.283.wav,يخرج من هنا,1.2209999999999999
125
+ 275328,audio8-1h30.tst_3560.657_3562.766,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_3560.657_3562.766.wav,أي تفضل هذي التكاي متاعك أي,2.109
126
+ 265993,audio5_Transcrit_61.587_62.853,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_61.587_62.853.wav,كيف كيف آكسبراس,1.266
127
+ 274176,audio7_test_transcrit_520.620_523.979,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_520.620_523.979.wav,فمة توا الماضي ساعة و نصف,3.359
128
+ 263194,audio7_test_transcrit_1884.408_1885.534,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_1884.408_1885.534.wav,بلاصة,1.126
129
+ 268452,audio7_test_transcrit_3594.026_3595.885,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_3594.026_3595.885.wav,إنتي ماشية لتونس,1.859
130
+ 266674,audio18_30_1608.682_1610.099,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_1608.682_1610.099.wav,تفضل يا بنتي,1.4169999999999998
131
+ 267528,audio7_test_transcrit_4701.219_4702.583,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_4701.219_4702.583.wav,لا ما فماش,1.364
132
+ 265248,audio10_1256.284_1257.972,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_1256.284_1257.972.wav,عسلامة أختي,1.6880000000000002
133
+ 275581,audio3_transcrit_2789.373_2790.202,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_2789.373_2790.202.wav,وقتاش,0.8290000000000001
134
+ 264044,audio1_2hTranscrit_919.723_921.254,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_919.723_921.254.wav,باللاهي صفاقس تونس,1.531
135
+ 271795,audio11_Transcrit_3774.621_3776.074,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_3774.621_3776.074.wav,ماضي ساعة و نصف,1.453
136
+ 265193,audio11_Transcrit_647.450_649.106,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_647.450_649.106.wav,فمة ماضي ساعة,1.656
137
+ 265175,audio8-1h30.tst_2983.300_2984.321,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_2983.300_2984.321.wav,آ,1.021
138
+ 261423,audio17_1h_2633.243_2633.904,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_2633.243_2633.904.wav,لحظة,0.6609999999999999
139
+ 277832,audio11_Transcrit_3142.920_3144.600,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_3142.920_3144.600.wav,قداش ألفين و مية,1.68
140
+ 272719,audio1_2hTranscrit_3397.359_3399.935,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_3397.359_3399.935.wav,و أربعة أعطيني بروميار كلاس,2.576
141
+ 272799,audio17_1h_1226.287_1228.681,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_1226.287_1228.681.wav,مشي و جي أيه مشي و جي,2.394
142
+ 269438,audio5_Transcrit_1711.912_1714.084,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1711.912_1714.084.wav,ماضي ساعة هو,2.1719999999999997
143
+ 270789,audio4_1h_transcrit_3338.252_3339.155,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_3338.252_3339.155.wav,عادي,0.903
144
+ 276257,audio8-1h30.tst_5176.098_5179.674,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_5176.098_5179.674.wav,فمة تران نصف نهار و خمسة,3.576
145
+ 264308,audio5_Transcrit_1128.412_1130.086,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1128.412_1130.086.wav,فمة تران تونس توا,1.6740000000000002
146
+ 262418,audio4_1h_transcrit_3474.612_3475.690,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_3474.612_3475.690.wav,سلام عليكم,1.078
147
+ 261258,audio3_transcrit_3261.976_3262.787,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3261.976_3262.787.wav,ثناش,0.8109999999999999
148
+ 276712,audio11_Transcrit_1465.894_1467.358,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_1465.894_1467.358.wav,تخلص البنية,1.464
149
+ 266969,audio18_30_2032.507_2035.578,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_2032.507_2035.578.wav,و و ثلاثة بالكارت جون و زوز بلاش,3.071
150
+ 269967,audio11_Transcrit_1011.482_1013.794,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_1011.482_1013.794.wav,أيه ألفين و ستة مية أي,2.312
151
+ 270368,audio7_test_transcrit_4679.559_4680.518,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_4679.559_4680.518.wav,تونس,0.9590000000000001
152
+ 262552,audio14-1h_1962.602_1968.272,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_1962.602_1968.272.wav,أيا باهي هاتهم أي بارك الله فيك وقتاش يخرج بالظبط التران,5.67
153
+ 267089,audio1_2hTranscrit_6440.628_6441.331,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_6440.628_6441.331.wav,تونس,0.703
154
+ 268039,audio3_transcrit_2340.341_2342.511,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_2340.341_2342.511.wav,السبعة غير درجين الصباح آكسبراس,2.17
155
+ 271467,audio8-1h30.tst_3912.400_3914.830,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_3912.400_3914.830.wav,أعطيني تسعطاش ألف و ثلاثة مية,2.43
156
+ 270777,audio4_1h_transcrit_554.402_555.823,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_554.402_555.823.wav,وقتاش تخرج,1.421
157
+ 266326,audio5_Transcrit_3455.400_3457.077,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_3455.400_3457.077.wav,لا ثنين درجة أولى,1.6769999999999998
158
+ 274156,audio12_2696.115_2698.635,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_2696.115_2698.635.wav,توزر قداش توزر,2.52
159
+ 275220,audio11_Transcrit_2110.315_2111.563,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_2110.315_2111.563.wav,الأربعة و نصف,1.248
160
+ 268048,audio11_Transcrit_1721.042_1721.618,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_1721.042_1721.618.wav,آه,0.5760000000000001
161
+ 268080,audio8-1h30.tst_1432.735_1434.248,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_1432.735_1434.248.wav,ما فهمتكش,1.5130000000000001
162
+ 270118,audio3_transcrit_794.348_795.497,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_794.348_795.497.wav,ڨابس وقتاش,1.149
163
+ 266241,audio18_30_697.857_700.185,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_697.857_700.185.wav,بالله ألاي رتور تونس,2.3280000000000003
164
+ 264005,audio5_Transcrit_1647.592_1650.268,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1647.592_1650.268.wav,بالله تونس وقتاش خويا,2.676
165
+ 262583,audio7_test_transcrit_5845.331_5847.315,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_5845.331_5847.315.wav,ألاي رتور تونس,1.984
166
+ 10333,audio3_transcrit_658.518_659.234,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_658.518_659.234.wav,وين,0.716
167
+ 272669,audio8-1h30.tst_416.916_418.561,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_416.916_418.561.wav,واحد عمرو عشرة سنين,1.645
168
+ 269999,audio10_1352.457_1355.044,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_1352.457_1355.044.wav,باهي تفضل آهوم الفلوس,2.5869999999999997
169
+ 264676,audio1_2hTranscrit_6289.956_6291.201,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_6289.956_6291.201.wav,معناتها,1.245
170
+ 262052,audio4_1h_transcrit_583.769_584.556,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_583.769_584.556.wav,لا,0.787
171
+ 10195,audio8-1h30.tst_654.714_656.449,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_654.714_656.449.wav,باهي أهم أربعطاش ألف,1.735
172
+ 274425,audio1_2hTranscrit_1109.892_1111.638,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_1109.892_1111.638.wav,ثلاثة و ثلاثين و ثمنية مية,1.746
173
+ 263783,audio7_test_transcrit_3062.611_3065.001,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_3062.611_3065.001.wav,ألاي رتور و إلا ألاي برك,2.39
174
+ 277560,audio11_Transcrit_3935.162_3936.002,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_3935.162_3936.002.wav,كيفاش,0.84
175
+ 270758,audio18_30_1206.902_1209.960,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_1206.902_1209.960.wav,سلام عليكم باللاهي وحدة لمتلوي,3.0580000000000003
176
+ 271291,audio14-1h_108.104_114.925,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_108.104_114.925.wav,باهي مالا أعطيني تكاي في التران إلي بعد مع الأربعة و أربعة بقداش هي التكاي أي تفضل آهم الفلوس,6.821000000000001
177
+ 277950,audio1_2hTranscrit_5813.629_5815.316,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5813.629_5815.316.wav,أول تران ماضي ساعة و أربعة,1.6869999999999998
178
+ 260700,audio8-1h30.tst_65.031_65.991,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_65.031_65.991.wav,أي نقصلك و إلا لا,0.96
179
+ 260523,audio18_30_2055.504_2057.200,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_2055.504_2057.200.wav,بروميار كلاس تحب,1.696
180
+ 275514,audio18_30_1447.038_1449.239,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_1447.038_1449.239.wav,باهي تحب بروميار كلاس,2.201
181
+ 274103,audio4_1h_transcrit_3493.503_3496.083,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_3493.503_3496.083.wav,تونس فمة توا السريع ثلاثة غير درج,2.58
182
+ 261578,audio1_2hTranscrit_2563.159_2564.050,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_2563.159_2564.050.wav,تفضل,0.8909999999999999
183
+ 261877,audio3_transcrit_3488.734_3492.483,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3488.734_3492.483.wav,عسلامة بربي سوسة بالله الأربعة و نصف لآكسبراس فمة,3.7489999999999997
184
+ 277820,audio1_2hTranscrit_6456.997_6461.214,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_6456.997_6461.214.wav,إيه أنا قلت لك سومها الحقاني في وسط الجمعة يرخصوا فيها,4.217
185
+ 276100,audio11_Transcrit_672.272_673.016,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_672.272_673.016.wav,أيه,0.7440000000000001
186
+ 273868,audio1_2hTranscrit_5966.482_5969.587,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5966.482_5969.587.wav,كهو يا العشرة متاع الصباح يا الساعتين و ربع متاع الليل,3.105
187
+ 268135,audio8-1h30.tst_4329.116_4330.780,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_4329.116_4330.780.wav,أي باهي بقداش البليصة,1.6640000000000001
188
+ 264432,audio17_1h_936.617_937.867,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_936.617_937.867.wav,أمم,1.25
189
+ 266764,audio11_Transcrit_1290.678_1293.072,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_1290.678_1293.072.wav,ألفين ماضي ساعة و نصف,2.394
190
+ 269390,audio1_2hTranscrit_2533.552_2538.159,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_2533.552_2538.159.wav,و ثمنية مية تولي ثمنية و عشرين دينار فمة تقريب سانك دينار,4.607
191
+ 270107,audio14-1h_1824.744_1825.985,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_1824.744_1825.985.wav,قداش قلت لي سامحني,1.2409999999999999
192
+ 265311,audio7_test_transcrit_1734.718_1735.911,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_1734.718_1735.911.wav,ثمنطاش,1.193
193
+ 272622,audio4_1h_transcrit_2840.216_2841.675,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_2840.216_2841.675.wav,ماضي ساعة و نصف,1.459
194
+ 265931,audio3_transcrit_3682.575_3684.392,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3682.575_3684.392.wav,ستة و أربعة يخرج من هنا,1.817
195
+ 269885,audio4_1h_transcrit_658.838_659.900,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_658.838_659.900.wav,أوكي البنوتة,1.062
196
+ 278136,audio11_Transcrit_386.294_388.325,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_386.294_388.325.wav,و و ثمة تران بعد,2.031
197
+ 273526,audio1_2hTranscrit_4799.695_4800.633,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_4799.695_4800.633.wav,عشرة أيام,0.938
198
+ 266007,audio7_test_transcrit_196.373_198.967,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_196.373_198.967.wav,بروميار دينار و خمسين يعيش إختي,2.594
199
+ 260330,audio1_2hTranscrit_3724.770_3725.880,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_3724.770_3725.880.wav,بروميار بروميار,1.11
200
+ 274129,audio17_1h_53.699_53.967,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_53.699_53.967.wav,آ,0.268
201
+ 277101,audio11_Transcrit_3102.009_3103.305,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_3102.009_3103.305.wav,هاكي خليها عندك,1.296
202
+ 265488,audio3_transcrit_3928.512_3929.606,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3928.512_3929.606.wav,لاي زورار,1.094
203
+ 268974,audio18_30_1745.633_1749.758,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_1745.633_1749.758.wav,ستطاش و تسعة مية عندكشي ألفين غادي,4.125
204
+ 277997,audio5_Transcrit_1480.513_1484.404,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1480.513_1484.404.wav,تران إلي مروح لتوز لتوزر وقتاش يخلط,3.891
205
+ 10386,audio18_30_1364.881_1375.204,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_1364.881_1375.204.wav,بالله عندي بياي هذي آ جا بيها ولدي و الرجوع موش باش يرجع بيها حتى شد في خدمة بعثتلو شركة,10.323
206
+ 271656,audio14-1h_2247.508_2249.061,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_2247.508_2249.061.wav,ثلاثة آلاف و خمسة مية عندك,1.5530000000000002
207
+ 265906,audio1_2hTranscrit_5353.019_5354.129,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5353.019_5354.129.wav,نحب نعمل,1.11
208
+ 275584,audio5_Transcrit_1633.263_1634.091,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1633.263_1634.091.wav,بقداه,0.828
209
+ 265821,audio14-1h_1777.493_1779.421,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_1777.493_1779.421.wav,عندكشي دينار و مية و إلا دينار و خمسة مية,1.9280000000000002
210
+ 263826,audio1_2hTranscrit_1601.475_1604.006,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_1601.475_1604.006.wav,تي طالع في التران هاذيا آني,2.531
211
+ 267542,audio1_2hTranscrit_4075.012_4077.144,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_4075.012_4077.144.wav,وقتاش التران غدوة الحي الصباح,2.1319999999999997
212
+ 271853,audio18_30_426.864_428.767,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_426.864_428.767.wav,ثناش ألف و متين و خمسين,1.903
213
+ 266601,audio1_2hTranscrit_139.222_140.729,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_139.222_140.729.wav,عندك لاي زورار خويا,1.507
214
+ 10221,audio4_1h_transcrit_3289.633_3290.977,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_3289.633_3290.977.wav,ثلاثة غير درج,1.344
215
+ 260143,audio4_1h_transcrit_3310.179_3311.018,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_3310.179_3311.018.wav,سلام,0.8390000000000001
216
+ 276691,audio14-1h_455.150_456.391,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_455.150_456.391.wav,آكسبراس راهو آ,1.2409999999999999
217
+ 266039,audio17_1h_2920.520_2921.791,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_2920.520_2921.791.wav,لا موش عليك إنتي,1.271
218
+ 260208,audio10_350.185_352.140,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_350.185_352.140.wav,باهي ملا أعطيني تكاي,1.955
219
+ 276788,audio12_2186.248_2187.184,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_2186.248_2187.184.wav,سلام,0.9359999999999999
220
+ 276798,audio18_30_1213.953_1214.670,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_1213.953_1214.670.wav,أيه,0.7170000000000001
221
+ 271406,audio4_1h_transcrit_469.362_470.638,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_469.362_470.638.wav,الحداش و ربع,1.276
222
+ 267052,audio1_2hTranscrit_6024.415_6029.118,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_6024.415_6029.118.wav,ڨابس عنا يا الصباح العشرة متاع الصباح يا الساعتين و أربعة متاع الليل,4.703
223
+ 271628,audio7_test_transcrit_4602.135_4603.428,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_4602.135_4603.428.wav,لا لا,1.2930000000000001
224
+ 274505,audio5_Transcrit_270.526_272.009,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_270.526_272.009.wav,ثناش دينار,1.483
225
+ 264057,audio7_test_transcrit_4951.108_4952.405,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_4951.108_4952.405.wav,السلام,1.297
226
+ 261199,audio18_30_1008.406_1010.421,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_1008.406_1010.421.wav,بياي تسكرة لتونس,2.015
227
+ 264957,audio1_2hTranscrit_2680.594_2681.469,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_2680.594_2681.469.wav,تفضل,0.875
228
+ 15823,audio3_transcrit_452.619_453.578,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_452.619_453.578.wav,غدوة أيه,0.9590000000000001
229
+ 271878,audio6.tst_2983.904_2985.792,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_2983.904_2985.792.wav,دهماني ألاي و رتور ستطاش و ستة مية,1.8880000000000001
230
+ 272147,audio1_2hTranscrit_5317.898_5319.191,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5317.898_5319.191.wav,باللاهي تران الفجر,1.2930000000000001
231
+ 272744,audio11_Transcrit_136.254_138.723,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_136.254_138.723.wav,سوسة ثلاثة آلاف و ميتين و خمسين,2.469
232
+ 263580,audio7_test_transcrit_1902.440_1905.128,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_1902.440_1905.128.wav,فمة خويا تران ماشي لڨابس توا,2.688
233
+ 263274,audio8-1h30.tst_2826.379_2829.136,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_2826.379_2829.136.wav,تران إلي ماشي تو لصفاقس ياقف في الحنشة,2.7569999999999997
234
+ 273392,audio14-1h_3386.498_3393.431,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_3386.498_3393.431.wav,باهي مدام فمة قصلي تكاي تعبى و إلا مازال تجم تشوفلي,6.933
235
+ 275359,audio5_Transcrit_2675.636_2677.191,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_2675.636_2677.191.wav,ما فمشي ساعتين و درجين,1.555
236
+ 269379,audio3_transcrit_1788.077_1789.451,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_1788.077_1789.451.wav,ديمة يمشي التران,1.374
237
+ 275423,audio4_1h_transcrit_253.374_254.452,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_253.374_254.452.wav,آه فهمتك,1.078
238
+ 274564,audio17_1h_2647.038_2649.239,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_2647.038_2649.239.wav,لا لا ماضي ساعتين بعشرة آلاف و ثلاثة مية,2.201
239
+ 276797,audio1_2hTranscrit_3264.944_3265.803,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_3264.944_3265.803.wav,الخمسة,0.8590000000000001
240
+ 262526,audio4_1h_transcrit_3519.707_3520.446,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_3519.707_3520.446.wav,أيه,0.7390000000000001
241
+ 267207,audio5_Transcrit_2307.038_2311.068,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_2307.038_2311.068.wav,باهي نعمل نجم نعمل كارت جون توا,4.03
242
+ 263844,audio11_Transcrit_3821.778_3823.997,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_3821.778_3823.997.wav,ثلاثة و عشرين و ثمنية مية الزوز,2.219
243
+ 270236,audio5_Transcrit_1654.877_1656.050,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1654.877_1656.050.wav,تفضل أي,1.173
244
+ 269475,audio17_1h_266.321_267.857,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_266.321_267.857.wav,معاش تخلط عليه تو,1.536
245
+ 270182,audio8-1h30.tst_3906.619_3908.069,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_3906.619_3908.069.wav,دوزيام كلاس,1.45
246
+ 273181,audio5_Transcrit_501.615_502.812,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_501.615_502.812.wav,أوفات,1.197
247
+ 266933,audio11_Transcrit_4704.435_4705.825,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_4704.435_4705.825.wav,الله يرحم والديك,1.39
248
+ 271906,audio1_2hTranscrit_2789.980_2790.743,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_2789.980_2790.743.wav,تو,0.763
249
+ 268075,audio5_Transcrit_542.212_543.171,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_542.212_543.171.wav,أيوه,0.9590000000000001
250
+ 263713,audio12_1818.663_1823.919,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_1818.663_1823.919.wav,باللاهي تران تونس ماضي ساعة متاع الليل نحب بروميار آه,5.256
251
+ 266219,audio12_1359.678_1361.838,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_1359.678_1361.838.wav,عيش ولدي راني ما عنديش صروف,2.16
252
+ 264321,audio5_Transcrit_364.591_366.997,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_364.591_366.997.wav,ستة آلاف و ستة مية و خمسين,2.406
253
+ 10274,audio11_Transcrit_3336.707_3338.435,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_3336.707_3338.435.wav,آكسبراس بالله وقتاش لتونس,1.7280000000000002
254
+ 276993,audio3_transcrit_96.053_97.538,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_96.053_97.538.wav,باهي بحول الله,1.485
255
+ 269844,audio1_2hTranscrit_6267.480_6268.081,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_6267.480_6268.081.wav,سلام,0.601
256
+ 265757,audio5_Transcrit_3117.800_3118.806,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_3117.800_3118.806.wav,و العادي,1.006
257
+ 261212,audio7_test_transcrit_5929.296_5931.020,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_5929.296_5931.020.wav,أيا خويا أي,1.724
258
+ 15738,audio11_Transcrit_5027.236_5028.004,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_5027.236_5028.004.wav,الحداش,0.768
259
+ 10007,audio3_transcrit_2291.171_2292.124,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_2291.171_2292.124.wav,مارسي,0.953
260
+ 262514,audio17_1h_1507.474_1508.903,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_1507.474_1508.903.wav,أي تفضل أي,1.429
261
+ 269635,audio7_test_transcrit_3733.809_3738.276,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_3733.809_3738.276.wav,غدوة الصباح الخمسة و خمسة و عشرين دقيقة,4.467
262
+ 262507,audio18_30_1496.319_1497.909,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_1496.319_1497.909.wav,متلوي الحداش,1.59
263
+ 276334,audio1_2hTranscrit_5106.666_5107.557,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5106.666_5107.557.wav,قداش,0.8909999999999999
264
+ 261156,audio7_test_transcrit_5412.974_5414.143,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_5412.974_5414.143.wav,أيه,1.169
265
+ 266598,audio12_942.207_945.116,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_942.207_945.116.wav,تونس وقتاه,2.909
266
+ 262634,audio5_Transcrit_1029.867_1033.195,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1029.867_1033.195.wav,لآكسبراس العشية آني وقت يخرج,3.3280000000000003
267
+ 276318,audio8-1h30.tst_1968.306_1969.204,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_1968.306_1969.204.wav,تفضل,0.898
268
+ 260501,audio5_Transcrit_3188.954_3192.011,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_3188.954_3192.011.wav,كالعادة أوك الأوقات متاع العادة,3.057
269
+ 271033,audio1_2hTranscrit_6495.551_6497.129,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_6495.551_6497.129.wav,السبعة غير درجين آكسبراس,1.578
270
+ 273504,audio3_transcrit_1743.128_1743.867,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_1743.128_1743.867.wav,تونس,0.7390000000000001
271
+ 277874,audio7_test_transcrit_1519.128_1520.333,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_1519.128_1520.333.wav,الستة و أربعة,1.205
272
+ 276374,audio4_1h_transcrit_2534.849_2536.459,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_2534.849_2536.459.wav,أوقات المشيان لتونس,1.61
273
+ 269593,audio7_test_transcrit_1325.396_1326.595,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_1325.396_1326.595.wav,وقتاه خويا التران,1.199
274
+ 274663,audio12_3403.484_3417.404,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_3403.484_3417.404.wav,لا راهو ثلاثة غير درج في رمضان أما تويكة في الإفطار يولي يولي الساعتين غير درج أعطيني ستطاش ألف ميتين و خمسين و أعطيني الكارت جون أعطيني نومرو متاعها,13.92
275
+ 270417,audio8-1h30.tst_3472.231_3474.008,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_3472.231_3474.008.wav,ثمة ترأنات,1.777
276
+ 271566,audio1_2hTranscrit_4845.372_4846.688,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_4845.372_4846.688.wav,قلعة صغرى,1.3159999999999998
277
+ 278077,audio7_test_transcrit_900.132_901.687,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_900.132_901.687.wav,أربعة سوايع يقعد,1.555
278
+ 268090,audio5_Transcrit_777.604_778.463,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_777.604_778.463.wav,زوز نابل,0.8590000000000001
279
+ 277004,audio4_1h_transcrit_2182.144_2183.079,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_2182.144_2183.079.wav,زوز من ناس,0.935
280
+ 265691,audio3_transcrit_2701.965_2703.658,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_2701.965_2703.658.wav,دوزيام قلت لي,1.693
281
+ 274626,audio17_1h_1002.457_1003.163,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_1002.457_1003.163.wav,ما يقلقنيش,0.706
282
+ 269983,audio1_2hTranscrit_1207.347_1209.175,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_1207.347_1209.175.wav,توا الماضي ساعة و ربع,1.828
283
+ 266105,audio3_transcrit_3205.183_3206.183,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3205.183_3206.183.wav,تونس,1.0
284
+ 273774,audio1_2hTranscrit_2445.416_2446.041,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_2445.416_2446.041.wav,آه,0.625
285
+ 264815,audio7_test_transcrit_5699.819_5700.928,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_5699.819_5700.928.wav,أيه,1.109
286
+ 274959,audio7_test_transcrit_2764.021_2768.147,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_2764.021_2768.147.wav,تران لتونس فمة توا الأربعة و نصف و فمة الستة و الأربعة أدراج,4.126
287
+ 267827,audio1_2hTranscrit_6019.722_6020.785,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_6019.722_6020.785.wav,تفضل أي,1.063
288
+ 274447,audio3_transcrit_525.416_526.056,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_525.416_526.056.wav,أيه,0.64
289
+ 268407,audio4_1h_transcrit_2432.428_2433.124,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_2432.428_2433.124.wav,سلام,0.696
290
+ 266124,audio11_Transcrit_3362.002_3363.564,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_3362.002_3363.564.wav,تسكرة بروميار,1.5619999999999998
291
+ 276676,audio3_transcrit_1354.877_1357.861,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_1354.877_1357.861.wav,تران متاع ماضي ساعة و نصف ياقف في حمام الأنف,2.984
292
+ 271181,audio6.tst_2726.323_2730.577,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_2726.323_2730.577.wav,أهلا و سهلا باللاهي عيشك فمة تران لسوسة توا آكسبراس ولا نورمال,4.254
293
+ 269695,audio3_transcrit_3095.700_3097.443,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3095.700_3097.443.wav,أيه ييه توا,1.743
294
+ 265729,audio1_2hTranscrit_6478.999_6479.882,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_6478.999_6479.882.wav,عسلامة,0.883
295
+ 260520,audio11_Transcrit_2232.785_2234.661,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_2232.785_2234.661.wav,ألاي و رتور تونس,1.876
296
+ 272736,audio3_transcrit_1465.640_1466.694,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_1465.640_1466.694.wav,حمام الأنف,1.054
297
+ 266451,audio14-1h_2450.561_2452.321,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_2450.561_2452.321.wav,نابل مع أما وقت يا بابا,1.76
298
+ 268854,audio7_test_transcrit_2730.348_2734.833,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_2730.348_2734.833.wav,توصل السبعة و نصف فهمت الفازة,4.485
299
+ 277372,audio1_2hTranscrit_1474.951_1475.623,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_1474.951_1475.623.wav,وين,0.672
300
+ 268383,audio7_test_transcrit_3819.817_3820.299,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_3819.817_3820.299.wav,آ,0.48200000000000004
301
+ 9990,audio12_1928.252_1929.487,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_1928.252_1929.487.wav,إنشاء الله أي,1.235
302
+ 276300,audio1_2hTranscrit_3343.543_3344.184,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_3343.543_3344.184.wav,آه,0.6409999999999999
303
+ 264611,audio3_transcrit_2954.143_2955.080,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_2954.143_2955.080.wav,ثناش دينار,0.937
304
+ 268241,audio1_2hTranscrit_1618.121_1619.199,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_1618.121_1619.199.wav,مازالو درجين,1.078
305
+ 263329,audio14-1h_3290.693_3298.943,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_3290.693_3298.943.wav,خمسين فرنك توا نشوفلك في بإلي عندي مية ما عنديش خمسينات أي توا نشوفلك أي باهي تفضل آهو لقيتلك خمسين,8.25
306
+ 275013,audio8-1h30.tst_495.522_497.706,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_495.522_497.706.wav,أعطينا ثلاثة و ثلاثين دينار و,2.184
307
+ 271636,audio11_Transcrit_3386.284_3388.847,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_3386.284_3388.847.wav,عندكشي مية و خمسين شوية صرف صغير,2.563
308
+ 271077,audio18_30_366.225_367.488,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_366.225_367.488.wav,لا ما عنديش,1.263
309
+ 268101,audio14-1h_2410.073_2410.667,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_2410.073_2410.667.wav,أهلا,0.594
310
+ 266854,audio11_Transcrit_687.765_689.952,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_687.765_689.952.wav,لا لا ثلاثة غير درج,2.187
311
+ 277414,audio17_1h_1203.933_1205.500,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_1203.933_1205.500.wav,ألاي و رتور في الدوزيام,1.567
312
+ 276158,audio1_2hTranscrit_230.681_231.945,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_230.681_231.945.wav,الخمسة و خمسة,1.264
313
+ 275436,audio10_1390.549_1393.420,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_1390.549_1393.420.wav,فمة كان في الليل مع التسعة و نصف,2.8710000000000004
314
+ 277486,audio5_Transcrit_3481.043_3482.049,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_3481.043_3482.049.wav,تونس,1.006
315
+ 261999,audio6.tst_2633.243_2633.904,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_2633.243_2633.904.wav,لحظة,0.6609999999999999
316
+ 260207,audio4_1h_transcrit_2276.239_2277.388,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_2276.239_2277.388.wav,تونس,1.149
317
+ 271284,audio17_1h_1580.155_1582.098,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_1580.155_1582.098.wav,وقتاش يخرج هو,1.943
318
+ 276316,audio11_Transcrit_2956.430_2958.258,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_2956.430_2958.258.wav,و عليكم السلام و رحمة الله,1.828
319
+ 9983,audio5_Transcrit_3105.026_3105.551,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_3105.026_3105.551.wav,أيه,0.525
320
+ 268072,audio12_3202.436_3210.428,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_3202.436_3210.428.wav,و إلي بعدو آ آ فمة الأربعة غير ربع و فمة في الليل الكلهم القلعة الصغرى راهو موش سنتر آه,7.992000000000001
321
+ 274969,audio8-1h30.tst_1529.745_1531.751,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_1529.745_1531.751.wav,أي باهي أي آكسبراس راهو,2.006
322
+ 273084,audio17_1h_2787.469_2791.447,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_2787.469_2791.447.wav,أيه أيه سوسة راهو موش صفاقس أيا تفضل آهم الفلوس أي,3.978
323
+ 268372,audio1_2hTranscrit_5001.155_5002.202,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5001.155_5002.202.wav,ستطاش و تسعة مية,1.047
324
+ 269976,audio4_1h_transcrit_3307.283_3308.314,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_3307.283_3308.314.wav,آكسبراس أيه,1.031
325
+ 269098,audio17_1h_3209.685_3216.039,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_3209.685_3216.039.wav,عسلامة باللاهي عيش أختي مازالوا فمة ترأنات المهدية وقتاش كان فمة وقتاش يخرج هو التران بالظبط,6.354
326
+ 273193,audio7_test_transcrit_4132.130_4134.658,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_4132.130_4134.658.wav,ميتين و خمسة و عشرين ثلاثة تسعات,2.528
327
+ 272699,audio5_Transcrit_2760.797_2761.859,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_2760.797_2761.859.wav,أوكاي مارسي,1.062
328
+ 267294,audio12_3389.300_3393.140,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_3389.300_3393.140.wav,خمسطاش ألف و ميتين و خمسين أعطيني ميتين و خمسين,3.84
329
+ 264347,audio12_1282.511_1284.214,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_1282.511_1284.214.wav,قفلهم و إلا مازال,1.703
330
+ 272462,audio5_Transcrit_1867.794_1871.106,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1867.794_1871.106.wav,القلعة الصغيرة محطة بعد سوسة بثلاثة كيلو,3.312
331
+ 269180,audio4_1h_transcrit_2210.432_2211.558,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_2210.432_2211.558.wav,ستين ألف,1.126
332
+ 272203,audio1_2hTranscrit_5167.562_5168.664,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5167.562_5168.664.wav,زوز تونس إيه,1.102
333
+ 260980,audio1_2hTranscrit_5838.906_5841.249,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_5838.906_5841.249.wav,البرومسيون مازالت تبطى واشي,2.343
334
+ 270861,audio11_Transcrit_2140.479_2141.448,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_2140.479_2141.448.wav,السريع,0.9690000000000001
335
+ 272005,audio7_test_transcrit_888.890_889.796,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_888.890_889.796.wav,أيه,0.9059999999999999
336
+ 269510,audio18_30_8.133_12.113,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_8.133_12.113.wav,آه دو تكاي الجم باهي أعطيني سبعطاش ألف و خمسة مية,3.98
337
+ 265896,audio12_2962.210_2964.394,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_2962.210_2964.394.wav,بلاصة و نصف لتونس,2.184
338
+ 270403,audio5_Transcrit_465.707_466.761,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_465.707_466.761.wav,دوزيام,1.054
339
+ 272435,audio7_test_transcrit_2895.068_2896.766,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_2895.068_2896.766.wav,كلهم بروميار كلاس,1.6980000000000002
340
+ 278025,audio6.tst_10.268_11.607,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_10.268_11.607.wav,دراكت و إلا آكسبراس,1.339
341
+ 276201,audio11_Transcrit_1513.094_1513.934,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_1513.094_1513.934.wav,لا متاع,0.84
342
+ 262825,audio11_Transcrit_3507.485_3508.204,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_3507.485_3508.204.wav,دوزيام,0.7190000000000001
343
+ 267472,audio7_test_transcrit_5681.454_5682.365,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_5681.454_5682.365.wav,أوه,0.9109999999999999
344
+ 261872,audio18_30_1982.346_1984.579,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_1982.346_1984.579.wav,أيا تفضل خويا,2.233
345
+ 261978,audio14-1h_3617.414_3619.053,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_3617.414_3619.053.wav,وريني الكارت,1.639
346
+ 277008,audio3_transcrit_2529.953_2530.999,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_2529.953_2530.999.wav,ماضي ساعة و نصف,1.046
347
+ 261954,audio1_2hTranscrit_2160.048_2160.845,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_2160.048_2160.845.wav,أيه ييه,0.797
348
+ 271357,audio7_test_transcrit_1299.893_1302.044,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_1299.893_1302.044.wav,الأربعة و نصف إنشاء الله,2.151
349
+ 270227,audio14-1h_2936.169_2937.656,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_2936.169_2937.656.wav,بقداش الزوز يطلعولي,1.4869999999999999
350
+ 260883,audio8-1h30.tst_1981.030_1983.458,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_1981.030_1983.458.wav,آ القلعة الكبرى و إلا القلعة الصغرى,2.428
351
+ 266418,audio8-1h30.tst_1055.615_1056.490,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_1055.615_1056.490.wav,أهلا,0.875
352
+ 261452,audio7_test_transcrit_974.326_975.233,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio7_test_transcrit_974.326_975.233.wav,سوسة,0.907
353
+ 264155,audio4_1h_transcrit_1151.247_1152.622,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_1151.247_1152.622.wav,أعطيني بروميار مالا,1.375
354
+ 277815,audio5_Transcrit_3352.252_3355.042,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_3352.252_3355.042.wav,فمة تران تخرج لڨابس,2.79
355
+ 261656,audio14-1h_891.336_895.109,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_891.336_895.109.wav,لا لا أعطيني نورمال و برة لين آكسبراس ما نحبهاش آكسبراس أنا,3.773
356
+ 274992,audio1_2hTranscrit_3933.068_3933.665,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_3933.068_3933.665.wav,نعم,0.597
357
+ 10294,audio3_transcrit_2874.441_2875.270,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_2874.441_2875.270.wav,أي ولدي أي,0.8290000000000001
358
+ 269322,audio8-1h30.tst_901.156_902.516,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_901.156_902.516.wav,التسعة و سبعة يا مدام,1.36
359
+ 274220,audio18_30_898.599_899.907,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio18_30_898.599_899.907.wav,الله يبارك,1.308
360
+ 275271,audio10_2632.204_2633.892,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio10_2632.204_2633.892.wav,بقداه التكاي,1.6880000000000002
361
+ 10051,audio11_Transcrit_3921.994_3924.150,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_3921.994_3924.150.wav,وحيدة آكسبراس تونس,2.156
362
+ 263861,audio3_transcrit_3168.504_3170.597,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3168.504_3170.597.wav,لا وحدة برك زوز بير بورڨبة,2.093
363
+ 265224,audio4_1h_transcrit_776.833_777.614,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio4_1h_transcrit_776.833_777.614.wav,مرحبا,0.7809999999999999
364
+ 260463,audio12_2664.386_2665.730,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_2664.386_2665.730.wav,السلام,1.344
365
+ 264094,audio8-1h30.tst_3925.655_3926.618,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_3925.655_3926.618.wav,آ,0.963
366
+ 262188,audio8-1h30.tst_4523.169_4524.719,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_4523.169_4524.719.wav,أي أكهو كان إنتي,1.55
367
+ 265237,audio5_Transcrit_2711.174_2712.799,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_2711.174_2712.799.wav,الخمسة و خمسة أول تران,1.625
368
+ 266458,audio17_1h_1021.866_1023.713,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio17_1h_1021.866_1023.713.wav,بحول الله,1.847
369
+ 260336,audio3_transcrit_1679.016_1680.610,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_1679.016_1680.610.wav,تونس بروميار قلت لي,1.594
370
+ 10299,audio8-1h30.tst_4792.160_4793.181,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_4792.160_4793.181.wav,داكوردو,1.021
371
+ 269875,audio3_transcrit_1134.090_1134.965,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_1134.090_1134.965.wav,وقتاش,0.875
372
+ 275840,audio12_629.114_632.042,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_629.114_632.042.wav,نورمال عندكشي ألفين,2.928
373
+ 267377,audio14-1h_857.516_858.581,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio14-1h_857.516_858.581.wav,بلاصة و نصف,1.065
374
+ 266325,audio12_889.550_894.590,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio12_889.550_894.590.wav,بالله تسكرة لتونس ماضي ساعة ماضي ساعة و قداش هو,5.04
375
+ 266617,audio11_Transcrit_4437.900_4438.932,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio11_Transcrit_4437.900_4438.932.wav,العشرة,1.032
376
+ 275039,audio1_2hTranscrit_4258.629_4259.421,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio1_2hTranscrit_4258.629_4259.421.wav,الجم,0.792
377
+ 263210,audio6.tst_375.099_376.515,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_375.099_376.515.wav,قداش من بلاصة حاشتك,1.416
378
+ 263778,audio8-1h30.tst_1155.278_1155.903,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio8-1h30.tst_1155.278_1155.903.wav,أكهو,0.625
379
+ 270184,audio6.tst_161.784_162.454,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio6.tst_161.784_162.454.wav,بالسلامة,0.67
380
+ 276421,audio5_Transcrit_1408.025_1409.289,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio5_Transcrit_1408.025_1409.289.wav,كان مشي برك,1.264
381
+ 275401,audio3_transcrit_3025.983_3026.623,/gpfsscratch/rech/nou/uzn19yk/TARIC_2/audio3_transcrit_3025.983_3026.623.wav,باهي,0.64
train_semi.yaml ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ################################
2
+ # Model: wav2vec2 + DNN + CTC
3
+ # Augmentation: SpecAugment
4
+ # Authors: Titouan Parcollet 2021
5
+ # ################################
6
+
7
+ # Seed needs to be set at top of yaml, before objects with parameters are made
8
+ seed: 1234
9
+ __set_seed: !!python/object/apply:torch.manual_seed [!ref <seed>]
10
+ output_folder: !ref semi_wavlm_large_tunisian_ctc/<seed>
11
+ wer_file: !ref <output_folder>/wer.txt
12
+ save_folder: !ref <output_folder>/save
13
+ train_log: !ref <output_folder>/train_log.txt
14
+
15
+ # URL for the biggest LeBenchmark wav2vec french.
16
+ wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
17
+
18
+ # Data files
19
+ data_folder: /path/to/data # e.g, /localscratch/cv-corpus-5.1-2020-06-22/fr
20
+ train_tsv_file: !ref <data_folder>/train.tsv # Standard CommonVoice .tsv files
21
+ dev_tsv_file: !ref <data_folder>/dev.tsv # Standard CommonVoice .tsv files
22
+ test_tsv_file: !ref <data_folder>/test.tsv # Standard CommonVoice .tsv files
23
+ accented_letters: True
24
+ language: fr # use 'it' for Italian, 'rw' for Kinyarwanda, 'en' for english
25
+ test_csv:
26
+ - /path/to/test_data
27
+
28
+ skip_prep: True # Skip data preparation
29
+
30
+ use_language_modelling: True
31
+ ngram_lm_path: outdomain.arpa
32
+
33
+ # We remove utterance slonger than 10s in the train/dev/test sets as
34
+ # longer sentences certainly correspond to "open microphones".
35
+ avoid_if_longer_than: 10.0
36
+ avoid_if_shorter_than: 1.2
37
+
38
+
39
+ # Training parameters
40
+ number_of_epochs: 12
41
+ lr: 1.0
42
+ lr_wav2vec: 0.0001
43
+ sorting: ascending
44
+ auto_mix_prec: False
45
+ sample_rate: 16000
46
+ ckpt_interval_minutes: 30 # save checkpoint every N min
47
+
48
+ # With data_parallel batch_size is split into N jobs
49
+ # With DDP batch_size is multiplied by N jobs
50
+ # Must be 6 per GPU to fit 16GB of VRAM
51
+ batch_size: 10
52
+ test_batch_size: 4
53
+
54
+ dataloader_options:
55
+ batch_size: !ref <batch_size>
56
+ num_workers: 6
57
+ test_dataloader_options:
58
+ batch_size: !ref <test_batch_size>
59
+ num_workers: 6
60
+
61
+ # BPE parameters
62
+ token_type: char # ["unigram", "bpe", "char"]
63
+ character_coverage: 1.0
64
+
65
+ # Model parameters
66
+ # activation: !name:torch.nn.LeakyReLU
67
+ wav2vec_output_dim: 1024
68
+ dnn_neurons: 1024
69
+ freeze_wav2vec: False
70
+ freeze_feature_extractor: True
71
+ dropout: 0.15
72
+ warmup_steps: 500 # The wav2vec 2 model isn't updated for this amount of steps
73
+
74
+ # Outputs
75
+ output_neurons: 40 # BPE size, index(blank/eos/bos) = 0
76
+
77
+ # Decoding parameters
78
+ # Be sure that the bos and eos index match with the BPEs ones
79
+ blank_index: 0
80
+ unk_index: 1
81
+
82
+ #
83
+ # Functions and classes
84
+ #
85
+ epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
86
+ limit: !ref <number_of_epochs>
87
+
88
+ augmentation: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
89
+ sample_rate: !ref <sample_rate>
90
+ speeds: [95, 100, 105]
91
+
92
+ enc: !new:speechbrain.nnet.containers.Sequential
93
+ input_shape: [null, null, !ref <wav2vec_output_dim>]
94
+ linear1: !name:speechbrain.nnet.linear.Linear
95
+ n_neurons: !ref <dnn_neurons>
96
+ bias: True
97
+ bn1: !name:speechbrain.nnet.normalization.BatchNorm1d
98
+ activation: !new:torch.nn.LeakyReLU
99
+ drop: !new:torch.nn.Dropout
100
+ p: !ref <dropout>
101
+ linear2: !name:speechbrain.nnet.linear.Linear
102
+ n_neurons: !ref <dnn_neurons>
103
+ bias: True
104
+ bn2: !name:speechbrain.nnet.normalization.BatchNorm1d
105
+ activation2: !new:torch.nn.LeakyReLU
106
+ drop2: !new:torch.nn.Dropout
107
+ p: !ref <dropout>
108
+ linear3: !name:speechbrain.nnet.linear.Linear
109
+ n_neurons: !ref <dnn_neurons>
110
+ bias: True
111
+ bn3: !name:speechbrain.nnet.normalization.BatchNorm1d
112
+ activation3: !new:torch.nn.LeakyReLU
113
+
114
+ wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
115
+ source: wavlm-large/
116
+ output_norm: False
117
+ freeze: !ref <freeze_wav2vec>
118
+ freeze_feature_extractor: !ref <freeze_feature_extractor>
119
+ save_path: !ref <wav2vec2_folder>
120
+
121
+
122
+ ctc_lin: !new:speechbrain.nnet.linear.Linear
123
+ input_size: !ref <dnn_neurons>
124
+ n_neurons: !ref <output_neurons>
125
+
126
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
127
+ apply_log: True
128
+
129
+ ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
130
+ blank_index: !ref <blank_index>
131
+
132
+ modules:
133
+ wav2vec2: !ref <wav2vec2>
134
+ enc: !ref <enc>
135
+ ctc_lin: !ref <ctc_lin>
136
+
137
+ model: !new:torch.nn.ModuleList
138
+ - [!ref <enc>, !ref <ctc_lin>]
139
+
140
+ model_opt_class: !name:torch.optim.Adadelta
141
+ lr: !ref <lr>
142
+ rho: 0.95
143
+ eps: 1.e-8
144
+
145
+ wav2vec_opt_class: !name:torch.optim.Adam
146
+ lr: !ref <lr_wav2vec>
147
+
148
+ lr_annealing_model: !new:speechbrain.nnet.schedulers.NewBobScheduler
149
+ initial_value: !ref <lr>
150
+ improvement_threshold: 0.0025
151
+ annealing_factor: 0.8
152
+ patient: 0
153
+
154
+ lr_annealing_wav2vec: !new:speechbrain.nnet.schedulers.NewBobScheduler
155
+ initial_value: !ref <lr_wav2vec>
156
+ improvement_threshold: 0.0025
157
+ annealing_factor: 0.9
158
+ patient: 0
159
+
160
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
161
+ checkpoints_dir: !ref <save_folder>
162
+ recoverables:
163
+ wav2vec2: !ref <wav2vec2>
164
+ model: !ref <model>
165
+ scheduler_model: !ref <lr_annealing_model>
166
+ scheduler_wav2vec: !ref <lr_annealing_wav2vec>
167
+ counter: !ref <epoch_counter>
168
+
169
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
170
+ save_file: !ref <train_log>
171
+
172
+ error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
173
+
174
+ cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
175
+ split_tokens: True
train_with_wavlm.py ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import sys
3
+ import torch
4
+ import logging
5
+ import speechbrain as sb
6
+ from pathlib import Path
7
+ import os
8
+ import torchaudio
9
+ from hyperpyyaml import load_hyperpyyaml
10
+ from speechbrain.tokenizers.SentencePiece import SentencePiece
11
+ from speechbrain.utils.data_utils import undo_padding
12
+ from speechbrain.utils.distributed import run_on_main
13
+
14
+ """Recipe for training a sequence-to-sequence ASR system with CommonVoice.
15
+ The system employs a wav2vec2 encoder and a CTC decoder.
16
+ Decoding is performed with greedy decoding (will be extended to beam search).
17
+
18
+ To run this recipe, do the following:
19
+ > python train_with_wav2vec2.py hparams/train_with_wav2vec2.yaml
20
+
21
+ With the default hyperparameters, the system employs a pretrained wav2vec2 encoder.
22
+ The wav2vec2 model is pretrained following the model given in the hprams file.
23
+ It may be dependent on the language.
24
+
25
+ The neural network is trained with CTC on sub-word units estimated with
26
+ Byte Pairwise Encoding (BPE).
27
+
28
+ The experiment file is flexible enough to support a large variety of
29
+ different systems. By properly changing the parameter files, you can try
30
+ different encoders, decoders, tokens (e.g, characters instead of BPE),
31
+ training languages (all CommonVoice languages), and many
32
+ other possible variations.
33
+
34
+ Authors
35
+ * Titouan Parcollet 2021
36
+ """
37
+
38
+ logger = logging.getLogger(__name__)
39
+
40
+
41
+ # Define training procedure
42
+ class ASR(sb.core.Brain):
43
+ def compute_forward(self, batch, stage):
44
+ """Forward computations from the waveform batches to the output probabilities."""
45
+
46
+ batch = batch.to(self.device)
47
+ wavs, wav_lens = batch.sig
48
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
49
+ if stage == sb.Stage.TRAIN:
50
+ if hasattr(self.hparams, "augmentation"):
51
+ wavs = self.hparams.augmentation(wavs, wav_lens)
52
+
53
+ # Forward pass
54
+ feats = self.modules.wav2vec2(wavs, wav_lens)
55
+ x = self.modules.enc(feats)
56
+ logits = self.modules.ctc_lin(x)
57
+ p_ctc = self.hparams.log_softmax(logits)
58
+
59
+ return p_ctc, wav_lens
60
+
61
+ def compute_objectives(self, predictions, batch, stage):
62
+ """Computes the loss (CTC) given predictions and targets."""
63
+
64
+ p_ctc, wav_lens = predictions
65
+
66
+ ids = batch.id
67
+ tokens, tokens_lens = batch.tokens
68
+
69
+ loss = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
70
+
71
+ if stage != sb.Stage.TRAIN:
72
+ predicted_tokens = sb.decoders.ctc_greedy_decode(
73
+ p_ctc, wav_lens, blank_id=self.hparams.blank_index
74
+ )
75
+ # Decode token terms to words
76
+ if self.hparams.use_language_modelling:
77
+ predicted_words = []
78
+ for logs in p_ctc:
79
+ text = decoder.decode(logs.detach().cpu().numpy())
80
+ predicted_words.append(text.split(" "))
81
+ else:
82
+ predicted_words = [
83
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
84
+ for utt_seq in predicted_tokens
85
+ ]
86
+ # Convert indices to words
87
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
88
+
89
+ self.wer_metric.append(ids, predicted_words, target_words)
90
+ self.cer_metric.append(ids, predicted_words, target_words)
91
+
92
+ return loss
93
+
94
+ def fit_batch(self, batch):
95
+ """Train the parameters given a single batch in input"""
96
+ should_step = self.step % self.grad_accumulation_factor == 0
97
+ # Managing automatic mixed precision
98
+ # TOFIX: CTC fine-tuning currently is unstable
99
+ # This is certainly due to CTC being done in fp16 instead of fp32
100
+ if self.auto_mix_prec:
101
+ with torch.cuda.amp.autocast():
102
+ with self.no_sync():
103
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
104
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
105
+ with self.no_sync(not should_step):
106
+ self.scaler.scale(
107
+ loss / self.grad_accumulation_factor
108
+ ).backward()
109
+ if should_step:
110
+
111
+ if not self.hparams.wav2vec2.freeze:
112
+ self.scaler.unscale_(self.wav2vec_optimizer)
113
+ self.scaler.unscale_(self.model_optimizer)
114
+ if self.check_gradients(loss):
115
+ if not self.hparams.wav2vec2.freeze:
116
+ if self.optimizer_step >= self.hparams.warmup_steps:
117
+ self.scaler.step(self.wav2vec_optimizer)
118
+ self.scaler.step(self.model_optimizer)
119
+ self.scaler.update()
120
+ self.zero_grad()
121
+ self.optimizer_step += 1
122
+ else:
123
+ # This is mandatory because HF models have a weird behavior with DDP
124
+ # on the forward pass
125
+ with self.no_sync():
126
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
127
+
128
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
129
+
130
+ with self.no_sync(not should_step):
131
+ (loss / self.grad_accumulation_factor).backward()
132
+ if should_step:
133
+ if self.check_gradients(loss):
134
+ if not self.hparams.wav2vec2.freeze:
135
+ if self.optimizer_step >= self.hparams.warmup_steps:
136
+ self.wav2vec_optimizer.step()
137
+ self.model_optimizer.step()
138
+ self.zero_grad()
139
+ self.optimizer_step += 1
140
+
141
+ self.on_fit_batch_end(batch, outputs, loss, should_step)
142
+ return loss.detach().cpu()
143
+
144
+ def evaluate_batch(self, batch, stage):
145
+ """Computations needed for validation/test batches"""
146
+ predictions = self.compute_forward(batch, stage=stage)
147
+ with torch.no_grad():
148
+ loss = self.compute_objectives(predictions, batch, stage=stage)
149
+ return loss.detach()
150
+
151
+ def on_stage_start(self, stage, epoch):
152
+ """Gets called at the beginning of each epoch"""
153
+ if stage != sb.Stage.TRAIN:
154
+ self.cer_metric = self.hparams.cer_computer()
155
+ self.wer_metric = self.hparams.error_rate_computer()
156
+
157
+ def on_stage_end(self, stage, stage_loss, epoch):
158
+ """Gets called at the end of an epoch."""
159
+ # Compute/store important stats
160
+ stage_stats = {"loss": stage_loss}
161
+ if stage == sb.Stage.TRAIN:
162
+ self.train_stats = stage_stats
163
+ else:
164
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
165
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
166
+
167
+ # Perform end-of-iteration things, like annealing, logging, etc.
168
+ if stage == sb.Stage.VALID:
169
+ old_lr_model, new_lr_model = self.hparams.lr_annealing_model(
170
+ stage_stats["loss"]
171
+ )
172
+ old_lr_wav2vec, new_lr_wav2vec = self.hparams.lr_annealing_wav2vec(
173
+ stage_stats["loss"]
174
+ )
175
+ sb.nnet.schedulers.update_learning_rate(
176
+ self.model_optimizer, new_lr_model
177
+ )
178
+ if not self.hparams.wav2vec2.freeze:
179
+ sb.nnet.schedulers.update_learning_rate(
180
+ self.wav2vec_optimizer, new_lr_wav2vec
181
+ )
182
+ self.hparams.train_logger.log_stats(
183
+ stats_meta={
184
+ "epoch": epoch,
185
+ "lr_model": old_lr_model,
186
+ "lr_wav2vec": old_lr_wav2vec,
187
+ },
188
+ train_stats=self.train_stats,
189
+ valid_stats=stage_stats,
190
+ )
191
+ self.checkpointer.save_and_keep_only(
192
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
193
+ )
194
+ elif stage == sb.Stage.TEST:
195
+ self.hparams.train_logger.log_stats(
196
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
197
+ test_stats=stage_stats,
198
+ )
199
+ with open(self.hparams.wer_file, "w") as w:
200
+ self.wer_metric.write_stats(w)
201
+
202
+ def init_optimizers(self):
203
+ "Initializes the wav2vec2 optimizer and model optimizer"
204
+
205
+ # If the wav2vec encoder is unfrozen, we create the optimizer
206
+ if not self.hparams.wav2vec2.freeze:
207
+ self.wav2vec_optimizer = self.hparams.wav2vec_opt_class(
208
+ self.modules.wav2vec2.parameters()
209
+ )
210
+ if self.checkpointer is not None:
211
+ self.checkpointer.add_recoverable(
212
+ "wav2vec_opt", self.wav2vec_optimizer
213
+ )
214
+
215
+ self.model_optimizer = self.hparams.model_opt_class(
216
+ self.hparams.model.parameters()
217
+ )
218
+
219
+ if self.checkpointer is not None:
220
+ self.checkpointer.add_recoverable("modelopt", self.model_optimizer)
221
+
222
+ def zero_grad(self, set_to_none=False):
223
+ if not self.hparams.wav2vec2.freeze:
224
+ self.wav2vec_optimizer.zero_grad(set_to_none)
225
+ self.model_optimizer.zero_grad(set_to_none)
226
+
227
+
228
+ # Define custom data procedure
229
+ def dataio_prepare(hparams):
230
+ """This function prepares the datasets to be used in the brain class.
231
+ It also defines the data processing pipeline through user-defined functions."""
232
+
233
+ # 1. Define datasets
234
+ data_folder = hparams["data_folder"]
235
+
236
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
237
+ csv_path=hparams["train_csv"], replacements={"data_root": data_folder},
238
+ )
239
+
240
+ if hparams["sorting"] == "ascending":
241
+ # we sort training data to speed up training and get better results.
242
+ train_data = train_data.filtered_sorted(
243
+ sort_key="duration",
244
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
245
+ )
246
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
247
+ hparams["dataloader_options"]["shuffle"] = False
248
+
249
+ elif hparams["sorting"] == "descending":
250
+ train_data = train_data.filtered_sorted(
251
+ sort_key="duration",
252
+ reverse=True,
253
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
254
+ )
255
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
256
+ hparams["dataloader_options"]["shuffle"] = False
257
+
258
+ elif hparams["sorting"] == "random":
259
+ pass
260
+
261
+ else:
262
+ raise NotImplementedError(
263
+ "sorting must be random, ascending or descending"
264
+ )
265
+
266
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
267
+ csv_path=hparams["valid_csv"], replacements={"data_root": data_folder},
268
+ )
269
+ # We also sort the validation data so it is faster to validate
270
+ valid_data = valid_data.filtered_sorted(sort_key="duration")
271
+ test_datasets = {}
272
+ for csv_file in hparams["test_csv"]:
273
+ name = Path(csv_file).stem
274
+ test_datasets[name] = sb.dataio.dataset.DynamicItemDataset.from_csv(
275
+ csv_path=csv_file, replacements={"data_root": data_folder}
276
+ )
277
+ test_datasets[name] = test_datasets[name].filtered_sorted(
278
+ sort_key="duration"
279
+ )
280
+
281
+ datasets = [train_data, valid_data] + [i for k, i in test_datasets.items()]
282
+
283
+
284
+ # 2. Define audio pipeline:
285
+ @sb.utils.data_pipeline.takes("wav")
286
+ @sb.utils.data_pipeline.provides("sig")
287
+ def audio_pipeline(wav):
288
+ info = torchaudio.info(wav)
289
+ sig = sb.dataio.dataio.read_audio(wav)
290
+ resampled = torchaudio.transforms.Resample(
291
+ info.sample_rate, hparams["sample_rate"],
292
+ )(sig)
293
+ return resampled
294
+
295
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline)
296
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
297
+
298
+ # 3. Define text pipeline:
299
+ @sb.utils.data_pipeline.takes("wrd")
300
+ @sb.utils.data_pipeline.provides(
301
+ "wrd", "char_list", "tokens_list", "tokens"
302
+ )
303
+ def text_pipeline(wrd):
304
+ yield wrd
305
+ char_list = list(wrd)
306
+ yield char_list
307
+ tokens_list = label_encoder.encode_sequence(char_list)
308
+ yield tokens_list
309
+ tokens = torch.LongTensor(tokens_list)
310
+ yield tokens
311
+
312
+ sb.dataio.dataset.add_dynamic_item(datasets, text_pipeline)
313
+ lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
314
+ special_labels = {
315
+ "blank_label": hparams["blank_index"],
316
+ "unk_label": hparams["unk_index"]
317
+ }
318
+ label_encoder.load_or_create(
319
+ path=lab_enc_file,
320
+ from_didatasets=[train_data],
321
+ output_key="char_list",
322
+ special_labels=special_labels,
323
+ sequence_input=True,
324
+ )
325
+
326
+ # 4. Set output:
327
+ sb.dataio.dataset.set_output_keys(
328
+ datasets, ["id", "sig", "wrd", "char_list", "tokens"],
329
+ )
330
+ return train_data, valid_data,test_datasets, label_encoder
331
+
332
+
333
+ if __name__ == "__main__":
334
+
335
+ # Load hyperparameters file with command-line overrides
336
+ hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
337
+ with open(hparams_file) as fin:
338
+ hparams = load_hyperpyyaml(fin, overrides)
339
+
340
+ # If --distributed_launch then
341
+ # create ddp_group with the right communication protocol
342
+ sb.utils.distributed.ddp_init_group(run_opts)
343
+
344
+
345
+ # Create experiment directory
346
+ sb.create_experiment_directory(
347
+ experiment_directory=hparams["output_folder"],
348
+ hyperparams_to_save=hparams_file,
349
+ overrides=overrides,
350
+ )
351
+
352
+ # Due to DDP, we do the preparation ONLY on the main python process
353
+ # Defining tokenizer and loading it
354
+ # Create the datasets objects as well as tokenization and encoding :-D
355
+ train_data, valid_data, test_datasets, label_encoder = dataio_prepare(hparams)
356
+ if hparams["use_language_modelling"]:
357
+ print("using langauge_modeeling")
358
+ from pyctcdecode import build_ctcdecoder
359
+ ind2lab = label_encoder.ind2lab
360
+ print(ind2lab)
361
+ labels = [ind2lab[x] for x in range(len(ind2lab))]
362
+ labels = [""] + labels[1:-1] + ["1"]
363
+ # Replace the <blank> token with a blank character, needed for PyCTCdecode
364
+ print(labels)
365
+ decoder = build_ctcdecoder(
366
+ labels,
367
+ kenlm_model_path=hparams["ngram_lm_path"], # .arpa or .bin
368
+ alpha=0.5, # Default by KenLM
369
+ beta=1.0, # Default by KenLM
370
+ )
371
+ # Trainer initialization
372
+ asr_brain = ASR(
373
+ modules=hparams["modules"],
374
+ hparams=hparams,
375
+ run_opts=run_opts,
376
+ checkpointer=hparams["checkpointer"],
377
+ )
378
+
379
+ # Adding objects to trainer.
380
+ asr_brain.tokenizer = label_encoder
381
+
382
+ # Training
383
+ asr_brain.fit(
384
+ asr_brain.hparams.epoch_counter,
385
+ train_data,
386
+ valid_data,
387
+ train_loader_kwargs=hparams["dataloader_options"],
388
+ valid_loader_kwargs=hparams["test_dataloader_options"],
389
+ )
390
+
391
+ # Test
392
+ for k in test_datasets.keys(): # keys are test_clean, test_other etc
393
+ asr_brain.hparams.wer_file = os.path.join(
394
+ hparams["output_folder"], "wer_{}.txt".format(k)
395
+ )
396
+ asr_brain.evaluate(
397
+ test_datasets[k], test_loader_kwargs=hparams["test_dataloader_options"]
398
+ )
399
+
wavlm-large/README.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - speech
6
+ inference: false
7
+ ---
8
+
9
+ # WavLM-Large
10
+
11
+ [Microsoft's WavLM](https://github.com/microsoft/unilm/tree/master/wavlm)
12
+
13
+ The large model pretrained on 16kHz sampled speech audio. When using the model, make sure that your speech input is also sampled at 16kHz.
14
+
15
+ **Note**: This model does not have a tokenizer as it was pretrained on audio alone. In order to use this model **speech recognition**, a tokenizer should be created and the model should be fine-tuned on labeled text data. Check out [this blog](https://huggingface.co/blog/fine-tune-wav2vec2-english) for more in-detail explanation of how to fine-tune the model.
16
+
17
+ The model was pre-trained on:
18
+
19
+ - 60,000 hours of [Libri-Light](https://arxiv.org/abs/1912.07875)
20
+ - 10,000 hours of [GigaSpeech](https://arxiv.org/abs/2106.06909)
21
+ - 24,000 hours of [VoxPopuli](https://arxiv.org/abs/2101.00390)
22
+
23
+ [Paper: WavLM: Large-Scale Self-Supervised Pre-Training for Full Stack Speech Processing](https://arxiv.org/abs/2110.13900)
24
+
25
+ Authors: Sanyuan Chen, Chengyi Wang, Zhengyang Chen, Yu Wu, Shujie Liu, Zhuo Chen, Jinyu Li, Naoyuki Kanda, Takuya Yoshioka, Xiong Xiao, Jian Wu, Long Zhou, Shuo Ren, Yanmin Qian, Yao Qian, Jian Wu, Michael Zeng, Furu Wei
26
+
27
+ **Abstract**
28
+ *Self-supervised learning (SSL) achieves great success in speech recognition, while limited exploration has been attempted for other speech processing tasks. As speech signal contains multi-faceted information including speaker identity, paralinguistics, spoken content, etc., learning universal representations for all speech tasks is challenging. In this paper, we propose a new pre-trained model, WavLM, to solve full-stack downstream speech tasks. WavLM is built based on the HuBERT framework, with an emphasis on both spoken content modeling and speaker identity preservation. We first equip the Transformer structure with gated relative position bias to improve its capability on recognition tasks. For better speaker discrimination, we propose an utterance mixing training strategy, where additional overlapped utterances are created unsupervisely and incorporated during model training. Lastly, we scale up the training dataset from 60k hours to 94k hours. WavLM Large achieves state-of-the-art performance on the SUPERB benchmark, and brings significant improvements for various speech processing tasks on their representative benchmarks.*
29
+
30
+ The original model can be found under https://github.com/microsoft/unilm/tree/master/wavlm.
31
+
32
+ # Usage
33
+
34
+ This is an English pre-trained speech model that has to be fine-tuned on a downstream task like speech recognition or audio classification before it can be
35
+ used in inference. The model was pre-trained in English and should therefore perform well only in English. The model has been shown to work well on the [SUPERB benchmark](https://superbbenchmark.org/).
36
+
37
+ **Note**: The model was pre-trained on phonemes rather than characters. This means that one should make sure that the input text is converted to a sequence
38
+ of phonemes before fine-tuning.
39
+
40
+ ## Speech Recognition
41
+
42
+ To fine-tune the model for speech recognition, see [the official speech recognition example](https://github.com/huggingface/transformers/tree/master/examples/pytorch/speech-recognition).
43
+
44
+ ## Speech Classification
45
+
46
+ To fine-tune the model for speech classification, see [the official audio classification example](https://github.com/huggingface/transformers/tree/master/examples/pytorch/audio-classification).
47
+
48
+ ## Speaker Verification
49
+
50
+ TODO
51
+
52
+ ## Speaker Diarization
53
+
54
+ TODO
55
+
56
+ # Contribution
57
+
58
+ The model was contributed by [cywang](https://huggingface.co/cywang) and [patrickvonplaten](https://huggingface.co/patrickvonplaten).
59
+
60
+ # License
61
+
62
+ The official license can be found [here](https://github.com/microsoft/UniSpeech/blob/main/LICENSE)
63
+
64
+ ![design](https://raw.githubusercontent.com/patrickvonplaten/scientific_images/master/wavlm.png)
wavlm-large/config.json ADDED
@@ -0,0 +1,99 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./wavlm-large/",
3
+ "activation_dropout": 0.0,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "WavLMModel"
10
+ ],
11
+ "attention_dropout": 0.1,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": false,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "sum",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.1,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.1,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.1,
63
+ "mask_channel_length": 10,
64
+ "mask_channel_min_space": 1,
65
+ "mask_channel_other": 0.0,
66
+ "mask_channel_prob": 0.0,
67
+ "mask_channel_selection": "static",
68
+ "mask_feature_length": 10,
69
+ "mask_feature_min_masks": 0,
70
+ "mask_feature_prob": 0.0,
71
+ "mask_time_length": 10,
72
+ "mask_time_min_masks": 2,
73
+ "mask_time_min_space": 1,
74
+ "mask_time_other": 0.0,
75
+ "mask_time_prob": 0.075,
76
+ "mask_time_selection": "static",
77
+ "max_bucket_distance": 800,
78
+ "model_type": "wavlm",
79
+ "num_adapter_layers": 3,
80
+ "num_attention_heads": 16,
81
+ "num_buckets": 320,
82
+ "num_codevector_groups": 2,
83
+ "num_codevectors_per_group": 320,
84
+ "num_conv_pos_embedding_groups": 16,
85
+ "num_conv_pos_embeddings": 128,
86
+ "num_ctc_classes": 80,
87
+ "num_feat_extract_layers": 7,
88
+ "num_hidden_layers": 24,
89
+ "num_negatives": 100,
90
+ "output_hidden_size": 1024,
91
+ "pad_token_id": 0,
92
+ "proj_codevector_dim": 768,
93
+ "replace_prob": 0.5,
94
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
95
+ "torch_dtype": "float32",
96
+ "transformers_version": "4.15.0.dev0",
97
+ "use_weighted_layer_sum": false,
98
+ "vocab_size": 32
99
+ }
wavlm-large/preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "return_attention_mask": true,
8
+ "sampling_rate": 16000
9
+ }
wavlm-large/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdee460e529396ddb2f8c8e8ce0ad74cfb747b726bc6f612e666c7c1e1963c9d
3
+ size 1261990257