SalahZa commited on
Commit
8a51838
1 Parent(s): 0d1350d

added mixer model

Browse files
results/non_semi_final_stac/ctc_lin.py ADDED
@@ -0,0 +1,756 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import os
4
+ import sys
5
+ import torch
6
+ import logging
7
+ import speechbrain as sb
8
+ from speechbrain.utils.distributed import run_on_main
9
+ from hyperpyyaml import load_hyperpyyaml
10
+ from pathlib import Path
11
+ import torchaudio.transforms as T
12
+ from cv_train import ASRCV
13
+ import torchaudio
14
+ import numpy as np
15
+ import kenlm
16
+ from pyctcdecode import build_ctcdecoder
17
+ import re
18
+
19
+ # Commented out IPython magic to ensure Python compatibility.
20
+ # %cd /content/drive/MyDrive/tunisian_corpora/tunisian_without_wavlm
21
+ #hparams_file, run_opts, overrides = sb.parse_arguments(["/gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/hparams/train_semi.yaml"])
22
+ hparams_file, run_opts, overrides = sb.parse_arguments(["semi_supervised_test_tunisian.yaml"])
23
+
24
+ # If distributed_launch=True then
25
+ # create ddp_group with the right communication protocol
26
+ sb.utils.distributed.ddp_init_group(run_opts)
27
+
28
+ with open(hparams_file) as fin:
29
+ hparams = load_hyperpyyaml(fin, overrides)
30
+
31
+ # Create experiment directory
32
+ sb.create_experiment_directory(
33
+ experiment_directory=hparams["output_folder"],
34
+ hyperparams_to_save=hparams_file,
35
+ overrides=overrides,
36
+ )
37
+ # Dataset prep (parsing Librispeech)
38
+
39
+ def dataio_prepare(hparams):
40
+ """This function prepares the datasets to be used in the brain class.
41
+ It also defines the data processing pipeline through user-defined functions."""
42
+
43
+ # 1. Define datasets
44
+ data_folder = hparams["data_folder"]
45
+
46
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
47
+ csv_path=hparams["train_csv"], replacements={"data_root": data_folder},
48
+ )
49
+
50
+ if hparams["sorting"] == "ascending":
51
+ # we sort training data to speed up training and get better results.
52
+ train_data = train_data.filtered_sorted(
53
+ sort_key="duration",
54
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
55
+ )
56
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
57
+ hparams["dataloader_options"]["shuffle"] = False
58
+
59
+ elif hparams["sorting"] == "descending":
60
+ train_data = train_data.filtered_sorted(
61
+ sort_key="duration",
62
+ reverse=True,
63
+ key_max_value={"duration": hparams["avoid_if_longer_than"]},
64
+ )
65
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
66
+ hparams["dataloader_options"]["shuffle"] = False
67
+
68
+ elif hparams["sorting"] == "random":
69
+ pass
70
+
71
+ else:
72
+ raise NotImplementedError(
73
+ "sorting must be random, ascending or descending"
74
+ )
75
+
76
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
77
+ csv_path=hparams["valid_csv"], replacements={"data_root": data_folder},
78
+ )
79
+ # We also sort the validation data so it is faster to validate
80
+ valid_data = valid_data.filtered_sorted(sort_key="duration")
81
+ test_datasets = {}
82
+ for csv_file in hparams["test_csv"]:
83
+ name = Path(csv_file).stem
84
+ test_datasets[name] = sb.dataio.dataset.DynamicItemDataset.from_csv(
85
+ csv_path=csv_file, replacements={"data_root": data_folder}
86
+ )
87
+ test_datasets[name] = test_datasets[name].filtered_sorted(
88
+ sort_key="duration"
89
+ )
90
+
91
+ datasets = [train_data, valid_data] + [i for k, i in test_datasets.items()]
92
+
93
+
94
+ # 2. Define audio pipeline:
95
+ @sb.utils.data_pipeline.takes("wav")
96
+ @sb.utils.data_pipeline.provides("sig")
97
+ def audio_pipeline(wav):
98
+ info = torchaudio.info(wav)
99
+ sig = sb.dataio.dataio.read_audio(wav)
100
+ if len(sig.shape)>1 :
101
+ sig = torch.mean(sig, dim=1)
102
+ resampled = torchaudio.transforms.Resample(
103
+ info.sample_rate, hparams["sample_rate"],
104
+ )(sig)
105
+ return resampled
106
+
107
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline)
108
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
109
+
110
+ # 3. Define text pipeline:
111
+ @sb.utils.data_pipeline.takes("wrd")
112
+ @sb.utils.data_pipeline.provides(
113
+ "wrd", "char_list", "tokens_list", "tokens"
114
+ )
115
+ def text_pipeline(wrd):
116
+ yield wrd
117
+ char_list = list(wrd)
118
+ yield char_list
119
+ tokens_list = label_encoder.encode_sequence(char_list)
120
+ yield tokens_list
121
+ tokens = torch.LongTensor(tokens_list)
122
+ yield tokens
123
+
124
+ sb.dataio.dataset.add_dynamic_item(datasets, text_pipeline)
125
+ lab_enc_file = os.path.join(hparams["save_folder"], "label_encoder.txt")
126
+ special_labels = {
127
+ "blank_label": hparams["blank_index"],
128
+ "unk_label": hparams["unk_index"]
129
+ }
130
+ label_encoder.load_or_create(
131
+ path=lab_enc_file,
132
+ from_didatasets=[train_data],
133
+ output_key="char_list",
134
+ special_labels=special_labels,
135
+ sequence_input=True,
136
+ )
137
+
138
+ # 4. Set output:
139
+ sb.dataio.dataset.set_output_keys(
140
+ datasets, ["id", "sig", "wrd", "char_list", "tokens"],
141
+ )
142
+ return train_data, valid_data,test_datasets, label_encoder
143
+
144
+ class ASR(sb.core.Brain):
145
+ def compute_forward(self, batch, stage):
146
+ """Forward computations from the waveform batches to the output probabilities."""
147
+
148
+ batch = batch.to(self.device)
149
+ wavs, wav_lens = batch.sig
150
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
151
+
152
+ if stage == sb.Stage.TRAIN:
153
+ if hasattr(self.hparams, "augmentation"):
154
+ wavs = self.hparams.augmentation(wavs, wav_lens)
155
+
156
+ # Forward pass
157
+ feats = self.modules.wav2vec2(wavs, wav_lens)
158
+ x = self.modules.enc(feats)
159
+ logits = self.modules.ctc_lin(x)
160
+ p_ctc = self.hparams.log_softmax(logits)
161
+
162
+ return p_ctc, wav_lens
163
+
164
+ def custom_encode(self,wavs,wav_lens) :
165
+ wavs = wavs.to(self.device)
166
+ if(wav_lens is not None): wav_lens.to(self.device)
167
+
168
+ feats = self.modules.wav2vec2(wavs, wav_lens)
169
+ x = self.modules.enc(feats)
170
+ logits = self.modules.ctc_lin(x)
171
+ p_ctc = self.hparams.log_softmax(logits)
172
+
173
+ return feats,p_ctc
174
+
175
+
176
+
177
+ def compute_objectives(self, predictions, batch, stage):
178
+ """Computes the loss (CTC) given predictions and targets."""
179
+
180
+ p_ctc, wav_lens = predictions
181
+
182
+ ids = batch.id
183
+ tokens, tokens_lens = batch.tokens
184
+
185
+ loss = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
186
+
187
+ if stage != sb.Stage.TRAIN:
188
+ predicted_tokens = sb.decoders.ctc_greedy_decode(
189
+ p_ctc, wav_lens, blank_id=self.hparams.blank_index
190
+ )
191
+ # Decode token terms to words
192
+ if self.hparams.use_language_modelling:
193
+ predicted_words = []
194
+ for logs in p_ctc:
195
+ text = decoder.decode(logs.detach().cpu().numpy())
196
+ predicted_words.append(text.split(" "))
197
+ else:
198
+ predicted_words = [
199
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
200
+ for utt_seq in predicted_tokens
201
+ ]
202
+ # Convert indices to words
203
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
204
+
205
+ self.wer_metric.append(ids, predicted_words, target_words)
206
+ self.cer_metric.append(ids, predicted_words, target_words)
207
+
208
+ return loss
209
+
210
+ def fit_batch(self, batch):
211
+ """Train the parameters given a single batch in input"""
212
+ should_step = self.step % self.grad_accumulation_factor == 0
213
+ # Managing automatic mixed precision
214
+ # TOFIX: CTC fine-tuning currently is unstable
215
+ # This is certainly due to CTC being done in fp16 instead of fp32
216
+ if self.auto_mix_prec:
217
+ with torch.cuda.amp.autocast():
218
+ with self.no_sync():
219
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
220
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
221
+ with self.no_sync(not should_step):
222
+ self.scaler.scale(
223
+ loss / self.grad_accumulation_factor
224
+ ).backward()
225
+ if should_step:
226
+
227
+ if not self.hparams.wav2vec2.freeze:
228
+ self.scaler.unscale_(self.wav2vec_optimizer)
229
+ self.scaler.unscale_(self.model_optimizer)
230
+ if self.check_gradients(loss):
231
+ if not self.hparams.wav2vec2.freeze:
232
+ if self.optimizer_step >= self.hparams.warmup_steps:
233
+ self.scaler.step(self.wav2vec_optimizer)
234
+ self.scaler.step(self.model_optimizer)
235
+ self.scaler.update()
236
+ self.zero_grad()
237
+ self.optimizer_step += 1
238
+ else:
239
+ # This is mandatory because HF models have a weird behavior with DDP
240
+ # on the forward pass
241
+ with self.no_sync():
242
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
243
+
244
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
245
+
246
+ with self.no_sync(not should_step):
247
+ (loss / self.grad_accumulation_factor).backward()
248
+ if should_step:
249
+ if self.check_gradients(loss):
250
+ if not self.hparams.wav2vec2.freeze:
251
+ if self.optimizer_step >= self.hparams.warmup_steps:
252
+ self.wav2vec_optimizer.step()
253
+ self.model_optimizer.step()
254
+ self.zero_grad()
255
+ self.optimizer_step += 1
256
+
257
+ self.on_fit_batch_end(batch, outputs, loss, should_step)
258
+ return loss.detach().cpu()
259
+
260
+ def evaluate_batch(self, batch, stage):
261
+ """Computations needed for validation/test batches"""
262
+ predictions = self.compute_forward(batch, stage=stage)
263
+ with torch.no_grad():
264
+ loss = self.compute_objectives(predictions, batch, stage=stage)
265
+ return loss.detach()
266
+
267
+ def on_stage_start(self, stage, epoch):
268
+ """Gets called at the beginning of each epoch"""
269
+ if stage != sb.Stage.TRAIN:
270
+ self.cer_metric = self.hparams.cer_computer()
271
+ self.wer_metric = self.hparams.error_rate_computer()
272
+
273
+ def on_stage_end(self, stage, stage_loss, epoch):
274
+ """Gets called at the end of an epoch."""
275
+ # Compute/store important stats
276
+ stage_stats = {"loss": stage_loss}
277
+ if stage == sb.Stage.TRAIN:
278
+ self.train_stats = stage_stats
279
+ else:
280
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
281
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
282
+
283
+ # Perform end-of-iteration things, like annealing, logging, etc.
284
+ if stage == sb.Stage.VALID:
285
+ old_lr_model, new_lr_model = self.hparams.lr_annealing_model(
286
+ stage_stats["loss"]
287
+ )
288
+ old_lr_wav2vec, new_lr_wav2vec = self.hparams.lr_annealing_wav2vec(
289
+ stage_stats["loss"]
290
+ )
291
+ sb.nnet.schedulers.update_learning_rate(
292
+ self.model_optimizer, new_lr_model
293
+ )
294
+ if not self.hparams.wav2vec2.freeze:
295
+ sb.nnet.schedulers.update_learning_rate(
296
+ self.wav2vec_optimizer, new_lr_wav2vec
297
+ )
298
+ self.hparams.train_logger.log_stats(
299
+ stats_meta={
300
+ "epoch": epoch,
301
+ "lr_model": old_lr_model,
302
+ "lr_wav2vec": old_lr_wav2vec,
303
+ },
304
+ train_stats=self.train_stats,
305
+ valid_stats=stage_stats,
306
+ )
307
+ self.checkpointer.save_and_keep_only(
308
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
309
+ )
310
+ elif stage == sb.Stage.TEST:
311
+ self.hparams.train_logger.log_stats(
312
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
313
+ test_stats=stage_stats,
314
+ )
315
+ with open(self.hparams.wer_file, "w") as w:
316
+ self.wer_metric.write_stats(w)
317
+
318
+ def init_optimizers(self):
319
+ "Initializes the wav2vec2 optimizer and model optimizer"
320
+
321
+ # If the wav2vec encoder is unfrozen, we create the optimizer
322
+ if not self.hparams.wav2vec2.freeze:
323
+ self.wav2vec_optimizer = self.hparams.wav2vec_opt_class(
324
+ self.modules.wav2vec2.parameters()
325
+ )
326
+ if self.checkpointer is not None:
327
+ self.checkpointer.add_recoverable(
328
+ "wav2vec_opt", self.wav2vec_optimizer
329
+ )
330
+
331
+ self.model_optimizer = self.hparams.model_opt_class(
332
+ self.hparams.model.parameters()
333
+ )
334
+
335
+ if self.checkpointer is not None:
336
+ self.checkpointer.add_recoverable("modelopt", self.model_optimizer)
337
+
338
+ def zero_grad(self, set_to_none=False):
339
+ if not self.hparams.wav2vec2.freeze:
340
+ self.wav2vec_optimizer.zero_grad(set_to_none)
341
+ self.model_optimizer.zero_grad(set_to_none)
342
+
343
+
344
+ """
345
+ label_encoder = sb.dataio.encoder.CTCTextEncoder()
346
+
347
+ train_data, valid_data, test_datasets, label_encoder = dataio_prepare(
348
+ hparams
349
+ )
350
+
351
+
352
+ # We dynamicaly add the tokenizer to our brain class.
353
+ # NB: This tokenizer corresponds to the one used for the LM!!
354
+ """
355
+ from speechbrain.pretrained import EncoderASR,EncoderDecoderASR
356
+ french_asr_model = EncoderASR.from_hparams(source="speechbrain/asr-wav2vec2-commonvoice-fr", savedir="pretrained_models/asr-wav2vec2-commonvoice-fr").cuda()
357
+ #french_asr_model = "r"
358
+
359
+ cvhparams_file, cvrun_opts, cvoverrides = sb.parse_arguments(["en_cv.yaml"])
360
+ with open(cvhparams_file) as cvfin:
361
+ cvhparams = load_hyperpyyaml(cvfin, cvoverrides)
362
+ english_asr_model = ASRCV(
363
+ modules=cvhparams["modules"],
364
+ hparams=cvhparams,
365
+ run_opts=cvrun_opts,
366
+ checkpointer=cvhparams["checkpointer"],
367
+ )
368
+ english_asr_model.checkpointer.recover_if_possible()
369
+ asr_brain = ASR(
370
+ modules=hparams["modules"],
371
+ hparams=hparams,
372
+ run_opts=run_opts,
373
+ checkpointer=hparams["checkpointer"],
374
+ )
375
+ asr_brain.checkpointer.recover_if_possible()
376
+ asr_brain.modules.eval()
377
+ english_asr_model.modules.eval()
378
+ french_asr_model.mods.eval()
379
+ """
380
+ asr_brain.tokenizer = label_encoder
381
+
382
+ # Testing
383
+ real = True
384
+ if real :
385
+ for k in test_datasets.keys(): # keys are test_clean, test_other etc
386
+ asr_brain.hparams.wer_file = os.path.join(
387
+ hparams["output_folder"], "wer_{}.txt".format(k)
388
+ )
389
+ asr_brain.evaluate(
390
+ test_datasets[k], test_loader_kwargs=hparams["dataloader_options"]
391
+ )
392
+ """
393
+
394
+ """
395
+ from torch.nn.utils.rnn import pad_sequence
396
+ def load_paths(wavs_path):
397
+ waveforms = []
398
+ for path in wavs_path :
399
+ waveform, _ = torchaudio.load(path)
400
+ waveforms.append(waveform.squeeze(0))
401
+ # normalize array length to the bigger arrays by pading with 0's
402
+ padded_arrays = pad_sequence(waveforms, batch_first=True)
403
+ return torch.tensor(padded_arrays)
404
+
405
+ waveform = load_paths(["/content/drive/MyDrive/tunisian_corpora/tunisian_without_wavlm/samples/Salah10.wav","/content/drive/MyDrive/tunisian_corpora/tunisian_without_wavlm/samples/Salah10.wav"])
406
+ embeddings, posteriogram = asr_brain.custom_encode(waveform,None)
407
+ print(embeddings.shape)
408
+ print(posteriogram.shape)
409
+ """
410
+
411
+ from speechbrain.pretrained import EncoderASR,EncoderDecoderASR
412
+ import torchaudio
413
+ import speechbrain as sb
414
+ import torch
415
+ from torch.nn.utils.rnn import pad_sequence
416
+ import torch
417
+ import speechbrain as sb
418
+ import numpy as np
419
+ import torch.optim as optim
420
+ import torch.nn as nn
421
+
422
+ # Commented out IPython magic to ensure Python compatibility.
423
+ # %ls
424
+
425
+ #UTILS FUNCTIOJNS
426
+ def get_size_dimensions(arr):
427
+ size_dimensions = []
428
+ while isinstance(arr, list):
429
+ size_dimensions.append(len(arr))
430
+ arr = arr[0]
431
+ return size_dimensions
432
+
433
+ def scale_array(batch,n):
434
+ scaled_batch = []
435
+
436
+ for array in batch:
437
+ if(n < len(array)): raise ValueError("Cannot scale Array down")
438
+
439
+ repeat = round(n/len(array))+1
440
+ scaled_length_array= []
441
+
442
+ for i in array:
443
+ for j in range(repeat) :
444
+ if(len(scaled_length_array) == n): break
445
+ scaled_length_array.append(i)
446
+
447
+ scaled_batch.append(scaled_length_array)
448
+
449
+ return torch.tensor(scaled_batch)
450
+
451
+
452
+ def load_paths(wavs_path):
453
+ waveforms = []
454
+ for path in wavs_path :
455
+ waveform, _ = torchaudio.load(path)
456
+ waveforms.append(waveform.squeeze(0))
457
+ # normalize array length to the bigger arrays by pading with 0's
458
+ padded_arrays = pad_sequence(waveforms, batch_first=True)
459
+ return torch.tensor(padded_arrays)
460
+
461
+
462
+
463
+ def word_to_vec(input_string):
464
+ mapping= {'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26, 'ا': 27, 'ب': 28, 'ت': 29, 'ث': 30, 'ج': 31, 'ح': 32, 'خ': 33, 'د': 34, 'ذ': 35, 'ر': 36, 'ز': 37, 'س': 38, 'ش': 39, 'ص': 40, 'ض': 41, 'ط': 42, 'ظ': 43, 'ع': 44, 'غ': 45, 'ف': 46, 'ق': 47, 'ك': 48, 'ل': 49, 'م': 50, 'ن': 51, 'ه': 52, 'و': 53, 'ي': 54,' ':55}
465
+
466
+ numbers = [mapping[word] for word in input_string if word in mapping]
467
+ return numbers
468
+
469
+ device = 'cuda'
470
+ verbose = 0
471
+ #FLOW LEVEL FUNCTIONS
472
+ def merge_strategy(embeddings1, embeddings2, embeddings3,post1, post2,post3):
473
+
474
+
475
+ post1 = post1.to(device)
476
+ post2 = post2.to(device)
477
+ post3 = post3.to(device)
478
+ embeddings1 = embeddings1.to(device)
479
+ embeddings2 = embeddings2.to(device)
480
+ embeddings3 = embeddings3.to(device)
481
+
482
+ posteriograms_merged = torch.cat((post1,post2,post3),dim=2)
483
+ embeddings_merged = torch.cat((embeddings1,embeddings2,embeddings3),dim=2)
484
+
485
+ if(verbose !=0):
486
+ print('MERGED POST ',posteriograms_merged.shape)
487
+ print('MERGED emb ',embeddings_merged.shape)
488
+
489
+ return torch.cat((posteriograms_merged,embeddings_merged),dim=2).to(device)
490
+
491
+ def decode(model,wavs,wav_lens):
492
+
493
+ with torch.no_grad():
494
+ wav_lens = wav_lens.to(model.device)
495
+ encoder_out = model.encode_batch(wavs, wav_lens)
496
+ predictions = model.decoding_function(encoder_out, wav_lens)
497
+ return predictions
498
+
499
+ def middle_layer(batch, lens):
500
+
501
+ tn_embeddings, tn_posteriogram = asr_brain.custom_encode(batch,None)
502
+
503
+ fr_embeddings = french_asr_model.mods.encoder.wav2vec2(batch)
504
+ fr_posteriogram =french_asr_model.encode_batch(batch,lens)
505
+ en_embeddings = english_asr_model.modules.wav2vec2(batch, lens)
506
+ x = english_asr_model.modules.enc(en_embeddings)
507
+ en_posteriogram = english_asr_model.modules.ctc_lin(x)
508
+ #scores, en_posteriogram = english_asr_model.mods.decoder(en_embeddings ,lens)
509
+ if(verbose !=0):
510
+ print('[EMBEDDINGS] FR:',fr_embeddings.shape, "EN:",en_embeddings.shape, "TN:", tn_embeddings.shape)
511
+ print('[POSTERIOGRAM] FR:',fr_posteriogram.shape, "EN:",en_posteriogram.shape,"TN:",tn_posteriogram.shape)
512
+
513
+
514
+ bilangual_sample = merge_strategy(fr_embeddings,en_embeddings,tn_embeddings,fr_posteriogram,en_posteriogram,tn_posteriogram)
515
+ return bilangual_sample
516
+
517
+ class Mixer(sb.core.Brain):
518
+
519
+ def compute_forward(self, batch, stage):
520
+ """Forward computations from the waveform batches to the output probabilities."""
521
+ wavs, wav_lens = batch.sig
522
+ wavs, wav_lens = wavs.to(self.device), wav_lens.to(self.device)
523
+
524
+ if stage == sb.Stage.TRAIN:
525
+ if hasattr(self.hparams, "augmentation"):
526
+ wavs = self.hparams.augmentation(wavs, wav_lens)
527
+
528
+ multi_langual_feats = middle_layer(wavs, wav_lens)
529
+ multi_langual_feats= multi_langual_feats.to(device)
530
+ feats, _ = self.modules.enc(multi_langual_feats)
531
+ logits = self.modules.ctc_lin(feats)
532
+ p_ctc = self.hparams.log_softmax(logits)
533
+
534
+ if stage!= sb.Stage.TRAIN:
535
+ p_tokens = sb.decoders.ctc_greedy_decode(
536
+ p_ctc, wav_lens, blank_id=self.hparams.blank_index
537
+ )
538
+ else :
539
+ p_tokens = None
540
+ return p_ctc, wav_lens, p_tokens
541
+
542
+ def compute_objectives(self, predictions, batch, stage):
543
+ """Computes the loss (CTC) given predictions and targets."""
544
+
545
+ p_ctc, wav_lens , predicted_tokens= predictions
546
+
547
+ ids = batch.id
548
+ tokens, tokens_lens = batch.tokens
549
+
550
+ loss = self.hparams.ctc_cost(p_ctc, tokens, wav_lens, tokens_lens)
551
+
552
+
553
+ if stage == sb.Stage.VALID:
554
+ predicted_words = [
555
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
556
+ for utt_seq in predicted_tokens
557
+ ]
558
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
559
+ self.wer_metric.append(ids, predicted_words, target_words)
560
+ self.cer_metric.append(ids, predicted_words, target_words)
561
+ if stage ==sb.Stage.TEST :
562
+ if self.hparams.language_modelling:
563
+ predicted_words = []
564
+ for logs in p_ctc:
565
+ text = decoder.decode(logs.detach().cpu().numpy())
566
+ predicted_words.append(text.split(" "))
567
+ else :
568
+ predicted_words = [
569
+ "".join(self.tokenizer.decode_ndim(utt_seq)).split(" ")
570
+ for utt_seq in predicted_tokens
571
+ ]
572
+
573
+ target_words = [wrd.split(" ") for wrd in batch.wrd]
574
+ self.wer_metric.append(ids, predicted_words, target_words)
575
+ self.cer_metric.append(ids, predicted_words, target_words)
576
+
577
+ return loss
578
+
579
+ def fit_batch(self, batch):
580
+ """Train the parameters given a single batch in input"""
581
+ should_step = self.step % self.grad_accumulation_factor == 0
582
+ # Managing automatic mixed precision
583
+ # TOFIX: CTC fine-tuning currently is unstable
584
+ # This is certainly due to CTC being done in fp16 instead of fp32
585
+ if self.auto_mix_prec:
586
+ with torch.cuda.amp.autocast():
587
+ with self.no_sync():
588
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
589
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
590
+ with self.no_sync(not should_step):
591
+ self.scaler.scale(
592
+ loss / self.grad_accumulation_factor
593
+ ).backward()
594
+ if should_step:
595
+
596
+
597
+ self.scaler.unscale_(self.model_optimizer)
598
+ if self.check_gradients(loss):
599
+ self.scaler.step(self.model_optimizer)
600
+ self.scaler.update()
601
+ self.zero_grad()
602
+ self.optimizer_step += 1
603
+ else:
604
+ # This is mandatory because HF models have a weird behavior with DDP
605
+ # on the forward pass
606
+ with self.no_sync():
607
+ outputs = self.compute_forward(batch, sb.Stage.TRAIN)
608
+
609
+ loss = self.compute_objectives(outputs, batch, sb.Stage.TRAIN)
610
+
611
+ with self.no_sync(not should_step):
612
+ (loss / self.grad_accumulation_factor).backward()
613
+ if should_step:
614
+ if self.check_gradients(loss):
615
+ self.model_optimizer.step()
616
+ self.zero_grad()
617
+ self.optimizer_step += 1
618
+
619
+ self.on_fit_batch_end(batch, outputs, loss, should_step)
620
+ return loss.detach().cpu()
621
+
622
+ def evaluate_batch(self, batch, stage):
623
+ """Computations needed for validation/test batches"""
624
+ predictions = self.compute_forward(batch, stage=stage)
625
+ with torch.no_grad():
626
+ loss = self.compute_objectives(predictions, batch, stage=stage)
627
+ return loss.detach()
628
+
629
+ def on_stage_start(self, stage, epoch):
630
+ """Gets called at the beginning of each epoch"""
631
+ if stage != sb.Stage.TRAIN:
632
+ self.cer_metric = self.hparams.cer_computer()
633
+ self.wer_metric = self.hparams.error_rate_computer()
634
+
635
+ def on_stage_end(self, stage, stage_loss, epoch):
636
+ """Gets called at the end of an epoch."""
637
+ # Compute/store important stats
638
+ stage_stats = {"loss": stage_loss}
639
+ if stage == sb.Stage.TRAIN:
640
+ self.train_stats = stage_stats
641
+ else:
642
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
643
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
644
+
645
+ # Perform end-of-iteration things, like annealing, logging, etc.
646
+ if stage == sb.Stage.VALID:
647
+ old_lr_model, new_lr_model = self.hparams.lr_annealing_model(
648
+ stage_stats["loss"]
649
+ )
650
+ sb.nnet.schedulers.update_learning_rate(
651
+ self.model_optimizer, new_lr_model
652
+ )
653
+ self.hparams.train_logger.log_stats(
654
+ stats_meta={
655
+ "epoch": epoch,
656
+ "lr_model": old_lr_model,
657
+ },
658
+ train_stats=self.train_stats,
659
+ valid_stats=stage_stats,
660
+ )
661
+ self.checkpointer.save_and_keep_only(
662
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
663
+ )
664
+ elif stage == sb.Stage.TEST:
665
+ self.hparams.train_logger.log_stats(
666
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
667
+ test_stats=stage_stats,
668
+ )
669
+ with open(self.hparams.wer_file, "w") as w:
670
+ self.wer_metric.write_stats(w)
671
+
672
+ def init_optimizers(self):
673
+
674
+ self.model_optimizer = self.hparams.model_opt_class(
675
+ self.hparams.model.parameters()
676
+ )
677
+
678
+ if self.checkpointer is not None:
679
+ self.checkpointer.add_recoverable("modelopt", self.model_optimizer)
680
+
681
+ def zero_grad(self, set_to_none=False):
682
+
683
+ self.model_optimizer.zero_grad(set_to_none)
684
+
685
+
686
+ hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
687
+
688
+ # If distributed_launch=True then
689
+ # create ddp_group with the right communication protocol
690
+ sb.utils.distributed.ddp_init_group(run_opts)
691
+
692
+ with open(hparams_file) as fin:
693
+ hparams = load_hyperpyyaml(fin, overrides)
694
+
695
+ # Create experiment directory
696
+ sb.create_experiment_directory(
697
+ experiment_directory=hparams["output_folder"],
698
+ hyperparams_to_save=hparams_file,
699
+ overrides=overrides,
700
+ )
701
+ def read_labels_file(labels_file):
702
+ with open(labels_file, "r",encoding="utf-8") as lf:
703
+ lines = lf.read().splitlines()
704
+ division = "==="
705
+ numbers = {}
706
+ for line in lines :
707
+ if division in line :
708
+ break
709
+ string, number = line.split("=>")
710
+ number = int(number)
711
+ string = string[1:-2]
712
+ numbers[number] = string
713
+ return [numbers[x] for x in range(len(numbers))]
714
+ train_data, valid_data, test_datasets, label_encoder = dataio_prepare(
715
+ hparams
716
+ )
717
+
718
+
719
+ labels = read_labels_file(os.path.join(hparams["save_folder"], "label_encoder.txt"))
720
+ labels = [""] + labels[1:-1] + ["1"]
721
+ if hparams["language_modelling"]:
722
+ decoder = build_ctcdecoder(
723
+ labels,
724
+ kenlm_model_path=hparams["ngram_lm_path"], # either .arpa or .bin file
725
+ alpha=0.5, # tuned on a val set
726
+ beta=1, # tuned on a val set
727
+ )
728
+
729
+
730
+
731
+
732
+ mixer = Mixer(
733
+ modules=hparams["modules"],
734
+ hparams=hparams,
735
+ run_opts=run_opts,
736
+ checkpointer=hparams["checkpointer"],
737
+ )
738
+ mixer.tokenizer = label_encoder
739
+
740
+
741
+ mixer.fit(
742
+ mixer.hparams.epoch_counter,
743
+ train_data,
744
+ valid_data,
745
+ train_loader_kwargs=hparams["dataloader_options"],
746
+ valid_loader_kwargs=hparams["test_dataloader_options"],
747
+ )
748
+ print(test_datasets.keys())
749
+ for k in test_datasets.keys(): # keys are test_clean, test_other etc
750
+ mixer.hparams.wer_file = os.path.join(
751
+ hparams["output_folder"], "wer_{}.txt".format(k)
752
+ )
753
+ mixer.evaluate(
754
+ test_datasets[k], test_loader_kwargs=hparams["test_dataloader_options"]
755
+ )
756
+
results/non_semi_final_stac/hyperparams.yaml ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2023-09-08 from:
2
+ # /gpfsssd/scratch/rech/nou/uzn19yk/switched_data/stac.yaml
3
+ # yamllint disable
4
+ # Generated 2023-08-03 from:
5
+ # /home/salah/new_tunisian_model/hparams/train_tunisian_withwavlm.yaml
6
+ # yamllint disable
7
+ # ################################
8
+ # Model: wav2vec2 + DNN + CTC
9
+ # Augmentation: SpecAugment
10
+ # Authors: Titouan Parcollet 2021
11
+ # ################################
12
+
13
+ seed: 1994
14
+ __set_seed: !!python/object/apply:torch.manual_seed [1234]
15
+ output_folder: results/non_semi_final_stac
16
+ wer_file: results/non_semi_final_stac/wer.txt
17
+ save_folder: results/non_semi_final_stac/save
18
+ train_log: results/non_semi_final_stac/train_log.txt
19
+
20
+
21
+
22
+ # Data files
23
+ data_folder: junk # e.g, /localscratch/cv-corpus-5.1-2020-06-22/fr
24
+ train_tsv_file: junk/train.tsv # Standard CommonVoice .tsv files
25
+ dev_tsv_file: junk/dev.tsv # Standard CommonVoice .tsv files
26
+ test_tsv_file: junk/test.tsv # Standard CommonVoice .tsv files
27
+ accented_letters: true
28
+
29
+ csv_folder: /gpfsscratch/rech/nou/uzn19yk/switched_data/extended_clean/
30
+ train_csv: /gpfsscratch/rech/nou/uzn19yk/switched_data/extended_clean//train.csv
31
+ valid_csv: /gpfsscratch/rech/nou/uzn19yk/switched_data/extended_clean//dev.csv
32
+ test_csv:
33
+ - all_tests/cs_test.csv
34
+ - all_tests/stac_test.csv
35
+
36
+ # We remove utterance slonger than 10s in the train/dev/test sets as
37
+ # longer sentences certainly correspond to "open microphones".
38
+ avoid_if_longer_than: 13.0
39
+ avoid_if_shorter_than: 0.5
40
+
41
+ # Training parameters
42
+ number_of_epochs: 20
43
+ lr: 0.0002
44
+ lr_weights: 0.01
45
+ sorting: ascending
46
+ auto_mix_prec: false
47
+ sample_rate: 16000
48
+ language_modelling: true
49
+ ngram_lm_path:
50
+ /gpfsstore/rech/nou/uzn19yk/switched_code_tunisian/train/tunisian_asr/arpas/pluslanguages_everything.arpa
51
+
52
+ # With data_parallel batch_size is split into N jobs
53
+ # With DDP batch_size is multiplied by N jobs
54
+ # Must be 3 per GPU to fit 32GB of VRAM
55
+ batch_size: 3
56
+ test_batch_size: 4
57
+
58
+ # Dataloader options
59
+ dataloader_options:
60
+ batch_size: 3
61
+ num_workers: 6
62
+
63
+ test_dataloader_options:
64
+ batch_size: 4
65
+ num_workers: 6
66
+
67
+ # Model parameters
68
+ activation: !name:torch.nn.Sigmoid
69
+ dnn_layers: 1
70
+ dnn_neurons: 768
71
+ freeze_encoder: true
72
+
73
+ # Outputs
74
+ output_neurons: 76 # BPE size, index(blank/eos/bos) = 0
75
+
76
+ # Functions and classes
77
+ #
78
+ epoch_counter: &id006 !new:speechbrain.utils.epoch_loop.EpochCounter
79
+ limit: 20
80
+
81
+ encoder_dim: 3217
82
+ enc: &id001 !new:speechbrain.nnet.RNN.LSTM
83
+ input_shape: [null, null, 3217]
84
+ num_layers: 2
85
+ bidirectional: true
86
+ dropout: 0.2
87
+ hidden_size: 1024
88
+
89
+ ctc_lin: &id002 !new:speechbrain.nnet.linear.Linear
90
+
91
+ input_size: 2048
92
+ n_neurons: 76
93
+
94
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
95
+ apply_log: true
96
+
97
+ ctc_cost: !name:speechbrain.nnet.losses.ctc_loss
98
+ blank_index: 0
99
+
100
+ modules:
101
+ enc: *id001
102
+ ctc_lin: *id002
103
+ model: &id003 !new:torch.nn.ModuleList
104
+ - [*id001, *id002]
105
+ model_opt_class: !name:torch.optim.Adam
106
+ lr: 0.0002
107
+
108
+ weights_opt_class: !name:torch.optim.Adam
109
+ lr: 0.01
110
+
111
+ lr_annealing_model: &id004 !new:speechbrain.nnet.schedulers.NewBobScheduler
112
+ initial_value: 0.0002
113
+ improvement_threshold: 0.0025
114
+ annealing_factor: 0.8
115
+ patient: 0
116
+
117
+ lr_annealing_weights: &id005 !new:speechbrain.nnet.schedulers.NewBobScheduler
118
+ initial_value: 0.01
119
+ improvement_threshold: 0.0025
120
+ annealing_factor: 0.9
121
+ patient: 0
122
+
123
+ label_encoder: &id007 !new:speechbrain.dataio.encoder.CTCTextEncoder
124
+
125
+
126
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
127
+ checkpoints_dir: results/non_semi_final_stac/save
128
+ recoverables:
129
+ model: *id003
130
+ scheduler_model: *id004
131
+ scheduler_encoder: *id005
132
+ counter: *id006
133
+ tokenizer: *id007
134
+ blank_index: 0
135
+ unk_index: 1
136
+
137
+
138
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
139
+ save_file: results/non_semi_final_stac/train_log.txt
140
+
141
+ error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
142
+
143
+ cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
144
+ split_tokens: true
results/non_semi_final_stac/save/CKPT+2023-09-08+01-40-18+00/CKPT.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # yamllint disable
2
+ WER: 51.292116454039906
3
+ end-of-epoch: true
4
+ unixtime: 1694130018.9642384
results/non_semi_final_stac/save/CKPT+2023-09-08+01-40-18+00/brain.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5c026fe6fa51700406bd476e131950c797b0b3bacb3daae0854e85689bb4cf9
3
+ size 50
results/non_semi_final_stac/save/CKPT+2023-09-08+01-40-18+00/counter.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5ca38f748a1d6eaf726b8a42fb575c3c71f1864a8143301782de13da2d9202b
3
+ size 2
results/non_semi_final_stac/save/CKPT+2023-09-08+01-40-18+00/dataloader-TRAIN.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7e1edcac43af8cea1439d222314af06354ae31da6a3d90b8cc6bcebc5c8e397
3
+ size 4
results/non_semi_final_stac/save/CKPT+2023-09-08+01-40-18+00/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da683a8efa5709a06af9b258452c243da841780a0a7942c196c472a3e21e5010
3
+ size 240389017
results/non_semi_final_stac/save/CKPT+2023-09-08+01-40-18+00/modelopt.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:416feb314443cf839f4425fc382e555dec90e3dea26fa52b75e4ac1b702c5078
3
+ size 480787579
results/non_semi_final_stac/save/CKPT+2023-09-08+01-40-18+00/scheduler_encoder.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e2efd50f0cf28a080e2625fdd8a1852c669841537cdc0a57fce60bc6c1eec11
3
+ size 515
results/non_semi_final_stac/save/CKPT+2023-09-08+01-40-18+00/scheduler_model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cec54cc9236fa7aa965b397675d24299b973675cc0c6345de038fc70e51629ab
3
+ size 703
results/non_semi_final_stac/save/CKPT+2023-09-08+01-40-18+00/tokenizer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21080a140faeb4f39fad188aaf081914ec782be9c4320d6415e8822709e18017
3
+ size 39
results/non_semi_final_stac/save/label_encoder.txt ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 'و' => 74
2
+ 'ي' => 1
3
+ 'ن' => 2
4
+ ' ' => 3
5
+ 'م' => 4
6
+ 'ش' => 5
7
+ 'ل' => 6
8
+ 'س' => 7
9
+ 'ت' => 8
10
+ 'ا' => 9
11
+ 'د' => 10
12
+ 'ر' => 11
13
+ 'ى' => 12
14
+ 'ب' => 13
15
+ 'ح' => 14
16
+ 'ط' => 15
17
+ 'ع' => 16
18
+ 'ك' => 17
19
+ 'ف' => 18
20
+ 'ق' => 19
21
+ 'ذ' => 20
22
+ 'ث' => 21
23
+ 'ج' => 22
24
+ 'ة' => 23
25
+ 'غ' => 24
26
+ 'o' => 25
27
+ 'k' => 26
28
+ 'b' => 27
29
+ 'n' => 28
30
+ 'خ' => 29
31
+ 'ه' => 30
32
+ 'v' => 31
33
+ 'i' => 32
34
+ 'l' => 33
35
+ 'à' => 34
36
+ 'ص' => 35
37
+ 'ض' => 36
38
+ 'a' => 37
39
+ 'u' => 38
40
+ 't' => 39
41
+ 'm' => 40
42
+ 'q' => 41
43
+ 'e' => 42
44
+ 'd' => 43
45
+ 'c' => 44
46
+ 'p' => 45
47
+ 'r' => 46
48
+ 'أ' => 47
49
+ 'إ' => 48
50
+ 's' => 49
51
+ 'j' => 50
52
+ 'ز' => 51
53
+ 'ء' => 52
54
+ 'h' => 53
55
+ 'f' => 54
56
+ 'آ' => 55
57
+ 'ئ' => 56
58
+ 'ؤ' => 57
59
+ 'ظ' => 58
60
+ 'y' => 59
61
+ 'é' => 60
62
+ "'" => 61
63
+ 'z' => 62
64
+ 'x' => 63
65
+ 'w' => 64
66
+ 'g' => 65
67
+ 'è' => 66
68
+ 'û' => 67
69
+ 'ç' => 68
70
+ 'ê' => 69
71
+ 'ô' => 70
72
+ 'ù' => 71
73
+ 'î' => 72
74
+ 'â' => 73
75
+ '<blank>' => 0
76
+ 1 => 75
77
+ ================
78
+ 'starting_index' => 0
79
+ 'unk_label' => 1
80
+ 'blank_label' => '<blank>'