jcmc commited on
Commit
9b333dd
1 Parent(s): 2a1c45f

Upload speechbrain IC model

Browse files
Files changed (48) hide show
  1. .gitattributes +9 -0
  2. fluent-speech-commands/README.md +58 -0
  3. fluent-speech-commands/Tokenizer/hparams/tokenizer_bpe51.yaml +32 -0
  4. fluent-speech-commands/Tokenizer/prepare.py +1 -0
  5. fluent-speech-commands/Tokenizer/train.py +53 -0
  6. fluent-speech-commands/direct/__pycache__/prepare.cpython-37.pyc +0 -0
  7. fluent-speech-commands/direct/hparams/train.yaml +204 -0
  8. fluent-speech-commands/direct/prepare.py +1 -0
  9. fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/asr.ckpt +1 -0
  10. fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/hyperparams.yaml +1 -0
  11. fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/lm.ckpt +1 -0
  12. fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/normalizer.ckpt +1 -0
  13. fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/tokenizer.ckpt +1 -0
  14. fluent-speech-commands/direct/results/BPE51/112011/env.log +434 -0
  15. fluent-speech-commands/direct/results/BPE51/112011/hyperparams.yaml +200 -0
  16. fluent-speech-commands/direct/results/BPE51/112011/log.txt +454 -0
  17. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/CKPT.yaml +4 -0
  18. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/brain.ckpt +2 -0
  19. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/counter.ckpt +1 -0
  20. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/dataloader-TRAIN.ckpt +1 -0
  21. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/model.ckpt +3 -0
  22. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/optimizer.ckpt +3 -0
  23. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/scheduler.ckpt +0 -0
  24. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/CKPT.yaml +4 -0
  25. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/brain.ckpt +2 -0
  26. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/counter.ckpt +1 -0
  27. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/dataloader-TRAIN.ckpt +1 -0
  28. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/model.ckpt +3 -0
  29. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/optimizer.ckpt +3 -0
  30. fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/scheduler.ckpt +0 -0
  31. fluent-speech-commands/direct/results/BPE51/112011/save/FSC_tokenizer/tokenizer.ckpt +0 -0
  32. fluent-speech-commands/direct/results/BPE51/112011/test.csv +0 -0
  33. fluent-speech-commands/direct/results/BPE51/112011/train.csv +0 -0
  34. fluent-speech-commands/direct/results/BPE51/112011/train.py +347 -0
  35. fluent-speech-commands/direct/results/BPE51/112011/train_log.txt +5 -0
  36. fluent-speech-commands/direct/results/BPE51/112011/valid.csv +0 -0
  37. fluent-speech-commands/direct/results/BPE51/112011/wer_test.txt +0 -0
  38. fluent-speech-commands/direct/train.py +347 -0
  39. fluent-speech-commands/extra_requirements.txt +1 -0
  40. fluent-speech-commands/prepare.py +103 -0
  41. pretrained_models/EncoderDecoderASR--5348169877143464308/asr.ckpt +1 -0
  42. pretrained_models/EncoderDecoderASR--5348169877143464308/hyperparams.yaml +1 -0
  43. pretrained_models/EncoderDecoderASR--5348169877143464308/lm.ckpt +1 -0
  44. pretrained_models/EncoderDecoderASR--5348169877143464308/normalizer.ckpt +1 -0
  45. pretrained_models/EncoderDecoderASR--5348169877143464308/tokenizer.ckpt +1 -0
  46. pretrained_models/EndToEndSLU-7990244956535603082/hyperparams.yaml +1 -0
  47. pretrained_models/EndToEndSLU-7990244956535603082/model.ckpt +1 -0
  48. pretrained_models/EndToEndSLU-7990244956535603082/tokenizer.ckpt +1 -0
.gitattributes CHANGED
@@ -25,3 +25,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
25
  *.zip filter=lfs diff=lfs merge=lfs -text
26
  *.zstandard filter=lfs diff=lfs merge=lfs -text
27
  *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/asr.ckpt filter=lfs diff=lfs merge=lfs -text
29
+ fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/lm.ckpt filter=lfs diff=lfs merge=lfs -text
30
+ fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/model.ckpt filter=lfs diff=lfs merge=lfs -text
31
+ fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/optimizer.ckpt filter=lfs diff=lfs merge=lfs -text
32
+ fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/model.ckpt filter=lfs diff=lfs merge=lfs -text
33
+ fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/optimizer.ckpt filter=lfs diff=lfs merge=lfs -text
34
+ pretrained_models/EncoderDecoderASR--5348169877143464308/asr.ckpt filter=lfs diff=lfs merge=lfs -text
35
+ pretrained_models/EncoderDecoderASR--5348169877143464308/lm.ckpt filter=lfs diff=lfs merge=lfs -text
36
+ pretrained_models/EndToEndSLU-7990244956535603082/model.ckpt filter=lfs diff=lfs merge=lfs -text
fluent-speech-commands/README.md ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SLU recipes for Fluent Speech Commands
2
+ This folder contains recipes for spoken language understanding (SLU) with [Fluent Speech Commands](fluent.ai/research/fluent-speech-commands/).
3
+
4
+ ### Tokenizer recipe
5
+ (You don't need to run this because the other recipes download a tokenizer, but you can run this if you want to train a new tokenizer for Fluent Speech Commands.)
6
+
7
+ Run this to train the tokenizer:
8
+
9
+ ```
10
+ cd Tokenizer
11
+ python train.py hparams/tokenizer_bpe51.yaml
12
+ ```
13
+
14
+ ### Direct recipe
15
+ The "direct" recipe maps the input speech to directly to semantics using a seq2seq model.
16
+ The encoder is pre-trained using the LibriSpeech seq2seq recipe.
17
+
18
+ ```
19
+ cd direct
20
+ python train.py hparams/train.yaml
21
+ ```
22
+
23
+ # Results
24
+
25
+ | Release | hyperparams file | Test Acc | Model link | GPUs |
26
+ |:-------------:|:---------------------------:| -----:| -----:| --------:|
27
+ | 21-06-03 | train.yaml | 99.60% | https://drive.google.com/drive/folders/13t2PYdedrPQoNYo_QSf6s04WXu2_vAb-?usp=sharing | 1xV100 32GB |
28
+
29
+
30
+ # PreTrained Model + Easy-Inference
31
+ You can find the pre-trained model with an easy-inference function on [HuggingFace](https://huggingface.co/speechbrain/slu-direct-fluent-speech-commands-librispeech-asr).
32
+
33
+
34
+ # Training Time
35
+ About 15 minutes for each epoch with a TESLA V100.
36
+
37
+
38
+ # **About SpeechBrain**
39
+ - Website: https://speechbrain.github.io/
40
+ - Code: https://github.com/speechbrain/speechbrain/
41
+ - HuggingFace: https://huggingface.co/speechbrain/
42
+
43
+
44
+ # **Citing SpeechBrain**
45
+ Please, cite SpeechBrain if you use it for your research or business.
46
+
47
+ ```bibtex
48
+ @misc{speechbrain,
49
+ title={{SpeechBrain}: A General-Purpose Speech Toolkit},
50
+ author={Mirco Ravanelli and Titouan Parcollet and Peter Plantinga and Aku Rouhe and Samuele Cornell and Loren Lugosch and Cem Subakan and Nauman Dawalatabad and Abdelwahab Heba and Jianyuan Zhong and Ju-Chieh Chou and Sung-Lin Yeh and Szu-Wei Fu and Chien-Feng Liao and Elena Rastorgueva and François Grondin and William Aris and Hwidong Na and Yan Gao and Renato De Mori and Yoshua Bengio},
51
+ year={2021},
52
+ eprint={2106.04624},
53
+ archivePrefix={arXiv},
54
+ primaryClass={eess.AS},
55
+ note={arXiv:2106.04624}
56
+ }
57
+ ```
58
+
fluent-speech-commands/Tokenizer/hparams/tokenizer_bpe51.yaml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ############################################################################
2
+ # Tokenizer: subword BPE with unigram 51
3
+ # Training: Fluent Speech Commands
4
+ # Authors: Abdel Heba 2021
5
+ # ############################################################################
6
+
7
+ output_folder: !ref results/tokenizer_bpe51/
8
+ train_log: !ref <output_folder>/train_log.txt
9
+
10
+ # Data files
11
+ data_folder: !PLACEHOLDER # e,g. /localscratch/fluent_speech_commands_dataset
12
+ train_csv: !ref <output_folder>/train.csv
13
+ valid_csv: !ref <output_folder>/valid.csv
14
+ skip_prep: False
15
+
16
+ # Training parameters
17
+ token_type: unigram # ["unigram", "bpe", "char"]
18
+ token_output: 51 # index(blank/eos/bos/unk) = 0
19
+ character_coverage: 1.0
20
+ num_sequences: 10000
21
+ csv_read: semantics
22
+
23
+
24
+ tokenizer: !name:speechbrain.tokenizers.SentencePiece.SentencePiece
25
+ model_dir: !ref <output_folder>
26
+ vocab_size: !ref <token_output>
27
+ annotation_train: !ref <train_csv>
28
+ annotation_read: !ref <csv_read>
29
+ model_type: !ref <token_type> # ["unigram", "bpe", "char"]
30
+ character_coverage: !ref <character_coverage>
31
+ num_sequences: !ref <num_sequences>
32
+ annotation_list_to_check: [!ref <train_csv>, !ref <valid_csv>]
fluent-speech-commands/Tokenizer/prepare.py ADDED
@@ -0,0 +1 @@
 
1
+ ../prepare.py
fluent-speech-commands/Tokenizer/train.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env/python3
2
+ """Recipe for training a BPE tokenizer for Fluent Speech Commands.
3
+ The tokenizer coverts semantics into sub-word units that can
4
+ be used to train a language (LM) or an acoustic model (AM).
5
+
6
+ To run this recipe, do the following:
7
+ > python train.py hparams/tokenizer_bpe51.yaml
8
+
9
+
10
+ Authors
11
+ * Abdel Heba 2021
12
+ * Mirco Ravanelli 2021
13
+ * Loren Lugosch 2021
14
+ """
15
+
16
+ import sys
17
+ import speechbrain as sb
18
+ from hyperpyyaml import load_hyperpyyaml
19
+ from speechbrain.utils.distributed import run_on_main
20
+
21
+ if __name__ == "__main__":
22
+
23
+ # CLI:
24
+ hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
25
+ with open(hparams_file) as fin:
26
+ hparams = load_hyperpyyaml(fin, overrides)
27
+
28
+ # If distributed_launch=True then
29
+ # create ddp_group with the right communication protocol
30
+ sb.utils.distributed.ddp_init_group(run_opts)
31
+
32
+ # Create experiment directory
33
+ sb.create_experiment_directory(
34
+ experiment_directory=hparams["output_folder"],
35
+ hyperparams_to_save=hparams_file,
36
+ overrides=overrides,
37
+ )
38
+
39
+ # 1. # Dataset prep
40
+ from prepare import prepare_FSC # noqa
41
+
42
+ # multi-gpu (ddp) save data preparation
43
+ run_on_main(
44
+ prepare_FSC,
45
+ kwargs={
46
+ "data_folder": hparams["data_folder"],
47
+ "save_folder": hparams["output_folder"],
48
+ "skip_prep": hparams["skip_prep"],
49
+ },
50
+ )
51
+
52
+ # Train tokenizer
53
+ hparams["tokenizer"]()
fluent-speech-commands/direct/__pycache__/prepare.cpython-37.pyc ADDED
Binary file (2.05 kB). View file
fluent-speech-commands/direct/hparams/train.yaml ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ############################################################################
2
+ # Model: Direct SLU
3
+ # Encoder: Pre-trained ASR encoder -> LSTM
4
+ # Decoder: GRU + beamsearch
5
+ # Tokens: BPE with unigram
6
+ # losses: NLL
7
+ # Training: Fluent Speech Commands
8
+ # Authors: Loren Lugosch, Mirco Ravanelli 2020
9
+ # ############################################################################
10
+
11
+ # Seed needs to be set at top of yaml, before objects with parameters are made
12
+ seed: 112011
13
+ __set_seed: !apply:torch.manual_seed [!ref <seed>]
14
+ output_folder: !ref results/BPE51/<seed>
15
+ save_folder: !ref <output_folder>/save
16
+ train_log: !ref <output_folder>/train_log.txt
17
+
18
+ # Data files
19
+ data_folder: !PLACEHOLDER # e.g, /localscratch/fluent_speech_commands_dataset
20
+ rir_folder: !ref <data_folder> # Change it if needed
21
+ csv_train: !ref <output_folder>/train.csv
22
+ csv_valid: !ref <output_folder>/valid.csv
23
+ csv_test: !ref <output_folder>/test.csv
24
+ tokenizer_file: https://www.dropbox.com/s/hvf2huofnq0sjbn/51_unigram.model?dl=1
25
+ skip_prep: False
26
+ # Training parameters
27
+ number_of_epochs: 4
28
+ batch_size: 8
29
+ lr: 0.0003
30
+ token_type: unigram # ["unigram", "bpe", "char"]
31
+ sorting: random
32
+
33
+ # Model parameters
34
+ sample_rate: 16000
35
+ emb_size: 128
36
+ dec_neurons: 512
37
+ output_neurons: 51 # index(eos/bos) = 0
38
+ ASR_encoder_dim: 512
39
+ encoder_dim: 256
40
+
41
+ # Decoding parameters
42
+ bos_index: 0
43
+ eos_index: 0
44
+ min_decode_ratio: 0.0
45
+ max_decode_ratio: 10.0
46
+ slu_beam_size: 80
47
+ eos_threshold: 1.5
48
+ temperature: 1.25
49
+
50
+ dataloader_opts:
51
+ batch_size: !ref <batch_size>
52
+ shuffle: True
53
+
54
+ epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
55
+ limit: !ref <number_of_epochs>
56
+
57
+ # Models
58
+ asr_model: !apply:speechbrain.pretrained.EncoderDecoderASR.from_hparams
59
+ source: speechbrain/asr-crdnn-rnnlm-librispeech
60
+ run_opts: {"device":"cuda:0"}
61
+
62
+ slu_enc: !new:speechbrain.nnet.containers.Sequential
63
+ input_shape: [null, null, !ref <ASR_encoder_dim>]
64
+ lstm: !new:speechbrain.nnet.RNN.LSTM
65
+ input_size: !ref <ASR_encoder_dim>
66
+ bidirectional: True
67
+ hidden_size: !ref <encoder_dim>
68
+ num_layers: 2
69
+ linear: !new:speechbrain.nnet.linear.Linear
70
+ input_size: !ref <encoder_dim> * 2
71
+ n_neurons: !ref <encoder_dim>
72
+
73
+ output_emb: !new:speechbrain.nnet.embedding.Embedding
74
+ num_embeddings: !ref <output_neurons>
75
+ embedding_dim: !ref <emb_size>
76
+
77
+ dec: !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
78
+ enc_dim: !ref <encoder_dim>
79
+ input_size: !ref <emb_size>
80
+ rnn_type: gru
81
+ attn_type: keyvalue
82
+ hidden_size: !ref <dec_neurons>
83
+ attn_dim: 512
84
+ num_layers: 3
85
+ scaling: 1.0
86
+ dropout: 0.0
87
+
88
+ seq_lin: !new:speechbrain.nnet.linear.Linear
89
+ input_size: !ref <dec_neurons>
90
+ n_neurons: !ref <output_neurons>
91
+
92
+ augment_wavedrop: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
93
+ sample_rate: !ref <sample_rate>
94
+ speeds: [100]
95
+
96
+ augment_speed: !new:speechbrain.lobes.augment.TimeDomainSpecAugment
97
+ sample_rate: !ref <sample_rate>
98
+ speeds: [95, 100, 105]
99
+
100
+ add_rev: !new:speechbrain.lobes.augment.EnvCorrupt
101
+ openrir_folder: !ref <rir_folder>
102
+ openrir_max_noise_len: 3.0 # seconds
103
+ reverb_prob: 1.0
104
+ noise_prob: 0.0
105
+ noise_snr_low: 0
106
+ noise_snr_high: 15
107
+ rir_scale_factor: 1.0
108
+
109
+ add_noise: !new:speechbrain.lobes.augment.EnvCorrupt
110
+ openrir_folder: !ref <rir_folder>
111
+ openrir_max_noise_len: 3.0 # seconds
112
+ reverb_prob: 0.0
113
+ noise_prob: 1.0
114
+ noise_snr_low: 0
115
+ noise_snr_high: 15
116
+ rir_scale_factor: 1.0
117
+
118
+ add_rev_noise: !new:speechbrain.lobes.augment.EnvCorrupt
119
+ openrir_folder: !ref <rir_folder>
120
+ openrir_max_noise_len: 3.0 # seconds
121
+ reverb_prob: 1.0
122
+ noise_prob: 1.0
123
+ noise_snr_low: 0
124
+ noise_snr_high: 15
125
+ rir_scale_factor: 1.0
126
+
127
+
128
+ augment_pipeline: [
129
+ !ref <augment_wavedrop>,
130
+ !ref <augment_speed>,
131
+ !ref <add_rev>,
132
+ !ref <add_noise>,
133
+ !ref <add_rev_noise>
134
+ ]
135
+
136
+
137
+ modules:
138
+ augment_wavedrop: !ref <augment_wavedrop>
139
+ augment_speed: !ref <augment_speed>
140
+ add_rev: !ref <add_rev>
141
+ add_noise: !ref <add_noise>
142
+ add_rev_noise: !ref <add_rev_noise>
143
+ slu_enc: !ref <slu_enc>
144
+ output_emb: !ref <output_emb>
145
+ dec: !ref <dec>
146
+ seq_lin: !ref <seq_lin>
147
+
148
+ model: !new:torch.nn.ModuleList
149
+ - [!ref <slu_enc>, !ref <output_emb>,
150
+ !ref <dec>, !ref <seq_lin>]
151
+
152
+ tokenizer: !new:sentencepiece.SentencePieceProcessor
153
+
154
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
155
+ collect_in: !ref <save_folder>/FSC_tokenizer
156
+ loadables:
157
+ tokenizer: !ref <tokenizer>
158
+ paths:
159
+ tokenizer: !ref <tokenizer_file>
160
+
161
+ beam_searcher: !new:speechbrain.decoders.S2SRNNBeamSearcher
162
+ embedding: !ref <output_emb>
163
+ decoder: !ref <dec>
164
+ linear: !ref <seq_lin>
165
+ bos_index: !ref <bos_index>
166
+ eos_index: !ref <eos_index>
167
+ min_decode_ratio: !ref <min_decode_ratio>
168
+ max_decode_ratio: !ref <max_decode_ratio>
169
+ beam_size: !ref <slu_beam_size>
170
+ eos_threshold: !ref <eos_threshold>
171
+ temperature: !ref <temperature>
172
+ using_max_attn_shift: False
173
+ max_attn_shift: 30
174
+ coverage_penalty: 0.
175
+
176
+ opt_class: !name:torch.optim.Adam
177
+ lr: !ref <lr>
178
+
179
+ lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
180
+ initial_value: !ref <lr>
181
+ improvement_threshold: 0.0025
182
+ annealing_factor: 0.8
183
+ patient: 0
184
+
185
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
186
+ checkpoints_dir: !ref <save_folder>
187
+ recoverables:
188
+ model: !ref <model>
189
+ scheduler: !ref <lr_annealing>
190
+ counter: !ref <epoch_counter>
191
+
192
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
193
+ apply_log: True
194
+
195
+ seq_cost: !name:speechbrain.nnet.losses.nll_loss
196
+ label_smoothing: 0.1
197
+
198
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
199
+ save_file: !ref <train_log>
200
+
201
+ error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
202
+
203
+ cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
204
+ split_tokens: True
fluent-speech-commands/direct/prepare.py ADDED
@@ -0,0 +1 @@
 
1
+ ../prepare.py
fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/asr.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/83e944252a91fe1d0883daa1e87077df4d64c35fffb45e22fff924faace4a59c.7fdf4aabd8400c69a6228ccc17c83b7a8ebf34c5d76f23497b7cf0d7a1baaea3
fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/hyperparams.yaml ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/7aac72d39109ee19b4004d94239c2924caf33de6d85b0aff9296d844982210cb.d14310ea63844fb38520a592ea3a92e4f131b5f4683f8fa08e27b1e403c92293
fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/lm.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/651df066b5d0b2efef7208f51df93d3a0a65bedc3a3a2500cd7b8faf064e631e.b438b9af3f549a23c4458bb066c11cd51dc1cfe9bfef30d3eb66b472e93b1e8c
fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/normalizer.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/e733854cce680bcb58ce4b86bacb3cab5222880933b7b85ab17758aa5b10e9da.587fb748e80e719ed5721d5e0098c5feb2a901017135271ce2b2c6baea7e9f6e
fluent-speech-commands/direct/pretrained_models/EncoderDecoderASR-6406358104753086746/tokenizer.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/f39208eba495042a59a8404b5703ca08a39a85e4d2bf707e197b90a3323f92ab.cd7af7ea8cfcfbf0f6dd61514c361972eb82b3b76f12b0e9ee0b371f36fdc078
fluent-speech-commands/direct/results/BPE51/112011/env.log ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SpeechBrain system description
2
+ ==============================
3
+ Python version:
4
+ 3.7.12 (default, Jan 15 2022, 18:48:18)
5
+ [GCC 7.5.0]
6
+ ==============================
7
+ Installed Python packages:
8
+ absl-py==1.0.0
9
+ aiohttp==3.8.1
10
+ aiosignal==1.2.0
11
+ alabaster==0.7.12
12
+ albumentations==0.1.12
13
+ altair==4.2.0
14
+ appdirs==1.4.4
15
+ argon2-cffi==21.3.0
16
+ argon2-cffi-bindings==21.2.0
17
+ arviz==0.11.4
18
+ astor==0.8.1
19
+ astropy==4.3.1
20
+ astunparse==1.6.3
21
+ async-timeout==4.0.2
22
+ asynctest==0.13.0
23
+ atari-py==0.2.9
24
+ atomicwrites==1.4.0
25
+ attrs==21.4.0
26
+ audioread==2.1.9
27
+ autograd==1.3
28
+ Babel==2.9.1
29
+ backcall==0.2.0
30
+ beautifulsoup4==4.6.3
31
+ black==19.10b0
32
+ bleach==4.1.0
33
+ blis==0.4.1
34
+ bokeh==2.3.3
35
+ Bottleneck==1.3.2
36
+ branca==0.4.2
37
+ bs4==0.0.1
38
+ CacheControl==0.12.10
39
+ cached-property==1.5.2
40
+ cachetools==4.2.4
41
+ catalogue==1.0.0
42
+ certifi==2021.10.8
43
+ cffi==1.15.0
44
+ cfgv==3.3.1
45
+ cftime==1.5.2
46
+ chardet==3.0.4
47
+ charset-normalizer==2.0.11
48
+ click==7.1.2
49
+ cloudpickle==1.3.0
50
+ cmake==3.12.0
51
+ cmdstanpy==0.9.5
52
+ colorcet==3.0.0
53
+ colorlover==0.3.0
54
+ community==1.0.0b1
55
+ contextlib2==0.5.5
56
+ convertdate==2.4.0
57
+ coverage==3.7.1
58
+ coveralls==0.5
59
+ crcmod==1.7
60
+ cufflinks==0.17.3
61
+ cupy-cuda111==9.4.0
62
+ cvxopt==1.2.7
63
+ cvxpy==1.0.31
64
+ cycler==0.11.0
65
+ cymem==2.0.6
66
+ Cython==0.29.27
67
+ daft==0.0.4
68
+ dask==2.12.0
69
+ datascience==0.10.6
70
+ datasets==1.18.3
71
+ debugpy==1.0.0
72
+ decorator==4.4.2
73
+ defusedxml==0.7.1
74
+ descartes==1.1.0
75
+ dill==0.3.4
76
+ distlib==0.3.4
77
+ distributed==1.25.3
78
+ dlib @ file:///dlib-19.18.0-cp37-cp37m-linux_x86_64.whl
79
+ dm-tree==0.1.6
80
+ docopt==0.6.2
81
+ docutils==0.17.1
82
+ dopamine-rl==1.0.5
83
+ earthengine-api==0.1.297
84
+ easydict==1.9
85
+ ecos==2.0.10
86
+ editdistance==0.5.3
87
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz
88
+ entrypoints==0.3
89
+ ephem==4.1.3
90
+ et-xmlfile==1.1.0
91
+ fa2==0.3.5
92
+ fastai==1.0.61
93
+ fastdtw==0.3.4
94
+ fastprogress==1.0.0
95
+ fastrlock==0.8
96
+ fbprophet==0.7.1
97
+ feather-format==0.4.1
98
+ filelock==3.4.2
99
+ firebase-admin==4.4.0
100
+ fix-yahoo-finance==0.0.22
101
+ flake8==3.7.9
102
+ Flask==1.1.4
103
+ flatbuffers==2.0
104
+ folium==0.8.3
105
+ frozenlist==1.3.0
106
+ fsspec==2022.1.0
107
+ future==0.16.0
108
+ gast==0.4.0
109
+ GDAL==2.2.2
110
+ gdown==4.2.1
111
+ gensim==3.6.0
112
+ geographiclib==1.52
113
+ geopy==1.17.0
114
+ gin-config==0.5.0
115
+ glob2==0.7
116
+ google==2.0.3
117
+ google-api-core==1.26.3
118
+ google-api-python-client==1.12.10
119
+ google-auth==1.35.0
120
+ google-auth-httplib2==0.0.4
121
+ google-auth-oauthlib==0.4.6
122
+ google-cloud-bigquery==1.21.0
123
+ google-cloud-bigquery-storage==1.1.0
124
+ google-cloud-core==1.0.3
125
+ google-cloud-datastore==1.8.0
126
+ google-cloud-firestore==1.7.0
127
+ google-cloud-language==1.2.0
128
+ google-cloud-storage==1.18.1
129
+ google-cloud-translate==1.5.0
130
+ google-colab @ file:///colabtools/dist/google-colab-1.0.0.tar.gz
131
+ google-pasta==0.2.0
132
+ google-resumable-media==0.4.1
133
+ googleapis-common-protos==1.54.0
134
+ googledrivedownloader==0.4
135
+ graphviz==0.10.1
136
+ greenlet==1.1.2
137
+ grpcio==1.43.0
138
+ gspread==3.4.2
139
+ gspread-dataframe==3.0.8
140
+ gym==0.17.3
141
+ h5py==3.1.0
142
+ HeapDict==1.0.1
143
+ hijri-converter==2.2.2
144
+ holidays==0.10.5.2
145
+ holoviews==1.14.7
146
+ html5lib==1.0.1
147
+ httpimport==0.5.18
148
+ httplib2==0.17.4
149
+ httplib2shim==0.0.3
150
+ huggingface-hub==0.4.0
151
+ humanize==0.5.1
152
+ hyperopt==0.1.2
153
+ HyperPyYAML==1.0.0
154
+ ideep4py==2.0.0.post3
155
+ identify==2.4.10
156
+ idna==2.10
157
+ imageio==2.4.1
158
+ imagesize==1.3.0
159
+ imbalanced-learn==0.8.1
160
+ imblearn==0.0
161
+ imgaug==0.2.9
162
+ importlib-metadata==4.10.1
163
+ importlib-resources==5.4.0
164
+ imutils==0.5.4
165
+ inflect==2.1.0
166
+ iniconfig==1.1.1
167
+ intel-openmp==2022.0.2
168
+ intervaltree==2.1.0
169
+ ipykernel==4.10.1
170
+ ipython==5.5.0
171
+ ipython-genutils==0.2.0
172
+ ipython-sql==0.3.9
173
+ ipywidgets==7.6.5
174
+ itsdangerous==1.1.0
175
+ jax==0.2.25
176
+ jaxlib @ https://storage.googleapis.com/jax-releases/cuda111/jaxlib-0.1.71+cuda111-cp37-none-manylinux2010_x86_64.whl
177
+ jedi==0.18.1
178
+ jieba==0.42.1
179
+ Jinja2==2.11.3
180
+ joblib==1.1.0
181
+ jpeg4py==0.1.4
182
+ jsonschema==4.3.3
183
+ jupyter==1.0.0
184
+ jupyter-client==5.3.5
185
+ jupyter-console==5.2.0
186
+ jupyter-core==4.9.1
187
+ jupyterlab-pygments==0.1.2
188
+ jupyterlab-widgets==1.0.2
189
+ kaggle==1.5.12
190
+ kapre==0.3.7
191
+ keras==2.7.0
192
+ Keras-Preprocessing==1.1.2
193
+ keras-vis==0.4.1
194
+ kiwisolver==1.3.2
195
+ korean-lunar-calendar==0.2.1
196
+ libclang==13.0.0
197
+ librosa==0.9.0
198
+ lightgbm==2.2.3
199
+ llvmlite==0.34.0
200
+ lmdb==0.99
201
+ LunarCalendar==0.0.9
202
+ lxml==4.2.6
203
+ Markdown==3.3.6
204
+ MarkupSafe==2.0.1
205
+ matplotlib==3.2.2
206
+ matplotlib-inline==0.1.3
207
+ matplotlib-venn==0.11.6
208
+ mccabe==0.6.1
209
+ missingno==0.5.0
210
+ mistune==0.8.4
211
+ mizani==0.6.0
212
+ mkl==2019.0
213
+ mlxtend==0.14.0
214
+ more-itertools==8.12.0
215
+ moviepy==0.2.3.5
216
+ mpmath==1.2.1
217
+ msgpack==1.0.3
218
+ multidict==6.0.2
219
+ multiprocess==0.70.12.2
220
+ multitasking==0.0.10
221
+ murmurhash==1.0.6
222
+ music21==5.5.0
223
+ natsort==5.5.0
224
+ nbclient==0.5.10
225
+ nbconvert==5.6.1
226
+ nbformat==5.1.3
227
+ nest-asyncio==1.5.4
228
+ netCDF4==1.5.8
229
+ networkx==2.6.3
230
+ nibabel==3.0.2
231
+ nltk==3.2.5
232
+ nodeenv==1.6.0
233
+ notebook==5.3.1
234
+ numba==0.51.2
235
+ numexpr==2.8.1
236
+ numpy==1.19.5
237
+ nvidia-ml-py3==7.352.0
238
+ oauth2client==4.1.3
239
+ oauthlib==3.2.0
240
+ okgrade==0.4.3
241
+ opencv-contrib-python==4.1.2.30
242
+ opencv-python==4.1.2.30
243
+ openpyxl==3.0.9
244
+ opt-einsum==3.3.0
245
+ osqp==0.6.2.post0
246
+ packaging==21.3
247
+ palettable==3.3.0
248
+ pandas==1.3.5
249
+ pandas-datareader==0.9.0
250
+ pandas-gbq==0.13.3
251
+ pandas-profiling==1.4.1
252
+ pandocfilters==1.5.0
253
+ panel==0.12.1
254
+ param==1.12.0
255
+ parso==0.8.3
256
+ pathlib==1.0.1
257
+ pathspec==0.9.0
258
+ patsy==0.5.2
259
+ pep517==0.12.0
260
+ pexpect==4.8.0
261
+ pickleshare==0.7.5
262
+ Pillow==7.1.2
263
+ pip-tools==6.2.0
264
+ plac==1.1.3
265
+ platformdirs==2.5.0
266
+ plotly==5.5.0
267
+ plotnine==0.6.0
268
+ pluggy==0.13.1
269
+ pooch==1.6.0
270
+ portpicker==1.3.9
271
+ pre-commit==2.17.0
272
+ prefetch-generator==1.0.1
273
+ preshed==3.0.6
274
+ prettytable==3.0.0
275
+ progressbar2==3.38.0
276
+ prometheus-client==0.13.1
277
+ promise==2.3
278
+ prompt-toolkit==1.0.18
279
+ protobuf==3.17.3
280
+ psutil==5.4.8
281
+ psycopg2==2.7.6.1
282
+ ptyprocess==0.7.0
283
+ py==1.11.0
284
+ pyarrow==6.0.1
285
+ pyasn1==0.4.8
286
+ pyasn1-modules==0.2.8
287
+ pycocotools==2.0.4
288
+ pycodestyle==2.5.0
289
+ pycparser==2.21
290
+ pyct==0.4.8
291
+ pydata-google-auth==1.3.0
292
+ pydot==1.3.0
293
+ pydot-ng==2.0.0
294
+ pydotplus==2.0.2
295
+ PyDrive==1.3.1
296
+ pyemd==0.5.1
297
+ pyerfa==2.0.0.1
298
+ pyflakes==2.1.1
299
+ pyglet==1.5.0
300
+ Pygments==2.6.1
301
+ pygobject==3.26.1
302
+ pymc3==3.11.4
303
+ PyMeeus==0.5.11
304
+ pymongo==4.0.1
305
+ pymystem3==0.2.0
306
+ PyOpenGL==3.1.5
307
+ pyparsing==3.0.7
308
+ pyrsistent==0.18.1
309
+ pysndfile==1.3.8
310
+ PySocks==1.7.1
311
+ pystan==2.19.1.1
312
+ pytest==5.4.1
313
+ python-apt==0.0.0
314
+ python-chess==0.23.11
315
+ python-dateutil==2.8.2
316
+ python-louvain==0.16
317
+ python-slugify==5.0.2
318
+ python-utils==3.1.0
319
+ pytz==2018.9
320
+ pyviz-comms==2.1.0
321
+ PyWavelets==1.2.0
322
+ PyYAML==6.0
323
+ pyzmq==22.3.0
324
+ qdldl==0.1.5.post0
325
+ qtconsole==5.2.2
326
+ QtPy==2.0.1
327
+ regex==2019.12.20
328
+ requests==2.23.0
329
+ requests-oauthlib==1.3.1
330
+ resampy==0.2.2
331
+ rpy2==3.4.5
332
+ rsa==4.8
333
+ ruamel.yaml==0.17.21
334
+ ruamel.yaml.clib==0.2.6
335
+ sacremoses==0.0.47
336
+ scikit-image==0.18.3
337
+ scikit-learn==1.0.2
338
+ scipy==1.4.1
339
+ screen-resolution-extra==0.0.0
340
+ scs==3.1.0
341
+ seaborn==0.11.2
342
+ semver==2.13.0
343
+ Send2Trash==1.8.0
344
+ sentencepiece==0.1.96
345
+ setuptools-git==1.2
346
+ Shapely==1.8.0
347
+ simplegeneric==0.8.1
348
+ six==1.15.0
349
+ sklearn==0.0
350
+ sklearn-pandas==1.8.0
351
+ smart-open==5.2.1
352
+ snowballstemmer==2.2.0
353
+ sortedcontainers==2.4.0
354
+ SoundFile==0.10.3.post1
355
+ spacy==2.2.4
356
+ speechbrain==0.5.11
357
+ Sphinx==1.8.6
358
+ sphinxcontrib-serializinghtml==1.1.5
359
+ sphinxcontrib-websupport==1.2.4
360
+ SQLAlchemy==1.4.31
361
+ sqlparse==0.4.2
362
+ srsly==1.0.5
363
+ statsmodels==0.10.2
364
+ sympy==1.7.1
365
+ tables==3.7.0
366
+ tabulate==0.8.9
367
+ tblib==1.7.0
368
+ tenacity==8.0.1
369
+ tensorboard==2.7.0
370
+ tensorboard-data-server==0.6.1
371
+ tensorboard-plugin-wit==1.8.1
372
+ tensorflow @ file:///tensorflow-2.7.0-cp37-cp37m-linux_x86_64.whl
373
+ tensorflow-datasets==4.0.1
374
+ tensorflow-estimator==2.7.0
375
+ tensorflow-gcs-config==2.7.0
376
+ tensorflow-hub==0.12.0
377
+ tensorflow-io-gcs-filesystem==0.24.0
378
+ tensorflow-metadata==1.6.0
379
+ tensorflow-probability==0.15.0
380
+ termcolor==1.1.0
381
+ terminado==0.13.1
382
+ testpath==0.5.0
383
+ text-unidecode==1.3
384
+ textblob==0.15.3
385
+ Theano-PyMC==1.1.2
386
+ thinc==7.4.0
387
+ threadpoolctl==3.1.0
388
+ tifffile==2021.11.2
389
+ tokenizers==0.11.4
390
+ toml==0.10.2
391
+ tomli==2.0.0
392
+ toolz==0.11.2
393
+ torch @ https://download.pytorch.org/whl/cu111/torch-1.10.0%2Bcu111-cp37-cp37m-linux_x86_64.whl
394
+ torchaudio @ https://download.pytorch.org/whl/cu111/torchaudio-0.10.0%2Bcu111-cp37-cp37m-linux_x86_64.whl
395
+ torchsummary==1.5.1
396
+ torchtext==0.11.0
397
+ torchvision @ https://download.pytorch.org/whl/cu111/torchvision-0.11.1%2Bcu111-cp37-cp37m-linux_x86_64.whl
398
+ tornado==5.1.1
399
+ tqdm==4.62.3
400
+ traitlets==5.1.1
401
+ transformers==4.16.2
402
+ tweepy==3.10.0
403
+ typed-ast==1.5.2
404
+ typeguard==2.7.1
405
+ typing-extensions==3.10.0.2
406
+ tzlocal==1.5.1
407
+ uritemplate==3.0.1
408
+ urllib3==1.24.3
409
+ vega-datasets==0.9.0
410
+ virtualenv==20.13.1
411
+ wasabi==0.9.0
412
+ wcwidth==0.2.5
413
+ webencodings==0.5.1
414
+ Werkzeug==1.0.1
415
+ widgetsnbextension==3.5.2
416
+ wordcloud==1.5.0
417
+ wrapt==1.13.3
418
+ xarray==0.18.2
419
+ xgboost==0.90
420
+ xkit==0.0.0
421
+ xlrd==1.1.0
422
+ xlwt==1.3.0
423
+ xxhash==2.0.2
424
+ yamllint==1.23.0
425
+ yarl==1.7.2
426
+ yellowbrick==1.3.post1
427
+ zict==2.0.0
428
+ zipp==3.7.0
429
+ ==============================
430
+ Git revision:
431
+ 9d56d508
432
+ ==============================
433
+ Cuda version:
434
+ 11.1
fluent-speech-commands/direct/results/BPE51/112011/hyperparams.yaml ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated 2022-02-15 from:
2
+ # /content/speechbrain/recipes/fluent-speech-commands/direct/hparams/train.yaml
3
+ # yamllint disable
4
+ # ############################################################################
5
+ # Model: Direct SLU
6
+ # Encoder: Pre-trained ASR encoder -> LSTM
7
+ # Decoder: GRU + beamsearch
8
+ # Tokens: BPE with unigram
9
+ # losses: NLL
10
+ # Training: Fluent Speech Commands
11
+ # Authors: Loren Lugosch, Mirco Ravanelli 2020
12
+ # ############################################################################
13
+
14
+ # Seed needs to be set at top of yaml, before objects with parameters are made
15
+ seed: 112011
16
+ __set_seed: !apply:torch.manual_seed [112011]
17
+ output_folder: results/BPE51/112011
18
+ save_folder: results/BPE51/112011/save
19
+ train_log: results/BPE51/112011/train_log.txt
20
+
21
+ # Data files
22
+ data_folder: /content/fluent_speech_commands_dataset
23
+ # e.g, /localscratch/fluent_speech_commands_dataset
24
+ rir_folder: /content/fluent_speech_commands_dataset # Change it if needed
25
+ csv_train: results/BPE51/112011/train.csv
26
+ csv_valid: results/BPE51/112011/valid.csv
27
+ csv_test: results/BPE51/112011/test.csv
28
+ tokenizer_file: https://www.dropbox.com/s/hvf2huofnq0sjbn/51_unigram.model?dl=1
29
+ skip_prep: false
30
+ # Training parameters
31
+ number_of_epochs: 4
32
+ batch_size: 8
33
+ lr: 0.0003
34
+ token_type: unigram # ["unigram", "bpe", "char"]
35
+ sorting: random
36
+
37
+ # Model parameters
38
+ sample_rate: 16000
39
+ emb_size: 128
40
+ dec_neurons: 512
41
+ output_neurons: 51 # index(eos/bos) = 0
42
+ ASR_encoder_dim: 512
43
+ encoder_dim: 256
44
+
45
+ # Decoding parameters
46
+ bos_index: 0
47
+ eos_index: 0
48
+ min_decode_ratio: 0.0
49
+ max_decode_ratio: 10.0
50
+ slu_beam_size: 80
51
+ eos_threshold: 1.5
52
+ temperature: 1.25
53
+
54
+ dataloader_opts:
55
+ batch_size: 8
56
+ shuffle: true
57
+
58
+ epoch_counter: &id013 !new:speechbrain.utils.epoch_loop.EpochCounter
59
+
60
+ limit: 4
61
+
62
+ # Models
63
+ asr_model: !apply:speechbrain.pretrained.EncoderDecoderASR.from_hparams
64
+ source: speechbrain/asr-crdnn-rnnlm-librispeech
65
+ run_opts: {device: cuda:0}
66
+
67
+ slu_enc: &id006 !new:speechbrain.nnet.containers.Sequential
68
+ input_shape: [null, null, 512]
69
+ lstm: !new:speechbrain.nnet.RNN.LSTM
70
+ input_size: 512
71
+ bidirectional: true
72
+ hidden_size: 256
73
+ num_layers: 2
74
+ linear: !new:speechbrain.nnet.linear.Linear
75
+ input_size: 512
76
+ n_neurons: 256
77
+
78
+ output_emb: &id007 !new:speechbrain.nnet.embedding.Embedding
79
+ num_embeddings: 51
80
+ embedding_dim: 128
81
+
82
+ dec: &id008 !new:speechbrain.nnet.RNN.AttentionalRNNDecoder
83
+ enc_dim: 256
84
+ input_size: 128
85
+ rnn_type: gru
86
+ attn_type: keyvalue
87
+ hidden_size: 512
88
+ attn_dim: 512
89
+ num_layers: 3
90
+ scaling: 1.0
91
+ dropout: 0.0
92
+
93
+ seq_lin: &id009 !new:speechbrain.nnet.linear.Linear
94
+
95
+ input_size: 512
96
+ n_neurons: 51
97
+
98
+ augment_wavedrop: &id001 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
99
+ sample_rate: 16000
100
+ speeds: [100]
101
+
102
+ augment_speed: &id002 !new:speechbrain.lobes.augment.TimeDomainSpecAugment
103
+ sample_rate: 16000
104
+ speeds: [95, 100, 105]
105
+
106
+ add_rev: &id003 !new:speechbrain.lobes.augment.EnvCorrupt
107
+ openrir_folder: /content/fluent_speech_commands_dataset
108
+ openrir_max_noise_len: 3.0 # seconds
109
+ reverb_prob: 1.0
110
+ noise_prob: 0.0
111
+ noise_snr_low: 0
112
+ noise_snr_high: 15
113
+ rir_scale_factor: 1.0
114
+
115
+ add_noise: &id004 !new:speechbrain.lobes.augment.EnvCorrupt
116
+ openrir_folder: /content/fluent_speech_commands_dataset
117
+ openrir_max_noise_len: 3.0 # seconds
118
+ reverb_prob: 0.0
119
+ noise_prob: 1.0
120
+ noise_snr_low: 0
121
+ noise_snr_high: 15
122
+ rir_scale_factor: 1.0
123
+
124
+ add_rev_noise: &id005 !new:speechbrain.lobes.augment.EnvCorrupt
125
+ openrir_folder: /content/fluent_speech_commands_dataset
126
+ openrir_max_noise_len: 3.0 # seconds
127
+ reverb_prob: 1.0
128
+ noise_prob: 1.0
129
+ noise_snr_low: 0
130
+ noise_snr_high: 15
131
+ rir_scale_factor: 1.0
132
+
133
+
134
+ augment_pipeline: [*id001, *id002, *id003, *id004, *id005]
135
+
136
+
137
+ modules:
138
+ augment_wavedrop: *id001
139
+ augment_speed: *id002
140
+ add_rev: *id003
141
+ add_noise: *id004
142
+ add_rev_noise: *id005
143
+ slu_enc: *id006
144
+ output_emb: *id007
145
+ dec: *id008
146
+ seq_lin: *id009
147
+ model: &id011 !new:torch.nn.ModuleList
148
+ - [*id006, *id007, *id008, *id009]
149
+ tokenizer: &id010 !new:sentencepiece.SentencePieceProcessor
150
+
151
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
152
+ collect_in: results/BPE51/112011/save/FSC_tokenizer
153
+ loadables:
154
+ tokenizer: *id010
155
+ paths:
156
+ tokenizer: https://www.dropbox.com/s/hvf2huofnq0sjbn/51_unigram.model?dl=1
157
+
158
+ beam_searcher: !new:speechbrain.decoders.S2SRNNBeamSearcher
159
+ embedding: *id007
160
+ decoder: *id008
161
+ linear: *id009
162
+ bos_index: 0
163
+ eos_index: 0
164
+ min_decode_ratio: 0.0
165
+ max_decode_ratio: 10.0
166
+ beam_size: 80
167
+ eos_threshold: 1.5
168
+ temperature: 1.25
169
+ using_max_attn_shift: false
170
+ max_attn_shift: 30
171
+ coverage_penalty: 0.
172
+
173
+ opt_class: !name:torch.optim.Adam
174
+ lr: 0.0003
175
+
176
+ lr_annealing: &id012 !new:speechbrain.nnet.schedulers.NewBobScheduler
177
+ initial_value: 0.0003
178
+ improvement_threshold: 0.0025
179
+ annealing_factor: 0.8
180
+ patient: 0
181
+
182
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
183
+ checkpoints_dir: results/BPE51/112011/save
184
+ recoverables:
185
+ model: *id011
186
+ scheduler: *id012
187
+ counter: *id013
188
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
189
+ apply_log: true
190
+
191
+ seq_cost: !name:speechbrain.nnet.losses.nll_loss
192
+ label_smoothing: 0.1
193
+
194
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
195
+ save_file: results/BPE51/112011/train_log.txt
196
+
197
+ error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
198
+
199
+ cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
200
+ split_tokens: true
fluent-speech-commands/direct/results/BPE51/112011/log.txt ADDED
@@ -0,0 +1,454 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2022-02-15 02:15:13,142 - speechbrain.core - INFO - Beginning experiment!
2
+ 2022-02-15 02:15:13,142 - speechbrain.core - INFO - Experiment folder: results/BPE51/112011
3
+ 2022-02-15 02:15:14,668 - speechbrain.utils.superpowers - DEBUG - absl-py==1.0.0
4
+ aiohttp==3.8.1
5
+ aiosignal==1.2.0
6
+ alabaster==0.7.12
7
+ albumentations==0.1.12
8
+ altair==4.2.0
9
+ appdirs==1.4.4
10
+ argon2-cffi==21.3.0
11
+ argon2-cffi-bindings==21.2.0
12
+ arviz==0.11.4
13
+ astor==0.8.1
14
+ astropy==4.3.1
15
+ astunparse==1.6.3
16
+ async-timeout==4.0.2
17
+ asynctest==0.13.0
18
+ atari-py==0.2.9
19
+ atomicwrites==1.4.0
20
+ attrs==21.4.0
21
+ audioread==2.1.9
22
+ autograd==1.3
23
+ Babel==2.9.1
24
+ backcall==0.2.0
25
+ beautifulsoup4==4.6.3
26
+ black==19.10b0
27
+ bleach==4.1.0
28
+ blis==0.4.1
29
+ bokeh==2.3.3
30
+ Bottleneck==1.3.2
31
+ branca==0.4.2
32
+ bs4==0.0.1
33
+ CacheControl==0.12.10
34
+ cached-property==1.5.2
35
+ cachetools==4.2.4
36
+ catalogue==1.0.0
37
+ certifi==2021.10.8
38
+ cffi==1.15.0
39
+ cfgv==3.3.1
40
+ cftime==1.5.2
41
+ chardet==3.0.4
42
+ charset-normalizer==2.0.11
43
+ click==7.1.2
44
+ cloudpickle==1.3.0
45
+ cmake==3.12.0
46
+ cmdstanpy==0.9.5
47
+ colorcet==3.0.0
48
+ colorlover==0.3.0
49
+ community==1.0.0b1
50
+ contextlib2==0.5.5
51
+ convertdate==2.4.0
52
+ coverage==3.7.1
53
+ coveralls==0.5
54
+ crcmod==1.7
55
+ cufflinks==0.17.3
56
+ cupy-cuda111==9.4.0
57
+ cvxopt==1.2.7
58
+ cvxpy==1.0.31
59
+ cycler==0.11.0
60
+ cymem==2.0.6
61
+ Cython==0.29.27
62
+ daft==0.0.4
63
+ dask==2.12.0
64
+ datascience==0.10.6
65
+ datasets==1.18.3
66
+ debugpy==1.0.0
67
+ decorator==4.4.2
68
+ defusedxml==0.7.1
69
+ descartes==1.1.0
70
+ dill==0.3.4
71
+ distlib==0.3.4
72
+ distributed==1.25.3
73
+ dlib @ file:///dlib-19.18.0-cp37-cp37m-linux_x86_64.whl
74
+ dm-tree==0.1.6
75
+ docopt==0.6.2
76
+ docutils==0.17.1
77
+ dopamine-rl==1.0.5
78
+ earthengine-api==0.1.297
79
+ easydict==1.9
80
+ ecos==2.0.10
81
+ editdistance==0.5.3
82
+ en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.2.5/en_core_web_sm-2.2.5.tar.gz
83
+ entrypoints==0.3
84
+ ephem==4.1.3
85
+ et-xmlfile==1.1.0
86
+ fa2==0.3.5
87
+ fastai==1.0.61
88
+ fastdtw==0.3.4
89
+ fastprogress==1.0.0
90
+ fastrlock==0.8
91
+ fbprophet==0.7.1
92
+ feather-format==0.4.1
93
+ filelock==3.4.2
94
+ firebase-admin==4.4.0
95
+ fix-yahoo-finance==0.0.22
96
+ flake8==3.7.9
97
+ Flask==1.1.4
98
+ flatbuffers==2.0
99
+ folium==0.8.3
100
+ frozenlist==1.3.0
101
+ fsspec==2022.1.0
102
+ future==0.16.0
103
+ gast==0.4.0
104
+ GDAL==2.2.2
105
+ gdown==4.2.1
106
+ gensim==3.6.0
107
+ geographiclib==1.52
108
+ geopy==1.17.0
109
+ gin-config==0.5.0
110
+ glob2==0.7
111
+ google==2.0.3
112
+ google-api-core==1.26.3
113
+ google-api-python-client==1.12.10
114
+ google-auth==1.35.0
115
+ google-auth-httplib2==0.0.4
116
+ google-auth-oauthlib==0.4.6
117
+ google-cloud-bigquery==1.21.0
118
+ google-cloud-bigquery-storage==1.1.0
119
+ google-cloud-core==1.0.3
120
+ google-cloud-datastore==1.8.0
121
+ google-cloud-firestore==1.7.0
122
+ google-cloud-language==1.2.0
123
+ google-cloud-storage==1.18.1
124
+ google-cloud-translate==1.5.0
125
+ google-colab @ file:///colabtools/dist/google-colab-1.0.0.tar.gz
126
+ google-pasta==0.2.0
127
+ google-resumable-media==0.4.1
128
+ googleapis-common-protos==1.54.0
129
+ googledrivedownloader==0.4
130
+ graphviz==0.10.1
131
+ greenlet==1.1.2
132
+ grpcio==1.43.0
133
+ gspread==3.4.2
134
+ gspread-dataframe==3.0.8
135
+ gym==0.17.3
136
+ h5py==3.1.0
137
+ HeapDict==1.0.1
138
+ hijri-converter==2.2.2
139
+ holidays==0.10.5.2
140
+ holoviews==1.14.7
141
+ html5lib==1.0.1
142
+ httpimport==0.5.18
143
+ httplib2==0.17.4
144
+ httplib2shim==0.0.3
145
+ huggingface-hub==0.4.0
146
+ humanize==0.5.1
147
+ hyperopt==0.1.2
148
+ HyperPyYAML==1.0.0
149
+ ideep4py==2.0.0.post3
150
+ identify==2.4.10
151
+ idna==2.10
152
+ imageio==2.4.1
153
+ imagesize==1.3.0
154
+ imbalanced-learn==0.8.1
155
+ imblearn==0.0
156
+ imgaug==0.2.9
157
+ importlib-metadata==4.10.1
158
+ importlib-resources==5.4.0
159
+ imutils==0.5.4
160
+ inflect==2.1.0
161
+ iniconfig==1.1.1
162
+ intel-openmp==2022.0.2
163
+ intervaltree==2.1.0
164
+ ipykernel==4.10.1
165
+ ipython==5.5.0
166
+ ipython-genutils==0.2.0
167
+ ipython-sql==0.3.9
168
+ ipywidgets==7.6.5
169
+ itsdangerous==1.1.0
170
+ jax==0.2.25
171
+ jaxlib @ https://storage.googleapis.com/jax-releases/cuda111/jaxlib-0.1.71+cuda111-cp37-none-manylinux2010_x86_64.whl
172
+ jedi==0.18.1
173
+ jieba==0.42.1
174
+ Jinja2==2.11.3
175
+ joblib==1.1.0
176
+ jpeg4py==0.1.4
177
+ jsonschema==4.3.3
178
+ jupyter==1.0.0
179
+ jupyter-client==5.3.5
180
+ jupyter-console==5.2.0
181
+ jupyter-core==4.9.1
182
+ jupyterlab-pygments==0.1.2
183
+ jupyterlab-widgets==1.0.2
184
+ kaggle==1.5.12
185
+ kapre==0.3.7
186
+ keras==2.7.0
187
+ Keras-Preprocessing==1.1.2
188
+ keras-vis==0.4.1
189
+ kiwisolver==1.3.2
190
+ korean-lunar-calendar==0.2.1
191
+ libclang==13.0.0
192
+ librosa==0.9.0
193
+ lightgbm==2.2.3
194
+ llvmlite==0.34.0
195
+ lmdb==0.99
196
+ LunarCalendar==0.0.9
197
+ lxml==4.2.6
198
+ Markdown==3.3.6
199
+ MarkupSafe==2.0.1
200
+ matplotlib==3.2.2
201
+ matplotlib-inline==0.1.3
202
+ matplotlib-venn==0.11.6
203
+ mccabe==0.6.1
204
+ missingno==0.5.0
205
+ mistune==0.8.4
206
+ mizani==0.6.0
207
+ mkl==2019.0
208
+ mlxtend==0.14.0
209
+ more-itertools==8.12.0
210
+ moviepy==0.2.3.5
211
+ mpmath==1.2.1
212
+ msgpack==1.0.3
213
+ multidict==6.0.2
214
+ multiprocess==0.70.12.2
215
+ multitasking==0.0.10
216
+ murmurhash==1.0.6
217
+ music21==5.5.0
218
+ natsort==5.5.0
219
+ nbclient==0.5.10
220
+ nbconvert==5.6.1
221
+ nbformat==5.1.3
222
+ nest-asyncio==1.5.4
223
+ netCDF4==1.5.8
224
+ networkx==2.6.3
225
+ nibabel==3.0.2
226
+ nltk==3.2.5
227
+ nodeenv==1.6.0
228
+ notebook==5.3.1
229
+ numba==0.51.2
230
+ numexpr==2.8.1
231
+ numpy==1.19.5
232
+ nvidia-ml-py3==7.352.0
233
+ oauth2client==4.1.3
234
+ oauthlib==3.2.0
235
+ okgrade==0.4.3
236
+ opencv-contrib-python==4.1.2.30
237
+ opencv-python==4.1.2.30
238
+ openpyxl==3.0.9
239
+ opt-einsum==3.3.0
240
+ osqp==0.6.2.post0
241
+ packaging==21.3
242
+ palettable==3.3.0
243
+ pandas==1.3.5
244
+ pandas-datareader==0.9.0
245
+ pandas-gbq==0.13.3
246
+ pandas-profiling==1.4.1
247
+ pandocfilters==1.5.0
248
+ panel==0.12.1
249
+ param==1.12.0
250
+ parso==0.8.3
251
+ pathlib==1.0.1
252
+ pathspec==0.9.0
253
+ patsy==0.5.2
254
+ pep517==0.12.0
255
+ pexpect==4.8.0
256
+ pickleshare==0.7.5
257
+ Pillow==7.1.2
258
+ pip-tools==6.2.0
259
+ plac==1.1.3
260
+ platformdirs==2.5.0
261
+ plotly==5.5.0
262
+ plotnine==0.6.0
263
+ pluggy==0.13.1
264
+ pooch==1.6.0
265
+ portpicker==1.3.9
266
+ pre-commit==2.17.0
267
+ prefetch-generator==1.0.1
268
+ preshed==3.0.6
269
+ prettytable==3.0.0
270
+ progressbar2==3.38.0
271
+ prometheus-client==0.13.1
272
+ promise==2.3
273
+ prompt-toolkit==1.0.18
274
+ protobuf==3.17.3
275
+ psutil==5.4.8
276
+ psycopg2==2.7.6.1
277
+ ptyprocess==0.7.0
278
+ py==1.11.0
279
+ pyarrow==6.0.1
280
+ pyasn1==0.4.8
281
+ pyasn1-modules==0.2.8
282
+ pycocotools==2.0.4
283
+ pycodestyle==2.5.0
284
+ pycparser==2.21
285
+ pyct==0.4.8
286
+ pydata-google-auth==1.3.0
287
+ pydot==1.3.0
288
+ pydot-ng==2.0.0
289
+ pydotplus==2.0.2
290
+ PyDrive==1.3.1
291
+ pyemd==0.5.1
292
+ pyerfa==2.0.0.1
293
+ pyflakes==2.1.1
294
+ pyglet==1.5.0
295
+ Pygments==2.6.1
296
+ pygobject==3.26.1
297
+ pymc3==3.11.4
298
+ PyMeeus==0.5.11
299
+ pymongo==4.0.1
300
+ pymystem3==0.2.0
301
+ PyOpenGL==3.1.5
302
+ pyparsing==3.0.7
303
+ pyrsistent==0.18.1
304
+ pysndfile==1.3.8
305
+ PySocks==1.7.1
306
+ pystan==2.19.1.1
307
+ pytest==5.4.1
308
+ python-apt==0.0.0
309
+ python-chess==0.23.11
310
+ python-dateutil==2.8.2
311
+ python-louvain==0.16
312
+ python-slugify==5.0.2
313
+ python-utils==3.1.0
314
+ pytz==2018.9
315
+ pyviz-comms==2.1.0
316
+ PyWavelets==1.2.0
317
+ PyYAML==6.0
318
+ pyzmq==22.3.0
319
+ qdldl==0.1.5.post0
320
+ qtconsole==5.2.2
321
+ QtPy==2.0.1
322
+ regex==2019.12.20
323
+ requests==2.23.0
324
+ requests-oauthlib==1.3.1
325
+ resampy==0.2.2
326
+ rpy2==3.4.5
327
+ rsa==4.8
328
+ ruamel.yaml==0.17.21
329
+ ruamel.yaml.clib==0.2.6
330
+ sacremoses==0.0.47
331
+ scikit-image==0.18.3
332
+ scikit-learn==1.0.2
333
+ scipy==1.4.1
334
+ screen-resolution-extra==0.0.0
335
+ scs==3.1.0
336
+ seaborn==0.11.2
337
+ semver==2.13.0
338
+ Send2Trash==1.8.0
339
+ sentencepiece==0.1.96
340
+ setuptools-git==1.2
341
+ Shapely==1.8.0
342
+ simplegeneric==0.8.1
343
+ six==1.15.0
344
+ sklearn==0.0
345
+ sklearn-pandas==1.8.0
346
+ smart-open==5.2.1
347
+ snowballstemmer==2.2.0
348
+ sortedcontainers==2.4.0
349
+ SoundFile==0.10.3.post1
350
+ spacy==2.2.4
351
+ speechbrain==0.5.11
352
+ Sphinx==1.8.6
353
+ sphinxcontrib-serializinghtml==1.1.5
354
+ sphinxcontrib-websupport==1.2.4
355
+ SQLAlchemy==1.4.31
356
+ sqlparse==0.4.2
357
+ srsly==1.0.5
358
+ statsmodels==0.10.2
359
+ sympy==1.7.1
360
+ tables==3.7.0
361
+ tabulate==0.8.9
362
+ tblib==1.7.0
363
+ tenacity==8.0.1
364
+ tensorboard==2.7.0
365
+ tensorboard-data-server==0.6.1
366
+ tensorboard-plugin-wit==1.8.1
367
+ tensorflow @ file:///tensorflow-2.7.0-cp37-cp37m-linux_x86_64.whl
368
+ tensorflow-datasets==4.0.1
369
+ tensorflow-estimator==2.7.0
370
+ tensorflow-gcs-config==2.7.0
371
+ tensorflow-hub==0.12.0
372
+ tensorflow-io-gcs-filesystem==0.24.0
373
+ tensorflow-metadata==1.6.0
374
+ tensorflow-probability==0.15.0
375
+ termcolor==1.1.0
376
+ terminado==0.13.1
377
+ testpath==0.5.0
378
+ text-unidecode==1.3
379
+ textblob==0.15.3
380
+ Theano-PyMC==1.1.2
381
+ thinc==7.4.0
382
+ threadpoolctl==3.1.0
383
+ tifffile==2021.11.2
384
+ tokenizers==0.11.4
385
+ toml==0.10.2
386
+ tomli==2.0.0
387
+ toolz==0.11.2
388
+ torch @ https://download.pytorch.org/whl/cu111/torch-1.10.0%2Bcu111-cp37-cp37m-linux_x86_64.whl
389
+ torchaudio @ https://download.pytorch.org/whl/cu111/torchaudio-0.10.0%2Bcu111-cp37-cp37m-linux_x86_64.whl
390
+ torchsummary==1.5.1
391
+ torchtext==0.11.0
392
+ torchvision @ https://download.pytorch.org/whl/cu111/torchvision-0.11.1%2Bcu111-cp37-cp37m-linux_x86_64.whl
393
+ tornado==5.1.1
394
+ tqdm==4.62.3
395
+ traitlets==5.1.1
396
+ transformers==4.16.2
397
+ tweepy==3.10.0
398
+ typed-ast==1.5.2
399
+ typeguard==2.7.1
400
+ typing-extensions==3.10.0.2
401
+ tzlocal==1.5.1
402
+ uritemplate==3.0.1
403
+ urllib3==1.24.3
404
+ vega-datasets==0.9.0
405
+ virtualenv==20.13.1
406
+ wasabi==0.9.0
407
+ wcwidth==0.2.5
408
+ webencodings==0.5.1
409
+ Werkzeug==1.0.1
410
+ widgetsnbextension==3.5.2
411
+ wordcloud==1.5.0
412
+ wrapt==1.13.3
413
+ xarray==0.18.2
414
+ xgboost==0.90
415
+ xkit==0.0.0
416
+ xlrd==1.1.0
417
+ xlwt==1.3.0
418
+ xxhash==2.0.2
419
+ yamllint==1.23.0
420
+ yarl==1.7.2
421
+ yellowbrick==1.3.post1
422
+ zict==2.0.0
423
+ zipp==3.7.0
424
+
425
+
426
+ 2022-02-15 02:15:14,791 - speechbrain.utils.superpowers - DEBUG - 9d56d508
427
+
428
+
429
+ 2022-02-15 02:15:14,794 - prepare - INFO - Preparing results/BPE51/112011/train.csv...
430
+ 2022-02-15 02:15:51,789 - prepare - INFO - Preparing results/BPE51/112011/valid.csv...
431
+ 2022-02-15 02:15:54,371 - prepare - INFO - Preparing results/BPE51/112011/test.csv...
432
+ 2022-02-15 02:15:57,696 - speechbrain.utils.parameter_transfer - DEBUG - Collecting files (or symlinks) for pretraining in results/BPE51/112011/save/FSC_tokenizer.
433
+ 2022-02-15 02:15:57,696 - speechbrain.pretrained.fetching - INFO - Fetch 51_unigram.model?dl=1: Downloading from normal URL https://www.dropbox.com/s/hvf2huofnq0sjbn/51_unigram.model?dl=1.
434
+ 2022-02-15 02:15:58,662 - speechbrain.utils.parameter_transfer - INFO - Loading pretrained files for: tokenizer
435
+ 2022-02-15 02:15:58,689 - speechbrain.core - INFO - 9.3M trainable parameters in SLU
436
+ 2022-02-15 02:15:58,690 - speechbrain.utils.checkpoints - INFO - Would load a checkpoint here, but none found yet.
437
+ 2022-02-15 02:15:58,690 - speechbrain.utils.epoch_loop - INFO - Going into epoch 1
438
+ 2022-02-15 02:55:37,944 - speechbrain.utils.train_logger - INFO - epoch: 1, lr: 3.00e-04 - train loss: 7.70e-01 - valid loss: 7.10e-01, valid CER: 3.82e-01, valid WER: 1.01
439
+ 2022-02-15 02:55:38,203 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/BPE51/112011/save/CKPT+2022-02-15+02-55-37+00
440
+ 2022-02-15 02:55:38,208 - speechbrain.utils.epoch_loop - INFO - Going into epoch 2
441
+ 2022-02-15 03:34:48,102 - speechbrain.utils.train_logger - INFO - epoch: 2, lr: 3.00e-04 - train loss: 7.10e-01 - valid loss: 7.08e-01, valid CER: 3.45e-01, valid WER: 8.11e-01
442
+ 2022-02-15 03:34:48,368 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/BPE51/112011/save/CKPT+2022-02-15+03-34-48+00
443
+ 2022-02-15 03:34:48,400 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/BPE51/112011/save/CKPT+2022-02-15+02-55-37+00
444
+ 2022-02-15 03:34:48,401 - speechbrain.utils.epoch_loop - INFO - Going into epoch 3
445
+ 2022-02-15 04:14:03,140 - speechbrain.utils.train_logger - INFO - epoch: 3, lr: 3.00e-04 - train loss: 7.08e-01 - valid loss: 7.07e-01, valid CER: 2.30e-01, valid WER: 5.79e-01
446
+ 2022-02-15 04:14:03,373 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00
447
+ 2022-02-15 04:14:03,429 - speechbrain.utils.checkpoints - INFO - Deleted checkpoint in results/BPE51/112011/save/CKPT+2022-02-15+03-34-48+00
448
+ 2022-02-15 04:14:03,430 - speechbrain.utils.epoch_loop - INFO - Going into epoch 4
449
+ 2022-02-15 04:53:03,184 - speechbrain.nnet.schedulers - INFO - Changing lr from 0.0003 to 0.00024
450
+ 2022-02-15 04:53:03,184 - speechbrain.utils.train_logger - INFO - epoch: 4, lr: 3.00e-04 - train loss: 7.08e-01 - valid loss: 7.07e-01, valid CER: 3.17e-01, valid WER: 7.26e-01
451
+ 2022-02-15 04:53:03,440 - speechbrain.utils.checkpoints - INFO - Saved an end-of-epoch checkpoint in results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00
452
+ 2022-02-15 04:53:03,473 - speechbrain.utils.checkpoints - INFO - Loading a checkpoint from results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00
453
+ 2022-02-15 04:53:03,520 - root - DEBUG - SaveableDataLoader was requested to load a checkpoint, but the DataLoader has already been iterated. The DataLoader file will be ignored. This is normal in evaluation, when a checkpoint is loaded just to retrieve the best model.
454
+ 2022-02-15 04:56:32,698 - speechbrain.utils.train_logger - INFO - Epoch loaded: 4 - test loss: 7.05e-01, test CER: 2.37e-02, test WER: 6.91e-02
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/CKPT.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
1
+ # yamllint disable
2
+ WER: 0.5789473684210527
3
+ end-of-epoch: true
4
+ unixtime: 1644898443.140418
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/brain.ckpt ADDED
@@ -0,0 +1,2 @@
 
 
1
+ avg_train_loss: 0.0
2
+ step: 0
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/counter.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ 3
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/dataloader-TRAIN.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ 2892
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66f0875b693545510c3de685a7b12825be269f311f47dff8e4dc09167aae8943
3
+ size 37181975
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/optimizer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6a1d3d05b4a82113bfece0e7dfb5a3fe11e38cd02fcc01c9e2a367cd1b0f444
3
+ size 74367087
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-14-03+00/scheduler.ckpt ADDED
Binary file (495 Bytes). View file
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/CKPT.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
1
+ # yamllint disable
2
+ WER: 0.7263157894736842
3
+ end-of-epoch: true
4
+ unixtime: 1644900783.1849935
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/brain.ckpt ADDED
@@ -0,0 +1,2 @@
 
 
1
+ avg_train_loss: 0.0
2
+ step: 0
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/counter.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ 4
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/dataloader-TRAIN.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ 2892
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:91b0d3ed61e45b003d62e451327473a9cfc56f64c15368ea8caafd09f3f1b948
3
+ size 37181975
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/optimizer.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce37a00b1a444e7e8b1076ec10f430e63b8c12c77406cb3aa88353b95ab1a17f
3
+ size 74367087
fluent-speech-commands/direct/results/BPE51/112011/save/CKPT+2022-02-15+04-53-03+00/scheduler.ckpt ADDED
Binary file (495 Bytes). View file
fluent-speech-commands/direct/results/BPE51/112011/save/FSC_tokenizer/tokenizer.ckpt ADDED
Binary file (238 kB). View file
fluent-speech-commands/direct/results/BPE51/112011/test.csv ADDED
The diff for this file is too large to render. See raw diff
fluent-speech-commands/direct/results/BPE51/112011/train.csv ADDED
The diff for this file is too large to render. See raw diff
fluent-speech-commands/direct/results/BPE51/112011/train.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env/python3
2
+ """
3
+ Recipe for "direct" (speech -> semantics) SLU with ASR-based transfer learning.
4
+
5
+ We encode input waveforms into features using a model trained on LibriSpeech,
6
+ then feed the features into a seq2seq model to map them to semantics.
7
+
8
+ (Adapted from the LibriSpeech seq2seq ASR recipe written by Ju-Chieh Chou, Mirco Ravanelli, Abdel Heba, and Peter Plantinga.)
9
+
10
+ Run using:
11
+ > python train.py hparams/train.yaml
12
+
13
+ Authors
14
+ * Loren Lugosch 2020
15
+ * Mirco Ravanelli 2020
16
+ """
17
+
18
+ import sys
19
+ import torch
20
+ import speechbrain as sb
21
+ import logging
22
+ from hyperpyyaml import load_hyperpyyaml
23
+ from speechbrain.utils.distributed import run_on_main
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Define training procedure
28
+
29
+
30
+ class SLU(sb.Brain):
31
+ def compute_forward(self, batch, stage):
32
+ """Forward computations from the waveform batches to the output probabilities."""
33
+ batch = batch.to(self.device)
34
+ wavs, wav_lens = batch.sig
35
+ tokens_bos, tokens_bos_lens = batch.tokens_bos
36
+
37
+ # Add augmentation if specified
38
+ if stage == sb.Stage.TRAIN:
39
+ # Applying the augmentation pipeline
40
+ wavs_aug_tot = []
41
+ wavs_aug_tot.append(wavs)
42
+ for count, augment in enumerate(self.hparams.augment_pipeline):
43
+
44
+ # Apply augment
45
+ wavs_aug = augment(wavs, wav_lens)
46
+
47
+ # Managing speed change
48
+ if wavs_aug.shape[1] > wavs.shape[1]:
49
+ wavs_aug = wavs_aug[:, 0 : wavs.shape[1]]
50
+ else:
51
+ zero_sig = torch.zeros_like(wavs)
52
+ zero_sig[:, 0 : wavs_aug.shape[1]] = wavs_aug
53
+ wavs_aug = zero_sig
54
+
55
+ wavs_aug_tot.append(wavs_aug)
56
+
57
+ wavs = torch.cat(wavs_aug_tot, dim=0)
58
+ self.n_augment = len(wavs_aug_tot)
59
+ wav_lens = torch.cat([wav_lens] * self.n_augment)
60
+ tokens_bos = torch.cat([tokens_bos] * self.n_augment)
61
+
62
+ # ASR encoder forward pass
63
+ with torch.no_grad():
64
+ ASR_encoder_out = self.hparams.asr_model.encode_batch(
65
+ wavs.detach(), wav_lens
66
+ )
67
+
68
+ # SLU forward pass
69
+ encoder_out = self.hparams.slu_enc(ASR_encoder_out)
70
+ e_in = self.hparams.output_emb(tokens_bos)
71
+ h, _ = self.hparams.dec(e_in, encoder_out, wav_lens)
72
+
73
+ # Output layer for seq2seq log-probabilities
74
+ logits = self.hparams.seq_lin(h)
75
+ p_seq = self.hparams.log_softmax(logits)
76
+
77
+ # Compute outputs
78
+ if (
79
+ stage == sb.Stage.TRAIN
80
+ and self.batch_count % show_results_every != 0
81
+ ):
82
+ return p_seq, wav_lens
83
+ else:
84
+ p_tokens, scores = self.hparams.beam_searcher(encoder_out, wav_lens)
85
+ return p_seq, wav_lens, p_tokens
86
+
87
+ def compute_objectives(self, predictions, batch, stage):
88
+ """Computes the loss (NLL) given predictions and targets."""
89
+
90
+ if (
91
+ stage == sb.Stage.TRAIN
92
+ and self.batch_count % show_results_every != 0
93
+ ):
94
+ p_seq, wav_lens = predictions
95
+ else:
96
+ p_seq, wav_lens, predicted_tokens = predictions
97
+
98
+ ids = batch.id
99
+ tokens_eos, tokens_eos_lens = batch.tokens_eos
100
+ tokens, tokens_lens = batch.tokens
101
+
102
+ if hasattr(self.hparams, "env_corrupt") and stage == sb.Stage.TRAIN:
103
+ tokens_eos = torch.cat([tokens_eos, tokens_eos], dim=0)
104
+ tokens_eos_lens = torch.cat(
105
+ [tokens_eos_lens, tokens_eos_lens], dim=0
106
+ )
107
+
108
+ if stage == sb.Stage.TRAIN:
109
+ tokens_eos = torch.cat([tokens_eos] * self.n_augment, dim=0)
110
+ tokens_eos_lens = torch.cat(
111
+ [tokens_eos_lens] * self.n_augment, dim=0
112
+ )
113
+
114
+ loss_seq = self.hparams.seq_cost(
115
+ p_seq, tokens_eos, length=tokens_eos_lens
116
+ )
117
+
118
+ # (No ctc loss)
119
+ loss = loss_seq
120
+
121
+ if (stage != sb.Stage.TRAIN) or (
122
+ self.batch_count % show_results_every == 0
123
+ ):
124
+ # Decode token terms to words
125
+ predicted_semantics = [
126
+ tokenizer.decode_ids(utt_seq).split(" ")
127
+ for utt_seq in predicted_tokens
128
+ ]
129
+
130
+ target_semantics = [wrd.split(" ") for wrd in batch.semantics]
131
+
132
+ for i in range(len(target_semantics)):
133
+ print(" ".join(predicted_semantics[i]).replace("|", ","))
134
+ print(" ".join(target_semantics[i]).replace("|", ","))
135
+ print("")
136
+
137
+ if stage != sb.Stage.TRAIN:
138
+ self.wer_metric.append(
139
+ ids, predicted_semantics, target_semantics
140
+ )
141
+ self.cer_metric.append(
142
+ ids, predicted_semantics, target_semantics
143
+ )
144
+
145
+ return loss
146
+
147
+ def fit_batch(self, batch):
148
+ """Train the parameters given a single batch in input"""
149
+ predictions = self.compute_forward(batch, sb.Stage.TRAIN)
150
+ loss = self.compute_objectives(predictions, batch, sb.Stage.TRAIN)
151
+ loss.backward()
152
+ if self.check_gradients(loss):
153
+ self.optimizer.step()
154
+ self.optimizer.zero_grad()
155
+ self.batch_count += 1
156
+ return loss.detach()
157
+
158
+ def evaluate_batch(self, batch, stage):
159
+ """Computations needed for validation/test batches"""
160
+ predictions = self.compute_forward(batch, stage=stage)
161
+ loss = self.compute_objectives(predictions, batch, stage=stage)
162
+ return loss.detach()
163
+
164
+ def on_stage_start(self, stage, epoch):
165
+ """Gets called at the beginning of each epoch"""
166
+ self.batch_count = 0
167
+
168
+ if stage != sb.Stage.TRAIN:
169
+
170
+ self.cer_metric = self.hparams.cer_computer()
171
+ self.wer_metric = self.hparams.error_rate_computer()
172
+
173
+ def on_stage_end(self, stage, stage_loss, epoch):
174
+ """Gets called at the end of a epoch."""
175
+ # Compute/store important stats
176
+ stage_stats = {"loss": stage_loss}
177
+ if stage == sb.Stage.TRAIN:
178
+ self.train_stats = stage_stats
179
+ else:
180
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
181
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
182
+
183
+ # Perform end-of-iteration things, like annealing, logging, etc.
184
+ if stage == sb.Stage.VALID:
185
+ old_lr, new_lr = self.hparams.lr_annealing(stage_stats["WER"])
186
+ sb.nnet.schedulers.update_learning_rate(self.optimizer, new_lr)
187
+ self.hparams.train_logger.log_stats(
188
+ stats_meta={"epoch": epoch, "lr": old_lr},
189
+ train_stats=self.train_stats,
190
+ valid_stats=stage_stats,
191
+ )
192
+ self.checkpointer.save_and_keep_only(
193
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
194
+ )
195
+ elif stage == sb.Stage.TEST:
196
+ self.hparams.train_logger.log_stats(
197
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
198
+ test_stats=stage_stats,
199
+ )
200
+ with open(self.hparams.wer_file, "w") as w:
201
+ self.wer_metric.write_stats(w)
202
+
203
+
204
+ def dataio_prepare(hparams):
205
+ """This function prepares the datasets to be used in the brain class.
206
+ It also defines the data processing pipeline through user-defined functions."""
207
+
208
+ data_folder = hparams["data_folder"]
209
+
210
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
211
+ csv_path=hparams["csv_train"], replacements={"data_root": data_folder},
212
+ )
213
+
214
+ if hparams["sorting"] == "ascending":
215
+ # we sort training data to speed up training and get better results.
216
+ train_data = train_data.filtered_sorted(sort_key="duration")
217
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
218
+ hparams["dataloader_opts"]["shuffle"] = False
219
+
220
+ elif hparams["sorting"] == "descending":
221
+ train_data = train_data.filtered_sorted(
222
+ sort_key="duration", reverse=True
223
+ )
224
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
225
+ hparams["dataloader_opts"]["shuffle"] = False
226
+
227
+ elif hparams["sorting"] == "random":
228
+ pass
229
+
230
+ else:
231
+ raise NotImplementedError(
232
+ "sorting must be random, ascending or descending"
233
+ )
234
+
235
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
236
+ csv_path=hparams["csv_valid"], replacements={"data_root": data_folder},
237
+ )
238
+ valid_data = valid_data.filtered_sorted(sort_key="duration")
239
+
240
+ test_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
241
+ csv_path=hparams["csv_test"], replacements={"data_root": data_folder},
242
+ )
243
+ test_data = test_data.filtered_sorted(sort_key="duration")
244
+
245
+ datasets = [train_data, valid_data, test_data]
246
+
247
+ tokenizer = hparams["tokenizer"]
248
+
249
+ # 2. Define audio pipeline:
250
+ @sb.utils.data_pipeline.takes("wav")
251
+ @sb.utils.data_pipeline.provides("sig")
252
+ def audio_pipeline(wav):
253
+ sig = sb.dataio.dataio.read_audio(wav)
254
+ return sig
255
+
256
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline)
257
+
258
+ # 3. Define text pipeline:
259
+ @sb.utils.data_pipeline.takes("semantics")
260
+ @sb.utils.data_pipeline.provides(
261
+ "semantics", "token_list", "tokens_bos", "tokens_eos", "tokens"
262
+ )
263
+ def text_pipeline(semantics):
264
+ yield semantics
265
+ tokens_list = tokenizer.encode_as_ids(semantics)
266
+ yield tokens_list
267
+ tokens_bos = torch.LongTensor([hparams["bos_index"]] + (tokens_list))
268
+ yield tokens_bos
269
+ tokens_eos = torch.LongTensor(tokens_list + [hparams["eos_index"]])
270
+ yield tokens_eos
271
+ tokens = torch.LongTensor(tokens_list)
272
+ yield tokens
273
+
274
+ sb.dataio.dataset.add_dynamic_item(datasets, text_pipeline)
275
+
276
+ # 4. Set output:
277
+ sb.dataio.dataset.set_output_keys(
278
+ datasets,
279
+ ["id", "sig", "semantics", "tokens_bos", "tokens_eos", "tokens"],
280
+ )
281
+ return train_data, valid_data, test_data, tokenizer
282
+
283
+
284
+ if __name__ == "__main__":
285
+
286
+ # Load hyperparameters file with command-line overrides
287
+ hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
288
+ with open(hparams_file) as fin:
289
+ hparams = load_hyperpyyaml(fin, overrides)
290
+
291
+ show_results_every = 100 # plots results every N iterations
292
+
293
+ # If distributed_launch=True then
294
+ # create ddp_group with the right communication protocol
295
+ sb.utils.distributed.ddp_init_group(run_opts)
296
+
297
+ # Create experiment directory
298
+ sb.create_experiment_directory(
299
+ experiment_directory=hparams["output_folder"],
300
+ hyperparams_to_save=hparams_file,
301
+ overrides=overrides,
302
+ )
303
+
304
+ # Dataset prep
305
+ from prepare import prepare_FSC # noqa
306
+
307
+ # multi-gpu (ddp) save data preparation
308
+ run_on_main(
309
+ prepare_FSC,
310
+ kwargs={
311
+ "data_folder": hparams["data_folder"],
312
+ "save_folder": hparams["output_folder"],
313
+ "skip_prep": hparams["skip_prep"],
314
+ },
315
+ )
316
+
317
+ # here we create the datasets objects as well as tokenization and encoding
318
+ (train_set, valid_set, test_set, tokenizer,) = dataio_prepare(hparams)
319
+
320
+ # We download and pretrain the tokenizer
321
+ run_on_main(hparams["pretrainer"].collect_files)
322
+ hparams["pretrainer"].load_collected(device=run_opts["device"])
323
+
324
+ # Brain class initialization
325
+ slu_brain = SLU(
326
+ modules=hparams["modules"],
327
+ opt_class=hparams["opt_class"],
328
+ hparams=hparams,
329
+ run_opts=run_opts,
330
+ checkpointer=hparams["checkpointer"],
331
+ )
332
+
333
+ # adding objects to trainer:
334
+ slu_brain.tokenizer = tokenizer
335
+
336
+ # Training
337
+ slu_brain.fit(
338
+ slu_brain.hparams.epoch_counter,
339
+ train_set,
340
+ valid_set,
341
+ train_loader_kwargs=hparams["dataloader_opts"],
342
+ valid_loader_kwargs=hparams["dataloader_opts"],
343
+ )
344
+
345
+ # Test
346
+ slu_brain.hparams.wer_file = hparams["output_folder"] + "/wer_test.txt"
347
+ slu_brain.evaluate(test_set, test_loader_kwargs=hparams["dataloader_opts"])
fluent-speech-commands/direct/results/BPE51/112011/train_log.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
1
+ epoch: 1, lr: 3.00e-04 - train loss: 7.70e-01 - valid loss: 7.10e-01, valid CER: 3.82e-01, valid WER: 1.01
2
+ epoch: 2, lr: 3.00e-04 - train loss: 7.10e-01 - valid loss: 7.08e-01, valid CER: 3.45e-01, valid WER: 8.11e-01
3
+ epoch: 3, lr: 3.00e-04 - train loss: 7.08e-01 - valid loss: 7.07e-01, valid CER: 2.30e-01, valid WER: 5.79e-01
4
+ epoch: 4, lr: 3.00e-04 - train loss: 7.08e-01 - valid loss: 7.07e-01, valid CER: 3.17e-01, valid WER: 7.26e-01
5
+ Epoch loaded: 4 - test loss: 7.05e-01, test CER: 2.37e-02, test WER: 6.91e-02
fluent-speech-commands/direct/results/BPE51/112011/valid.csv ADDED
The diff for this file is too large to render. See raw diff
fluent-speech-commands/direct/results/BPE51/112011/wer_test.txt ADDED
The diff for this file is too large to render. See raw diff
fluent-speech-commands/direct/train.py ADDED
@@ -0,0 +1,347 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env/python3
2
+ """
3
+ Recipe for "direct" (speech -> semantics) SLU with ASR-based transfer learning.
4
+
5
+ We encode input waveforms into features using a model trained on LibriSpeech,
6
+ then feed the features into a seq2seq model to map them to semantics.
7
+
8
+ (Adapted from the LibriSpeech seq2seq ASR recipe written by Ju-Chieh Chou, Mirco Ravanelli, Abdel Heba, and Peter Plantinga.)
9
+
10
+ Run using:
11
+ > python train.py hparams/train.yaml
12
+
13
+ Authors
14
+ * Loren Lugosch 2020
15
+ * Mirco Ravanelli 2020
16
+ """
17
+
18
+ import sys
19
+ import torch
20
+ import speechbrain as sb
21
+ import logging
22
+ from hyperpyyaml import load_hyperpyyaml
23
+ from speechbrain.utils.distributed import run_on_main
24
+
25
+ logger = logging.getLogger(__name__)
26
+
27
+ # Define training procedure
28
+
29
+
30
+ class SLU(sb.Brain):
31
+ def compute_forward(self, batch, stage):
32
+ """Forward computations from the waveform batches to the output probabilities."""
33
+ batch = batch.to(self.device)
34
+ wavs, wav_lens = batch.sig
35
+ tokens_bos, tokens_bos_lens = batch.tokens_bos
36
+
37
+ # Add augmentation if specified
38
+ if stage == sb.Stage.TRAIN:
39
+ # Applying the augmentation pipeline
40
+ wavs_aug_tot = []
41
+ wavs_aug_tot.append(wavs)
42
+ for count, augment in enumerate(self.hparams.augment_pipeline):
43
+
44
+ # Apply augment
45
+ wavs_aug = augment(wavs, wav_lens)
46
+
47
+ # Managing speed change
48
+ if wavs_aug.shape[1] > wavs.shape[1]:
49
+ wavs_aug = wavs_aug[:, 0 : wavs.shape[1]]
50
+ else:
51
+ zero_sig = torch.zeros_like(wavs)
52
+ zero_sig[:, 0 : wavs_aug.shape[1]] = wavs_aug
53
+ wavs_aug = zero_sig
54
+
55
+ wavs_aug_tot.append(wavs_aug)
56
+
57
+ wavs = torch.cat(wavs_aug_tot, dim=0)
58
+ self.n_augment = len(wavs_aug_tot)
59
+ wav_lens = torch.cat([wav_lens] * self.n_augment)
60
+ tokens_bos = torch.cat([tokens_bos] * self.n_augment)
61
+
62
+ # ASR encoder forward pass
63
+ with torch.no_grad():
64
+ ASR_encoder_out = self.hparams.asr_model.encode_batch(
65
+ wavs.detach(), wav_lens
66
+ )
67
+
68
+ # SLU forward pass
69
+ encoder_out = self.hparams.slu_enc(ASR_encoder_out)
70
+ e_in = self.hparams.output_emb(tokens_bos)
71
+ h, _ = self.hparams.dec(e_in, encoder_out, wav_lens)
72
+
73
+ # Output layer for seq2seq log-probabilities
74
+ logits = self.hparams.seq_lin(h)
75
+ p_seq = self.hparams.log_softmax(logits)
76
+
77
+ # Compute outputs
78
+ if (
79
+ stage == sb.Stage.TRAIN
80
+ and self.batch_count % show_results_every != 0
81
+ ):
82
+ return p_seq, wav_lens
83
+ else:
84
+ p_tokens, scores = self.hparams.beam_searcher(encoder_out, wav_lens)
85
+ return p_seq, wav_lens, p_tokens
86
+
87
+ def compute_objectives(self, predictions, batch, stage):
88
+ """Computes the loss (NLL) given predictions and targets."""
89
+
90
+ if (
91
+ stage == sb.Stage.TRAIN
92
+ and self.batch_count % show_results_every != 0
93
+ ):
94
+ p_seq, wav_lens = predictions
95
+ else:
96
+ p_seq, wav_lens, predicted_tokens = predictions
97
+
98
+ ids = batch.id
99
+ tokens_eos, tokens_eos_lens = batch.tokens_eos
100
+ tokens, tokens_lens = batch.tokens
101
+
102
+ if hasattr(self.hparams, "env_corrupt") and stage == sb.Stage.TRAIN:
103
+ tokens_eos = torch.cat([tokens_eos, tokens_eos], dim=0)
104
+ tokens_eos_lens = torch.cat(
105
+ [tokens_eos_lens, tokens_eos_lens], dim=0
106
+ )
107
+
108
+ if stage == sb.Stage.TRAIN:
109
+ tokens_eos = torch.cat([tokens_eos] * self.n_augment, dim=0)
110
+ tokens_eos_lens = torch.cat(
111
+ [tokens_eos_lens] * self.n_augment, dim=0
112
+ )
113
+
114
+ loss_seq = self.hparams.seq_cost(
115
+ p_seq, tokens_eos, length=tokens_eos_lens
116
+ )
117
+
118
+ # (No ctc loss)
119
+ loss = loss_seq
120
+
121
+ if (stage != sb.Stage.TRAIN) or (
122
+ self.batch_count % show_results_every == 0
123
+ ):
124
+ # Decode token terms to words
125
+ predicted_semantics = [
126
+ tokenizer.decode_ids(utt_seq).split(" ")
127
+ for utt_seq in predicted_tokens
128
+ ]
129
+
130
+ target_semantics = [wrd.split(" ") for wrd in batch.semantics]
131
+
132
+ for i in range(len(target_semantics)):
133
+ print(" ".join(predicted_semantics[i]).replace("|", ","))
134
+ print(" ".join(target_semantics[i]).replace("|", ","))
135
+ print("")
136
+
137
+ if stage != sb.Stage.TRAIN:
138
+ self.wer_metric.append(
139
+ ids, predicted_semantics, target_semantics
140
+ )
141
+ self.cer_metric.append(
142
+ ids, predicted_semantics, target_semantics
143
+ )
144
+
145
+ return loss
146
+
147
+ def fit_batch(self, batch):
148
+ """Train the parameters given a single batch in input"""
149
+ predictions = self.compute_forward(batch, sb.Stage.TRAIN)
150
+ loss = self.compute_objectives(predictions, batch, sb.Stage.TRAIN)
151
+ loss.backward()
152
+ if self.check_gradients(loss):
153
+ self.optimizer.step()
154
+ self.optimizer.zero_grad()
155
+ self.batch_count += 1
156
+ return loss.detach()
157
+
158
+ def evaluate_batch(self, batch, stage):
159
+ """Computations needed for validation/test batches"""
160
+ predictions = self.compute_forward(batch, stage=stage)
161
+ loss = self.compute_objectives(predictions, batch, stage=stage)
162
+ return loss.detach()
163
+
164
+ def on_stage_start(self, stage, epoch):
165
+ """Gets called at the beginning of each epoch"""
166
+ self.batch_count = 0
167
+
168
+ if stage != sb.Stage.TRAIN:
169
+
170
+ self.cer_metric = self.hparams.cer_computer()
171
+ self.wer_metric = self.hparams.error_rate_computer()
172
+
173
+ def on_stage_end(self, stage, stage_loss, epoch):
174
+ """Gets called at the end of a epoch."""
175
+ # Compute/store important stats
176
+ stage_stats = {"loss": stage_loss}
177
+ if stage == sb.Stage.TRAIN:
178
+ self.train_stats = stage_stats
179
+ else:
180
+ stage_stats["CER"] = self.cer_metric.summarize("error_rate")
181
+ stage_stats["WER"] = self.wer_metric.summarize("error_rate")
182
+
183
+ # Perform end-of-iteration things, like annealing, logging, etc.
184
+ if stage == sb.Stage.VALID:
185
+ old_lr, new_lr = self.hparams.lr_annealing(stage_stats["WER"])
186
+ sb.nnet.schedulers.update_learning_rate(self.optimizer, new_lr)
187
+ self.hparams.train_logger.log_stats(
188
+ stats_meta={"epoch": epoch, "lr": old_lr},
189
+ train_stats=self.train_stats,
190
+ valid_stats=stage_stats,
191
+ )
192
+ self.checkpointer.save_and_keep_only(
193
+ meta={"WER": stage_stats["WER"]}, min_keys=["WER"],
194
+ )
195
+ elif stage == sb.Stage.TEST:
196
+ self.hparams.train_logger.log_stats(
197
+ stats_meta={"Epoch loaded": self.hparams.epoch_counter.current},
198
+ test_stats=stage_stats,
199
+ )
200
+ with open(self.hparams.wer_file, "w") as w:
201
+ self.wer_metric.write_stats(w)
202
+
203
+
204
+ def dataio_prepare(hparams):
205
+ """This function prepares the datasets to be used in the brain class.
206
+ It also defines the data processing pipeline through user-defined functions."""
207
+
208
+ data_folder = hparams["data_folder"]
209
+
210
+ train_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
211
+ csv_path=hparams["csv_train"], replacements={"data_root": data_folder},
212
+ )
213
+
214
+ if hparams["sorting"] == "ascending":
215
+ # we sort training data to speed up training and get better results.
216
+ train_data = train_data.filtered_sorted(sort_key="duration")
217
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
218
+ hparams["dataloader_opts"]["shuffle"] = False
219
+
220
+ elif hparams["sorting"] == "descending":
221
+ train_data = train_data.filtered_sorted(
222
+ sort_key="duration", reverse=True
223
+ )
224
+ # when sorting do not shuffle in dataloader ! otherwise is pointless
225
+ hparams["dataloader_opts"]["shuffle"] = False
226
+
227
+ elif hparams["sorting"] == "random":
228
+ pass
229
+
230
+ else:
231
+ raise NotImplementedError(
232
+ "sorting must be random, ascending or descending"
233
+ )
234
+
235
+ valid_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
236
+ csv_path=hparams["csv_valid"], replacements={"data_root": data_folder},
237
+ )
238
+ valid_data = valid_data.filtered_sorted(sort_key="duration")
239
+
240
+ test_data = sb.dataio.dataset.DynamicItemDataset.from_csv(
241
+ csv_path=hparams["csv_test"], replacements={"data_root": data_folder},
242
+ )
243
+ test_data = test_data.filtered_sorted(sort_key="duration")
244
+
245
+ datasets = [train_data, valid_data, test_data]
246
+
247
+ tokenizer = hparams["tokenizer"]
248
+
249
+ # 2. Define audio pipeline:
250
+ @sb.utils.data_pipeline.takes("wav")
251
+ @sb.utils.data_pipeline.provides("sig")
252
+ def audio_pipeline(wav):
253
+ sig = sb.dataio.dataio.read_audio(wav)
254
+ return sig
255
+
256
+ sb.dataio.dataset.add_dynamic_item(datasets, audio_pipeline)
257
+
258
+ # 3. Define text pipeline:
259
+ @sb.utils.data_pipeline.takes("semantics")
260
+ @sb.utils.data_pipeline.provides(
261
+ "semantics", "token_list", "tokens_bos", "tokens_eos", "tokens"
262
+ )
263
+ def text_pipeline(semantics):
264
+ yield semantics
265
+ tokens_list = tokenizer.encode_as_ids(semantics)
266
+ yield tokens_list
267
+ tokens_bos = torch.LongTensor([hparams["bos_index"]] + (tokens_list))
268
+ yield tokens_bos
269
+ tokens_eos = torch.LongTensor(tokens_list + [hparams["eos_index"]])
270
+ yield tokens_eos
271
+ tokens = torch.LongTensor(tokens_list)
272
+ yield tokens
273
+
274
+ sb.dataio.dataset.add_dynamic_item(datasets, text_pipeline)
275
+
276
+ # 4. Set output:
277
+ sb.dataio.dataset.set_output_keys(
278
+ datasets,
279
+ ["id", "sig", "semantics", "tokens_bos", "tokens_eos", "tokens"],
280
+ )
281
+ return train_data, valid_data, test_data, tokenizer
282
+
283
+
284
+ if __name__ == "__main__":
285
+
286
+ # Load hyperparameters file with command-line overrides
287
+ hparams_file, run_opts, overrides = sb.parse_arguments(sys.argv[1:])
288
+ with open(hparams_file) as fin:
289
+ hparams = load_hyperpyyaml(fin, overrides)
290
+
291
+ show_results_every = 100 # plots results every N iterations
292
+
293
+ # If distributed_launch=True then
294
+ # create ddp_group with the right communication protocol
295
+ sb.utils.distributed.ddp_init_group(run_opts)
296
+
297
+ # Create experiment directory
298
+ sb.create_experiment_directory(
299
+ experiment_directory=hparams["output_folder"],
300
+ hyperparams_to_save=hparams_file,
301
+ overrides=overrides,
302
+ )
303
+
304
+ # Dataset prep
305
+ from prepare import prepare_FSC # noqa
306
+
307
+ # multi-gpu (ddp) save data preparation
308
+ run_on_main(
309
+ prepare_FSC,
310
+ kwargs={
311
+ "data_folder": hparams["data_folder"],
312
+ "save_folder": hparams["output_folder"],
313
+ "skip_prep": hparams["skip_prep"],
314
+ },
315
+ )
316
+
317
+ # here we create the datasets objects as well as tokenization and encoding
318
+ (train_set, valid_set, test_set, tokenizer,) = dataio_prepare(hparams)
319
+
320
+ # We download and pretrain the tokenizer
321
+ run_on_main(hparams["pretrainer"].collect_files)
322
+ hparams["pretrainer"].load_collected(device=run_opts["device"])
323
+
324
+ # Brain class initialization
325
+ slu_brain = SLU(
326
+ modules=hparams["modules"],
327
+ opt_class=hparams["opt_class"],
328
+ hparams=hparams,
329
+ run_opts=run_opts,
330
+ checkpointer=hparams["checkpointer"],
331
+ )
332
+
333
+ # adding objects to trainer:
334
+ slu_brain.tokenizer = tokenizer
335
+
336
+ # Training
337
+ slu_brain.fit(
338
+ slu_brain.hparams.epoch_counter,
339
+ train_set,
340
+ valid_set,
341
+ train_loader_kwargs=hparams["dataloader_opts"],
342
+ valid_loader_kwargs=hparams["dataloader_opts"],
343
+ )
344
+
345
+ # Test
346
+ slu_brain.hparams.wer_file = hparams["output_folder"] + "/wer_test.txt"
347
+ slu_brain.evaluate(test_set, test_loader_kwargs=hparams["dataloader_opts"])
fluent-speech-commands/extra_requirements.txt ADDED
@@ -0,0 +1 @@
 
1
+ pandas
fluent-speech-commands/prepare.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+ from speechbrain.dataio.dataio import read_audio
4
+
5
+ try:
6
+ import pandas as pd
7
+ except ImportError:
8
+ err_msg = (
9
+ "The optional dependency pandas must be installed to run this recipe.\n"
10
+ )
11
+ err_msg += "Install using `pip install pandas`.\n"
12
+ raise ImportError(err_msg)
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ def prepare_FSC(data_folder, save_folder, skip_prep=False):
18
+ """
19
+ This function prepares the Fluent Speech Commands dataset.
20
+
21
+ data_folder : path to dataset.
22
+ save_folder: folder where the manifest files will be stored.
23
+ skip_prep: If True, skip data preparation
24
+
25
+ """
26
+ if skip_prep:
27
+ return
28
+
29
+ splits = [
30
+ "train",
31
+ "valid",
32
+ "test",
33
+ ]
34
+ ID_start = 0 # needed to have a unique ID for each audio
35
+ for split in splits:
36
+ new_filename = os.path.join(save_folder, split) + ".csv"
37
+ if os.path.exists(new_filename):
38
+ continue
39
+ logger.info("Preparing %s..." % new_filename)
40
+
41
+ ID = []
42
+ duration = []
43
+
44
+ wav = []
45
+ wav_format = []
46
+ wav_opts = []
47
+
48
+ spk_id = []
49
+ spk_id_format = []
50
+ spk_id_opts = []
51
+
52
+ semantics = []
53
+ semantics_format = []
54
+ semantics_opts = []
55
+
56
+ transcript = []
57
+ transcript_format = []
58
+ transcript_opts = []
59
+
60
+ df = pd.read_csv(os.path.join(data_folder, "data", split) + "_data.csv")
61
+ for i in range(len(df)):
62
+ ID.append(ID_start + i)
63
+ signal = read_audio(os.path.join(data_folder, df.path[i]))
64
+ duration.append(signal.shape[0] / 16000)
65
+
66
+ wav.append(os.path.join(data_folder, df.path[i]))
67
+ wav_format.append("wav")
68
+ wav_opts.append(None)
69
+
70
+ spk_id.append(df.speakerId[i])
71
+ spk_id_format.append("string")
72
+ spk_id_opts.append(None)
73
+
74
+ transcript_ = df.transcription[i]
75
+ transcript.append(transcript_)
76
+ transcript_format.append("string")
77
+ transcript_opts.append(None)
78
+
79
+ semantics_ = (
80
+ '{"action:" "'
81
+ + df.action[i]
82
+ + '"| "object": "'
83
+ + df.object[i]
84
+ + '"| "location": "'
85
+ + df.location[i]
86
+ + '"}'
87
+ )
88
+ semantics.append(semantics_)
89
+ semantics_format.append("string")
90
+ semantics_opts.append(None)
91
+
92
+ new_df = pd.DataFrame(
93
+ {
94
+ "ID": ID,
95
+ "duration": duration,
96
+ "wav": wav,
97
+ "spk_id": spk_id,
98
+ "semantics": semantics,
99
+ "transcript": transcript,
100
+ }
101
+ )
102
+ new_df.to_csv(new_filename, index=False)
103
+ ID_start += len(df)
pretrained_models/EncoderDecoderASR--5348169877143464308/asr.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/83e944252a91fe1d0883daa1e87077df4d64c35fffb45e22fff924faace4a59c.7fdf4aabd8400c69a6228ccc17c83b7a8ebf34c5d76f23497b7cf0d7a1baaea3
pretrained_models/EncoderDecoderASR--5348169877143464308/hyperparams.yaml ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/7aac72d39109ee19b4004d94239c2924caf33de6d85b0aff9296d844982210cb.d14310ea63844fb38520a592ea3a92e4f131b5f4683f8fa08e27b1e403c92293
pretrained_models/EncoderDecoderASR--5348169877143464308/lm.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/651df066b5d0b2efef7208f51df93d3a0a65bedc3a3a2500cd7b8faf064e631e.b438b9af3f549a23c4458bb066c11cd51dc1cfe9bfef30d3eb66b472e93b1e8c
pretrained_models/EncoderDecoderASR--5348169877143464308/normalizer.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/e733854cce680bcb58ce4b86bacb3cab5222880933b7b85ab17758aa5b10e9da.587fb748e80e719ed5721d5e0098c5feb2a901017135271ce2b2c6baea7e9f6e
pretrained_models/EncoderDecoderASR--5348169877143464308/tokenizer.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/f39208eba495042a59a8404b5703ca08a39a85e4d2bf707e197b90a3323f92ab.cd7af7ea8cfcfbf0f6dd61514c361972eb82b3b76f12b0e9ee0b371f36fdc078
pretrained_models/EndToEndSLU-7990244956535603082/hyperparams.yaml ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/a095f802a6283ecd636ffd0c0ec2d2dc335dcccfb395f5bc8d48fdb0ed34ca62.ca16cf2255d592246550b1dcfb9ac24800ec38cb8589cfd07e9db7558562037f
pretrained_models/EndToEndSLU-7990244956535603082/model.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/f01892eb014043257a527de1a0ebf610a17895a2b4c13d7e7e719c37231d08e5.d625fbcb8a2387e5d81fe6ff0d868125c7dcbc1b2245206ea152cccfb98a44fe
pretrained_models/EndToEndSLU-7990244956535603082/tokenizer.ckpt ADDED
@@ -0,0 +1 @@
 
1
+ /root/.cache/huggingface/hub/2e5567fd31be3518b2a174a53d89d98df57247924ea50e69bbdb39cc4f8a76e5.8d38059f23fb577abadc9e131f1b67dd9662567eb032fdb8837e33a90feb47d4