chaanks commited on
Commit
2a6795c
1 Parent(s): a8dfc04

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +51 -7
README.md CHANGED
@@ -34,13 +34,57 @@ Please notice that we encourage you to read our tutorials and learn more about
34
  ### Using the Vocoder
35
 
36
  ```python
37
- import torch
38
- from speechbrain.pretrained import UnitHIFIGAN
39
-
40
- hifi_gan_unit = UnitHIFIGAN.from_hparams(source="chaanks/hifigan-unit-hubert-l7-k128-ljspeech-libritts", savedir="tmpdir_vocoder")
41
- codes = torch.randint(0, 99, (100,))
42
- waveform = hifi_gan_unit.decode_unit(codes)
43
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  ```
45
 
46
 
 
34
  ### Using the Vocoder
35
 
36
  ```python
37
+ import torchaudio
38
+ from speechbrain.inference.encoders import MelSpectrogramEncoder
39
+ from speechbrain.inference.vocoders import UnitHIFIGAN
40
+ from speechbrain.lobes.models.huggingface_transformers.discrete_hubert import (
41
+ DiscreteHuBERT,
42
+ )
43
+
44
+ speaker_encoder_source = "speechbrain/spkrec-ecapa-voxceleb-mel-spec"
45
+ speech_encoder_source = "facebook/hubert-base-ls960"
46
+ kmeans_folder = "speechbrain/SSL_Quantization"
47
+ kmeans_filename = "LJSpeech_hubert_k128_L7.pt"
48
+ layer = 7
49
+ vocoder_source = "chaanks/hifigan-unit-hubert-l7-k128-ljspeech-libritts"
50
+ save_path = "tmpdir"
51
+ device = "cuda"
52
+ sample_rate = 16000
53
+
54
+ wav = "chaanks/hifigan-unit-hubert-l7-k128-ljspeech-libritts/test.wav"
55
+
56
+ speaker_encoder = MelSpectrogramEncoder.from_hparams(
57
+ source=speaker_encoder_source,
58
+ run_opts={"device": str(device)},
59
+ savedir=save_path + "/spk_encoder",
60
+ )
61
+
62
+ speech_encoder = DiscreteHuBERT(
63
+ source=speech_encoder_source,
64
+ save_path=save_path + "/speech_encoder",
65
+ kmeans_filename=kmeans_filename,
66
+ kmeans_cache_dir=save_path + "/kmeans",
67
+ kmeans_repo_id=kmeans_folder,
68
+ output_norm=False,
69
+ freeze=True,
70
+ freeze_feature_extractor=True,
71
+ apply_spec_augment=False,
72
+ output_all_hiddens=True,
73
+ ssl_layer_num=layer,
74
+ ).to(device)
75
+
76
+ vocoder = UnitHIFIGAN.from_hparams(
77
+ source=vocoder_source,
78
+ run_opts={"device": str(device)},
79
+ savedir=save_path + "/vocoder",
80
+ )
81
+
82
+ audio = speaker_encoder.load_audio(wav)
83
+ audio = audio.to(device)
84
+ spk = speaker_encoder.encode_waveform(audio)
85
+ _, codes = speech_encoder(audio.unsqueeze(0))
86
+ waveform = vocoder.decode_unit(codes.squeeze(0), spk=spk.reshape(-1))
87
+ torchaudio.save("test.wav", waveform.cpu(), sample_rate=sample_rate)
88
  ```
89
 
90