Voice-Cloning-with-Voice-Fixer

Paused

Kevin676 commited on Apr 5, 2023

Commit

6d1d18c

1 Parent(s): f197956

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,6 +7,10 @@ os.system('git clone https://github.com/Edresson/Coqui-TTS -b multilingual-torch
 os.system('pip install -q -e TTS/')
 os.system('pip install -q torchaudio==0.9.0')
 import sys
 TTS_PATH = "TTS/"
@@ -25,6 +29,14 @@ from IPython.display import Audio
 import torch
 from TTS.tts.utils.synthesis import synthesis
 from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
@@ -154,7 +166,18 @@ def greet(Text,Voicetoclone,VoiceMicrophone):
       out_path = os.path.join(OUT_PATH, file_name)
       print(" > Saving output to {}".format(out_path))
       ap.save_wav(wav, out_path)
-      return out_path
 demo = gr.Interface(
     fn=greet,

 os.system('pip install -q -e TTS/')
 os.system('pip install -q torchaudio==0.9.0')
+os.system('pip install voicefixer --upgrade')
+from voicefixer import VoiceFixer
+voicefixer = VoiceFixer()
 import sys
 TTS_PATH = "TTS/"
 import torch
+import torchaudio
+from speechbrain.pretrained import SpectralMaskEnhancement
+enhance_model = SpectralMaskEnhancement.from_hparams(
+source="speechbrain/metricgan-plus-voicebank",
+savedir="pretrained_models/metricgan-plus-voicebank",
+run_opts={"device":"cuda"},
+)
 from TTS.tts.utils.synthesis import synthesis
 from TTS.tts.utils.text.symbols import make_symbols, phonemes, symbols
       out_path = os.path.join(OUT_PATH, file_name)
       print(" > Saving output to {}".format(out_path))
       ap.save_wav(wav, out_path)
+      voicefixer.restore(input=out_path, # input wav file path
+                      output="audio1.wav", # output wav file path
+                      cuda=True, # whether to use gpu acceleration
+                      mode = 0) # You can try out mode 0, 1, or 2 to find out the best result
+      noisy = enhance_model.load_audio(
+      "audio1.wav"
+      ).unsqueeze(0)
+      enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.]))
+      torchaudio.save("enhanced.wav", enhanced.cpu(), 16000)
+      return "enhanced.wav"
 demo = gr.Interface(
     fn=greet,