Raven-with-Voice-Cloning

Runtime error

Kevin676 commited on Apr 9, 2023

Commit

8afab49

•

1 Parent(s): 957670c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ pipeline = PIPELINE(model, "20B_tokenizer.json")
 from TTS.api import TTS
 tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
 import whisper
-model = whisper.load_model("small")
 os.system('pip install voicefixer --upgrade')
 from voicefixer import VoiceFixer
@@ -66,16 +66,16 @@ def evaluate(
     audio = whisper.load_audio(audio)
     audio = whisper.pad_or_trim(audio)
-    # make log-Mel spectrogram and move to the same device as the model
-    mel = whisper.log_mel_spectrogram(audio).to(model.device)
     # detect the spoken language
-    _, probs = model.detect_language(mel)
     print(f"Detected language: {max(probs, key=probs.get)}")
     # decode the audio
     options = whisper.DecodingOptions()
-    result = whisper.decode(model, mel, options)
     res = []
@@ -122,9 +122,9 @@ def evaluate(
     res.append(out_str.strip())
-    res1 = ''.join(str(x) for x in res)
-    tts.tts_to_file(res1, speaker_wav = upload, language="en", file_path="output.wav")
     voicefixer.restore(input="output.wav", # input wav file path
                     output="audio1.wav", # output wav file path

 from TTS.api import TTS
 tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
 import whisper
+model1 = whisper.load_model("small")
 os.system('pip install voicefixer --upgrade')
 from voicefixer import VoiceFixer
     audio = whisper.load_audio(audio)
     audio = whisper.pad_or_trim(audio)
+    # make log-Mel spectrogram and move to the same device as the model1
+    mel = whisper.log_mel_spectrogram(audio).to(model1.device)
     # detect the spoken language
+    _, probs = model1.detect_language(mel)
     print(f"Detected language: {max(probs, key=probs.get)}")
     # decode the audio
     options = whisper.DecodingOptions()
+    result = whisper.decode(model1, mel, options)
     res = []
     res.append(out_str.strip())
+#    res1 = ''.join(str(x) for x in res)
+    tts.tts_to_file(res, speaker_wav = upload, language="en", file_path="output.wav")
     voicefixer.restore(input="output.wav", # input wav file path
                     output="audio1.wav", # output wav file path