Podcastify

Running on Zero

mrfakename commited on Feb 26, 2024

Commit

406e977

1 Parent(s): 7a62809

Remove tempfile dependency

Files changed (2) hide show

app.py CHANGED Viewed

@@ -8,9 +8,9 @@ device = 'cuda' if torch.cuda.is_available() else 'cpu'
 model = TTS(language='EN', device=device)
 speaker_ids = model.hps.data.spk2id
 def synthesize(speaker, text, speed=1.0, progress=gr.Progress()):
-    with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as f:
-        model.tts_to_file(text, speaker_ids[speaker], f.name, speed=speed, pbar=progress.tqdm)
-        return f.name
 with gr.Blocks() as demo:
     gr.Markdown('# MeloTTS\n\nAn unofficial demo of [MeloTTS](https://github.com/myshell-ai/MeloTTS) from MyShell AI. MeloTTS is a permissively licensed (MIT) SOTA multi-speaker TTS model.\n\nI am not affiliated with MyShell AI in any way.\n\nThis demo currently only supports English, but the model itself supports other languages.')
     with gr.Group():

 model = TTS(language='EN', device=device)
 speaker_ids = model.hps.data.spk2id
 def synthesize(speaker, text, speed=1.0, progress=gr.Progress()):
+    bio = io.BytesIO()
+    model.tts_to_file(text, speaker_ids[speaker], bio, speed=speed, pbar=progress.tqdm, format='wav')
+    return bio.getvalue()
 with gr.Blocks() as demo:
     gr.Markdown('# MeloTTS\n\nAn unofficial demo of [MeloTTS](https://github.com/myshell-ai/MeloTTS) from MyShell AI. MeloTTS is a permissively licensed (MIT) SOTA multi-speaker TTS model.\n\nI am not affiliated with MyShell AI in any way.\n\nThis demo currently only supports English, but the model itself supports other languages.')
     with gr.Group():

melo/api.py CHANGED Viewed

@@ -70,7 +70,7 @@ class TTS(nn.Module):
         # print(" > ===========================")
         return texts
-    def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_scale=0.6, noise_scale_w=0.8, speed=1.0, pbar=None):
         language = self.language
         texts = self.split_sentences_into_pieces(text, language)
         audio_list = []
@@ -113,4 +113,4 @@ class TTS(nn.Module):
         if output_path is None:
             return audio
         else:
-            soundfile.write(output_path, audio, self.hps.data.sampling_rate)

         # print(" > ===========================")
         return texts
+    def tts_to_file(self, text, speaker_id, output_path=None, sdp_ratio=0.2, noise_scale=0.6, noise_scale_w=0.8, speed=1.0, pbar=None, format=None):
         language = self.language
         texts = self.split_sentences_into_pieces(text, language)
         audio_list = []
         if output_path is None:
             return audio
         else:
+            soundfile.write(output_path, audio, self.hps.data.sampling_rate, format)