Spaces:

tahirturk
/

VoiceCloner

Running on Zero

App Files Files Community

tahirturk commited on 27 days ago

Commit

f12ab45

verified ·

1 Parent(s): 87119c0

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -33

app.py CHANGED Viewed

@@ -1,23 +1,24 @@
 import spaces
 import gradio as gr
 import torch
-from f5_tts.api import F5TTS
 import os
 from pydub import AudioSegment
 import re
 os.makedirs("audio", exist_ok=True)
 # Auto-detect device
 device = "cuda" if torch.cuda.is_available() else "cpu"
-# Load ultra-realistic model (F5-TTS)
-tts = F5TTS(device=device)
 # Function for long text voice cloning
 @spaces.GPU(enable_queue=True)
 def clone(text, audio):
-    # Split text into smaller chunks (sentences or short phrases)
     sentences = re.split(r'(?<=[.!?]) +', text)
     final_audio = AudioSegment.silent(duration=0)
@@ -25,28 +26,23 @@ def clone(text, audio):
         if not chunk.strip():
             continue
         temp_path = f"chunk_{i}.wav"
-        # Generate speech from cloned voice using F5-TTS
-        tts.infer(
-            ref_audio_path=audio,
             text=chunk,
-            output_path=temp_path,
-            speaker_id=0,
             language="en",
-            emotion="neutral"
         )
         final_audio += AudioSegment.from_wav(temp_path)
-    # Merge chunks into one final audio file
     output_path = "./output.wav"
     final_audio.export(output_path, format="wav")
     return output_path
-# ==================== UI SECTION ====================
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan", neutral_hue="slate")) as demo:
     gr.HTML("""
     <style>
         body {
@@ -106,33 +102,33 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan", ne
         with gr.Column(scale=1):
             gr.Markdown(
                 """
-                # 🎙️ Ultra Realistic Voice Clone Studio (F5-TTS)
-                Clone **any voice** with high emotional realism using **F5-TTS**.
-                Upload a short reference audio and type what you want it to say.
-                **Supports English and Multilingual Texts.**
                 """
             )
             text_input = gr.Textbox(
                 label="Enter your text",
-                placeholder="Type what you'd like the cloned voice to say...",
                 lines=6
             )
             audio_input = gr.Audio(
                 type="filepath",
                 label="Upload voice reference (WAV or MP3)"
             )
-            submit_btn = gr.Button("✨ Generate Ultra-Realistic Voice", variant="primary")
         with gr.Column(scale=1):
             output_audio = gr.Audio(type="filepath", label="🔊 Generated Voice Output")
             gr.Markdown(
                 """
                 ---
-                ⚡ **Pro Tips**
-                - Use **clean 5–15 sec** reference audio for best results.
-                - Long text is automatically chunked for natural flow.
-                - You can now generate **multi-minute, realistic speech**.
                 ---
                 """
             )
@@ -140,16 +136,16 @@ with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan", ne
     with gr.Row():
         gr.Examples(
             examples=[
-                ["Hey! It's me Dorthy, from the Wizard of Oz. Type what you'd like me to say.", "./audio/Wizard-of-Oz-Dorthy.wav"],
-                ["It's me Vito Corleone from The Godfather.", "./audio/Godfather.wav"],
-                ["Hey, it's me Paris Hilton!", "./audio/Paris-Hilton.mp3"],
-                ["Hey, it's me Megan Fox from Transformers.", "./audio/Megan-Fox.mp3"],
-                ["Hey there, it's me Jeff Goldblum.", "./audio/Jeff-Goldblum.mp3"],
-                ["Hey there, it's me Heath Ledger as the Joker.", "./audio/Heath-Ledger.mp3"],
             ],
             inputs=[text_input, audio_input],
             outputs=[output_audio],
-            label="🎭 Try these sample voices"
         )
     submit_btn.click(fn=clone, inputs=[text_input, audio_input], outputs=output_audio)

 import spaces
 import gradio as gr
 import torch
+from TTS.api import TTS
 import os
 from pydub import AudioSegment
 import re
 os.makedirs("audio", exist_ok=True)
+# Agree to Coqui TTS license
+os.environ["COQUI_TOS_AGREED"] = "1"
 # Auto-detect device
 device = "cuda" if torch.cuda.is_available() else "cpu"
+tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
 # Function for long text voice cloning
 @spaces.GPU(enable_queue=True)
 def clone(text, audio):
+    # Split input into sentences/phrases
     sentences = re.split(r'(?<=[.!?]) +', text)
     final_audio = AudioSegment.silent(duration=0)
         if not chunk.strip():
             continue
         temp_path = f"chunk_{i}.wav"
+        tts.tts_to_file(
             text=chunk,
+            speaker_wav=audio,
             language="en",
+            file_path=temp_path
         )
         final_audio += AudioSegment.from_wav(temp_path)
+    # Merge chunks into one file
     output_path = "./output.wav"
     final_audio.export(output_path, format="wav")
     return output_path
+# UI
 with gr.Blocks(theme=gr.themes.Soft(primary_hue="teal", secondary_hue="cyan", neutral_hue="slate")) as demo:
+    # Custom CSS
     gr.HTML("""
     <style>
         body {
         with gr.Column(scale=1):
             gr.Markdown(
                 """
+                # 🎙️ Voice Clone Studio By Tahir Turk
+                Clone any voice by uploading a short reference audio file
+                and typing what you want it to say.
+                **Powered by XTTS v2 — multilingual voice cloning.**
                 """
             )
             text_input = gr.Textbox(
                 label="Enter your text",
+                placeholder="Type anything you'd like the cloned voice to say...",
                 lines=6
             )
             audio_input = gr.Audio(
                 type="filepath",
                 label="Upload voice reference (WAV or MP3)"
             )
+            submit_btn = gr.Button("✨ Generate Voice", variant="primary")
         with gr.Column(scale=1):
             output_audio = gr.Audio(type="filepath", label="🔊 Generated Voice Output")
             gr.Markdown(
                 """
                 ---
+                ⚡ **Tips for Best Results**
+                - Use a **clean, clear** reference audio (5–15 seconds works best).
+                - Long text will be split automatically for natural speech.
+                - You can generate **minutes of audio** now without cutoff.
                 ---
                 """
             )
     with gr.Row():
         gr.Examples(
             examples=[
+                ["Hey! It's me Dorthy, from the Wizard of Oz. Type in whatever you'd like me to say.", "./audio/Wizard-of-Oz-Dorthy.wav"],
+                ["It's me Vito Corleone, from the Godfather. Type in whatever you'd like me to say.", "./audio/Godfather.wav"],
+                ["Hey, it's me Paris Hilton. Type in whatever you'd like me to say.", "./audio/Paris-Hilton.mp3"],
+                ["Hey, it's me Megan Fox from Transformers. Type in whatever you'd like me to say.", "./audio/Megan-Fox.mp3"],
+                ["Hey there, it's me Jeff Goldblum. Type in whatever you'd like me to say.", "./audio/Jeff-Goldblum.mp3"],
+                ["Hey there, it's me Heath Ledger as the Joker. Type in whatever you'd like me to say.", "./audio/Heath-Ledger.mp3"],
             ],
             inputs=[text_input, audio_input],
             outputs=[output_audio],
+            label="🎭 Try with these sample voices"
         )
     submit_btn.click(fn=clone, inputs=[text_input, audio_input], outputs=output_audio)