Spaces:

Karayakar
/

OuteTTS

Running on Zero

App Files Files Community

Karayakar commited on Apr 21

Commit

76c6e92

verified ·

1 Parent(s): 2e92c1b

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -130

app.py CHANGED Viewed

@@ -1,43 +1,9 @@
-import spaces
-import outetts
-from outetts.models.config import GenerationConfig
-import torch
-import json
 import gradio as gr
-from datetime import datetime
-import os
-from pathlib import Path
-from scipy.io import wavfile
-# Check if CUDA is available
-device = "cuda" if torch.cuda.is_available() else "cpu"
-# Set Hugging Face cache directory to the 'models' folder
-os.environ["HF_HOME"] = os.path.join(os.getcwd(), "models", "huggingface")
-os.environ["TRANSFORMERS_CACHE"] = os.path.join(os.getcwd(), "models", "huggingface", "transformers")
-os.environ["HF_DATASETS_CACHE"] = os.path.join(os.getcwd(), "models", "huggingface", "datasets")
-TOKENIZERS_PARALLELISM=False
-class OuteTTSGUI:
-    def __init__(self):
-        self.interface = None
-        self.current_speaker = None
-        self.history = []
-        self.models_dir = "models"
-        self.custom_speakers_dir = "custom_speakers"
-        os.makedirs(self.custom_speakers_dir, exist_ok=True)
-        os.makedirs(os.path.join(self.models_dir, "whisper"), exist_ok=True)  # Create whisper directory
-        self.model=None;
-        self.examples=  [
-        ["Bu sabah olanları hatırlayınca kendimi tutamıyorum."],
-        ["Patron yanlış dosyayı gönderince <laugh> hepimiz şok olduk."],
-        ["Sen ciddi misin? Gerçekten öyle mi dedi?"],
-        ["Sahneye çıkmadan önce pantolonun ters olduğunu fark etti."],
-        ["Mikrofona yanlışlıkla nefesini üfleyince"],
-        ["Hayat bazen böyle zor işte, dedi."],
-        ["Sınıfta sessizlik varken aniden gelen, dikkat dağıttı."],
-        ["Özür dilerim, biraz rahatsızım bugün."]  ]
-    def loadModel(self):
-        model_config = outetts.ModelConfig(
             model_path="Karayakar/Oute-TTS",
             tokenizer_path="Karayakar/Oute-TTS",
             interface_version=outetts.InterfaceVersion.V2,
@@ -47,100 +13,43 @@ class OuteTTSGUI:
             dtype=torch.bfloat16,
             )
-        self.interface = outetts.Interface(config=model_config)
-    def get_history(self):
-        if not self.history:
-            return "No generation history available."
-        history_text = "Generation History:\n\n"
-        for item in reversed(self.history):
-            history_text += f"Time: {item['timestamp']}\n"
-            history_text += f"Speaker: {item['speaker']}\n"
-            history_text += f"Text: {item['text']}\n"
-            history_text += f"File: {item['path']}\n"
-            history_text += "-" * 50 + "\n"
-        return history_text
-    @spaces.GPU()
-    def generate_speech(self,text, progress=gr.Progress()):
-         progress(0.1, "Processing text...")
-         gen_cfg = GenerationConfig( text=text, speaker= None )
-         timestamp_str = str(int(datetime.now().timestamp()))
-         file_name=f"{timestamp_str}.wav"
-         output=self.interface.generate(config=gen_cfg)
-         # Save the generated audio
-         timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-         output_path = os.path.join("generated_audio", f"generated_{timestamp}.wav")
-         output.save(output_path)
-         # Add to history
-         self.history.append({
-                    "timestamp": timestamp,
-                    "text": text,
-                    "speaker":  "Default",
-                    "path": output_path
-          })
-         return output_path, "Generation successful!"
-def create_gui():
-    gui = OuteTTSGUI()
-    gui.loadModel()
-    with gr.Blocks(title="Oute-TTS") as demo:
-        gr.Markdown(f"""
-        # 🎵 Oute-TTS Turkish (TTS)  -  [Karay Akar](https://www.linkedin.com/in/karayakar/)
-        Aşağıya metninizi girin ve Oute TTS modeliyle doğal sesli konuşmaya dönüştürülmesini dinleyin.
-        """)
-        with gr.Row():
-            with gr.Column(scale=3):
-                text_input = gr.Textbox(
-                    label="Cevirilecek metin",
-                    placeholder="Metin girin...",
-                    lines=5
-                )
-                with gr.Row():
-                    submit_btn = gr.Button("Generate Speech", variant="primary")
-                    clear_btn = gr.Button("Clear")
-            with gr.Column(scale=2):
-                audio_output = gr.Audio(label="Generated Audio")
-                #generation_output = gr.Textbox(label="Generation Status", lines=2)
-        # Set up examples
-        gr.Examples(
-            examples=gui.examples,
-            inputs=[text_input],
-            outputs=audio_output,
-            fn=gui.generate_speech,
-            cache_examples=True,
-        )
-        # Set up event handlers
-        submit_btn.click(
-            fn=gui.generate_speech,
-            inputs=[text_input],
-            outputs=[audio_output]
-        )
-        clear_btn.click(
-            fn=lambda: (None, None),
-            inputs=[],
-            outputs=[text_input, audio_output]
-        )
-    return demo
-# Launch the app
-if __name__ == "__main__":
-    #demo.queue().launch(share=False, ssr_mode=False)
-    # Try to import get_compatible_dtype outside the function for default value setting
-    try:
-        from outetts.models.config import get_compatible_dtype
-    except ImportError:
-        get_compatible_dtype = None # Define as None if import fails
-    demo = create_gui()
-    #demo.launch(share=False)
-    demo.queue().launch(share=False, ssr_mode=False)

 import gradio as gr
+from outetts.v2.interface import InterfaceHF
+#interface = InterfaceHF("Karayakar/Oute-TTS")
+model_config = outetts.ModelConfig(
             model_path="Karayakar/Oute-TTS",
             tokenizer_path="Karayakar/Oute-TTS",
             interface_version=outetts.InterfaceVersion.V2,
             dtype=torch.bfloat16,
             )
+interface = outetts.Interface(config=model_config)
+def generate_tts(text, temperature, repetition_penalty, reference_audio, reference_text):
+    output = interface.generate(
+        text=text,
+        #speaker=speaker,
+        #temperature=temperature,
+        #repetition_penalty=repetition_penalty
+    )
+    output.save("output.wav")
+    return "output.wav"
+with gr.Blocks() as demo:
+    gr.Markdown("# OuteTTS-0.3-500M Text-to-Speech Demo")
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(label="Text to Synthesize", placeholder="Enter text here...")
+            temperature = gr.Slider(0.1, 1.0, value=0.1, label="Temperature")
+            repetition_penalty = gr.Slider(0.5, 2.0, value=1.1, label="Repetition Penalty")
+            gr.Markdown("""
+**Note**: For voice cloning, both a reference audio file and its corresponding transcription must be provided.
+If either the audio file or transcription is missing, the model will generate audio with random characteristics.""")
+            reference_audio = gr.Audio(label="Reference Audio (for voice cloning)", type="filepath")
+            reference_text = gr.Textbox(label="Reference Transcription Text (matching the audio)", placeholder="Enter reference text here if using voice cloning")
+            submit_button = gr.Button("Generate Speech")
+        with gr.Column():
+            audio_output = gr.Audio(label="Generated Audio", type="filepath")
+    submit_button.click(
+        fn=generate_tts,
+        inputs=[text_input, temperature, repetition_penalty, reference_audio, reference_text],
+        outputs=audio_output
+    )
+demo.launch()