Spaces:

Chillarmo
/

Voice_Cloning_with_OuteTTS

Running

App Files Files Community

Chillarmo commited on Nov 5

Commit

bbcc061

•

1 Parent(s): aac3370

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -12

app.py CHANGED Viewed

@@ -17,15 +17,9 @@ def download_model():
 def initialize_models():
     """Initialize the OuteTTS and Faster-Whisper models"""
-    # Download and initialize GGUF model with adjusted parameters
     model_path = download_model()
-    tts_interface = InterfaceGGUF(
-        model_path,
-        n_ctx=2048,          # Reduced context size
-        n_batch=512,         # Reduced batch size
-        n_threads=4,         # Adjust based on CPU
-        verbose=False,       # Reduce logging
-    )
     # Initialize Whisper
     asr_model = WhisperModel("tiny",
@@ -55,8 +49,8 @@ def process_audio_file(audio_path, reference_text, text_to_speak, temperature=0.
         gr.Info(f"Using reference text: {reference_text}")
         # Limit text lengths to prevent context overflow
-        reference_text = reference_text[:2000]  # Further reduced
-        text_to_speak = text_to_speak[:300]     # Further reduced
         # Create speaker from reference audio
         speaker = TTS_INTERFACE.create_speaker(
@@ -70,7 +64,7 @@ def process_audio_file(audio_path, reference_text, text_to_speak, temperature=0.
             speaker=speaker,
             temperature=temperature,
             repetition_penalty=repetition_penalty,
-            max_lenght=1024  # Reduced from 2048
         )
         # Save to temporary file and return path
@@ -82,7 +76,6 @@ Reference text: {reference_text[:300]}...
     except Exception as e:
         return None, f"Error: {str(e)}"
 # Create Gradio interface
 with gr.Blocks(title="Voice Cloning with OuteTTS (GGUF)") as demo:
     gr.Markdown("# 🎙️ Voice Cloning with OuteTTS (GGUF)")

 def initialize_models():
     """Initialize the OuteTTS and Faster-Whisper models"""
+    # Download and initialize GGUF model
     model_path = download_model()
+    tts_interface = InterfaceGGUF(model_path)
     # Initialize Whisper
     asr_model = WhisperModel("tiny",
         gr.Info(f"Using reference text: {reference_text}")
         # Limit text lengths to prevent context overflow
+        reference_text = reference_text[:2000]  # Limit reference text
+        text_to_speak = text_to_speak[:300]     # Limit output text
         # Create speaker from reference audio
         speaker = TTS_INTERFACE.create_speaker(
             speaker=speaker,
             temperature=temperature,
             repetition_penalty=repetition_penalty,
+            max_lenght=2048  # Note: Using original typo from docs ('lenght')
         )
         # Save to temporary file and return path
     except Exception as e:
         return None, f"Error: {str(e)}"
 # Create Gradio interface
 with gr.Blocks(title="Voice Cloning with OuteTTS (GGUF)") as demo:
     gr.Markdown("# 🎙️ Voice Cloning with OuteTTS (GGUF)")