Spaces:

DSC-Team
/

TTS

Sleeping

App Files Files Community

Update app.py

by Hamzasha - opened Jun 3

base: refs/heads/main

←

from: refs/pr/1

Discussion Files changed

+35

-1

Files changed (1) hide show

app.py +35 -1

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ print("Gradio version:", gr.__version__)
 # ─── 1. Read & validate API key from environment ────────────────────────────────
 api_key = os.getenv("GOOGLE_API_KEY")
 if not api_key:
     raise ValueError("Environment variable 'GOOGLE_API_KEY' not found.")
@@ -37,6 +38,7 @@ def generate_audio(
     instructions: str,
     text: str,
     voice_name: str,
     custom_additions: str,
 ) -> str:
     full_prompt = f"""
@@ -68,7 +70,38 @@ def generate_audio(
     pcm_data = response.candidates[0].content.parts[0].inline_data.data
     wav_path = wave_file(pcm_data)
-    return wav_path  # Gradio will serve this as a URL
 # ─── 5. Gradio UI components ──────────────────────────────────────────────────────
 model_choices = ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"]
@@ -115,6 +148,7 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
             clarity = gr.Dropdown(clarity_choices, value="Very clear", label="Clarity")
             style = gr.Dropdown(style_choices, value="Motivational and casual", label="Style")
             voice_name = gr.Dropdown(voice_names, value="Charon", label="Voice Name")
             instructions = gr.Textbox(
                 value="Imagine you're speaking to someone close, giving them an energy boost to start their day. Use expressive intonation and add emotional warmth.",
                 lines=3,

 # ─── 1. Read & validate API key from environment ────────────────────────────────
 api_key = os.getenv("GOOGLE_API_KEY")
+eleven_api_key = os.getenv("ELEVEN_API_KEY")
 if not api_key:
     raise ValueError("Environment variable 'GOOGLE_API_KEY' not found.")
     instructions: str,
     text: str,
     voice_name: str,
+    premium_voice_id: str,
     custom_additions: str,
 ) -> str:
     full_prompt = f"""
     pcm_data = response.candidates[0].content.parts[0].inline_data.data
     wav_path = wave_file(pcm_data)
+    if premium_voice_id is None:
+        return wav_path
+    else :
+        url = f"https://api.elevenlabs.io/v1/speech-to-speech/{premium_voice_id}"
+        headers = {
+            "xi-api-key": eleven_api_key,
+        }
+        # Read the audio file
+        with open(wav_path, "rb") as audio_file:
+            files = {
+                "audio": audio_file,
+            }
+            data = {
+                "model_id": "eleven_multilingual_sts_v2",  # or eleven_english_sts_v2
+                "output_format": "mp3_44100_128",
+                # Optional: "voice_settings": json.dumps({...}),
+                # Optional: "seed": 12345,
+                # Optional: "remove_background_noise": "true",
+            }
+            response = requests.post(url, headers=headers, files=files, data=data)
+        if response.ok:
+            with open("output.mp3", "wb") as f:
+                f.write(response.content)
+                print("✅ Voice converted and saved as output.mp3")
+        else:
+            print("❌ Error:", response.status_code, response.text)
+        return "output.mp3"  # Gradio will serve this as a URL
 # ─── 5. Gradio UI components ──────────────────────────────────────────────────────
 model_choices = ["gemini-2.5-flash-preview-tts", "gemini-2.5-pro-preview-tts"]
             clarity = gr.Dropdown(clarity_choices, value="Very clear", label="Clarity")
             style = gr.Dropdown(style_choices, value="Motivational and casual", label="Style")
             voice_name = gr.Dropdown(voice_names, value="Charon", label="Voice Name")
+            premium_voice_id = gr.Textbox(label="premium Voice ID (optional)")
             instructions = gr.Textbox(
                 value="Imagine you're speaking to someone close, giving them an energy boost to start their day. Use expressive intonation and add emotional warmth.",
                 lines=3,