import gradio as gr from elevenlabs.client import ElevenLabs import tempfile import os def generate_speech(text = "", api_key = "", voice_id = "JBFqnCBsd6RMkjVDRZzb", model_id = "eleven_v3", language_code = "en", output_format = "mp3_22050_32"): """ Convert input text to speech using ElevenLabs API and return the audio file path. Args: text (str): Required. The input text to be converted into speech. api_key (str): Required. The ElevenLabs API key used to authenticate the request. This should be kept secret and treated like a password. voice_id (str): The ID of the voice to use for speech generation. Default: "JBFqnCBsd6RMkjVDRZzb". Other available voices can be found at: https://elevenlabs.io/app/default-voices model_id (str): The ID of the ElevenLabs model to use. Typically one of: "eleven_v3" (default) or "eleven_multilingual_v2" language_code (str): The two-letter ISO 639-1 language code specifying the language of the input text. Example: "en" for English, "lv" for Latvian. Full list: https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes output_format (str): The desired output audio format. Default: "mp3_22050_32". Other formats are listed here: https://elevenlabs.io/docs/api-reference/text-to-speech/convert#request.query.output_format.output_format Returns: tuple A tuple containing: audio_file_path : str or None The file path to the generated audio file if the speech synthesis was successful, or None if an error occurred. status_message : str A human-readable message indicating the result of the operation, such as a success confirmation or an error description. """ if not text.strip(): return None, "❌ Text is empty." if not api_key.strip(): return None, "❌ API key is required." try: # Initialize ElevenLabs client dynamically with the provided key elevenlabs = ElevenLabs(api_key=api_key) # Create a temporary file to store the generated speech temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") # Convert text to speech in chunks with open(temp_file.name, "wb") as f: for chunk in elevenlabs.text_to_speech.convert( voice_id=voice_id, output_format=output_format, text=text, language_code=language_code, model_id=model_id ): if chunk: f.write(chunk) return temp_file.name, "✅ Speech generated successfully." except Exception as e: return None, f"❌ Error: {str(e)}" with gr.Blocks(title="ElevenLabs Text-to-Speech") as demo: gr.Markdown("## 🗣️ ElevenLabs Text-to-Speech Generator") gr.Markdown( """ Convert your text into speech using the [ElevenLabs API](https://elevenlabs.io/). Fill in the fields below and click **Generate Speech**. """ ) with gr.Row(): api_key = gr.Textbox( label="🔑 ElevenLabs API Key", type="password", placeholder="Enter your ElevenLabs API key", ) with gr.Row(): text_input = gr.Textbox( label="📝 Text to Convert", placeholder="Enter the text you want to convert to speech...", lines=8, ) with gr.Row(): voice_id = gr.Textbox( label="🎤 Voice ID", value="JBFqnCBsd6RMkjVDRZzb", info="Default: JBFqnCBsd6RMkjVDRZzb (Rachel). Other voices: https://elevenlabs.io/app/default-voices", ) with gr.Row(): model_id = gr.Dropdown( ["eleven_v3", "eleven_multilingual_v2"], value="eleven_v3", label="🧠 Model", info="Choose ElevenLabs model (default: eleven_v3)", ) with gr.Row(): language_code = gr.Textbox( label="🌐 Language Code", value="en", info="Language code must follow ISO 639-1 (e.g., en, lv, fr). See: https://en.wikipedia.org/wiki/List_of_ISO_639_language_codes", ) with gr.Row(): output_format = gr.Textbox( label="📁 Output Format", value="mp3_22050_32", info="Default: mp3_22050_32. Other formats: https://elevenlabs.io/docs/api-reference/text-to-speech/convert#request.query.output_format.output_format", ) with gr.Row(): generate_btn = gr.Button("🚀 Generate Speech") with gr.Row(): audio_output = gr.Audio(label="🔊 Generated Audio", type="filepath") status_msg = gr.Markdown() generate_btn.click( fn=generate_speech, inputs=[text_input, api_key, voice_id, model_id, language_code, output_format], outputs=[audio_output, status_msg], ) if __name__ == "__main__": demo.launch(mcp_server=True)