Spaces:
Runtime error
Runtime error
File size: 3,231 Bytes
f7e1683 ec75c79 1689d75 eceecf3 7c14ea7 eceecf3 1689d75 eceecf3 7d70e82 7c14ea7 7d70e82 eceecf3 ec75c79 3cccab6 ec75c79 eceecf3 3cccab6 ec75c79 3cccab6 eceecf3 ec75c79 1689d75 ec75c79 3cccab6 ec75c79 7c14ea7 ec75c79 1689d75 3cccab6 eceecf3 3cccab6 eceecf3 3cccab6 eceecf3 3cccab6 eceecf3 3cccab6 eceecf3 3cccab6 ec75c79 1689d75 ec75c79 eceecf3 1689d75 3cccab6 1689d75 7c14ea7 ec75c79 7c14ea7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import gradio as gr
import edge_tts
import asyncio
import tempfile
import nltk
import os
import srt
from pydub import AudioSegment, silence
import datetime
import nest_asyncio
nltk.download("punkt")
nest_asyncio.apply()
async def text_to_speech(text, voice, rate, pitch):
if not text.strip():
return None, None, "Please enter some text."
if not voice:
return None, None, "Please select a voice."
voice_short = voice.split(" - ")[0]
rate_str = f"{rate:+d}%"
pitch_str = f"{pitch:+d}Hz"
communicate = edge_tts.Communicate(text, voice_short, rate=rate_str, pitch=pitch_str)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
srt_path = generate_srt(tmp_path, text)
return tmp_path, srt_path, ""
def generate_srt(audio_path, text):
audio = AudioSegment.from_file(audio_path)
silences = silence.detect_silence(audio, min_silence_len=400, silence_thresh=audio.dBFS - 16)
silences = [(start / 1000.0, end / 1000.0) for start, end in silences]
sentences = nltk.tokenize.sent_tokenize(text)
subtitles = []
last_time = 0.0
for i, sentence in enumerate(sentences):
if i < len(silences):
start = last_time
end = silences[i][0]
last_time = silences[i][1]
else:
start = last_time
end = start + 2.5
subtitles.append(srt.Subtitle(
index=i + 1,
start=datetime.timedelta(seconds=start),
end=datetime.timedelta(seconds=end),
content=sentence
))
srt_data = srt.compose(subtitles)
with tempfile.NamedTemporaryFile(delete=False, suffix=".srt", mode="w") as srt_file:
srt_file.write(srt_data)
return srt_file.name
async def tts_interface(text, voice, rate, pitch):
return await text_to_speech(text, voice, rate, pitch)
# ⬇️ Create demo synchronously (run async functions in loop)
voices = asyncio.run(edge_tts.list_voices())
voice_dict = {f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v['ShortName'] for v in voices}
with gr.Blocks() as demo:
gr.Markdown("# 🎙️ Text-to-Speech + Subtitle Generator")
with gr.Row():
with gr.Column():
text_input = gr.Textbox(label="Input Text", lines=5)
voice_dropdown = gr.Dropdown(choices=[""] + list(voice_dict.keys()), label="Select Voice")
rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate (%)")
pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch (Hz)")
generate_btn = gr.Button("🎧 Generate Audio + SRT")
with gr.Column():
audio_output = gr.Audio(label="Generated Audio", type="filepath")
srt_output = gr.File(label="Download Subtitle (.srt)")
message_output = gr.Textbox(label="Status", interactive=False)
generate_btn.click(
fn=lambda text, voice, rate, pitch: asyncio.run(tts_interface(text, voice, rate, pitch)),
inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
outputs=[audio_output, srt_output, message_output]
)
|