insta-maker-2

Sleeping

App Files Files Community

insta-maker-2 / app.py

hivecorp

Update app.py

8ca57cc verified 6 months ago

raw

history blame

4.88 kB

	import gradio as gr
	from pydub import AudioSegment
	import edge_tts
	import os
	import wave
	import asyncio
	import srt

	# Function to calculate audio duration
	def get_audio_length(audio_path):
	with wave.open(audio_path, 'rb') as audio:
	frames = audio.getnframes()
	rate = audio.getframerate()
	return frames / float(rate)

	# Generate precise SRT entries for a text batch
	def generate_accurate_srt(text, start_time, batch_index):
	srt_entries = []
	current_time = start_time

	for line in text.splitlines():
	end_time = current_time + get_audio_length_for_line(line)

	srt_entries.append(
	srt.Subtitle(
	index=batch_index,
	start=srt.timedelta(seconds=current_time),
	end=srt.timedelta(seconds=end_time),
	content=line
	)
	)
	current_time = end_time
	batch_index += 1
	return srt_entries, current_time

	# Process batches and accumulate precise SRT entries
	async def batch_process_srt_and_audio(script_text, voice, batch_size=500, progress=gr.Progress()):
	total_srt_entries = []
	combined_audio = AudioSegment.empty()
	cumulative_time = 0.0 # Track total time for accurate SRT start times
	batch_index = 1

	# Split text into manageable batches
	for i in range(0, len(script_text), batch_size):
	batch_text = script_text[i:i+batch_size]
	mp3_file = f"audio_batch_{i}.mp3" # Save as MP3 first
	wav_file = f"audio_batch_{i}.wav" # Convert to WAV

	# Generate audio for each batch and save as MP3
	tts = edge_tts.Communicate(batch_text, voice, rate="-25%")
	await tts.save(mp3_file)

	# Convert MP3 to WAV
	batch_audio = AudioSegment.from_file(mp3_file, format="mp3")
	batch_audio.export(wav_file, format="wav")

	# Ensure WAV conversion succeeded and calculate duration
	batch_duration = get_audio_length(wav_file)
	srt_entries, cumulative_time = generate_accurate_srt(batch_text, cumulative_time, batch_index)

	# Append entries and audio for the batch
	total_srt_entries.extend(srt_entries)
	combined_audio += batch_audio
	batch_index += len(srt_entries)

	# Clean up temporary MP3 file
	os.remove(mp3_file)

	# Export combined audio and SRT
	combined_audio.export("final_audio.wav", format="wav")
	with open("final_subtitles.srt", "w") as srt_file:
	srt_file.write(srt.compose(total_srt_entries))

	# Final validation check
	validate_srt_against_audio("final_subtitles.srt", "final_audio.wav")

	return "final_subtitles.srt", "final_audio.wav"

	# Validate SRT timing with total audio length
	def validate_srt_against_audio(srt_file_path, audio_file_path):
	audio_duration = get_audio_length(audio_file_path)

	with open(srt_file_path, 'r') as file:
	subtitles = list(srt.parse(file.read()))

	for subtitle in subtitles:
	if subtitle.end.total_seconds() > audio_duration:
	subtitle.end = srt.timedelta(seconds=audio_duration)
	break

	with open(srt_file_path, 'w') as file:
	file.write(srt.compose(subtitles))

	# Gradio function with error handling and markdown message
	async def process_script(script_text, language, voice):
	try:
	srt_path, audio_path = await batch_process_srt_and_audio(script_text, voice)
	return srt_path, audio_path, audio_path, ""
	except Exception as e:
	print(f"Error: {e}")
	return None, None, None, "An error occurred. Please check the script text and try again."

	# Dynamic voice selection based on language
	def update_voice_options(language):
	voices = {
	"en-US": ["en-US-AndrewNeural", "en-US-JennyNeural"],
	"es-ES": ["es-ES-AlvaroNeural", "es-ES-ElviraNeural"]
	}
	return gr.update(choices=voices.get(language, []), value=voices.get(language, [])[0])

	# Gradio app setup
	with gr.Blocks() as app:
	gr.Markdown("# Text to Speech with Accurate SRT and Audio Generation")

	language = gr.Dropdown(choices=["en-US", "es-ES"], label="Select Language", value="en-US")
	voice = gr.Dropdown(choices=["en-US-AndrewNeural", "en-US-JennyNeural"], label="Select Voice")

	language.change(fn=update_voice_options, inputs=language, outputs=voice)

	script_text = gr.Textbox(label="Enter Script Text", lines=10)

	outputs = [
	gr.File(label="Download SRT File"),
	gr.File(label="Download Audio File"),
	gr.Audio(label="Play Audio"),
	gr.Markdown(label="Error Message") # This will display any error messages
	]

	submit_button = gr.Button("Generate Audio and SRT")
	submit_button.click(process_script, inputs=[script_text, language, voice], outputs=outputs)

	app.launch()