Spaces:

hivecorp
/

orb-audio

Runtime error

App Files Files Community

orb-audio / app.py

hivecorp

Update app.py

81b3ec7 verified 6 months ago

raw

history blame contribute delete

2.49 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor
	import soundfile as sf
	import os
	import time

	# Load the Kokoro-TTS model and processor
	model_name = "hexgrad/Kokoro-TTS"
	model = AutoModelForSpeechSeq2Seq.from_pretrained(model_name)
	processor = AutoProcessor.from_pretrained(model_name)

	# Define available speakers (update this based on the model's capabilities)
	speakers = ["Speaker 1", "Speaker 2", "Speaker 3"] # Replace with actual speaker names

	# Function to generate TTS
	def generate_tts(text, speaker):
	try:
	# Preprocess input text
	inputs = processor(text, return_tensors="pt", speaker=speaker)

	# Generate speech
	with torch.no_grad():
	speech = model.generate(**inputs)

	# Save the output as a temporary file with an auto-generated name
	timestamp = int(time.time())
	output_file = f"output_{timestamp}.wav"
	sf.write(output_file, speech.numpy(), samplerate=22050) # Adjust samplerate if needed

	return output_file
	except Exception as e:
	return str(e)

	# Gradio interface
	def tts_app(text, speaker):
	output_file = generate_tts(text, speaker)
	if output_file.endswith(".wav"):
	return output_file, f"Generated: {output_file}"
	else:
	return None, output_file

	# Auto-naming system for downloads
	def get_download_name():
	return f"tts_output_{int(time.time())}.wav"

	# Create the Gradio app
	with gr.Blocks() as demo:
	gr.Markdown("# Kokoro-TTS v1.9: Long Input TTS Generation")

	with gr.Row():
	text_input = gr.Textbox(label="Input Text", placeholder="Enter your text here...", lines=10)
	speaker_dropdown = gr.Dropdown(label="Select Speaker", choices=speakers, value=speakers[0])

	generate_button = gr.Button("Generate TTS")

	with gr.Row():
	audio_output = gr.Audio(label="Generated Audio")
	status_output = gr.Textbox(label="Status", placeholder="Generation status will appear here...")

	download_button = gr.Button("Download Audio")
	download_output = gr.File(label="Download Generated Audio")

	# Link functions to interface
	generate_button.click(
	fn=tts_app,
	inputs=[text_input, speaker_dropdown],
	outputs=[audio_output, status_output]
	)

	download_button.click(
	fn=get_download_name,
	outputs=download_output
	)

	# Launch the app
	demo.launch()