Spaces:

QomSSLab
/

ASR

Running

ASR / app.py

FatimaWaeli

Add application file

17a8b4f 3 months ago

7.35 kB

	import os
	from TTS.utils.synthesizer import Synthesizer
	import gradio as gr
	from huggingface_hub import hf_hub_download
	from huggingface_hub import login
	import time

	# Uncomment for private models if needed
	# login(token=os.environ.get("HF_TOKEN"))

	# Custom CSS for better styling
	custom_css = """
	.gradio-container {
	background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
	font-family: 'Vazirmatn', 'Tahoma', sans-serif;
	}

	.main-header {
	color: #2d3748;
	text-align: center;
	margin-bottom: 2rem;
	text-shadow: 1px 1px 2px rgba(0,0,0,0.1);
	}

	.container {
	max-width: 900px;
	margin: 0 auto;
	padding: 20px;
	background-color: white;
	border-radius: 12px;
	box-shadow: 0 10px 25px rgba(0,0,0,0.1);
	}

	.footer {
	text-align: center;
	margin-top: 2rem;
	color: #4a5568;
	font-size: 0.9rem;
	}

	/* Persian text alignment */
	textarea, .label {
	text-align: right;
	direction: rtl;
	}

	/* Button styling */
	button.primary {
	background: linear-gradient(to right, #4776E6, #8E54E9);
	border: none;
	border-radius: 8px;
	color: white;
	font-weight: bold;
	transition: all 0.3s ease;
	}

	button.primary:hover {
	transform: translateY(-2px);
	box-shadow: 0 7px 14px rgba(50, 50, 93, 0.1), 0 3px 6px rgba(0, 0, 0, 0.08);
	}

	.input-panel, .output-panel {
	background-color: rgba(255, 255, 255, 0.9);
	border-radius: 10px;
	padding: 15px;
	margin-bottom: 15px;
	border: 1px solid #e2e8f0;
	}

	.examples-panel {
	background-color: rgba(255, 255, 255, 0.8);
	border-radius: 10px;
	padding: 10px;
	border: 1px solid #e2e8f0;
	}

	.status-panel {
	background-color: #edf2f7;
	border-radius: 8px;
	padding: 10px;
	margin-bottom: 15px;
	text-align: center;
	}
	"""

	def load_synthesizer():
	# Status for loading
	status_block.update("در حال بارگذاری مدل... لطفاً منتظر بمانید")

	try:
	# Download model files from Hugging Face Hub
	model_path = hf_hub_download(
	repo_id="QomSSLab/vits-fa-voice",
	filename="best_model.pth",
	cache_dir="models"
	)
	config_path = hf_hub_download(
	repo_id="QomSSLab/vits-fa-voice",
	filename="config.json",
	cache_dir="models"
	)

	# Create synthesizer
	synthesizer = Synthesizer(
	tts_checkpoint=model_path,
	tts_config_path=config_path,
	use_cuda=False # Usually no GPU in free Spaces
	)

	status_block.update("مدل با موفقیت بارگذاری شد! اکنون می‌توانید از سیستم استفاده کنید.")
	return synthesizer

	except Exception as e:
	error_msg = f"خطا در بارگذاری مدل: {str(e)}"
	status_block.update(f"❌ {error_msg}")
	raise RuntimeError(error_msg)

	def tts(text, speed=1.0):
	if not text.strip():
	return None, "لطفاً متنی وارد کنید."

	try:
	status_block.update("در حال تبدیل متن به گفتار...")

	# Show processing animation
	for i in range(3):
	time.sleep(0.3)
	status_block.update(f"در حال پردازش{'.' * (i+1)}")

	# Generate speech
	wav = synthesizer.tts(text, speed=speed)
	output_path = "output.wav"
	synthesizer.save_wav(wav, output_path)

	status_block.update("✅ صدا با موفقیت تولید شد!")
	return output_path, "تبدیل با موفقیت انجام شد."

	except Exception as e:
	error_msg = f"خطا در تولید صدا: {str(e)}"
	status_block.update(f"❌ {error_msg}")
	return None, error_msg

	# Create a status block for feedback
	status_block = gr.Markdown("در حال آماده‌سازی سیستم...")

	# First create the interface without the synthesizer
	with gr.Blocks(css=custom_css) as demo:
	with gr.Column(elem_classes="container"):
	gr.Markdown("# سامانه تبدیل متن فارسی به گفتار", elem_classes="main-header")

	# Status area
	with gr.Column(elem_classes="status-panel"):
	status_output = gr.Markdown("", elem_id="status")

	# Input panel
	with gr.Column(elem_classes="input-panel"):
	gr.Markdown("### متن ورودی", elem_classes="label")
	text_input = gr.Textbox(
	placeholder="متن فارسی خود را اینجا وارد کنید...",
	lines=5,
	label="",
	elem_classes="input-text"
	)

	with gr.Row():
	speed_slider = gr.Slider(
	minimum=0.5,
	maximum=2.0,
	value=1.0,
	step=0.1,
	label="سرعت گفتار",
	elem_classes="speed-slider"
	)

	submit_btn = gr.Button("تبدیل به گفتار", variant="primary", elem_classes="primary")

	# Output panel
	with gr.Column(elem_classes="output-panel"):
	gr.Markdown("### خروجی صوتی", elem_classes="label")
	output_audio = gr.Audio(label="")
	result_text = gr.Markdown("")

	# Examples panel
	with gr.Column(elem_classes="examples-panel"):
	gr.Markdown("### نمونه‌های متنی", elem_classes="label")
	examples = gr.Examples(
	examples=[
	["سلام دنیا، این یک آزمایش برای سیستم تبدیل متن به گفتار فارسی است."],
	["امروز هوا بسیار خوب است و من احساس شادی می‌کنم."],
	["فناوری هوش مصنوعی به سرعت در حال پیشرفت است و به زودی در تمام جنبه‌های زندگی ما حضور خواهد داشت."]
	],
	inputs=text_input,
	label="نمونه‌های متنی را امتحان کنید"
	)

	gr.Markdown(
	"راهنما: متن فارسی خود را در کادر بالا وارد کنید و دکمه تبدیل را فشار دهید. "
	"می‌توانید سرعت گفتار را با استفاده از نوار لغزنده تنظیم کنید.",
	elem_classes="footer"
	)

	gr.Markdown(
	"توسعه داده شده با استفاده از مدل VITS فارسی \| [WaeliFatima/vits-fa-voice](https://huggingface.co/WaeliFatima/vits-fa-voice)",
	elem_classes="footer"
	)

	# Initialize the synthesizer
	try:
	synthesizer = load_synthesizer()
	# Connect the function to the button
	submit_btn.click(
	fn=tts,
	inputs=[text_input, speed_slider],
	outputs=[output_audio, result_text]
	)
	# Update the status block
	status_block.update("سیستم آماده استفاده است!")

	except Exception as e:
	print(f"Error: {str(e)}")
	status_block.update(f"❌ خطا در بارگذاری مدل: {str(e)}")

	# Launch the interface
	demo.launch()