mana-tts / interface.py
abreza's picture
update requirements to include transformers
a81a2ea
import gradio as gr
from config import custom_css
from synthesis import generate_speech
from GE2PE import GE2PE
MODEL_PATHS = {
"Homo-GE2PE": "./homo-ge2pe",
"Homo-T5": "./homo-t5",
}
_g2p_cache = {}
def _get_g2p(model_name: str) -> GE2PE:
if model_name not in _g2p_cache:
path = MODEL_PATHS.get(model_name)
if path is None:
raise ValueError(f"Unknown model: {model_name}")
_g2p_cache[model_name] = GE2PE(model_path=path, GPU=False)
return _g2p_cache[model_name]
def ge2pe_infer(model_name: str, text: str, use_rules: bool, use_dict: bool):
if not text or not text.strip():
return ""
try:
model = _get_g2p(model_name)
result = model.generate([text], use_rules=use_rules, use_dict=use_dict)
return result[0] if result else ""
except Exception as e:
return f"⚠️ Error: {str(e)}"
def create_interface():
with gr.Blocks(title="Persian Speech Suite", css=custom_css) as demo:
gr.Markdown("# Persian Speech Suite: GE2PE & TTS\n" "A unified playground for Persian grapheme‑to‑phoneme conversion (GE2PE) **and** text‑to‑speech synthesis (Mana TTS).")
with gr.Tabs():
with gr.TabItem("Grapheme → Phoneme (GE2PE)"):
gr.Markdown("Convert Persian text to its phonemic transcription. Choose between **Homo‑GE2PE** and **Homo‑T5**, optionally applying short‑vowel rules and/or a custom dictionary.")
with gr.Row():
model_selector = gr.Radio(
choices=list(MODEL_PATHS.keys()),
value="Homo-GE2PE",
label="G2P Model",
)
g2p_input = gr.Textbox(
label="Persian Text",
placeholder="مثال: این کتابِ علی است",
lines=4,
)
with gr.Row():
g2p_use_rules = gr.Checkbox(value=True, label="Apply short‑vowel rules (optional)")
g2p_use_dict = gr.Checkbox(value=False, label="Use custom dictionary (optional)")
g2p_button = gr.Button("Convert", variant="primary")
g2p_output = gr.Textbox(label="Phoneme Output", interactive=False)
g2p_button.click(
fn=ge2pe_infer,
inputs=[model_selector, g2p_input, g2p_use_rules, g2p_use_dict],
outputs=[g2p_output],
)
gr.Examples(
examples=[
["او مرد خوبی است."],
["او مرد."],
["این کتابِ علی است."],
["به خانه آمد."]
],
inputs=[g2p_input],
)
with gr.TabItem("Text‑to‑Speech"):
gr.Markdown("Generate natural‑sounding Persian speech from your text using Tacotron2 + HiFiGAN.")
tts_input = gr.Textbox(
label="Persian Text",
placeholder="مدل تولید گفتار با دادگان نسل مانا",
lines=5,
)
tts_button = gr.Button("Generate Speech", variant="primary")
tts_output = gr.Audio(label="Generated Speech")
tts_button.click(
fn=generate_speech,
inputs=[tts_input],
outputs=[tts_output],
)
gr.Examples(
examples=[
["سلام، چطور هستید؟"],
["ایران سرزمین زیبایی‌ها و افتخارات است."],
["فناوری هوش مصنوعی به سرعت در حال پیشرفت است."],
["مدل تولید گفتار با دادگان نسل مانا"],
],
inputs=[tts_input],
)
gr.Markdown(
"""
### Acknowledgments
- [**Nasl‑e‑Mana**](https://naslemana.com/), the monthly magazine of the blind community of Iran
- [ManaTTS Dataset](https://huggingface.co/datasets/MahtaFetrat/Mana-TTS)
- [Persian‑MultiSpeaker‑Tacotron2](https://github.com/MahtaFetrat/Persian-MultiSpeaker-Tacotron2/)
- [Homo-GE2PE (Github)](https://github.com/MahtaFetrat/Homo-GE2PE-Persian/)
- [Base GE2PE Paper](https://aclanthology.org/2024.findings-emnlp.196/)
- [Base GE2PE Model](https://github.com/Sharif-SLPL/GE2PE)
- [HomoRich Dataset (Huggingface)](https://huggingface.co/datasets/MahtaFetrat/HomoRich-G2P-Persian)
- [HomoRich Dataset (Github)](https://github.com/MahtaFetrat/HomoRich-G2P-Persian)
- [SentenceBench Persian G2P Benchmark](https://huggingface.co/datasets/MahtaFetrat/SentenceBench)
### Citation
```bibtex
@misc{qharabagh2025fastfancyrethinkingg2p,
title={Fast, Not Fancy: Rethinking G2P with Rich Data and Rule-Based Models},
author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee},
year={2025},
eprint={2505.12973},
archivePrefix={arXiv},
primaryClass={cs.CL},
}
@article{fetrat2024manatts,
title={ManaTTS Persian: A Recipe for Creating TTS Datasets for Lower-Resource Languages},
author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee},
journal={arXiv preprint arXiv:2409.07259},
year={2024},
}
```
"""
)
return demo