|
import gradio as gr |
|
from config import custom_css |
|
from synthesis import generate_speech |
|
from GE2PE import GE2PE |
|
|
|
MODEL_PATHS = { |
|
"Homo-GE2PE": "./homo-ge2pe", |
|
"Homo-T5": "./homo-t5", |
|
} |
|
|
|
_g2p_cache = {} |
|
|
|
def _get_g2p(model_name: str) -> GE2PE: |
|
if model_name not in _g2p_cache: |
|
path = MODEL_PATHS.get(model_name) |
|
if path is None: |
|
raise ValueError(f"Unknown model: {model_name}") |
|
_g2p_cache[model_name] = GE2PE(model_path=path, GPU=False) |
|
return _g2p_cache[model_name] |
|
|
|
|
|
def ge2pe_infer(model_name: str, text: str, use_rules: bool, use_dict: bool): |
|
if not text or not text.strip(): |
|
return "" |
|
try: |
|
model = _get_g2p(model_name) |
|
result = model.generate([text], use_rules=use_rules, use_dict=use_dict) |
|
return result[0] if result else "" |
|
except Exception as e: |
|
return f"⚠️ Error: {str(e)}" |
|
|
|
|
|
def create_interface(): |
|
with gr.Blocks(title="Persian Speech Suite", css=custom_css) as demo: |
|
gr.Markdown("# Persian Speech Suite: GE2PE & TTS\n" "A unified playground for Persian grapheme‑to‑phoneme conversion (GE2PE) **and** text‑to‑speech synthesis (Mana TTS).") |
|
|
|
with gr.Tabs(): |
|
with gr.TabItem("Grapheme → Phoneme (GE2PE)"): |
|
gr.Markdown("Convert Persian text to its phonemic transcription. Choose between **Homo‑GE2PE** and **Homo‑T5**, optionally applying short‑vowel rules and/or a custom dictionary.") |
|
|
|
with gr.Row(): |
|
model_selector = gr.Radio( |
|
choices=list(MODEL_PATHS.keys()), |
|
value="Homo-GE2PE", |
|
label="G2P Model", |
|
) |
|
|
|
g2p_input = gr.Textbox( |
|
label="Persian Text", |
|
placeholder="مثال: این کتابِ علی است", |
|
lines=4, |
|
) |
|
|
|
with gr.Row(): |
|
g2p_use_rules = gr.Checkbox(value=True, label="Apply short‑vowel rules (optional)") |
|
g2p_use_dict = gr.Checkbox(value=False, label="Use custom dictionary (optional)") |
|
|
|
g2p_button = gr.Button("Convert", variant="primary") |
|
g2p_output = gr.Textbox(label="Phoneme Output", interactive=False) |
|
|
|
g2p_button.click( |
|
fn=ge2pe_infer, |
|
inputs=[model_selector, g2p_input, g2p_use_rules, g2p_use_dict], |
|
outputs=[g2p_output], |
|
) |
|
|
|
gr.Examples( |
|
examples=[ |
|
["او مرد خوبی است."], |
|
["او مرد."], |
|
["این کتابِ علی است."], |
|
["به خانه آمد."] |
|
], |
|
inputs=[g2p_input], |
|
) |
|
|
|
with gr.TabItem("Text‑to‑Speech"): |
|
gr.Markdown("Generate natural‑sounding Persian speech from your text using Tacotron2 + HiFiGAN.") |
|
|
|
tts_input = gr.Textbox( |
|
label="Persian Text", |
|
placeholder="مدل تولید گفتار با دادگان نسل مانا", |
|
lines=5, |
|
) |
|
|
|
tts_button = gr.Button("Generate Speech", variant="primary") |
|
tts_output = gr.Audio(label="Generated Speech") |
|
|
|
tts_button.click( |
|
fn=generate_speech, |
|
inputs=[tts_input], |
|
outputs=[tts_output], |
|
) |
|
|
|
gr.Examples( |
|
examples=[ |
|
["سلام، چطور هستید؟"], |
|
["ایران سرزمین زیباییها و افتخارات است."], |
|
["فناوری هوش مصنوعی به سرعت در حال پیشرفت است."], |
|
["مدل تولید گفتار با دادگان نسل مانا"], |
|
], |
|
inputs=[tts_input], |
|
) |
|
|
|
gr.Markdown( |
|
""" |
|
### Acknowledgments |
|
|
|
- [**Nasl‑e‑Mana**](https://naslemana.com/), the monthly magazine of the blind community of Iran |
|
- [ManaTTS Dataset](https://huggingface.co/datasets/MahtaFetrat/Mana-TTS) |
|
- [Persian‑MultiSpeaker‑Tacotron2](https://github.com/MahtaFetrat/Persian-MultiSpeaker-Tacotron2/) |
|
- [Homo-GE2PE (Github)](https://github.com/MahtaFetrat/Homo-GE2PE-Persian/) |
|
- [Base GE2PE Paper](https://aclanthology.org/2024.findings-emnlp.196/) |
|
- [Base GE2PE Model](https://github.com/Sharif-SLPL/GE2PE) |
|
- [HomoRich Dataset (Huggingface)](https://huggingface.co/datasets/MahtaFetrat/HomoRich-G2P-Persian) |
|
- [HomoRich Dataset (Github)](https://github.com/MahtaFetrat/HomoRich-G2P-Persian) |
|
- [SentenceBench Persian G2P Benchmark](https://huggingface.co/datasets/MahtaFetrat/SentenceBench) |
|
### Citation |
|
|
|
```bibtex |
|
@misc{qharabagh2025fastfancyrethinkingg2p, |
|
title={Fast, Not Fancy: Rethinking G2P with Rich Data and Rule-Based Models}, |
|
author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee}, |
|
year={2025}, |
|
eprint={2505.12973}, |
|
archivePrefix={arXiv}, |
|
primaryClass={cs.CL}, |
|
} |
|
|
|
@article{fetrat2024manatts, |
|
title={ManaTTS Persian: A Recipe for Creating TTS Datasets for Lower-Resource Languages}, |
|
author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee}, |
|
journal={arXiv preprint arXiv:2409.07259}, |
|
year={2024}, |
|
} |
|
``` |
|
""" |
|
) |
|
|
|
return demo |
|
|