File size: 5,872 Bytes
10e72d3 eb57aa1 10e72d3 eb57aa1 10e72d3 eb57aa1 10e72d3 eb57aa1 10e72d3 eb57aa1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
import gradio as gr
from config import custom_css
from synthesis import generate_speech
from GE2PE import GE2PE
MODEL_PATHS = {
"Homo-GE2PE": "./homo-ge2pe",
"Homo-T5": "./homo-t5",
}
_g2p_cache = {}
def _get_g2p(model_name: str) -> GE2PE:
if model_name not in _g2p_cache:
path = MODEL_PATHS.get(model_name)
if path is None:
raise ValueError(f"Unknown model: {model_name}")
_g2p_cache[model_name] = GE2PE(model_path=path, GPU=False)
return _g2p_cache[model_name]
def ge2pe_infer(model_name: str, text: str, use_rules: bool, use_dict: bool):
if not text or not text.strip():
return ""
try:
model = _get_g2p(model_name)
result = model.generate([text], use_rules=use_rules, use_dict=use_dict)
return result[0] if result else ""
except Exception as e:
return f"⚠️ Error: {str(e)}"
def create_interface():
with gr.Blocks(title="Persian Speech Suite", css=custom_css) as demo:
gr.Markdown("# Persian Speech Suite: GE2PE & TTS\n" "A unified playground for Persian grapheme‑to‑phoneme conversion (GE2PE) **and** text‑to‑speech synthesis (Mana TTS).")
with gr.Tabs():
with gr.TabItem("Grapheme → Phoneme (GE2PE)"):
gr.Markdown("Convert Persian text to its phonemic transcription. Choose between **Homo‑GE2PE** and **Homo‑T5**, optionally applying short‑vowel rules and/or a custom dictionary.")
with gr.Row():
model_selector = gr.Radio(
choices=list(MODEL_PATHS.keys()),
value="Homo-GE2PE",
label="G2P Model",
)
g2p_input = gr.Textbox(
label="Persian Text",
placeholder="مثال: این کتابِ علی است",
lines=4,
)
with gr.Row():
g2p_use_rules = gr.Checkbox(value=True, label="Apply short‑vowel rules (optional)")
g2p_use_dict = gr.Checkbox(value=False, label="Use custom dictionary (optional)")
g2p_button = gr.Button("Convert", variant="primary")
g2p_output = gr.Textbox(label="Phoneme Output", interactive=False)
g2p_button.click(
fn=ge2pe_infer,
inputs=[model_selector, g2p_input, g2p_use_rules, g2p_use_dict],
outputs=[g2p_output],
)
gr.Examples(
examples=[
["او مرد خوبی است."],
["او مرد."],
["این کتابِ علی است."],
["به خانه آمد."]
],
inputs=[g2p_input],
)
with gr.TabItem("Text‑to‑Speech"):
gr.Markdown("Generate natural‑sounding Persian speech from your text using Tacotron2 + HiFiGAN.")
tts_input = gr.Textbox(
label="Persian Text",
placeholder="مدل تولید گفتار با دادگان نسل مانا",
lines=5,
)
tts_button = gr.Button("Generate Speech", variant="primary")
tts_output = gr.Audio(label="Generated Speech")
tts_button.click(
fn=generate_speech,
inputs=[tts_input],
outputs=[tts_output],
)
gr.Examples(
examples=[
["سلام، چطور هستید؟"],
["ایران سرزمین زیباییها و افتخارات است."],
["فناوری هوش مصنوعی به سرعت در حال پیشرفت است."],
["مدل تولید گفتار با دادگان نسل مانا"],
],
inputs=[tts_input],
)
gr.Markdown(
"""
### Acknowledgments
- [**Nasl‑e‑Mana**](https://naslemana.com/), the monthly magazine of the blind community of Iran
- [ManaTTS Dataset](https://huggingface.co/datasets/MahtaFetrat/Mana-TTS)
- [Persian‑MultiSpeaker‑Tacotron2](https://github.com/MahtaFetrat/Persian-MultiSpeaker-Tacotron2/)
- [Homo-GE2PE (Github)](https://github.com/MahtaFetrat/Homo-GE2PE-Persian/)
- [Base GE2PE Paper](https://aclanthology.org/2024.findings-emnlp.196/)
- [Base GE2PE Model](https://github.com/Sharif-SLPL/GE2PE)
- [HomoRich Dataset (Huggingface)](https://huggingface.co/datasets/MahtaFetrat/HomoRich-G2P-Persian)
- [HomoRich Dataset (Github)](https://github.com/MahtaFetrat/HomoRich-G2P-Persian)
- [SentenceBench Persian G2P Benchmark](https://huggingface.co/datasets/MahtaFetrat/SentenceBench)
### Citation
```bibtex
@misc{qharabagh2025fastfancyrethinkingg2p,
title={Fast, Not Fancy: Rethinking G2P with Rich Data and Rule-Based Models},
author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee},
year={2025},
eprint={2505.12973},
archivePrefix={arXiv},
primaryClass={cs.CL},
}
@article{fetrat2024manatts,
title={ManaTTS Persian: A Recipe for Creating TTS Datasets for Lower-Resource Languages},
author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee},
journal={arXiv preprint arXiv:2409.07259},
year={2024},
}
```
"""
)
return demo
|