File size: 5,872 Bytes
10e72d3
 
 
eb57aa1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e72d3
 
eb57aa1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e72d3
 
eb57aa1
10e72d3
eb57aa1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10e72d3
 
 
eb57aa1
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
import gradio as gr
from config import custom_css
from synthesis import generate_speech
from GE2PE import GE2PE

MODEL_PATHS = {
    "Homo-GE2PE": "./homo-ge2pe",
    "Homo-T5": "./homo-t5",
}

_g2p_cache = {}

def _get_g2p(model_name: str) -> GE2PE:
    if model_name not in _g2p_cache:
        path = MODEL_PATHS.get(model_name)
        if path is None:
            raise ValueError(f"Unknown model: {model_name}")
        _g2p_cache[model_name] = GE2PE(model_path=path, GPU=False)
    return _g2p_cache[model_name]


def ge2pe_infer(model_name: str, text: str, use_rules: bool, use_dict: bool):
    if not text or not text.strip():
        return ""
    try:
        model = _get_g2p(model_name)
        result = model.generate([text], use_rules=use_rules, use_dict=use_dict)
        return result[0] if result else ""
    except Exception as e:
        return f"⚠️ Error: {str(e)}"


def create_interface():
    with gr.Blocks(title="Persian Speech Suite", css=custom_css) as demo:
        gr.Markdown("# Persian Speech Suite: GE2PE & TTS\n" "A unified playground for Persian grapheme‑to‑phoneme conversion (GE2PE) **and** text‑to‑speech synthesis (Mana TTS).")

        with gr.Tabs():
            with gr.TabItem("Grapheme → Phoneme (GE2PE)"):
                gr.Markdown("Convert Persian text to its phonemic transcription. Choose between **Homo‑GE2PE** and **Homo‑T5**, optionally applying short‑vowel rules and/or a custom dictionary.")

                with gr.Row():
                    model_selector = gr.Radio(
                        choices=list(MODEL_PATHS.keys()),
                        value="Homo-GE2PE",
                        label="G2P Model",
                    )

                g2p_input = gr.Textbox(
                    label="Persian Text",
                    placeholder="مثال: این کتابِ علی است",
                    lines=4,
                )

                with gr.Row():
                    g2p_use_rules = gr.Checkbox(value=True, label="Apply short‑vowel rules (optional)")
                    g2p_use_dict = gr.Checkbox(value=False, label="Use custom dictionary (optional)")

                g2p_button = gr.Button("Convert", variant="primary")
                g2p_output = gr.Textbox(label="Phoneme Output", interactive=False)

                g2p_button.click(
                    fn=ge2pe_infer,
                    inputs=[model_selector, g2p_input, g2p_use_rules, g2p_use_dict],
                    outputs=[g2p_output],
                )

                gr.Examples(
                    examples=[
                        ["او مرد خوبی است."],
                        ["او مرد."],
                        ["این کتابِ علی است."],
                        ["به خانه آمد."]
                    ],
                    inputs=[g2p_input],
                )

            with gr.TabItem("Text‑to‑Speech"):
                gr.Markdown("Generate natural‑sounding Persian speech from your text using Tacotron2 + HiFiGAN.")

                tts_input = gr.Textbox(
                    label="Persian Text",
                    placeholder="مدل تولید گفتار با دادگان نسل مانا",
                    lines=5,
                )

                tts_button = gr.Button("Generate Speech", variant="primary")
                tts_output = gr.Audio(label="Generated Speech")

                tts_button.click(
                    fn=generate_speech,
                    inputs=[tts_input],
                    outputs=[tts_output],
                )

                gr.Examples(
                    examples=[
                        ["سلام، چطور هستید؟"],
                        ["ایران سرزمین زیبایی‌ها و افتخارات است."],
                        ["فناوری هوش مصنوعی به سرعت در حال پیشرفت است."],
                        ["مدل تولید گفتار با دادگان نسل مانا"],
                    ],
                    inputs=[tts_input],
                )

        gr.Markdown(
            """
            ### Acknowledgments

            - [**Nasl‑e‑Mana**](https://naslemana.com/), the monthly magazine of the blind community of Iran
            - [ManaTTS Dataset](https://huggingface.co/datasets/MahtaFetrat/Mana-TTS)
            - [Persian‑MultiSpeaker‑Tacotron2](https://github.com/MahtaFetrat/Persian-MultiSpeaker-Tacotron2/)
            - [Homo-GE2PE (Github)](https://github.com/MahtaFetrat/Homo-GE2PE-Persian/)
            - [Base GE2PE Paper](https://aclanthology.org/2024.findings-emnlp.196/)
            - [Base GE2PE Model](https://github.com/Sharif-SLPL/GE2PE)
            - [HomoRich Dataset (Huggingface)](https://huggingface.co/datasets/MahtaFetrat/HomoRich-G2P-Persian)
            - [HomoRich Dataset (Github)](https://github.com/MahtaFetrat/HomoRich-G2P-Persian)
            - [SentenceBench Persian G2P Benchmark](https://huggingface.co/datasets/MahtaFetrat/SentenceBench)
            ### Citation

            ```bibtex
            @misc{qharabagh2025fastfancyrethinkingg2p,
              title={Fast, Not Fancy: Rethinking G2P with Rich Data and Rule-Based Models},
              author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee},
              year={2025},
              eprint={2505.12973},
              archivePrefix={arXiv},
              primaryClass={cs.CL},
            }

            @article{fetrat2024manatts,
              title={ManaTTS Persian: A Recipe for Creating TTS Datasets for Lower-Resource Languages},
              author={Mahta Fetrat Qharabagh and Zahra Dehghanian and Hamid R. Rabiee},
              journal={arXiv preprint arXiv:2409.07259},
              year={2024},
            }
            ```
            """
        )

    return demo