File size: 5,717 Bytes
3e25102
 
 
 
 
 
 
7da7e15
dbacc9b
3e25102
 
 
 
dbacc9b
964b4e8
 
 
3e25102
dbacc9b
 
 
 
 
 
 
 
 
 
7da7e15
964b4e8
dbacc9b
 
 
 
 
 
3e25102
7da7e15
964b4e8
 
 
7da7e15
 
 
3e25102
 
 
 
 
 
 
 
dbacc9b
 
964b4e8
3e25102
 
 
 
 
 
 
 
 
7da7e15
 
89ccb88
7da7e15
 
 
979aa51
7da7e15
dbacc9b
 
 
 
7da7e15
 
 
 
 
 
 
 
 
 
 
 
dbacc9b
 
 
 
 
 
 
979aa51
dbacc9b
 
 
 
 
7da7e15
 
 
 
979aa51
7da7e15
 
 
 
 
 
3e25102
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import gradio as gr
from transformers import pipeline
import os
import azure.cognitiveservices.speech as speechsdk

dialects = {"Palestinian/Jordanian": "P", "Syrian": "S", "Lebanese": "L", "Egyptian": "E"}

translator_en2ar = pipeline(task="translation", model="guymorlan/English2Dialect")
translator_ar2en = pipeline(task="translation", model="guymorlan/Shami2English")
transliterator = pipeline(task="translation", model="guymorlan/DialectTransliterator")

speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))

def translate_english(input_text, include):
    if not input_text:
        return "", "", "", ""

    inputs = [f"{val} {input_text}" for val in dialects.values()]

    sy, lb, eg = "Syrian" in include, "Lebanese" in include, "Egyptian" in include
    # remove 2nd element if sy is false
    if not eg:
        inputs.pop()
    if not lb:
        inputs.pop()
    if not sy:
        inputs.pop()

    result = translator_en2ar(inputs)
    
    pal_out = result[0]["translation_text"]
    sy_out = result[1]["translation_text"] if sy else ""
    lb_out = result[1 + sy]["translation_text"] if lb else ""
    eg_out = result[1 + sy + lb]["translation_text"] if eg else ""

    return pal_out, sy_out, lb_out, eg_out

def translate_arabic(input_text):
    if not input_text:
        return ""

    result = translator_ar2en([input_text])
    return result[0]["translation_text"]


def get_audio(input_text):
    audio_config = speechsdk.audio.AudioOutputConfig(filename=f"{input_text}.wav")
    speech_config.speech_synthesis_voice_name='ar-SY-AmanyNeural'
    speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
    speech_synthesis_result = speech_synthesizer.speak_text_async(input_text).get()
    return f"{input_text}.wav"

def get_transliteration(input_text, include=["Transliteration"]):
    if "Transliteration" not in include:
        return ""
    result = transliterator([input_text])
    return result[0]["translation_text"]


css = """
#liter textarea, #trans textarea { font-size: 25px;}
#trans textarea { direction: rtl; };
"""

with gr.Blocks(title = "English to Levantine Arabic", css=css, theme="default") as demo:
    gr.Markdown("# Levantine Arabic Translator")
    with gr.Tab('En -> Ar'):
        with gr.Row():
            with gr.Column():
                input_text = gr.Textbox(label="Input", placeholder="Enter English text", lines=1)
                gr.Examples(["I wanted to go to the store yesterday, but it rained", "How are you feeling today?", "Let's drink coffee"], input_text)
                btn = gr.Button("Translate", label="Translate")
                with gr.Row():
                    include = gr.CheckboxGroup(["Transliteration", "Syrian", "Lebanese", "Egyptian"], 
                                               label="Disable features to speed up translation",
                                               value=["Transliteration", "Syrian", "Lebanese", "Egyptian"])
                gr.Markdown("Built by [Guy Mor-Lan](mailto:guy.mor@mail.huji.ac.il). Pronunciation model is specifically tailored to urban Palestinian Arabic. Text-to-speech uses Microsoft Azure's API and may provide different result from the transliterated pronunciation.")

            with gr.Column():
                pal = gr.Textbox(lines=1, label="Palestinian", elem_id="trans")
                pal_translit = gr.Textbox(lines=1, label="Palestinian Pronunciation", elem_id="liter")
                sy = gr.Textbox(lines=1, label="Syrian", elem_id="trans")
                lb = gr.Textbox(lines=1, label="Lebanese", elem_id="trans")
                eg = gr.Textbox(lines=1, label="Egyptian", elem_id="trans")
                with gr.Row():
                    audio = gr.Audio(label="Audio - Palestinian", interactive=False)
                    audio_button = gr.Button("Get Audio", label="Click Here to Get Audio")
                    audio_button.click(get_audio, inputs=[pal], outputs=[audio])
        btn.click(translate_english,inputs=[input_text, include], outputs=[pal, sy, lb, eg])
        input_text.submit(translate_english, inputs=[input_text, include], outputs=[pal, sy, lb, eg])
        pal.change(get_transliteration, inputs=[pal, include], outputs=[pal_translit])
    with gr.Tab('Ar -> En'):
        with gr.Row():
            with gr.Column():
                input_text = gr.Textbox(label="Input", placeholder="Enter Levantine Arabic text", lines=1, elem_id="trans")
                gr.Examples(["ุฎู„ูŠู†ุง ู†ุฏูˆุฑ ุนู„ู‰ ู…ุทุนู… ุชุงู†ูŠ", "ูƒุงู† ุจุฏูŠ ุงูˆูƒู„ ุงุดูŠ ู‚ุจู„ ู…ุง ู†ุฑูˆุญ"], input_text)
                btn = gr.Button("Translate", label="Translate")
                gr.Markdown("Built by [Guy Mor-Lan](mailto:guy.mor@mail.huji.ac.il).")
            with gr.Column():
                eng = gr.Textbox(label="English", lines=1, elem_id="liter")
        btn.click(translate_arabic,inputs=input_text, outputs=[eng])
    with gr.Tab("Transliterate"):
        with gr.Row():
            with gr.Column():
                input_text = gr.Textbox(label="Input", placeholder="Enter Levantine Arabic text", lines=1)
                gr.Examples(["ุฎู„ูŠู†ุง ู†ุฏูˆุฑ ุนู„ู‰ ู…ุทุนู… ุชุงู†ูŠ", "ูƒุงู† ุจุฏูŠ ุงูˆูƒู„ ุงุดูŠ ู‚ุจู„ ู…ุง ู†ุฑูˆุญ"], input_text)
                btn = gr.Button("Transliterate", label="Transliterate")
                gr.Markdown("Built by [Guy Mor-Lan](mailto:guy.mor@mail.huji.ac.il)")
            with gr.Column():
                translit = gr.Textbox(label="Transliteration", lines=1, elem_id="liter")
        btn.click(get_transliteration, inputs=input_text, outputs=[translit])

demo.launch()