guymorlan's picture
Update app.py
979aa51
raw history blame
No virus
5.72 kB
import gradio as gr
from transformers import pipeline
import os
import azure.cognitiveservices.speech as speechsdk
dialects = {"Palestinian/Jordanian": "P", "Syrian": "S", "Lebanese": "L", "Egyptian": "E"}
translator_en2ar = pipeline(task="translation", model="guymorlan/English2Dialect")
translator_ar2en = pipeline(task="translation", model="guymorlan/Shami2English")
transliterator = pipeline(task="translation", model="guymorlan/DialectTransliterator")
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
def translate_english(input_text, include):
if not input_text:
return "", "", "", ""
inputs = [f"{val} {input_text}" for val in dialects.values()]
sy, lb, eg = "Syrian" in include, "Lebanese" in include, "Egyptian" in include
# remove 2nd element if sy is false
if not eg:
inputs.pop()
if not lb:
inputs.pop()
if not sy:
inputs.pop()
result = translator_en2ar(inputs)
pal_out = result[0]["translation_text"]
sy_out = result[1]["translation_text"] if sy else ""
lb_out = result[1 + sy]["translation_text"] if lb else ""
eg_out = result[1 + sy + lb]["translation_text"] if eg else ""
return pal_out, sy_out, lb_out, eg_out
def translate_arabic(input_text):
if not input_text:
return ""
result = translator_ar2en([input_text])
return result[0]["translation_text"]
def get_audio(input_text):
audio_config = speechsdk.audio.AudioOutputConfig(filename=f"{input_text}.wav")
speech_config.speech_synthesis_voice_name='ar-SY-AmanyNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
speech_synthesis_result = speech_synthesizer.speak_text_async(input_text).get()
return f"{input_text}.wav"
def get_transliteration(input_text, include=["Transliteration"]):
if "Transliteration" not in include:
return ""
result = transliterator([input_text])
return result[0]["translation_text"]
css = """
#liter textarea, #trans textarea { font-size: 25px;}
#trans textarea { direction: rtl; };
"""
with gr.Blocks(title = "English to Levantine Arabic", css=css, theme="default") as demo:
gr.Markdown("# Levantine Arabic Translator")
with gr.Tab('En -> Ar'):
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input", placeholder="Enter English text", lines=1)
gr.Examples(["I wanted to go to the store yesterday, but it rained", "How are you feeling today?", "Let's drink coffee"], input_text)
btn = gr.Button("Translate", label="Translate")
with gr.Row():
include = gr.CheckboxGroup(["Transliteration", "Syrian", "Lebanese", "Egyptian"],
label="Disable features to speed up translation",
value=["Transliteration", "Syrian", "Lebanese", "Egyptian"])
gr.Markdown("Built by [Guy Mor-Lan](mailto:guy.mor@mail.huji.ac.il). Pronunciation model is specifically tailored to urban Palestinian Arabic. Text-to-speech uses Microsoft Azure's API and may provide different result from the transliterated pronunciation.")
with gr.Column():
pal = gr.Textbox(lines=1, label="Palestinian", elem_id="trans")
pal_translit = gr.Textbox(lines=1, label="Palestinian Pronunciation", elem_id="liter")
sy = gr.Textbox(lines=1, label="Syrian", elem_id="trans")
lb = gr.Textbox(lines=1, label="Lebanese", elem_id="trans")
eg = gr.Textbox(lines=1, label="Egyptian", elem_id="trans")
with gr.Row():
audio = gr.Audio(label="Audio - Palestinian", interactive=False)
audio_button = gr.Button("Get Audio", label="Click Here to Get Audio")
audio_button.click(get_audio, inputs=[pal], outputs=[audio])
btn.click(translate_english,inputs=[input_text, include], outputs=[pal, sy, lb, eg])
input_text.submit(translate_english, inputs=[input_text, include], outputs=[pal, sy, lb, eg])
pal.change(get_transliteration, inputs=[pal, include], outputs=[pal_translit])
with gr.Tab('Ar -> En'):
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input", placeholder="Enter Levantine Arabic text", lines=1, elem_id="trans")
gr.Examples(["ุฎู„ูŠู†ุง ู†ุฏูˆุฑ ุนู„ู‰ ู…ุทุนู… ุชุงู†ูŠ", "ูƒุงู† ุจุฏูŠ ุงูˆูƒู„ ุงุดูŠ ู‚ุจู„ ู…ุง ู†ุฑูˆุญ"], input_text)
btn = gr.Button("Translate", label="Translate")
gr.Markdown("Built by [Guy Mor-Lan](mailto:guy.mor@mail.huji.ac.il).")
with gr.Column():
eng = gr.Textbox(label="English", lines=1, elem_id="liter")
btn.click(translate_arabic,inputs=input_text, outputs=[eng])
with gr.Tab("Transliterate"):
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input", placeholder="Enter Levantine Arabic text", lines=1)
gr.Examples(["ุฎู„ูŠู†ุง ู†ุฏูˆุฑ ุนู„ู‰ ู…ุทุนู… ุชุงู†ูŠ", "ูƒุงู† ุจุฏูŠ ุงูˆูƒู„ ุงุดูŠ ู‚ุจู„ ู…ุง ู†ุฑูˆุญ"], input_text)
btn = gr.Button("Transliterate", label="Transliterate")
gr.Markdown("Built by [Guy Mor-Lan](mailto:guy.mor@mail.huji.ac.il)")
with gr.Column():
translit = gr.Textbox(label="Transliteration", lines=1, elem_id="liter")
btn.click(get_transliteration, inputs=input_text, outputs=[translit])
demo.launch()