Spaces:
Running
Running
File size: 5,717 Bytes
3e25102 7da7e15 dbacc9b 3e25102 dbacc9b 964b4e8 3e25102 dbacc9b 7da7e15 964b4e8 dbacc9b 3e25102 7da7e15 964b4e8 7da7e15 3e25102 dbacc9b 964b4e8 3e25102 7da7e15 89ccb88 7da7e15 979aa51 7da7e15 dbacc9b 7da7e15 dbacc9b 979aa51 dbacc9b 7da7e15 979aa51 7da7e15 3e25102 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import gradio as gr
from transformers import pipeline
import os
import azure.cognitiveservices.speech as speechsdk
dialects = {"Palestinian/Jordanian": "P", "Syrian": "S", "Lebanese": "L", "Egyptian": "E"}
translator_en2ar = pipeline(task="translation", model="guymorlan/English2Dialect")
translator_ar2en = pipeline(task="translation", model="guymorlan/Shami2English")
transliterator = pipeline(task="translation", model="guymorlan/DialectTransliterator")
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
def translate_english(input_text, include):
if not input_text:
return "", "", "", ""
inputs = [f"{val} {input_text}" for val in dialects.values()]
sy, lb, eg = "Syrian" in include, "Lebanese" in include, "Egyptian" in include
# remove 2nd element if sy is false
if not eg:
inputs.pop()
if not lb:
inputs.pop()
if not sy:
inputs.pop()
result = translator_en2ar(inputs)
pal_out = result[0]["translation_text"]
sy_out = result[1]["translation_text"] if sy else ""
lb_out = result[1 + sy]["translation_text"] if lb else ""
eg_out = result[1 + sy + lb]["translation_text"] if eg else ""
return pal_out, sy_out, lb_out, eg_out
def translate_arabic(input_text):
if not input_text:
return ""
result = translator_ar2en([input_text])
return result[0]["translation_text"]
def get_audio(input_text):
audio_config = speechsdk.audio.AudioOutputConfig(filename=f"{input_text}.wav")
speech_config.speech_synthesis_voice_name='ar-SY-AmanyNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_config)
speech_synthesis_result = speech_synthesizer.speak_text_async(input_text).get()
return f"{input_text}.wav"
def get_transliteration(input_text, include=["Transliteration"]):
if "Transliteration" not in include:
return ""
result = transliterator([input_text])
return result[0]["translation_text"]
css = """
#liter textarea, #trans textarea { font-size: 25px;}
#trans textarea { direction: rtl; };
"""
with gr.Blocks(title = "English to Levantine Arabic", css=css, theme="default") as demo:
gr.Markdown("# Levantine Arabic Translator")
with gr.Tab('En -> Ar'):
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input", placeholder="Enter English text", lines=1)
gr.Examples(["I wanted to go to the store yesterday, but it rained", "How are you feeling today?", "Let's drink coffee"], input_text)
btn = gr.Button("Translate", label="Translate")
with gr.Row():
include = gr.CheckboxGroup(["Transliteration", "Syrian", "Lebanese", "Egyptian"],
label="Disable features to speed up translation",
value=["Transliteration", "Syrian", "Lebanese", "Egyptian"])
gr.Markdown("Built by [Guy Mor-Lan](mailto:guy.mor@mail.huji.ac.il). Pronunciation model is specifically tailored to urban Palestinian Arabic. Text-to-speech uses Microsoft Azure's API and may provide different result from the transliterated pronunciation.")
with gr.Column():
pal = gr.Textbox(lines=1, label="Palestinian", elem_id="trans")
pal_translit = gr.Textbox(lines=1, label="Palestinian Pronunciation", elem_id="liter")
sy = gr.Textbox(lines=1, label="Syrian", elem_id="trans")
lb = gr.Textbox(lines=1, label="Lebanese", elem_id="trans")
eg = gr.Textbox(lines=1, label="Egyptian", elem_id="trans")
with gr.Row():
audio = gr.Audio(label="Audio - Palestinian", interactive=False)
audio_button = gr.Button("Get Audio", label="Click Here to Get Audio")
audio_button.click(get_audio, inputs=[pal], outputs=[audio])
btn.click(translate_english,inputs=[input_text, include], outputs=[pal, sy, lb, eg])
input_text.submit(translate_english, inputs=[input_text, include], outputs=[pal, sy, lb, eg])
pal.change(get_transliteration, inputs=[pal, include], outputs=[pal_translit])
with gr.Tab('Ar -> En'):
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input", placeholder="Enter Levantine Arabic text", lines=1, elem_id="trans")
gr.Examples(["ุฎูููุง ูุฏูุฑ ุนูู ู
ุทุนู
ุชุงูู", "ูุงู ุจุฏู ุงููู ุงุดู ูุจู ู
ุง ูุฑูุญ"], input_text)
btn = gr.Button("Translate", label="Translate")
gr.Markdown("Built by [Guy Mor-Lan](mailto:guy.mor@mail.huji.ac.il).")
with gr.Column():
eng = gr.Textbox(label="English", lines=1, elem_id="liter")
btn.click(translate_arabic,inputs=input_text, outputs=[eng])
with gr.Tab("Transliterate"):
with gr.Row():
with gr.Column():
input_text = gr.Textbox(label="Input", placeholder="Enter Levantine Arabic text", lines=1)
gr.Examples(["ุฎูููุง ูุฏูุฑ ุนูู ู
ุทุนู
ุชุงูู", "ูุงู ุจุฏู ุงููู ุงุดู ูุจู ู
ุง ูุฑูุญ"], input_text)
btn = gr.Button("Transliterate", label="Transliterate")
gr.Markdown("Built by [Guy Mor-Lan](mailto:guy.mor@mail.huji.ac.il)")
with gr.Column():
translit = gr.Textbox(label="Transliteration", lines=1, elem_id="liter")
btn.click(get_transliteration, inputs=input_text, outputs=[translit])
demo.launch()
|