#%% import gradio as gr from dotenv import load_dotenv from translate import run_translate from diacritize import diacritize, diacritize_if_not_already from translit import taatik, translit from semsearch import update_df from tts import get_audio from consts import CSS, ABOUT, JS_FUNC load_dotenv() with gr.Blocks(title = "Levanti - Levantine Arabic Translation Tools", css=CSS, theme="default") as demo: # gr.HTML("

Levantine Arabic Translator

") gr.HTML("

Levantine Translator

Levantine Arabic Translation Tools") with gr.Tab('Translation', elem_id="tab1"): with gr.Row(): with gr.Column(): input_text = gr.Textbox(label="Input", info = "Colloquial Arabic or English", placeholder="Enter text in Arabic or English", lines=2, elem_id="input") gr.Examples(["I called him two times, he's not picking up", "خلينا ندور على مطعم تاني"], input_text, label="Examples") btn = gr.Button("Translation") with gr.Row(): dialect = gr.Radio(["Palestinian", "Syrian", "Lebanese", "Egyptian"], label = "Dialect", info="Affects translation to Arabic", value="Palestinian") # gr.Markdown("Built by [Guy Mor-Lan](mailto:guy.mor@mail.huji.ac.il). Pronunciation model is specifically tailored to urban Palestinian Arabic. Text-to-speech uses Microsoft Azure's API and may provide different result from the transliterated pronunciation.") gr.Markdown("Create by [Guy Mor-Lan](mailto:guy.mor@mail.huji.ac.il) as part of the [Levanti](https://huggingface.co/datasets/guymorlan/levanti) project. Audio is produced using Azure TTS with predicted diacritics and heuristics.", elem_id="footer") with gr.Column(): with gr.Group(elem_id="grp"): gr.HTML("Translation") # gr.Markdown("תרגום", elem_id="diacritized") translation_output = gr.HTML("
", visible=True, label="Translation", elem_id="main") hidden_arabic = gr.Textbox(lines=1, elem_id="trans", visible=False) diacritized_output = gr.Textbox(label="Diacritization (experimental)", lines=1, elem_id="diacritized", interactive=False) taatik_output = gr.Textbox(label="Transliteration (Experimental)", lines=1, elem_id="taatik", text_align="right", interactive=False) # diacritized_output = gr.HTML("
", label="ניקוד") # taatik_output = gr.HTML("
", label="תעתיק") audio = gr.Audio(label="Audio (Azure)", interactive=False, autoplay=True) audio_button = gr.Button("Generate Audio") audio_button.click(get_audio, inputs=[diacritized_output], outputs=[audio]) btn.click(run_translate, inputs=[input_text, dialect], outputs=[translation_output, hidden_arabic], api_name="en2ar", js="function jump(x, y){document.getElementById('main').scrollIntoView(); return [x, y];}") input_text.submit(run_translate, inputs=[input_text, dialect], outputs=[translation_output, hidden_arabic], scroll_to_output=True) hidden_arabic.change(diacritize, inputs=[hidden_arabic], outputs=[diacritized_output]) diacritized_output.change(translit, inputs=[diacritized_output], outputs=[taatik_output]) # with gr.Row(): # nearest_df = gr.DataFrame(headers=["ערבית", "עברית", "מאומת"], visible=False, wrap=True, # elem_id="nearest", label="תוצאות קרובות מתוך קורפוס Levanti", height=300) # hidden_arabic.change(update_df, inputs=[hidden_arabic], outputs=[nearest_df]) with gr.Tab("Diacritization and Transliteration", elem_id="tab2"): with gr.Row(): with gr.Column(): diac_text = gr.Textbox(label="Input", placeholder="Insert text in Arabic", lines=1, info = "For transliteration only, insert diacritized text", elem_id="diac_input") gr.Examples(["خلينا ندور على مطعم تاني", "قَدِيْش حَقّ الْبَنْدُورَة؟"], diac_text, label="Examples", elem_id="diac_ex") btn2 = gr.Button("Send") with gr.Column(): diacritized_output2 = gr.Textbox(label="Diacritization", lines=1, elem_id="diacritized2") taatik_output2 = gr.Textbox(label="Transliteration", lines=1, elem_id="taatik2") # input_text.submit(run_translate, inputs=[input_text, dialect], # outputs=[translation_output], scroll_to_output=True) # hidden_arabic.change(diacritize, inputs=[hidden_arabic], outputs=[diacritized_output]) # diacritized_output.change(taatik, inputs=[diacritized_output], outputs=[taatik_output]) btn2.click(diacritize_if_not_already, inputs=[diac_text], outputs=[diacritized_output2]) diac_text.submit(diacritize_if_not_already, inputs=[diac_text], outputs=[diacritized_output2]) diacritized_output2.change(translit, inputs=[diacritized_output2], outputs=[taatik_output2]) with gr.Tab("About", elem_id="tab3"): with gr.Row(): gr.HTML("

About

") gr.Markdown(ABOUT, elem_id="about") demo.launch(ssl_verify=False)