Spaces:
Runtime error
Runtime error
File size: 4,280 Bytes
476e166 a7f2f12 8b8b295 476e166 a7f2f12 476e166 a7f2f12 dfe3477 a7f2f12 316e0e6 8b8b295 316e0e6 e400466 8b8b295 ceaa373 a7f2f12 e400466 a7f2f12 e400466 a7f2f12 e400466 a7f2f12 e400466 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import os
import fasttext
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch
title = "Community Tab Language Detection & Translation"
description = """
When comments are created in the community tab, detect the language of the content.
Then, if the detected language is different from the user's language, display an option to translate it.
"""
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
device = 0 if torch.cuda.is_available() else -1
print(f"Is CUDA available: {torch.cuda.is_available()}")
language_code_map = {
"English": "eng_Latn",
"French": "fra_Latn",
"German": "deu_Latn",
"Spanish": "spa_Latn",
"Korean": "kor_Hang",
"Japanese": "jpn_Jpan"
}
def identify_language(text):
model_file = "lid218e.bin"
model_full_path = os.path.join(os.path.dirname(__file__), model_file)
model = fasttext.load_model(model_full_path)
predictions = model.predict(text, k=1) # e.g., (('__label__eng_Latn',), array([0.81148803]))
CHAR_TO_STRIP = 9 # To strip away '__label__' from language code
language_code = predictions[0][0][CHAR_TO_STRIP:]
return language_code
def display(user_lang, text):
user_lang_code = language_code_map[user_lang]
language_code = identify_language(text)
translate_button_visibility = language_code != user_lang_code
detected_language_text = f"""
Detected Language: {language_code}\n
User Content Language: {user_lang_code}\n
{"" if translate_button_visibility else "[NOT TRANSLATABLE] Detected Language and Content Language are the same"}
"""
return text, gr.update(value="", placeholder="Leave a comment"), gr.update(value=detected_language_text), gr.update(visible=translate_button_visibility, variant="primary")
def translate(text, src_lang, tgt_lang):
CHAR_TO_STRIP = 22 # To strip away 'Detected Language: ' from language code
LANGUAGE_CODE_LENGTH = 8 # To strip away 'Detected Language: ' from language code
src_lang_code = src_lang[CHAR_TO_STRIP:CHAR_TO_STRIP + LANGUAGE_CODE_LENGTH]
tgt_lang_code = language_code_map[tgt_lang]
translation_pipeline = pipeline(
"translation", model=model, tokenizer=tokenizer, src_lang=src_lang_code, tgt_lang=tgt_lang_code, device=device)
result = translation_pipeline(text)
return result[0]['translation_text']
with gr.Blocks() as demo:
gr.HTML(
f"""
<div style="text-align: center; margin: 0 auto;">
<div style=" display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;">
<h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
{title}
</h1>
</div>
<p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">
{description}
</p>
</div>
"""
)
user_langugage_radio = gr.Radio(["English", "Spanish", "Korean", "French", "German", "Japanese"],
value="English", label="User Content Language")
comment_input_textbox = gr.Textbox(
lines=3, label="Write a Comment", placeholder="Leave a comment")
comment_out_textbox = gr.Textbox(lines=3, label="Comment")
detected_lang_markdown = gr.Markdown("", elem_id="detect-lang-md")
comment_btn = gr.Button("Comment")
translate_btn = gr.Button("Translate", visible=False)
detected_language_value = gr.Textbox("", visible=False)
comment_btn.click(display,
inputs=[user_langugage_radio, comment_input_textbox],
outputs=[
comment_out_textbox,
comment_input_textbox,
detected_lang_markdown,
translate_btn
])
translate_btn.click(translate,
inputs=[
comment_out_textbox,
detected_lang_markdown,
user_langugage_radio
],
outputs=comment_out_textbox)
demo.launch()
|