File size: 4,316 Bytes
476e166
a7f2f12
8b8b295
476e166
a7f2f12
 
 
 
 
 
 
 
476e166
a7f2f12
 
 
dfe3477
a7f2f12
 
 
 
 
 
 
582f48e
 
a7f2f12
 
316e0e6
8b8b295
 
 
 
316e0e6
 
e400466
 
 
8b8b295
ceaa373
a7f2f12
e400466
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a7f2f12
e400466
 
 
 
 
a7f2f12
e400466
a7f2f12
 
 
e400466
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582f48e
e400466
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os

import fasttext
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
import torch

title = "Community Tab Language Detection & Translation"
description = """
When comments are created in the community tab, detect the language of the content.
Then, if the detected language is different from the user's language, display an option to translate it.
"""

model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
device = 0 if torch.cuda.is_available() else -1
print(f"Is CUDA available: {torch.cuda.is_available()}")

language_code_map = {
    "English": "eng_Latn",
    "French": "fra_Latn",
    "German": "deu_Latn",
    "Spanish": "spa_Latn",
    "Korean": "kor_Hang",
    "Japanese": "jpn_Jpan",
    "Polish": "pol_Latn"
}


def identify_language(text):
    model_file = "lid218e.bin"
    model_full_path = os.path.join(os.path.dirname(__file__), model_file)
    model = fasttext.load_model(model_full_path)
    predictions = model.predict(text, k=1)  # e.g., (('__label__eng_Latn',), array([0.81148803]))

    CHAR_TO_STRIP = 9  # To strip away '__label__' from language code
    language_code = predictions[0][0][CHAR_TO_STRIP:]

    return language_code


def display(user_lang, text):
    user_lang_code = language_code_map[user_lang]
    language_code = identify_language(text)

    translate_button_visibility = language_code != user_lang_code

    detected_language_text = f"""
    Detected Language: {language_code}\n
    User Content Language: {user_lang_code}\n
    {"" if translate_button_visibility else "[NOT TRANSLATABLE] Detected Language and Content Language are the same"}
    """

    return text, gr.update(value="", placeholder="Leave a comment"), gr.update(value=detected_language_text), gr.update(visible=translate_button_visibility, variant="primary")


def translate(text, src_lang, tgt_lang):
    CHAR_TO_STRIP = 22  # To strip away 'Detected Language: ' from language code
    LANGUAGE_CODE_LENGTH = 8  # To strip away 'Detected Language: ' from language code
    src_lang_code = src_lang[CHAR_TO_STRIP:CHAR_TO_STRIP + LANGUAGE_CODE_LENGTH]
    tgt_lang_code = language_code_map[tgt_lang]

    translation_pipeline = pipeline(
        "translation", model=model, tokenizer=tokenizer, src_lang=src_lang_code, tgt_lang=tgt_lang_code, device=device)
    result = translation_pipeline(text)
    return result[0]['translation_text']

with gr.Blocks() as demo:
    gr.HTML(
        f"""
            <div style="text-align: center; margin: 0 auto;">
              <div style=" display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;">
                <h1 style="font-weight: 900; margin-bottom: 7px;margin-top:5px">
                  {title}
                </h1>
              </div>
              <p style="margin-bottom: 10px; font-size: 94%; line-height: 23px;">
                {description}
              </p>
            </div>
        """
    )

    user_langugage_radio = gr.Radio(["English", "Spanish", "Korean", "French", "German", "Japanese", "Polish"],
                                    value="English", label="User Content Language")

    comment_input_textbox = gr.Textbox(
        lines=3, label="Write a Comment", placeholder="Leave a comment")
    comment_out_textbox = gr.Textbox(lines=3, label="Comment")
    detected_lang_markdown = gr.Markdown("", elem_id="detect-lang-md")

    comment_btn = gr.Button("Comment")

    translate_btn = gr.Button("Translate", visible=False)
    detected_language_value = gr.Textbox("", visible=False)


    comment_btn.click(display,
                      inputs=[user_langugage_radio, comment_input_textbox],
                      outputs=[
                          comment_out_textbox,
                          comment_input_textbox,
                          detected_lang_markdown,
                          translate_btn
                      ])

    translate_btn.click(translate,
                        inputs=[
                            comment_out_textbox,
                            detected_lang_markdown,
                            user_langugage_radio
                        ],
                        outputs=comment_out_textbox)

demo.launch()