File size: 8,454 Bytes
dc149ba 3ad5346 dc149ba 8c5885d 158e38e 8c5885d dc149ba 90b34d1 93baa69 dc149ba 90b34d1 e939d89 b0ae3fd b2d474f 7239594 90b34d1 93baa69 9e1c3ab 90b34d1 9e1c3ab dc149ba 8c5885d 46abd0a 8c5885d 46abd0a 8c5885d 5e8469e 8c5885d 98b9978 31097f0 3ad5346 5e8469e 98b9978 5e8469e 98b9978 8c5885d 98b9978 dc149ba 90b34d1 592978b dc149ba 2866119 dc149ba 2866119 dc149ba 2866119 dc149ba de3cada b52f918 8112e48 f010b24 31097f0 592978b 3ad5346 592978b 9215493 dc149ba cc4bac3 a71436a bccf7be 33e3967 5b11a3e bccf7be 33e3967 bccf7be 33e3967 bccf7be 33e3967 bccf7be a71436a 93baa69 c603514 93baa69 90b34d1 e3d64dc c603514 93baa69 765a96d 90b34d1 765a96d 93baa69 e3a8bf2 90b34d1 765a96d 9e1c3ab 90b34d1 d895aa7 9e1c3ab f64ffeb 9c13f88 f64ffeb 9c13f88 765a96d 90b34d1 e3d64dc 9c13f88 b2974e9 90b34d1 b2974e9 9e1c3ab 90b34d1 d895aa7 9e1c3ab 93baa69 de3cada e3d64dc 72801d9 e3d64dc 93baa69 9e1c3ab 33e3967 de3cada 8c5885d 90e8b1d 76ef102 b2974e9 76ef102 90e8b1d 65fbb2d de3cada d895aa7 de3cada 8c5885d |
|
"""
translation program for simple text
1. detect language from langdetect
2. translate to target language given by user
Example from
https://www.thepythoncode.com/article/machine-translation-using-huggingface-transformers-in-python
user_input:
string: string to be translated
target_lang: language to be translated to
Returns:
string: translated string of text
try this : https://pypi.org/project/EasyNMT/
and this : https://huggingface.co/IDEA-CCNL/Randeng-Deltalm-362M-En-Zh
"""
from __future__ import annotations
from typing import Iterable
import gradio as gr
from gradio.themes.base import Base
from gradio.themes.utils import colors, fonts, sizes
import argparse
import langid
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer, logging
from easynmt import EasyNMT
# Initialize logging
logging.set_verbosity_info()
logger = logging.get_logger("transformers")
# # Initialize nllb-200 models
# tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-600M")
# model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
# Initialize mbart50 models
mbart_m2en_model = EasyNMT("mbart50_m2en")
mbart_en2m_model = EasyNMT("mbart50_en2m")
logger.info("mbart50 models initialized")
# Initialize m2m_100 models
m2m_model = EasyNMT("m2m_100_1.2B")
logger.info("m2m_100 models initialized")
class myTheme(Base):
def __init__(
self,
*,
primary_hue: colors.Color | str = colors.red,
secondary_hue: colors.Color | str = colors.blue,
neutral_hue: colors.Color | str = colors.orange,
spacing_size: sizes.Size | str = sizes.spacing_md,
radius_size: sizes.Size | str = sizes.radius_md,
text_size: sizes.Size | str = sizes.text_lg,
font: fonts.Font
| str
| Iterable[fonts.Font | str] = (
fonts.GoogleFont("handjet"),
"cursive",
# "sans-serif",
),
font_mono: fonts.Font
| str
| Iterable[fonts.Font | str] = (
fonts.GoogleFont("IBM Plex Mono"),
"ui-monospace",
"monospace",
),
):
super().__init__(
primary_hue=primary_hue,
secondary_hue=secondary_hue,
neutral_hue=neutral_hue,
spacing_size=spacing_size,
radius_size=radius_size,
text_size=text_size,
font=font,
font_mono=font_mono,
)
super().set(
body_background_fill="repeating-linear-gradient(135deg, *primary_800, *primary_800 10px, *primary_900 10px, *primary_900 20px)",
button_primary_background_fill="linear-gradient(90deg, *primary_600, *secondary_800)",
button_primary_background_fill_hover="linear-gradient(45deg, *primary_200, *secondary_300)",
button_primary_text_color="white",
slider_color="*secondary_300",
slider_color_dark="*secondary_600",
block_title_text_weight="600",
block_border_width="3px",
block_shadow="*shadow_drop_lg",
button_shadow="*shadow_drop_lg",
button_large_padding="24px",
)
def detect_lang(article):
"""
Language Detection using library langid
Args:
article (string): article that user wish to translate
target_lang (string): language user want to translate article into
Returns:
string: detected language short form
"""
result_lang = langid.classify(article)
logger.info(f"language detected as {result_lang}")
return result_lang[0]
def opus_trans(article, target_language):
"""
Translation by Helsinki-NLP model
Args:
article (string): article that user wishes to translate
target_language (string): language that user wishes to translate article into
Returns:
string: translated piece of article based off target_language
"""
result_lang = detect_lang(article)
if target_language == "English":
target_lang = "en"
elif target_language == "Chinese":
target_lang = "zh"
if result_lang != target_lang:
task_name = f"translation_{result_lang}_to_{target_lang}"
model_name = f"Helsinki-NLP/opus-mt-{result_lang}-{target_lang}"
try:
translator = pipeline(task_name, model=model_name, tokenizer=model_name)
translated = translator(article)[0]["translation_text"]
except:
translated = "Error: Model doesn't exist"
else:
translated = "Error: You chose the same language as the article detected language. Please reselect language and try again."
return translated
def nllb_trans(article, target_language):
result_lang = detect_lang(article)
inputs = tokenizer(article, return_tensors="pt")
if target_language == "English":
target_lang = "eng_Latn"
target_language = "en"
elif target_language == "Chinese":
target_lang = "zho_Hans"
target_language = "zh"
if result_lang != target_language:
translated_tokens = model.generate(
**inputs,
forced_bos_token_id=tokenizer.lang_code_to_id[target_lang],
max_length=30,
)
translated = tokenizer.batch_decode(
translated_tokens, skip_special_tokens=True
)[0]
else:
translated = "Error: You chose the same language as the article detected language. Please reselect language and try again."
return translated
def mbart_trans(article, target_language):
result_lang = detect_lang(article)
if target_language == "English":
target_lang = "en"
elif target_language == "Chinese":
target_lang = "zh"
logger.info(f"Article to translate : {article}")
logger.info(f"Chose which translation model: mbart model")
logger.info(f"Language selected: {target_language}")
if result_lang != target_lang:
if target_language == "English":
translated = mbart_m2en_model.translate(article, target_lang="en")
logger.info(f"Translated Result: {translated}")
return translated
else:
translated = mbart_en2m_model.translate(article, target_lang="zh")
logger.info(f"Translated Result: {translated}")
return translated
else:
logger.warning(
"Error: You chose the same language as the article detected language. Please reselect language and try again."
)
return "Error: You chose the same language as the article detected language. Please reselect language and try again."
def m2m_trans(article, target_language):
result_lang = detect_lang(article)
if target_language == "English":
target_lang = "en"
elif target_language == "Chinese":
target_lang = "zh"
logger.info(f"Article to translate : {article}")
logger.info(f"Chose which translation model: m2m model")
logger.info(f"Language selected: {target_language}")
if result_lang != target_lang:
translated = m2m_model.translate(article, target_lang)
logger.info(f"Translation Result: {translated}")
return translated
else:
logger.warning(
f"Error: You chose the same language as the article detected language. Please reselect language and try again."
)
return "Error: You chose the same language as the article detected language. Please reselect language and try again."
def translate(article, toolkit, target_language):
if toolkit == "OPUS":
translated = opus_trans(article, target_language)
# if toolkit == "NLLB":
# translated = nllb_trans(article, target_language)
elif toolkit == "MBART":
translated = mbart_trans(article, target_language)
elif toolkit == "M2M":
translated = m2m_trans(article, target_language)
return translated
myTheme = myTheme()
with gr.Blocks(theme=myTheme) as demo:
article = gr.Textbox(label="Article")
toolkit_select = gr.Radio(
["OPUS", "MBART", "M2M"], label="Select Translation Model", value="MBART"
)
lang_select = gr.Radio(["English", "Chinese"], label="Select Desired Language")
result = gr.Textbox(label="Translated Result")
trans_btn = gr.Button("Translate")
trans_btn.click(
fn=translate, inputs=[article, toolkit_select, lang_select], outputs=result
)
demo.launch()
|