Spaces:
Running
Running
File size: 4,229 Bytes
3e6ffc5 c153533 3e6ffc5 c153533 3e6ffc5 27ca4a9 fe02c49 0c7be31 706408b 0c7be31 4661832 fe02c49 706408b fe02c49 b10cb1c fe02c49 706408b fe02c49 706408b fe02c49 b10cb1c fe02c49 0c7be31 fe02c49 0c7be31 fe02c49 0c7be31 fe02c49 0c7be31 fe02c49 3e6ffc5 0c7be31 3e6ffc5 0c7be31 3e6ffc5 200d05c 3e6ffc5 0c7be31 3e6ffc5 0c7be31 200d05c 4661832 0c7be31 c153533 fe02c49 200d05c fe02c49 0c7be31 fe02c49 0c7be31 eadcb10 0c7be31 eadcb10 0c7be31 3e6ffc5 0c7be31 3e6ffc5 0c7be31 3e6ffc5 4661832 3e6ffc5 fe02c49 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
"""
File: app.py
Description: Translate text...
Author: Didier Guillevic
Date: 2024-09-07
"""
import spaces
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)
import gradio as gr
import langdetect
from deep_translator import GoogleTranslator
from model_spacy import nlp_xx
import model_translation
m2m100 = model_translation.ModelM2M100()
def translate_with_Helsinki(
chunks, src_lang, tgt_lang, input_max_length, output_max_length) -> str:
"""Translate the chunks with the Helsinki model
"""
if src_lang not in translation.src_langs:
return (
f"ISSUE: currently no model for language '{src_lang}'. "
"If wrong language, please specify language."
)
logger.info(f"LANG: {src_lang}, TEXT: {chunks[0][:50]}...")
tokenizer, model = translation.get_tokenizer_model_for_src_lang(src_lang)
translated_chunks = []
for chunk in chunks:
# NOTE: The 'fa' (Persian) model has multiple target languages to choose from.
# We need to specifiy the desired languages among: fra ita por ron spa
# https://huggingface.co/Helsinki-NLP/opus-mt-tc-big-fa-itc
# Prepend text with >>fra<< in order to translate in French.
if src_lang == 'fa':
chunk = ">>fra<< " + chunk
inputs = tokenizer(
chunk, return_tensors="pt", max_length=input_max_length,
truncation=True, padding="longest").to(model.device)
outputs = model.generate(**inputs, max_length=output_max_length)
translated_chunk = tokenizer.batch_decode(
outputs, skip_special_tokens=True)[0]
#logger.info(f"Text: {chunk}")
#logger.info(f"Translation: {translated_chunk}")
translated_chunks.append(translated_chunk)
return '\n'.join(translated_chunks)
def translate_text(
text: str,
src_lang: str,
tgt_lang: str
) -> str:
"""Translate the given text into English or French
"""
# src_lang among the supported languages?
# - make sure src_lang is not None
src_lang = src_lang if (src_lang and src_lang != "auto") else langdetect.detect(text)
if src_lang not in model_translation.language_codes.values():
logging.error(f"Language detected {src_lang} not among supported language")
# tgt_lang: make sure it is not None. Default to 'en' if not set.
if tgt_lang not in model_translation.tgt_language_codes.values():
tgt_lang = 'en'
# translate
translated_text_m2m100 = m2m100.translate(text, src_lang, tgt_lang)
translated_text_google_translate = GoogleTranslator(
source='auto', target='en').translate(text=text)
return (
translated_text_m2m100,
translated_text_google_translate
)
#
# User interface
#
with gr.Blocks() as demo:
gr.Markdown("""
## Text translation v0.0.3
""")
# Input
input_text = gr.Textbox(
lines=5,
placeholder="Enter text to translate",
label="Text to translate",
render=True
)
# Output
output_text_m2m100 = gr.Textbox(
lines=4,
label="Facebook m2m100 (418M)",
render=True
)
output_text_google_translate = gr.Textbox(
lines=4,
label="Google Translate",
render=True
)
# Source and target languages
with gr.Row():
src_lang = gr.Dropdown(
choices=model_translation.language_codes.items(),
value="auto",
label="Source language",
render=True
)
tgt_lang = gr.Dropdown(
choices=model_translation.tgt_language_codes.items(),
value="en",
label="Target language",
render=True
)
# Submit button
translate_btn = gr.Button("Translate")
translate_btn.click(
fn=translate_text,
inputs=[input_text, src_lang, tgt_lang],
outputs=[output_text_m2m100, output_text_google_translate]
)
with gr.Accordion("Documentation", open=False):
gr.Markdown("""
- Models: serving Facebook M2M100 (418M) and Google Translate.
""")
if __name__ == "__main__":
demo.launch()
|