Spaces:

aidoskanapyanov
/

ru-kz-translation

Sleeping

aidoskanapyanov commited on May 3, 2024

Commit

0253f2e

1 Parent(s): dca8da9

split sentences and paragraphs intelligently

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 # this model was loaded from https://hf.co/models
 model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
@@ -21,8 +23,30 @@ def translate(text, src_lang, tgt_lang):
         max_length=400,
         device=device,
     )
-    result = translation_pipeline(text)
-    return result[0]["translation_text"]
 demo = gr.Interface(

 import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+from razdel import sentenize
 # this model was loaded from https://hf.co/models
 model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
         max_length=400,
         device=device,
     )
+    translated_paragraphs = []
+    paragraphs = text.split("\n")
+    for paragraph in paragraphs:
+        sentences = list(sentenize(paragraph))
+        translated_sentences = []
+        for sentence in sentences:
+            output = translation_pipeline(sentence.text)
+            translated_sentence = output[0]["translation_text"]
+            if sentence.text == "":
+                translated_sentence = ""
+            translated_sentences.append(translated_sentence)
+        translated_paragraph = " ".join(translated_sentences)
+        translated_paragraphs.append(translated_paragraph)
+    result_text = "\n".join(translated_paragraphs)
+    return result_text
 demo = gr.Interface(

requirements.txt CHANGED Viewed

@@ -1,2 +1,3 @@
 transformers
 torch

 transformers
 torch
+razdel