aidoskanapyanov commited on
Commit
0253f2e
·
1 Parent(s): dca8da9

split sentences and paragraphs intelligently

Browse files
Files changed (2) hide show
  1. app.py +26 -2
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
 
 
3
 
4
  # this model was loaded from https://hf.co/models
5
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
@@ -21,8 +23,30 @@ def translate(text, src_lang, tgt_lang):
21
  max_length=400,
22
  device=device,
23
  )
24
- result = translation_pipeline(text)
25
- return result[0]["translation_text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
  demo = gr.Interface(
 
1
  import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
3
+ from razdel import sentenize
4
+
5
 
6
  # this model was loaded from https://hf.co/models
7
  model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
 
23
  max_length=400,
24
  device=device,
25
  )
26
+
27
+ translated_paragraphs = []
28
+ paragraphs = text.split("\n")
29
+
30
+ for paragraph in paragraphs:
31
+ sentences = list(sentenize(paragraph))
32
+
33
+ translated_sentences = []
34
+ for sentence in sentences:
35
+
36
+ output = translation_pipeline(sentence.text)
37
+ translated_sentence = output[0]["translation_text"]
38
+
39
+ if sentence.text == "":
40
+ translated_sentence = ""
41
+
42
+ translated_sentences.append(translated_sentence)
43
+
44
+ translated_paragraph = " ".join(translated_sentences)
45
+ translated_paragraphs.append(translated_paragraph)
46
+
47
+ result_text = "\n".join(translated_paragraphs)
48
+
49
+ return result_text
50
 
51
 
52
  demo = gr.Interface(
requirements.txt CHANGED
@@ -1,2 +1,3 @@
1
  transformers
2
  torch
 
 
1
  transformers
2
  torch
3
+ razdel