Spaces:
Sleeping
Sleeping
aidoskanapyanov
commited on
Commit
·
0253f2e
1
Parent(s):
dca8da9
split sentences and paragraphs intelligently
Browse files- app.py +26 -2
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
|
|
|
|
3 |
|
4 |
# this model was loaded from https://hf.co/models
|
5 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
@@ -21,8 +23,30 @@ def translate(text, src_lang, tgt_lang):
|
|
21 |
max_length=400,
|
22 |
device=device,
|
23 |
)
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
|
28 |
demo = gr.Interface(
|
|
|
1 |
import gradio as gr
|
2 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
3 |
+
from razdel import sentenize
|
4 |
+
|
5 |
|
6 |
# this model was loaded from https://hf.co/models
|
7 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
|
|
23 |
max_length=400,
|
24 |
device=device,
|
25 |
)
|
26 |
+
|
27 |
+
translated_paragraphs = []
|
28 |
+
paragraphs = text.split("\n")
|
29 |
+
|
30 |
+
for paragraph in paragraphs:
|
31 |
+
sentences = list(sentenize(paragraph))
|
32 |
+
|
33 |
+
translated_sentences = []
|
34 |
+
for sentence in sentences:
|
35 |
+
|
36 |
+
output = translation_pipeline(sentence.text)
|
37 |
+
translated_sentence = output[0]["translation_text"]
|
38 |
+
|
39 |
+
if sentence.text == "":
|
40 |
+
translated_sentence = ""
|
41 |
+
|
42 |
+
translated_sentences.append(translated_sentence)
|
43 |
+
|
44 |
+
translated_paragraph = " ".join(translated_sentences)
|
45 |
+
translated_paragraphs.append(translated_paragraph)
|
46 |
+
|
47 |
+
result_text = "\n".join(translated_paragraphs)
|
48 |
+
|
49 |
+
return result_text
|
50 |
|
51 |
|
52 |
demo = gr.Interface(
|
requirements.txt
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
transformers
|
2 |
torch
|
|
|
|
1 |
transformers
|
2 |
torch
|
3 |
+
razdel
|