ambreshrc commited on
Commit
6395882
1 Parent(s): db6630c

Delete streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +0 -123
streamlit_app.py DELETED
@@ -1,123 +0,0 @@
1
- import streamlit as st
2
- from io import BytesIO
3
- # import gradio as gr
4
- # Def_04 Docx file to translated_Docx file
5
- #from transformers import MarianMTModel, MarianTokenizer
6
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
- import nltk
8
- from nltk.tokenize import sent_tokenize
9
- from nltk.tokenize import LineTokenizer
10
- nltk.download('punkt')
11
- import math
12
- import torch
13
- from docx import Document
14
- from time import sleep
15
- from stqdm import stqdm
16
-
17
- import docx
18
- def getText(filename):
19
- doc = docx.Document(filename)
20
- fullText = []
21
- for para in doc.paragraphs:
22
- fullText.append(para.text)
23
- return '\n'.join(fullText)
24
-
25
-
26
-
27
-
28
- # mname = 'Helsinki-NLP/opus-mt-en-hi'
29
- # tokenizer = MarianTokenizer.from_pretrained(mname)
30
- # model = MarianMTModel.from_pretrained(mname)
31
- # model.to(device)
32
-
33
- #@st.cache
34
- def btTranslator(docxfile):
35
- if torch.cuda.is_available():
36
- dev = "cuda"
37
- else:
38
- dev = "cpu"
39
- device = torch.device(dev)
40
- a=getText(docxfile)
41
- a1=a.split('\n')
42
- bigtext=''' '''
43
- for a in a1:
44
- bigtext=bigtext+'\n'+a
45
-
46
- files=Document()
47
-
48
- a="Helsinki-NLP/opus-mt-en-ru"
49
- b="Helsinki-NLP/opus-mt-ru-fr"
50
- c="Helsinki-NLP/opus-mt-fr-en"
51
- # d="Helsinki-NLP/opus-mt-es-en"
52
- langs=[a,b,c]
53
- text=bigtext
54
-
55
- for _,lang in zip(stqdm(langs),langs):
56
- st.spinner('Wait for it...')
57
- sleep(0.5)
58
- # mname = '/content/drive/MyDrive/Transformers Models/opus-mt-en-hi-Trans Model'
59
- tokenizer = AutoTokenizer.from_pretrained(lang)
60
- model = AutoModelForSeq2SeqLM.from_pretrained(lang)
61
- model.to(device)
62
- lt = LineTokenizer()
63
- batch_size = 64
64
- paragraphs = lt.tokenize(bigtext)
65
- translated_paragraphs = []
66
-
67
- for _, paragraph in zip(stqdm(paragraphs),paragraphs):
68
- st.spinner('Wait for it...')
69
- # ######################################
70
- sleep(0.5)
71
-
72
- # ######################################
73
- sentences = sent_tokenize(paragraph)
74
- batches = math.ceil(len(sentences) / batch_size)
75
- translated = []
76
- for i in range(batches):
77
- sent_batch = sentences[i*batch_size:(i+1)*batch_size]
78
- model_inputs = tokenizer(sent_batch, return_tensors="pt", padding=True, truncation=True, max_length=500).to(device)
79
- with torch.no_grad():
80
- translated_batch = model.generate(**model_inputs)
81
- translated += translated_batch
82
- translated = [tokenizer.decode(t, skip_special_tokens=True) for t in translated]
83
- translated_paragraphs += [" ".join(translated)]
84
- #files.add_paragraph(translated)
85
- translated_text = "\n".join(translated_paragraphs)
86
- bigtext=translated_text
87
- files.add_paragraph(bigtext)
88
- #files2save=files.save("Translated.docx")
89
- #files.save("Translated.docx")
90
- #binary_output = BytesIO()
91
- #f=files.save(binary_output)
92
- #f2=f.getvalue()
93
- return files
94
-
95
-
96
- #return translated_text
97
- st.title('Translator App')
98
- st.markdown("Translate from Docx file")
99
- st.subheader("File Upload")
100
-
101
- datas=st.file_uploader("Original File")
102
- name=st.text_input('Enter New File Name: ')
103
- #data=getText("C:\Users\Ambresh C\Desktop\Python Files\Translators\Trail Doc of 500 words.docx")
104
- #if datas :
105
- #if st.button(label='Data Process'):
106
- binary_output = BytesIO()
107
- if st.button(label='Translate'):
108
- st.spinner('Waiting...')
109
- btTranslator(datas).save(binary_output)
110
- binary_output.getbuffer()
111
- st.success("Translated")
112
-
113
- st.download_button(label='Download Translated File',file_name=(f"{name}_Translated.docx"), data=binary_output.getvalue())
114
- #files.save(f"{name}_Translated.docx")
115
- #else:
116
- # st.text('Upload File and Start the process')
117
-
118
-
119
- #f4=binary_output(f3)
120
-
121
- #st.sidebar.download_button(label='Download Translated File',file_name='Translated.docx', data=binary_output.getvalue())
122
- # st.text_area(label="",value=btTranslator(datas),height=100)
123
- # Footer