ambujraj2001 commited on
Commit
e291f2f
1 Parent(s): 34fff5f
Files changed (6) hide show
  1. app.py +31 -0
  2. image.png +0 -0
  3. main.py +28 -0
  4. model.sav +3 -0
  5. requirements,txt +2 -0
  6. tokenizer.sav +3 -0
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ from PIL import Image
4
+
5
+ with st.sidebar:
6
+ st.subheader('English to Hindi Translator')
7
+ st.write('This model is trained on OPUS dataset. This open parallel is the collection of translated texts from the web. It also includes translations of Wikipedia, WikiSource, WikiBooks, WikiNews and WikiQuote web pages.Built using MarianMT model')
8
+ image = Image.open('image.png')
9
+ st.image(image, caption='MarianMT model')
10
+ add_selectbox = st.sidebar.text(
11
+ "Members: \n"
12
+ "\tRajat Sharma\n"
13
+ "\tTanisha Bhargava\n"
14
+ "\tAyush Chouraisa\n"
15
+ "\tAyush Chourasia\n"
16
+ "\tPallavi\n"
17
+ "\tSmriti\n"
18
+ )
19
+
20
+
21
+ st.header("English to Hindi Translator")
22
+ text = st.text_input("Enter text to translate")
23
+ if st.button("Translate"):
24
+ with st.spinner("Translating..."):
25
+ model = joblib.load('model.sav')
26
+ tokenizer = joblib.load('tokenizer.sav')
27
+ input_ids = tokenizer.encode(text, return_tensors="pt", padding=True)
28
+ outputs = model.generate(input_ids)
29
+ decoded_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
30
+ st.success("Done!")
31
+ st.write("Hindi Translation: ",decoded_text)
image.png ADDED
main.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
2
+ import joblib
3
+
4
+ tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
5
+ model = AutoModelForSeq2SeqLM.from_pretrained("Helsinki-NLP/opus-mt-en-hi")
6
+ joblib.dump(model, 'model.sav')
7
+ loaded_model = joblib.load('model.sav')
8
+ joblib.dump(tokenizer, 'tokenizer.sav')
9
+ loaded_tokenizer = joblib.load('tokenizer.sav')
10
+
11
+
12
+ def translator(text):
13
+ # function to translate english text to hindi
14
+ input_ids = loaded_tokenizer.encode(text, return_tensors="pt", padding=True)
15
+ outputs = loaded_model.generate(input_ids)
16
+ decoded_text = loaded_tokenizer.decode(outputs[0], skip_special_tokens=True)
17
+ return decoded_text
18
+
19
+ texts = ["I spend a few hours a day maintaining my website.",
20
+ "Where do random thoughts come from?",
21
+ "I can't believe that she is older than my mother.",
22
+ "My Mum tries to be cool by saying that she likes all the same things that I do",
23
+ "A song can make or ruin a person’s day if they let it get to them."]
24
+
25
+ for text in texts:
26
+ print("English Text: ", text)
27
+ print("Hindi Translation: ", translator(text))
28
+ print("*"*50,"\n")
model.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9eefa7f19a6d77d5cb59e8e3ba415a5117ec63f4f461ab3f304a075f64487bd5
3
+ size 305880976
requirements,txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ streamlit
2
+ joblib
tokenizer.sav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e242deb2f9fc0ed01de2db8fb16d4923bb7dc1b8b6314de24e52a233ee02cc6a
3
+ size 1687217