Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pickle | |
import re | |
import string | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
from nltk.stem.porter import PorterStemmer | |
stemmer = PorterStemmer() | |
st.set_page_config( | |
page_title="NLP WEB APP" | |
) | |
st.title("LANGUAGE DETECTOR MODEL") | |
st.sidebar.success("Select a page above") | |
nltk.download('stopwords') | |
nltk.download('punkt') | |
def preprocess(text): | |
text = text.lower() | |
text = re.sub(r'\d+', '', text) | |
translator = str.maketrans('', '', string.punctuation) | |
text = text.translate(translator) | |
stop_words = set(stopwords.words("english")) | |
word_tokens = word_tokenize(text) | |
filtered_text = [word for word in word_tokens if word not in stop_words] | |
stems = [stemmer.stem(word) for word in filtered_text] | |
preprocessed_text = ' '.join(stems) | |
return preprocessed_text | |
cv = pickle.load(open('language-detector-models/vectorizer.pkl','rb')) | |
model = pickle.load(open('language-detector-models/model.pkl','rb')) | |
message= st.text_input("ENTER THE MESSAGE") | |
if st.button("PREDICT"): | |
# PREPROCESS | |
transformed_text = preprocess(message) | |
# VECTORIZE | |
vector_input = cv.transform([message]) | |
# PREDICTION | |
result = model.predict(vector_input)[0] | |
# DISPLAY | |
if result==0: | |
st.header("ARABIC") | |
elif result==1: | |
st.header("DANISH") | |
elif result==2: | |
st.header("DUTCH") | |
elif result==3: | |
st.header("ENGLISH") | |
elif result==4: | |
st.header("FRENCH") | |
elif result==5: | |
st.header("GERMAN") | |
elif result==6: | |
st.header("GREEK") | |
elif result==7: | |
st.header("HINDI") | |
elif result==8: | |
st.header("ITALIAN") | |
elif result==9: | |
st.header("KANNADA") | |
elif result==10: | |
st.header("MALYALAM") | |
elif result==11: | |
st.header("PORTUGESE") | |
elif result==12: | |
st.header("RUSSIAN") | |
elif result==13: | |
st.header("SPANISH") | |
elif result==14: | |
st.header("SWEDISH") | |
elif result==15: | |
st.header("TAMIL") | |
else: | |
st.header("TURKISH") |