File size: 2,100 Bytes
fe5faf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
import pickle
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem.porter import PorterStemmer
stemmer = PorterStemmer()

st.set_page_config(
    page_title="NLP WEB APP"
)

st.title("LANGUAGE DETECTOR MODEL")
st.sidebar.success("Select a page above")
nltk.download('stopwords')
nltk.download('punkt')

def preprocess(text):
    text = text.lower()
    text = re.sub(r'\d+', '', text)
    translator = str.maketrans('', '', string.punctuation)
    text = text.translate(translator)
    
    
    stop_words = set(stopwords.words("english"))
    word_tokens = word_tokenize(text)
    filtered_text = [word for word in word_tokens if word not in stop_words]
    
    stems = [stemmer.stem(word) for word in filtered_text]
    preprocessed_text = ' '.join(stems)
    return  preprocessed_text



cv = pickle.load(open('language-detector-models/vectorizer.pkl','rb'))
model = pickle.load(open('language-detector-models/model.pkl','rb'))

message= st.text_input("ENTER THE MESSAGE")
 

if st.button("PREDICT"):
    # PREPROCESS 
    transformed_text = preprocess(message)

    # VECTORIZE
    vector_input = cv.transform([message])

    # PREDICTION
    result = model.predict(vector_input)[0]


    # DISPLAY
    if result==0:
       st.header("ARABIC")
    elif result==1:
       st.header("DANISH")
    elif result==2:
       st.header("DUTCH")
    elif result==3:
       st.header("ENGLISH")
    elif result==4:
       st.header("FRENCH")
    elif result==5:
       st.header("GERMAN")
    elif result==6:
       st.header("GREEK")
    elif result==7:
       st.header("HINDI")
    elif result==8:
       st.header("ITALIAN")
    elif result==9:
       st.header("KANNADA")
    elif result==10:
       st.header("MALYALAM")
    elif result==11:
       st.header("PORTUGESE")
    elif result==12:
       st.header("RUSSIAN")
    elif result==13:
       st.header("SPANISH")
    elif result==14:
       st.header("SWEDISH")
    elif result==15:
       st.header("TAMIL")
    else:
       st.header("TURKISH")