SSahas commited on
Commit
ee32637
1 Parent(s): 5a714d7

first commit

Browse files
Files changed (4) hide show
  1. app.py +47 -0
  2. randomforestmodel.pkl +3 -0
  3. requirements.txt +5 -0
  4. text.csv +0 -0
app.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import pandas as pd
4
+ import string
5
+ import re
6
+ import nltk
7
+ from sklearn.feature_extraction.text import TfidfVectorizer
8
+
9
+
10
+ model = joblib.load("randomforestmodel.pkl")
11
+ data = pd.read_csv("text.csv")
12
+
13
+ ps = nltk.PorterStemmer()
14
+ stopwords = nltk.corpus.stopwords.words('english')
15
+
16
+
17
+ def clean_text(text):
18
+ text = "".join([word.lower()
19
+ for word in text if word not in string.punctuation])
20
+ tokens = re.split('\W+', text)
21
+ text = [ps.stem(word) for word in tokens if word not in stopwords]
22
+ return text
23
+
24
+
25
+ vectoriz = TfidfVectorizer(analyzer=clean_text)
26
+ vectorizer = vectoriz.fit(data["text"])
27
+
28
+
29
+ def count_punct(text):
30
+ count = sum([1 for char in text if char in string.punctuation])
31
+ return round(count/(len(text) - text.count(" ")), 3)*100
32
+
33
+
34
+ st.title("Sentiment analysis classification")
35
+
36
+ text = st.text_input("Type the text here")
37
+ if st.button("Predict"):
38
+ #text = str(text)
39
+ trans = vectorizer.transform([text])
40
+ body_len = len(text) - text.count(" ")
41
+ punct = count_punct(text)
42
+ k = {"body_len": [body_len], "punc%": [punct]}
43
+ df = pd.DataFrame(k)
44
+ test_vect = pd.concat([df.reset_index(drop=True),
45
+ pd.DataFrame(trans.toarray())], axis=1)
46
+ prediction = model.predict(test_vect)
47
+ st.write(prediction[0])
randomforestmodel.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:930fc2558572ff43d2cf04309a0c5890ca38211bb2604c7e74fd6b6bbda077d5
3
+ size 38243005
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ joblib
2
+ sklearn
3
+ pandas
4
+ nltk
5
+ regex
text.csv ADDED
The diff for this file is too large to render. See raw diff