Spaces:

SSahas
/

sentiment_classifier_airline

Sleeping

SSahas commited on Sep 11, 2022

Commit

ee32637

•

1 Parent(s): 5a714d7

first commit

Files changed (4) hide show

app.py ADDED Viewed

+import streamlit as st
+import joblib
+import pandas as pd
+import string
+import re
+import nltk
+from sklearn.feature_extraction.text import TfidfVectorizer
+model = joblib.load("randomforestmodel.pkl")
+data = pd.read_csv("text.csv")
+ps = nltk.PorterStemmer()
+stopwords = nltk.corpus.stopwords.words('english')
+def clean_text(text):
+    text = "".join([word.lower()
+                   for word in text if word not in string.punctuation])
+    tokens = re.split('\W+', text)
+    text = [ps.stem(word) for word in tokens if word not in stopwords]
+    return text
+vectoriz = TfidfVectorizer(analyzer=clean_text)
+vectorizer = vectoriz.fit(data["text"])
+def count_punct(text):
+    count = sum([1 for char in text if char in string.punctuation])
+    return round(count/(len(text) - text.count(" ")), 3)*100
+st.title("Sentiment analysis classification")
+text = st.text_input("Type the text here")
+if st.button("Predict"):
+    #text = str(text)
+    trans = vectorizer.transform([text])
+    body_len = len(text) - text.count(" ")
+    punct = count_punct(text)
+    k = {"body_len": [body_len], "punc%": [punct]}
+    df = pd.DataFrame(k)
+    test_vect = pd.concat([df.reset_index(drop=True),
+                           pd.DataFrame(trans.toarray())], axis=1)
+    prediction = model.predict(test_vect)
+    st.write(prediction[0])

randomforestmodel.pkl ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:930fc2558572ff43d2cf04309a0c5890ca38211bb2604c7e74fd6b6bbda077d5
+size 38243005

requirements.txt ADDED Viewed

+joblib
+sklearn
+pandas
+nltk
+regex

text.csv ADDED Viewed

The diff for this file is too large to render. See raw diff