Spaces:
Sleeping
Sleeping
from tracemalloc import stop | |
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
import re | |
import string | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.tokenize import word_tokenize | |
from nltk.stem.porter import PorterStemmer | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.tree import DecisionTreeRegressor | |
from sklearn.ensemble import RandomForestClassifier | |
nltk.download('punkt') | |
nltk.download('stopwords') | |
sw=nltk.corpus.stopwords.words("english") | |
rad=st.sidebar.radio("Navigation",["Home","Sarcasm Detection","Sentiment Analysis","Spam or Ham Detection","Stress Detection","Hate and Offensive Content Detection"]) | |
#Home Page | |
if rad=="Home": | |
st.title("Text Analyzer") | |
st.image("SEO-articles-V2_Text-Analysis.png") | |
st.text(" ") | |
st.text("The Following Text Analysis Options Are Available->") | |
st.text(" ") | |
st.text("1. Spam or Ham Detection") | |
st.text("2. Sentiment Analysis") | |
st.text("3. Stress Detection") | |
st.text("4. Hate and Offensive Content Detection") | |
st.text("5. Sarcasm Detection") | |
#function to clean and transform the user input which is in raw format | |
def transform_text(text): | |
text=text.lower() | |
text=nltk.word_tokenize(text) | |
y=[] | |
for i in text: | |
if i.isalnum(): | |
y.append(i) | |
text=y[:] | |
y.clear() | |
for i in text: | |
if i not in stopwords.words('english') and i not in string.punctuation: | |
y.append(i) | |
text=y[:] | |
y.clear() | |
ps=PorterStemmer() | |
for i in text: | |
y.append(ps.stem(i)) | |
return " ".join(y) | |
#Spam Detection Prediction | |
tfidf1=TfidfVectorizer(stop_words=sw,max_features=20) | |
def transform1(txt1): | |
txt2=tfidf1.fit_transform(txt1) | |
return txt2.toarray() | |
df1=pd.read_csv("Spam Detection.csv") | |
df1.columns=["Label","Text"] | |
x=transform1(df1["Text"]) | |
y=df1["Label"] | |
x_train1,x_test1,y_train1,y_test1=train_test_split(x,y,test_size=0.1,random_state=0) | |
model1=LogisticRegression() | |
model1.fit(x_train1,y_train1) | |
#Spam Detection Analysis Page | |
if rad=="Spam or Ham Detection": | |
st.header("Detect Whether A Text Is Spam Or Ham??") | |
sent1=st.text_area("Enter The Text") | |
transformed_sent1=transform_text(sent1) | |
vector_sent1=tfidf1.transform([transformed_sent1]) | |
prediction1=model1.predict(vector_sent1)[0] | |
if st.button("Predict"): | |
if prediction1=="spam": | |
st.warning("Spam Text!!") | |
elif prediction1=="ham": | |
st.success("Ham Text!!") | |
#Sentiment Analysis Prediction | |
tfidf2=TfidfVectorizer(stop_words=sw,max_features=20) | |
def transform2(txt1): | |
txt2=tfidf2.fit_transform(txt1) | |
return txt2.toarray() | |
df2=pd.read_csv("Sentiment Analysis.csv") | |
df2.columns=["Text","Label"] | |
x=transform2(df2["Text"]) | |
y=df2["Label"] | |
x_train2,x_test2,y_train2,y_test2=train_test_split(x,y,test_size=0.1,random_state=0) | |
model2=LogisticRegression() | |
model2.fit(x_train2,y_train2) | |
#Sentiment Analysis Page | |
if rad=="Sentiment Analysis": | |
st.header("Detect The Sentiment Of The Text!!") | |
sent2=st.text_area("Enter The Text") | |
transformed_sent2=transform_text(sent2) | |
vector_sent2=tfidf2.transform([transformed_sent2]) | |
prediction2=model2.predict(vector_sent2)[0] | |
if st.button("Predict"): | |
if prediction2==0: | |
st.warning("Negetive Text!!") | |
elif prediction2==1: | |
st.success("Positive Text!!") | |
#Stress Detection Prediction | |
tfidf3=TfidfVectorizer(stop_words=sw,max_features=20) | |
def transform3(txt1): | |
txt2=tfidf3.fit_transform(txt1) | |
return txt2.toarray() | |
df3=pd.read_csv("Stress Detection.csv") | |
df3=df3.drop(["subreddit","post_id","sentence_range","syntax_fk_grade"],axis=1) | |
df3.columns=["Text","Sentiment","Stress Level"] | |
x=transform3(df3["Text"]) | |
y=df3["Stress Level"].to_numpy() | |
x_train3,x_test3,y_train3,y_test3=train_test_split(x,y,test_size=0.1,random_state=0) | |
model3=DecisionTreeRegressor(max_leaf_nodes=2000) | |
model3.fit(x_train3,y_train3) | |
#Stress Detection Page | |
if rad=="Stress Detection": | |
st.header("Detect The Amount Of Stress In The Text!!") | |
sent3=st.text_area("Enter The Text") | |
transformed_sent3=transform_text(sent3) | |
vector_sent3=tfidf3.transform([transformed_sent3]) | |
prediction3=model3.predict(vector_sent3)[0] | |
if st.button("Predict"): | |
if prediction3>=0: | |
st.warning("Stressful Text!!") | |
elif prediction3<0: | |
st.success("Not A Stressful Text!!") | |
#Hate & Offensive Content Prediction | |
tfidf4=TfidfVectorizer(stop_words=sw,max_features=20) | |
def transform4(txt1): | |
txt2=tfidf4.fit_transform(txt1) | |
return txt2.toarray() | |
df4=pd.read_csv("Hate Content Detection.csv") | |
df4=df4.drop(["Unnamed: 0","count","neither"],axis=1) | |
df4.columns=["Hate Level","Offensive Level","Class Level","Text"] | |
x=transform4(df4["Text"]) | |
y=df4["Class Level"] | |
x_train4,x_test4,y_train4,y_test4=train_test_split(x,y,test_size=0.1,random_state=0) | |
model4=RandomForestClassifier() | |
model4.fit(x_train4,y_train4) | |
#Hate & Offensive Content Page | |
if rad=="Hate and Offensive Content Detection": | |
st.header("Detect The Level Of Hate & Offensive Content In The Text!!") | |
sent4=st.text_area("Enter The Text") | |
transformed_sent4=transform_text(sent4) | |
vector_sent4=tfidf4.transform([transformed_sent4]) | |
prediction4=model4.predict(vector_sent4)[0] | |
if st.button("Predict"): | |
if prediction4==0: | |
st.exception("Highly Offensive Text!!") | |
elif prediction4==1: | |
st.warning("Offensive Text!!") | |
elif prediction4==2: | |
st.success("Non Offensive Text!!") | |
#Sarcasm Detection Prediction | |
tfidf5=TfidfVectorizer(stop_words=sw,max_features=20) | |
def transform5(txt1): | |
txt2=tfidf5.fit_transform(txt1) | |
return txt2.toarray() | |
df5=pd.read_csv("Sarcasm Detection.csv") | |
df5.columns=["Text","Label"] | |
x=transform5(df5["Text"]) | |
y=df5["Label"] | |
x_train5,x_test5,y_train5,y_test5=train_test_split(x,y,test_size=0.1,random_state=0) | |
model5=LogisticRegression() | |
model5.fit(x_train5,y_train5) | |
#Sarcasm Detection Page | |
if rad=="Sarcasm Detection": | |
st.header("Detect Whether The Text Is Sarcastic Or Not!!") | |
sent5=st.text_area("Enter The Text") | |
transformed_sent5=transform_text(sent5) | |
vector_sent5=tfidf5.transform([transformed_sent5]) | |
prediction5=model5.predict(vector_sent5)[0] | |
if st.button("Predict"): | |
if prediction5==1: | |
st.exception("Sarcastic Text!!") | |
elif prediction5==0: | |
st.success("Non Sarcastic Text!!") |