Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
from sklearn.model_selection import train_test_split | |
from sklearn.linear_model import LogisticRegression | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from sklearn.metrics import accuracy_score, classification_report | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.ensemble import RandomForestClassifier | |
from sklearn.svm import SVC | |
import pickle | |
import matplotlib.pyplot as plt | |
st.title(":blue[IMDB Dataset of 50k reviews]") | |
def load_data(): | |
return pd.read_csv('IMDB Dataset.csv') | |
if 'models' not in st.session_state: | |
st.session_state.models = {} | |
if 'vectorizer' not in st.session_state: | |
st.session_state.vectorizer = None | |
if 'accuracy' not in st.session_state: | |
st.session_state.accuracy = {} | |
if 'report' not in st.session_state: | |
st.session_state.report = {} | |
# Dataset | |
st.header("Dataset") | |
df = load_data() | |
with st.expander("Show Data"): | |
st.write(df) | |
df['sentiment'] = df['sentiment'].map({'positive':1,'negative':0}) | |
X = df['review'] | |
y = df['sentiment'] | |
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=41) | |
tfidf_vectorizer = TfidfVectorizer() | |
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train) | |
X_test_tfidf = tfidf_vectorizer.transform(X_test) | |
if not st.session_state.models: | |
st.session_state.vectorizer = TfidfVectorizer() | |
X_train_tfidf = st.session_state.vectorizer.fit_transform(X_train) | |
# models | |
models = { | |
# "SVM": SVC(kernel='linear'), | |
"Logistic Regression": LogisticRegression(max_iter=1000), | |
"Naive Bayes": MultinomialNB() | |
} | |
for name, model in models.items(): | |
model.fit(X_train_tfidf, y_train) | |
st.session_state.models[name] = model | |
X_test_tfidf = st.session_state.vectorizer.transform(X_test) | |
y_pred = model.predict(X_test_tfidf) | |
st.session_state.accuracy[name] = accuracy_score(y_test, y_pred) | |
st.session_state.report[name] = classification_report(y_test, y_pred) | |
if st.session_state.accuracy: | |
plt.figure(figsize=(10, 5)) | |
plt.bar(st.session_state.accuracy.keys(), st.session_state.accuracy.values(), color=['blue', 'orange', 'green']) | |
plt.ylabel('Accuracy') | |
plt.title('Model Accuracy Comparison') | |
st.pyplot(plt) | |
for name in st.session_state.report: | |
st.write(f"### Classification Report for {name}:") | |
# st.text(st.session_state.report[name]) | |
st.dataframe(st.session_state.report[name]) | |
st.header("Manual Tryouts",divider='orange') | |
# Input text from the user | |
user_input = st.text_area("Enter your Review", "") | |
if st.button("Predict"): | |
if user_input: | |
# Vectorize user input for all models | |
user_input_tfidf = st.session_state.vectorizer.transform([user_input]) | |
# Predict using all models | |
predictions = {} | |
for name, model in st.session_state.models.items(): | |
prediction = model.predict(user_input_tfidf) | |
predictions[name] = "Positive" if prediction[0] == 1 else "Negative" | |
# Display predictions for each model | |
st.write("Predicted Sentiment:") | |
for name in predictions: | |
st.write(f"{name}: **{predictions[name]}**") | |
else: | |
st.write("Please enter a review.") | |
# # Linear Regression | |
# st.header('Linear Regression',divider='orange') | |
# model = LogisticRegression() | |
# model.fit(X_train_tfidf, y_train) | |
# y_pred = model.predict(X_test_tfidf) | |
# print("Accuracy:", accuracy_score(y_test, y_pred)) | |
# print(classification_report(y_test, y_pred)) | |
# filename = 'linear_regression_model.pkl' | |
# with open(filename, 'wb') as model_file: | |
# pickle.dump(model, model_file) | |
# st.write("Accuracy:", accuracy_score(y_test, y_pred)) | |
# st.markdown(body=classification_report(y_test, y_pred),unsafe_allow_html=True) | |
# # Naive Bayes | |
# st.header("Naive Bayes",divider='orange') | |
# model_nb = MultinomialNB() | |
# model_nb.fit(X_train_tfidf, y_train) | |
# # Evaluate the model | |
# y_pred = model_nb.predict(X_test_tfidf) | |
# st.write("Accuracy:", accuracy_score(y_test, y_pred)) | |
# st.markdown(body=classification_report(y_test, y_pred),unsafe_allow_html=True) | |
# # SVM | |
# st.header("Support Vector Machine") | |
# st.caption("Kernal type is linear.") | |
# model = SVC(kernel='linear') # You can also try 'rbf', 'poly', etc. | |
# model.fit(X_train_tfidf, y_train) | |
# y_pred = model.predict(X_test_tfidf) | |
# st.write("Accuracy:", accuracy_score(y_test, y_pred)) | |
# st.markdown(body=classification_report(y_test, y_pred),unsafe_allow_html=True) | |