Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pickle | |
import pandas as pd | |
import nltk | |
nltk.download('stopwords') | |
from nltk.corpus import stopwords | |
nltk.download('punkt') | |
from nltk.tokenize import punkt | |
nltk.download('wordnet') | |
from nltk.corpus.reader import wordnet | |
nltk.download('WordNetLemmatizer') | |
from nltk.stem import WordNetLemmatizer | |
def main(): | |
""" Blooms Taxonomy classifier""" | |
st.title("Blooms Taxonomy Classifier") | |
st.subheader("ML App for Blooms Taxonomy Level Prediction") | |
activities = ["Prediction","About"] | |
choice =st.sidebar.selectbox("Choose Activity",activities ) | |
if choice == "Prediction": | |
path_tfidf = "tfidf.pickle" | |
with open(path_tfidf, 'rb') as data: | |
tfidf = pickle.load(data) | |
category_codes = { | |
'BT1 - Knowledge': 0, | |
'BT2 - Comprehension': 1, | |
'BT3 - Application': 2, | |
'BT4 - Analysis': 3, | |
'BT5 - Evaluation': 4, | |
'BT6 - Creation': 5 | |
} | |
punctuation_signs = list("?:!.,;") | |
stop_words = list(stopwords.words('english')) | |
def create_features_from_text(text): | |
# Dataframe creation | |
lemmatized_text_list = [] | |
df = pd.DataFrame(columns=['Questions']) | |
df.loc[0] = text | |
df['Questions_Parsed_1'] = df['Questions'].str.replace("\r", " ") | |
df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace("\n", " ") | |
df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace(" ", " ") | |
df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace('"', '') | |
df['Questions_Parsed_2'] = df['Questions_Parsed_1'].str.lower() | |
df['Questions_Parsed_3'] = df['Questions_Parsed_2'] | |
for punct_sign in punctuation_signs: | |
df['Questions_Parsed_3'] = df['Questions_Parsed_3'].str.replace(punct_sign, '') | |
df['Questions_Parsed_4'] = df['Questions_Parsed_3'].str.replace("'s", "") | |
wordnet_lemmatizer = WordNetLemmatizer() | |
lemmatized_list = [] | |
text = df.loc[0]['Questions_Parsed_4'] | |
text_words = text.split(" ") | |
for word in text_words: | |
lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos="v")) | |
lemmatized_text = " ".join(lemmatized_list) | |
lemmatized_text_list.append(lemmatized_text) | |
df['Questions_Parsed_5'] = lemmatized_text_list | |
df['Questions_Parsed_6'] = df['Questions_Parsed_5'] | |
for stop_word in stop_words: | |
regex_stopword = r"\b" + stop_word + r"\b" | |
df['Questions_Parsed_6'] = df['Questions_Parsed_6'].str.replace(regex_stopword, '') | |
df = df['Questions_Parsed_6'] | |
df = df.rename({'Questions_Parsed_6': 'Questions_Parsed'}) | |
# TF-IDF | |
features = tfidf.transform(df).toarray() | |
return features | |
def get_category_name(category_id): | |
for category, id_ in category_codes.items(): | |
if id_ == category_id: | |
return category | |
def predict_from_text(text): | |
path_lr = 'best_lrc.pickle' | |
with open(path_lr, 'rb') as data: | |
lr_model = pickle.load(data) | |
path_mnb = 'best_mnbc.pickle' | |
with open(path_mnb, 'rb') as data: | |
mnb_model = pickle.load(data) | |
path_gbc = 'best_gbc.pickle' | |
with open(path_gbc, 'rb') as data: | |
gbc_model = pickle.load(data) | |
path_rfc = 'best_rfc.pickle' | |
with open(path_rfc, 'rb') as data: | |
rfc_model = pickle.load(data) | |
path_knn = 'best_knnc.pickle' | |
with open(path_knn, 'rb') as data: | |
knn_model = pickle.load(data) | |
path_svm = 'best_svc.pickle' | |
with open(path_svm, 'rb') as data: | |
svc_model = pickle.load(data) | |
# Predict using the input model | |
prediction_lr = lr_model.predict(create_features_from_text(text))[0] | |
prediction_lr_proba = lr_model.predict_proba(create_features_from_text(text))[0] | |
prediction_mnb = mnb_model.predict(create_features_from_text(text))[0] | |
prediction_mnb_proba = mnb_model.predict_proba(create_features_from_text(text))[0] | |
prediction_gbc = gbc_model.predict(create_features_from_text(text))[0] | |
prediction_gbc_proba = gbc_model.predict_proba(create_features_from_text(text))[0] | |
prediction_rfc = rfc_model.predict(create_features_from_text(text))[0] | |
prediction_rfc_proba = svc_model.predict_proba(create_features_from_text(text))[0] | |
prediction_knn = knn_model.predict(create_features_from_text(text))[0] | |
prediction_knn_proba = svc_model.predict_proba(create_features_from_text(text))[0] | |
prediction_svc = svc_model.predict(create_features_from_text(text))[0] | |
prediction_svc_proba = svc_model.predict_proba(create_features_from_text(text))[0] | |
# Return result | |
category_lr = get_category_name(prediction_lr) | |
category_mnb = get_category_name(prediction_mnb) | |
category_gbc = get_category_name(prediction_gbc) | |
category_rfc = get_category_name(prediction_rfc) | |
category_knn = get_category_name(prediction_knn) | |
category_svc = get_category_name(prediction_svc) | |
a=prediction_lr_proba.max()*100 | |
b=prediction_mnb_proba.max()*100 | |
c=prediction_gbc_proba.max()*100 | |
d=prediction_rfc_proba.max()*100 | |
e=prediction_knn_proba.max()*100 | |
f=prediction_svc_proba.max()*100 | |
best_one = {"category_lr":prediction_lr_proba.max()*100,"category_mnb":prediction_mnb_proba.max()*100,"category_gbc":prediction_gbc_proba.max()*100,"category_rfc":prediction_rfc_proba.max()*100,"category_knn":prediction_knn_proba.max()*100,"category_svc":prediction_svc_proba.max()*100} | |
keymax = max(best_one, key = best_one.get) | |
if keymax == "category_lr": | |
return category_lr, best_one["category_lr"],a,b,c,d,e,f | |
elif keymax == "category_mnb": | |
return category_mnb,best_one["category_mnb"],a,b,c,d,e,f | |
elif keymax == "category_gbc": | |
return category_gbc,best_one["category_gbc"],a,b,c,d,e,f | |
elif keymax == "category_rfc": | |
return category_rfc,best_one["category_rfc"],a,b,c,d,e,f | |
elif keymax == "category_knn": | |
return category_knn,best_one["category_knn"],a,b,c,d,e,f | |
else: | |
return category_svc,best_one["category_svc"],a,b,c,d,e,f | |
st.info("Prediction with Various Models") | |
bt_text = st.text_area("Question to Predict","Type Here") | |
if st.button("Classify"): | |
st.text("Original Text ::\n{}".format(bt_text)) | |
prediction = predict_from_text(bt_text) | |
st.success("Blooms Taxonomy Level :: {}".format(prediction[0])) | |
st.success("Maximum Probability :: {}".format(prediction[1])) | |
st.write("Performance of Various Algorithms") | |
data = pd.DataFrame({ | |
'Various Algorithm': ['Logistic Regression', 'Multinomial Naive Bayes', 'Gradient Boosting Classifier','Random Forest Classifier','k-Nearest Neighbors','Support Vector Machine'], | |
'Maximum Accuracy': [(prediction[2]),prediction[3],prediction[4],prediction[5],prediction[6],prediction[7]], | |
}).set_index('Various Algorithm') | |
st.write(data) | |
st.bar_chart(data) | |
if choice == "About": | |
st.success("This is used for classification of Bloom's Taxonomy Levels.") | |
hide_streamlit_style = """ | |
<style> | |
#MainMenu {visibility: hidden;} | |
footer {visibility: hidden;} | |
</style> | |
""" | |
st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |
if __name__ =='__main__': | |
main() | |