|
import streamlit as st |
|
import pickle |
|
import pandas as pd |
|
import nltk |
|
nltk.download('stopwords') |
|
from nltk.corpus import stopwords |
|
nltk.download('punkt') |
|
from nltk.tokenize import punkt |
|
nltk.download('wordnet') |
|
from nltk.corpus.reader import wordnet |
|
nltk.download('WordNetLemmatizer') |
|
from nltk.stem import WordNetLemmatizer |
|
|
|
|
|
def main(): |
|
""" Blooms Taxonomy classifier""" |
|
|
|
st.title("Blooms Taxonomy Classifier") |
|
st.subheader("ML App for Blooms Taxonomy Level Prediction") |
|
|
|
activities = ["Prediction","About"] |
|
choice =st.sidebar.selectbox("Choose Activity",activities ) |
|
|
|
if choice == "Prediction": |
|
path_tfidf = "tfidf.pickle" |
|
|
|
with open(path_tfidf, 'rb') as data: |
|
tfidf = pickle.load(data) |
|
category_codes = { |
|
'BT1 - Knowledge': 0, |
|
'BT2 - Comprehension': 1, |
|
'BT3 - Application': 2, |
|
'BT4 - Analysis': 3, |
|
'BT5 - Evaluation': 4, |
|
'BT6 - Creation': 5 |
|
} |
|
|
|
punctuation_signs = list("?:!.,;") |
|
stop_words = list(stopwords.words('english')) |
|
|
|
def create_features_from_text(text): |
|
|
|
|
|
lemmatized_text_list = [] |
|
df = pd.DataFrame(columns=['Questions']) |
|
df.loc[0] = text |
|
df['Questions_Parsed_1'] = df['Questions'].str.replace("\r", " ") |
|
df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace("\n", " ") |
|
df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace(" ", " ") |
|
df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace('"', '') |
|
df['Questions_Parsed_2'] = df['Questions_Parsed_1'].str.lower() |
|
df['Questions_Parsed_3'] = df['Questions_Parsed_2'] |
|
for punct_sign in punctuation_signs: |
|
df['Questions_Parsed_3'] = df['Questions_Parsed_3'].str.replace(punct_sign, '') |
|
df['Questions_Parsed_4'] = df['Questions_Parsed_3'].str.replace("'s", "") |
|
wordnet_lemmatizer = WordNetLemmatizer() |
|
lemmatized_list = [] |
|
text = df.loc[0]['Questions_Parsed_4'] |
|
text_words = text.split(" ") |
|
for word in text_words: |
|
lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos="v")) |
|
lemmatized_text = " ".join(lemmatized_list) |
|
lemmatized_text_list.append(lemmatized_text) |
|
df['Questions_Parsed_5'] = lemmatized_text_list |
|
df['Questions_Parsed_6'] = df['Questions_Parsed_5'] |
|
for stop_word in stop_words: |
|
regex_stopword = r"\b" + stop_word + r"\b" |
|
df['Questions_Parsed_6'] = df['Questions_Parsed_6'].str.replace(regex_stopword, '') |
|
df = df['Questions_Parsed_6'] |
|
df = df.rename({'Questions_Parsed_6': 'Questions_Parsed'}) |
|
|
|
|
|
features = tfidf.transform(df).toarray() |
|
|
|
return features |
|
|
|
def get_category_name(category_id): |
|
for category, id_ in category_codes.items(): |
|
if id_ == category_id: |
|
return category |
|
def predict_from_text(text): |
|
path_lr = 'best_lrc.pickle' |
|
with open(path_lr, 'rb') as data: |
|
lr_model = pickle.load(data) |
|
|
|
path_mnb = 'best_mnbc.pickle' |
|
with open(path_mnb, 'rb') as data: |
|
mnb_model = pickle.load(data) |
|
|
|
path_gbc = 'best_gbc.pickle' |
|
with open(path_gbc, 'rb') as data: |
|
gbc_model = pickle.load(data) |
|
|
|
path_rfc = 'best_rfc.pickle' |
|
with open(path_rfc, 'rb') as data: |
|
rfc_model = pickle.load(data) |
|
|
|
path_knn = 'best_knnc.pickle' |
|
with open(path_knn, 'rb') as data: |
|
knn_model = pickle.load(data) |
|
|
|
path_svm = 'best_svc.pickle' |
|
with open(path_svm, 'rb') as data: |
|
svc_model = pickle.load(data) |
|
|
|
|
|
prediction_lr = lr_model.predict(create_features_from_text(text))[0] |
|
prediction_lr_proba = lr_model.predict_proba(create_features_from_text(text))[0] |
|
prediction_mnb = mnb_model.predict(create_features_from_text(text))[0] |
|
prediction_mnb_proba = mnb_model.predict_proba(create_features_from_text(text))[0] |
|
prediction_gbc = gbc_model.predict(create_features_from_text(text))[0] |
|
prediction_gbc_proba = gbc_model.predict_proba(create_features_from_text(text))[0] |
|
prediction_rfc = rfc_model.predict(create_features_from_text(text))[0] |
|
prediction_rfc_proba = svc_model.predict_proba(create_features_from_text(text))[0] |
|
prediction_knn = knn_model.predict(create_features_from_text(text))[0] |
|
prediction_knn_proba = svc_model.predict_proba(create_features_from_text(text))[0] |
|
prediction_svc = svc_model.predict(create_features_from_text(text))[0] |
|
prediction_svc_proba = svc_model.predict_proba(create_features_from_text(text))[0] |
|
|
|
|
|
category_lr = get_category_name(prediction_lr) |
|
category_mnb = get_category_name(prediction_mnb) |
|
category_gbc = get_category_name(prediction_gbc) |
|
category_rfc = get_category_name(prediction_rfc) |
|
category_knn = get_category_name(prediction_knn) |
|
category_svc = get_category_name(prediction_svc) |
|
a=prediction_lr_proba.max()*100 |
|
b=prediction_mnb_proba.max()*100 |
|
c=prediction_gbc_proba.max()*100 |
|
d=prediction_rfc_proba.max()*100 |
|
e=prediction_knn_proba.max()*100 |
|
f=prediction_svc_proba.max()*100 |
|
best_one = {"category_lr":prediction_lr_proba.max()*100,"category_mnb":prediction_mnb_proba.max()*100,"category_gbc":prediction_gbc_proba.max()*100,"category_rfc":prediction_rfc_proba.max()*100,"category_knn":prediction_knn_proba.max()*100,"category_svc":prediction_svc_proba.max()*100} |
|
keymax = max(best_one, key = best_one.get) |
|
if keymax == "category_lr": |
|
return category_lr, best_one["category_lr"],a,b,c,d,e,f |
|
elif keymax == "category_mnb": |
|
return category_mnb,best_one["category_mnb"],a,b,c,d,e,f |
|
elif keymax == "category_gbc": |
|
return category_gbc,best_one["category_gbc"],a,b,c,d,e,f |
|
elif keymax == "category_rfc": |
|
return category_rfc,best_one["category_rfc"],a,b,c,d,e,f |
|
elif keymax == "category_knn": |
|
return category_knn,best_one["category_knn"],a,b,c,d,e,f |
|
else: |
|
return category_svc,best_one["category_svc"],a,b,c,d,e,f |
|
|
|
|
|
|
|
st.info("Prediction with Various Models") |
|
|
|
bt_text = st.text_area("Question to Predict","Type Here") |
|
|
|
if st.button("Classify"): |
|
st.text("Original Text ::\n{}".format(bt_text)) |
|
|
|
|
|
|
|
prediction = predict_from_text(bt_text) |
|
|
|
st.success("Blooms Taxonomy Level :: {}".format(prediction[0])) |
|
st.success("Maximum Probability :: {}".format(prediction[1])) |
|
st.write("Performance of Various Algorithms") |
|
|
|
data = pd.DataFrame({ |
|
'Various Algorithm': ['Logistic Regression', 'Multinomial Naive Bayes', 'Gradient Boosting Classifier','Random Forest Classifier','k-Nearest Neighbors','Support Vector Machine'], |
|
'Maximum Accuracy': [(prediction[2]),prediction[3],prediction[4],prediction[5],prediction[6],prediction[7]], |
|
}).set_index('Various Algorithm') |
|
|
|
st.write(data) |
|
st.bar_chart(data) |
|
|
|
|
|
|
|
if choice == "About": |
|
st.success("This is used for classification of Bloom's Taxonomy Levels.") |
|
hide_streamlit_style = """ |
|
<style> |
|
#MainMenu {visibility: hidden;} |
|
footer {visibility: hidden;} |
|
</style> |
|
""" |
|
st.markdown(hide_streamlit_style, unsafe_allow_html=True) |
|
|
|
if __name__ =='__main__': |
|
main() |
|
|