Spaces:
Sleeping
Sleeping
File size: 8,335 Bytes
4b3083e a7fa880 773e68b a7fa880 d40f603 773e68b a7fa880 d49bf2e 4b3083e a7fa880 fd359dd a7fa880 b80ea12 a7fa880 475febe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
import streamlit as st
import pickle
import pandas as pd
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
nltk.download('punkt')
from nltk.tokenize import punkt
nltk.download('wordnet')
from nltk.corpus.reader import wordnet
nltk.download('WordNetLemmatizer')
from nltk.stem import WordNetLemmatizer
def main():
""" Blooms Taxonomy classifier"""
st.title("Blooms Taxonomy Classifier")
st.subheader("ML App for Blooms Taxonomy Level Prediction")
activities = ["Prediction","About"]
choice =st.sidebar.selectbox("Choose Activity",activities )
if choice == "Prediction":
path_tfidf = "tfidf.pickle"
with open(path_tfidf, 'rb') as data:
tfidf = pickle.load(data)
category_codes = {
'BT1 - Knowledge': 0,
'BT2 - Comprehension': 1,
'BT3 - Application': 2,
'BT4 - Analysis': 3,
'BT5 - Evaluation': 4,
'BT6 - Creation': 5
}
punctuation_signs = list("?:!.,;")
stop_words = list(stopwords.words('english'))
def create_features_from_text(text):
# Dataframe creation
lemmatized_text_list = []
df = pd.DataFrame(columns=['Questions'])
df.loc[0] = text
df['Questions_Parsed_1'] = df['Questions'].str.replace("\r", " ")
df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace("\n", " ")
df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace(" ", " ")
df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace('"', '')
df['Questions_Parsed_2'] = df['Questions_Parsed_1'].str.lower()
df['Questions_Parsed_3'] = df['Questions_Parsed_2']
for punct_sign in punctuation_signs:
df['Questions_Parsed_3'] = df['Questions_Parsed_3'].str.replace(punct_sign, '')
df['Questions_Parsed_4'] = df['Questions_Parsed_3'].str.replace("'s", "")
wordnet_lemmatizer = WordNetLemmatizer()
lemmatized_list = []
text = df.loc[0]['Questions_Parsed_4']
text_words = text.split(" ")
for word in text_words:
lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos="v"))
lemmatized_text = " ".join(lemmatized_list)
lemmatized_text_list.append(lemmatized_text)
df['Questions_Parsed_5'] = lemmatized_text_list
df['Questions_Parsed_6'] = df['Questions_Parsed_5']
for stop_word in stop_words:
regex_stopword = r"\b" + stop_word + r"\b"
df['Questions_Parsed_6'] = df['Questions_Parsed_6'].str.replace(regex_stopword, '')
df = df['Questions_Parsed_6']
df = df.rename({'Questions_Parsed_6': 'Questions_Parsed'})
# TF-IDF
features = tfidf.transform(df).toarray()
return features
def get_category_name(category_id):
for category, id_ in category_codes.items():
if id_ == category_id:
return category
def predict_from_text(text):
path_lr = 'best_lrc.pickle'
with open(path_lr, 'rb') as data:
lr_model = pickle.load(data)
path_mnb = 'best_mnbc.pickle'
with open(path_mnb, 'rb') as data:
mnb_model = pickle.load(data)
path_gbc = 'best_gbc.pickle'
with open(path_gbc, 'rb') as data:
gbc_model = pickle.load(data)
path_rfc = 'best_rfc.pickle'
with open(path_rfc, 'rb') as data:
rfc_model = pickle.load(data)
path_knn = 'best_knnc.pickle'
with open(path_knn, 'rb') as data:
knn_model = pickle.load(data)
path_svm = 'best_svc.pickle'
with open(path_svm, 'rb') as data:
svc_model = pickle.load(data)
# Predict using the input model
prediction_lr = lr_model.predict(create_features_from_text(text))[0]
prediction_lr_proba = lr_model.predict_proba(create_features_from_text(text))[0]
prediction_mnb = mnb_model.predict(create_features_from_text(text))[0]
prediction_mnb_proba = mnb_model.predict_proba(create_features_from_text(text))[0]
prediction_gbc = gbc_model.predict(create_features_from_text(text))[0]
prediction_gbc_proba = gbc_model.predict_proba(create_features_from_text(text))[0]
prediction_rfc = rfc_model.predict(create_features_from_text(text))[0]
prediction_rfc_proba = svc_model.predict_proba(create_features_from_text(text))[0]
prediction_knn = knn_model.predict(create_features_from_text(text))[0]
prediction_knn_proba = svc_model.predict_proba(create_features_from_text(text))[0]
prediction_svc = svc_model.predict(create_features_from_text(text))[0]
prediction_svc_proba = svc_model.predict_proba(create_features_from_text(text))[0]
# Return result
category_lr = get_category_name(prediction_lr)
category_mnb = get_category_name(prediction_mnb)
category_gbc = get_category_name(prediction_gbc)
category_rfc = get_category_name(prediction_rfc)
category_knn = get_category_name(prediction_knn)
category_svc = get_category_name(prediction_svc)
a=prediction_lr_proba.max()*100
b=prediction_mnb_proba.max()*100
c=prediction_gbc_proba.max()*100
d=prediction_rfc_proba.max()*100
e=prediction_knn_proba.max()*100
f=prediction_svc_proba.max()*100
best_one = {"category_lr":prediction_lr_proba.max()*100,"category_mnb":prediction_mnb_proba.max()*100,"category_gbc":prediction_gbc_proba.max()*100,"category_rfc":prediction_rfc_proba.max()*100,"category_knn":prediction_knn_proba.max()*100,"category_svc":prediction_svc_proba.max()*100}
keymax = max(best_one, key = best_one.get)
if keymax == "category_lr":
return category_lr, best_one["category_lr"],a,b,c,d,e,f
elif keymax == "category_mnb":
return category_mnb,best_one["category_mnb"],a,b,c,d,e,f
elif keymax == "category_gbc":
return category_gbc,best_one["category_gbc"],a,b,c,d,e,f
elif keymax == "category_rfc":
return category_rfc,best_one["category_rfc"],a,b,c,d,e,f
elif keymax == "category_knn":
return category_knn,best_one["category_knn"],a,b,c,d,e,f
else:
return category_svc,best_one["category_svc"],a,b,c,d,e,f
st.info("Prediction with Various Models")
bt_text = st.text_area("Question to Predict","Type Here")
if st.button("Classify"):
st.text("Original Text ::\n{}".format(bt_text))
prediction = predict_from_text(bt_text)
st.success("Blooms Taxonomy Level :: {}".format(prediction[0]))
st.success("Maximum Probability :: {}".format(prediction[1]))
st.write("Performance of Various Algorithms")
data = pd.DataFrame({
'Various Algorithm': ['Logistic Regression', 'Multinomial Naive Bayes', 'Gradient Boosting Classifier','Random Forest Classifier','k-Nearest Neighbors','Support Vector Machine'],
'Maximum Accuracy': [(prediction[2]),prediction[3],prediction[4],prediction[5],prediction[6],prediction[7]],
}).set_index('Various Algorithm')
st.write(data)
st.bar_chart(data)
if choice == "About":
st.success("This is used for classification of Bloom's Taxonomy Levels.")
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
if __name__ =='__main__':
main()
|