File size: 8,335 Bytes
4b3083e
a7fa880
 
 
773e68b
a7fa880
d40f603
 
 
 
773e68b
a7fa880
d49bf2e
4b3083e
a7fa880
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fd359dd
a7fa880
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b80ea12
a7fa880
 
 
 
 
 
 
 
 
475febe
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
import streamlit as st
import pickle
import pandas as pd
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords
nltk.download('punkt')
from nltk.tokenize import punkt
nltk.download('wordnet')
from nltk.corpus.reader import wordnet
nltk.download('WordNetLemmatizer')
from nltk.stem import WordNetLemmatizer


def main():
    """ Blooms Taxonomy classifier"""

    st.title("Blooms Taxonomy Classifier")
    st.subheader("ML App for Blooms Taxonomy Level Prediction")
    
    activities = ["Prediction","About"]
    choice =st.sidebar.selectbox("Choose Activity",activities )
   
    if choice == "Prediction":
        path_tfidf = "tfidf.pickle"
                
        with open(path_tfidf, 'rb') as data:
            tfidf = pickle.load(data)
        category_codes = {
                           'BT1 - Knowledge': 0,
                           'BT2 - Comprehension': 1,
                           'BT3 - Application': 2,
                           'BT4 - Analysis': 3,
                           'BT5 - Evaluation': 4,
                           'BT6 - Creation': 5
                           }

        punctuation_signs = list("?:!.,;")
        stop_words = list(stopwords.words('english'))

        def create_features_from_text(text):
    
                      # Dataframe creation
            lemmatized_text_list = []
            df = pd.DataFrame(columns=['Questions'])
            df.loc[0] = text
            df['Questions_Parsed_1'] = df['Questions'].str.replace("\r", " ")
            df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace("\n", " ")
            df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace("    ", " ")
            df['Questions_Parsed_1'] = df['Questions_Parsed_1'].str.replace('"', '')
            df['Questions_Parsed_2'] = df['Questions_Parsed_1'].str.lower()
            df['Questions_Parsed_3'] = df['Questions_Parsed_2']
            for punct_sign in punctuation_signs:
                df['Questions_Parsed_3'] = df['Questions_Parsed_3'].str.replace(punct_sign, '')
            df['Questions_Parsed_4'] = df['Questions_Parsed_3'].str.replace("'s", "")
            wordnet_lemmatizer = WordNetLemmatizer()
            lemmatized_list = []
            text = df.loc[0]['Questions_Parsed_4']
            text_words = text.split(" ")
            for word in text_words:
                lemmatized_list.append(wordnet_lemmatizer.lemmatize(word, pos="v"))
            lemmatized_text = " ".join(lemmatized_list)    
            lemmatized_text_list.append(lemmatized_text)
            df['Questions_Parsed_5'] = lemmatized_text_list
            df['Questions_Parsed_6'] = df['Questions_Parsed_5']
            for stop_word in stop_words:
                regex_stopword = r"\b" + stop_word + r"\b"
                df['Questions_Parsed_6'] = df['Questions_Parsed_6'].str.replace(regex_stopword, '')
            df = df['Questions_Parsed_6']
            df = df.rename({'Questions_Parsed_6': 'Questions_Parsed'})
    
            # TF-IDF
            features = tfidf.transform(df).toarray()
    
            return features
                
        def get_category_name(category_id):
            for category, id_ in category_codes.items():    
                if id_ == category_id:
                    return category                
        def predict_from_text(text):
            path_lr = 'best_lrc.pickle'
            with open(path_lr, 'rb') as data:
                lr_model = pickle.load(data)

            path_mnb = 'best_mnbc.pickle'
            with open(path_mnb, 'rb') as data:
                mnb_model = pickle.load(data)

            path_gbc = 'best_gbc.pickle'
            with open(path_gbc, 'rb') as data:
                gbc_model = pickle.load(data)

            path_rfc = 'best_rfc.pickle'
            with open(path_rfc, 'rb') as data:
                rfc_model = pickle.load(data)

            path_knn = 'best_knnc.pickle'
            with open(path_knn, 'rb') as data:
                knn_model = pickle.load(data)

            path_svm = 'best_svc.pickle'
            with open(path_svm, 'rb') as data:
                svc_model = pickle.load(data)
    
                   # Predict using the input model
            prediction_lr = lr_model.predict(create_features_from_text(text))[0]
            prediction_lr_proba = lr_model.predict_proba(create_features_from_text(text))[0]
            prediction_mnb = mnb_model.predict(create_features_from_text(text))[0]
            prediction_mnb_proba = mnb_model.predict_proba(create_features_from_text(text))[0]
            prediction_gbc = gbc_model.predict(create_features_from_text(text))[0]
            prediction_gbc_proba = gbc_model.predict_proba(create_features_from_text(text))[0]
            prediction_rfc = rfc_model.predict(create_features_from_text(text))[0]
            prediction_rfc_proba = svc_model.predict_proba(create_features_from_text(text))[0]
            prediction_knn = knn_model.predict(create_features_from_text(text))[0]
            prediction_knn_proba = svc_model.predict_proba(create_features_from_text(text))[0]
            prediction_svc = svc_model.predict(create_features_from_text(text))[0]
            prediction_svc_proba = svc_model.predict_proba(create_features_from_text(text))[0]
    
                    # Return result
            category_lr = get_category_name(prediction_lr)
            category_mnb = get_category_name(prediction_mnb)
            category_gbc = get_category_name(prediction_gbc)
            category_rfc = get_category_name(prediction_rfc)
            category_knn = get_category_name(prediction_knn)
            category_svc = get_category_name(prediction_svc)
            a=prediction_lr_proba.max()*100
            b=prediction_mnb_proba.max()*100
            c=prediction_gbc_proba.max()*100
            d=prediction_rfc_proba.max()*100
            e=prediction_knn_proba.max()*100
            f=prediction_svc_proba.max()*100
            best_one = {"category_lr":prediction_lr_proba.max()*100,"category_mnb":prediction_mnb_proba.max()*100,"category_gbc":prediction_gbc_proba.max()*100,"category_rfc":prediction_rfc_proba.max()*100,"category_knn":prediction_knn_proba.max()*100,"category_svc":prediction_svc_proba.max()*100}
            keymax = max(best_one, key = best_one.get)
            if keymax == "category_lr":
                return category_lr, best_one["category_lr"],a,b,c,d,e,f
            elif keymax == "category_mnb":
                return category_mnb,best_one["category_mnb"],a,b,c,d,e,f
            elif keymax == "category_gbc":
                return category_gbc,best_one["category_gbc"],a,b,c,d,e,f
            elif keymax == "category_rfc":
                return category_rfc,best_one["category_rfc"],a,b,c,d,e,f
            elif keymax == "category_knn":
                return category_knn,best_one["category_knn"],a,b,c,d,e,f
            else:
                return category_svc,best_one["category_svc"],a,b,c,d,e,f

    
        
        st.info("Prediction with Various Models")
        
        bt_text = st.text_area("Question to Predict","Type Here")

        if st.button("Classify"):
            st.text("Original Text ::\n{}".format(bt_text))
          


            prediction = predict_from_text(bt_text)

            st.success("Blooms Taxonomy Level   ::   {}".format(prediction[0]))
            st.success("Maximum Probability   ::   {}".format(prediction[1]))
            st.write("Performance of Various Algorithms")

            data = pd.DataFrame({
                'Various Algorithm': ['Logistic Regression', 'Multinomial Naive Bayes', 'Gradient Boosting Classifier','Random Forest Classifier','k-Nearest Neighbors','Support Vector Machine'],
                'Maximum Accuracy': [(prediction[2]),prediction[3],prediction[4],prediction[5],prediction[6],prediction[7]],
            }).set_index('Various Algorithm')

            st.write(data)
            st.bar_chart(data)



    if choice == "About":
        st.success("This is used for classification of Bloom's Taxonomy Levels.")
hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) 

if __name__ =='__main__':
    main()