import streamlit as st import joblib,os import scipy import spacy import pandas as pd nlp = spacy.load("en_core_web_sm") import matplotlib.pyplot as plt import matplotlib matplotlib.use("Agg") from wordcloud import WordCloud # load Vectorizer complaints_vectorizer = open("models/tfidf_vect.joblib","rb") complaints_cv = joblib.load(complaints_vectorizer) def load_prediction_models(model_file): loaded_model = joblib.load(open(os.path.join(model_file),"rb")) return loaded_model # Get the Keys def get_key(val,my_dict): for key,value in my_dict.items(): if val == value: return key def main(): """Telecom Complaints Classifier""" st.title("Comcast Telecom Complaints App") # Layout Templates html_temp = """

ML - Telecom Complaints Classifier

Avatar

{}

""" title_temp ="""

{Debmalya Ray}

Avatar
Author:{Debmalya Ray}


{}

""" article_temp ="""

{Debmalya Ray}

Author:{Debmalya Ray}
Post Date: {}
Avatar

{}

""" st.markdown(html_temp,unsafe_allow_html=True) activity = ['Prediction','NLP','About'] choice = st.sidebar.selectbox("Select Activity",activity) if choice == 'Prediction': st.info("Prediction with ML") complaints_text = st.text_area("Enter Complaints Here","Type Here") all_ml_models = ["Decision Tree", "GradientBoost"] model_choice = st.selectbox("Select Model",all_ml_models) prediction_labels = {'Closed': 0, 'Open': 1, 'Pending': 2, 'Solved': 3} if st.button("Classify"): st.text("Original Text:\n{}".format(complaints_text)) vect_text = complaints_cv.transform([complaints_text]).toarray() if model_choice == 'Decision Tree': predictor = load_prediction_models("models/dtcpred.joblib") prediction = predictor.predict(vect_text) st.write(prediction) elif model_choice == 'GradientBoost': predictor = load_prediction_models("models/gbcpred.joblib") prediction = predictor.predict(vect_text) st.write(prediction) final_result = get_key(prediction,prediction_labels) st.success("Complaints Categorized as: {}".format(final_result)) elif choice == 'NLP': st.info("Natural Language Processing of Text") raw_text = st.text_area("Enter Customer Complaints Here","Type Here") nlp_task = ["Tokenization","Lemmatization","Named Entity Recognition(NER)","Parts-of-Speech(POS) Tags"] task_choice = st.selectbox("Choose NLP Task",nlp_task) if st.button("Analyze"): st.info("Original Text:\n{}".format(raw_text)) docx = nlp(raw_text) if task_choice == 'Tokenization': result = [token.text for token in docx ] elif task_choice == 'Lemmatization': result = ["'Token':{},'Lemma':{}".format(token.text,token.lemma_) for token in docx] elif task_choice == 'Named Entity Recognition(NER)': result = [(entity.text,entity.label_)for entity in docx.ents] elif task_choice == 'Parts-of-Speech(POS) Tags': result = ["'Token':{},'POS':{},'Dependency':{}".format(word.text,word.tag_,word.dep_) for word in docx] st.json(result) if st.button("Tabulize"): docx = nlp(raw_text) c_tokens = [token.text for token in docx ] c_lemma = [token.lemma_ for token in docx ] c_pos = [token.pos_ for token in docx ] new_df = pd.DataFrame(zip(c_tokens,c_lemma,c_pos),columns=['Tokens','Lemma','POS']) st.dataframe(new_df) if st.checkbox("WordCloud"): c_text = raw_text wordcloud = WordCloud().generate(c_text) plt.imshow(wordcloud,interpolation='bilinear') plt.axis("off") st.set_option('deprecation.showPyplotGlobalUse', False) st.pyplot() else: st.write("") st.subheader("About") st.write("""**************************************************************************""") st.markdown(""" ### NLP Complaints Classifier With Different Models (With Streamlit) ###### Python Tools Used: spacy, pandas, matplotlib, wordcloud, Pillow(PIL), Joblib """) st.write("""**************************************************************************""") st.write(""" 361148 || Throttling service and unreasonable data caps || 24-06-2015 || Acworth || Georgia || 30101 || Pending """) st.write(""" 359792 || Comcast refuses to help troubleshoot and correct my service. || 23-06-2015 || Adrian || Michigan || 49221 || Solved """) st.write(""" 371214 || Comcast Raising Prices and Not Being Available To Ask Why || 28-06-2015 || Alameda || California || 94501 || Open """) st.write(""" 242732 || Speed and Service || 18-04-2015 || Acworth || Georgia || 30101 || Closed """) st.write("""**************************************************************************""") if __name__ == '__main__': main()