File size: 5,690 Bytes

c9665af

import streamlit as st
import joblib,os
import scipy
import spacy
import pandas as pd
nlp = spacy.load("en_core_web_sm")
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use("Agg")
from wordcloud import WordCloud


# load Vectorizer
complaints_vectorizer = open("models/tfidf_vect.joblib","rb")
complaints_cv = joblib.load(complaints_vectorizer)

def load_prediction_models(model_file):

	loaded_model = joblib.load(open(os.path.join(model_file),"rb"))
	return loaded_model

# Get the Keys
def get_key(val,my_dict):
	for key,value in my_dict.items():
		if val == value:
			return key




def main():

	"""Telecom Complaints Classifier"""
	st.title("Comcast Telecom Complaints App")
	
	# Layout Templates
	html_temp = """
	<div style="background-color:#D5CC8F;padding:10px;border-radius:10px;margin:10px;">
	<h1 style="color:white;text-align:center;"> ML - Telecom Complaints Classifier </h1>
	<img src="https://www.w3schools.com/howto/img_avatar.png" alt="Avatar" style="vertical-align: middle;width: 50px;height: 50px;border-radius: 50%;" >
	<p style="text-align:justify">{}</p>
	</div>
	"""
	title_temp ="""
	<div style="background-color:#D5CC8F;padding:10px;border-radius:10px;margin:10px;">
	<h4 style="color:white;text-align:center;">{Debmalya Ray}</h1>
	<img src="https://www.w3schools.com/howto/img_avatar.png" alt="Avatar" style="vertical-align: middle;float:left;width: 50px;height: 50px;border-radius: 50%;" >
	<h6>Author:{Debmalya Ray}</h6>
	<br/>
	<br/>
	<p style="text-align:justify">{}</p>
	</div>
	"""
	article_temp ="""
	<div style="background-color:#D5CC8F;padding:10px;border-radius:5px;margin:10px;">
	<h4 style="color:white;text-align:center;">{Debmalya Ray}</h1>
	<h6>Author:{Debmalya Ray}</h6>
	<h6>Post Date: {}</h6>
	<img src="https://www.w3schools.com/howto/img_avatar.png" alt="Avatar" style="vertical-align: middle;width: 50px;height: 50px;border-radius: 50%;" >
	<br/>
	<br/>
	<p style="text-align:justify">{}</p>
	</div>
	"""


	st.markdown(html_temp,unsafe_allow_html=True)

	activity = ['Prediction','NLP','About']
	choice = st.sidebar.selectbox("Select Activity",activity)


	if choice == 'Prediction':
		st.info("Prediction with ML")
		complaints_text = st.text_area("Enter Complaints Here","Type Here")
		all_ml_models = ["Decision Tree", "GradientBoost"]
		model_choice = st.selectbox("Select Model",all_ml_models)

		prediction_labels = {'Closed': 0, 'Open': 1, 'Pending': 2, 'Solved': 3}
		if st.button("Classify"):
			st.text("Original Text:\n{}".format(complaints_text))
			vect_text = complaints_cv.transform([complaints_text]).toarray()
			if model_choice == 'Decision Tree':
				predictor = load_prediction_models("models/dtcpred.joblib")
				prediction = predictor.predict(vect_text)
				st.write(prediction)
			elif model_choice == 'GradientBoost':
				predictor = load_prediction_models("models/gbcpred.joblib")
				prediction = predictor.predict(vect_text)
				st.write(prediction)

			final_result = get_key(prediction,prediction_labels)
			st.success("Complaints Categorized as: {}".format(final_result))

	elif choice == 'NLP':
		st.info("Natural Language Processing of Text")
		raw_text = st.text_area("Enter Customer Complaints Here","Type Here")
		nlp_task = ["Tokenization","Lemmatization","Named Entity Recognition(NER)","Parts-of-Speech(POS) Tags"]
		task_choice = st.selectbox("Choose NLP Task",nlp_task)
		if st.button("Analyze"):
			st.info("Original Text:\n{}".format(raw_text))

			docx = nlp(raw_text)
			if task_choice == 'Tokenization':
				result = [token.text for token in docx ]
			elif task_choice == 'Lemmatization':
				result = ["'Token':{},'Lemma':{}".format(token.text,token.lemma_) for token in docx]
			elif task_choice == 'Named Entity Recognition(NER)':
				result = [(entity.text,entity.label_)for entity in docx.ents]
			elif task_choice == 'Parts-of-Speech(POS) Tags':
				result = ["'Token':{},'POS':{},'Dependency':{}".format(word.text,word.tag_,word.dep_) for word in docx]

			st.json(result)

		if st.button("Tabulize"):
			docx = nlp(raw_text)
			c_tokens = [token.text for token in docx ]
			c_lemma = [token.lemma_ for token in docx ]
			c_pos = [token.pos_ for token in docx ]

			new_df = pd.DataFrame(zip(c_tokens,c_lemma,c_pos),columns=['Tokens','Lemma','POS'])
			st.dataframe(new_df)


		if st.checkbox("WordCloud"):
			c_text = raw_text
			wordcloud = WordCloud().generate(c_text)
			plt.imshow(wordcloud,interpolation='bilinear')
			plt.axis("off")
			st.set_option('deprecation.showPyplotGlobalUse', False)
			st.pyplot()

	else:
		st.write("")
		st.subheader("About")
		st.write("""**************************************************************************""")
		st.markdown("""
        ### NLP Complaints Classifier With Different Models (With Streamlit)
        ###### Python Tools Used: spacy, pandas, matplotlib, wordcloud, Pillow(PIL), Joblib
        """)
		st.write("""**************************************************************************""")
		st.write("""
        361148 || Throttling service and unreasonable data caps	 || 24-06-2015 || Acworth || Georgia || 30101 || Pending  
        """)
		st.write("""
        359792 || Comcast refuses to help troubleshoot and correct my service. || 23-06-2015 || Adrian || Michigan || 49221 || Solved 
        """)
		st.write("""
        371214 || Comcast Raising Prices and Not Being Available To Ask Why || 28-06-2015 || Alameda || California || 94501 || Open  
        """)
		st.write("""
        242732 || Speed and Service || 18-04-2015 || Acworth || Georgia || 30101 || Closed
        """)
		st.write("""**************************************************************************""")


if __name__ == '__main__':
	main()