csm_score_calc / app.py
haiderlawati's picture
added all files: Dockerfile, app, and requirements
a654785
import pandas as pd
import panel as pn
from io import BytesIO
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
import openpyxl
class ExcelProcessorPanel:
def __init__(self):
self.file_input = pn.widgets.FileInput(accept=".xlsx, .xls", width=300)
self.process_button = pn.widgets.Button(name="Process and Save", width=300)
self.selected_file_pane = pn.pane.Markdown()
self.download_link = pn.widgets.FileDownload()
self.file_input.param.watch(self.load_file, 'filename')
self.process_button.on_click(self.process_and_save)
self.app = pn.Column(
"# Score Calculator App",
self.file_input,
self.process_button,
self.selected_file_pane,
self.download_link
)
def load_file(self, event):
if event.new:
file_name = event.new
self.file_path = BytesIO(self.file_input.value)
self.selected_file_pane.object = f"Selected File: {file_name}"
def clean_data(self, df):
# Clean up the sheet
# 1- replace all NaN with an empty string ""
# Replace NaN with ""
clean_data_no_null = df.where(pd.notnull(df), "")
clean_data = clean_data_no_null.copy()
# 2- Replace Pass to 1 and Defect to 0 in these 3 columns
clean_data["Has the Customer Been notified?"] = np.where(clean_data["Has the Customer Been notified?"]=="Pass",1,0)
clean_data["Has the follow up done Correctly?"] = np.where(clean_data["Has the follow up done Correctly?"]=="Pass",1,0)
clean_data["Did the advisor took the ownership in the ticket?"] = \
np.where(clean_data["Did the advisor took the ownership in the ticket?"]=="Pass",1,0)
# 3- replace NaN in column "Communication With the L2 clear and complete?" with 1
clean_data['Communication With the L2 clear and complete?'].replace("",1, inplace=True)
# 4- replace all "Pass" and "Defect" to 1
# 5- replace all "YES" and "NO" to 1
clean_data.replace(to_replace=["Yes", "YES", "NO"], value=1, inplace=True)
clean_data.replace(to_replace=["Pass", "Defect"], value=1, inplace=True)
return clean_data
def feature_extraction(self, data, findings_cols):
# turns body of text into a matrix of features
# split string on semicolons instead of spaces
vectorizer = CountVectorizer(tokenizer = lambda x: x.split(";"), lowercase=False)
for names in findings_cols:
X = vectorizer.fit_transform(data[names].values.astype('U'))
df_temp = pd.DataFrame(X.toarray(), columns=vectorizer.get_feature_names_out())
data.update(df_temp)
return data
def get_scores(self, data, ScoreList, findings_cols, sections_list):
# get the columns with values to be scored
for idx, names in enumerate(findings_cols):
# get the index of column names
idx_col = data.columns.get_loc(names)
temp_arr = data.iloc[:,idx_col+1:idx_col+1+len(sections_list[idx])]
if(idx==0):
array = temp_arr
else:
array = np.concatenate((array, temp_arr), axis=1)
scores_list = []
for idx, items in enumerate(sections_list):
for i in sections_list[idx].values():
scores_list.append(i)
# create negative score numpy array
neg_array = np.array(scores_list)*(-1)
neg_array = np.expand_dims(neg_array,0)
# compute Hadamard product between array and neg_score
# to obtain how much to calculate the negative
# score per line per item
neg_scores = np.multiply(array, neg_array)
Main_Questions = ["All Correspondence Sent/ Received Logged in the ticket\xa0", "Has the Customer Been notified?", \
"Has the follow up done Correctly?", "Communication With the customer clear and complete?",\
"Communication With the L2 clear and complete?", "Is the Advisor used Robotic Language", \
"Did the advisor took the ownership in the ticket?", "Process Adherence was followed in the case?", \
"There is Data Privacy Breach from the Advisor", \
"There is regrettable contacts in the ticket from the advisor?"]
# compute the positive values per line per item
for index, names in enumerate(Main_Questions):
# replace any empty cells in a column with 0
data.replace(to_replace=[""], value=0, inplace=True)
temp_arr = data[names].to_numpy()
temp_arr = np.expand_dims(temp_arr, 1)
if index == 0:
array2 = temp_arr
else:
array2 = np.concatenate((array2, temp_arr), axis=1)
ScoreListArray = np.array(ScoreList)
ScoreListArray = np.expand_dims(ScoreListArray, 0)
# print(ScoreListArray.shape)
pos_scores = np.multiply(array2, ScoreListArray)
total_score = np.sum(pos_scores,1)+np.sum(neg_scores,1)
total_score = np.expand_dims(total_score,1)
return total_score
def process_and_save(self, event):
if hasattr(self, 'file_path'):
try:
# Read Excel file
df = pd.read_excel(self.file_path)
# Read Excel file
df = pd.read_excel(self.file_path)
clean_data = self.clean_data(df)
findings_cols = ["Ticket Logging Finds", "Communication Findings", "Communication With the L2 Findings",\
"Robotic Language Findings", "Process Adherence Findings", "Data Privacy Breach Findings", \
"Regrettable contacts Findings?"]
Sec1TicketLog = {
"Wrong Company": 1,
"Missing Customer Statment" : 1,
"Wrong Summary Format" : 1,
"Wrong Assigned Group" : 1,
"Wrong Template" : 1,
"Description Error" : 1,
"Communication not Logged" : 1,
"Phone call details not logged" : 1,
"Attachment not added" : 1,
"3rd party ticket not logged" : 1,
"Reference ID not added to the correspondance" : 1,
"Email Received Email/ time not added" : 1,
"Wrong Channel/ System Wrong Assignment" : 1,
"Incorrect Resolution Subcategory" : 1,
"No Standardized Global Process for Checking with Customer Prior Resolving/ Closing Ticket" : 1,
"No Standardized Global Process to Update Customer on Open Ticket Progress" : 1,
"Delayed Action taken on the ticket" : 1
}
Sec2CommCustomer = {
"E-mail Format (font used, font color, font size)" : 1,
"Grammar Mistakes" : 1,
"Using Jargons" : 1,
"Negative Language " : 1,
"Invalid Case Thread" : 1,
"Incomplete Information Provided" : 1,
"Use Of personal Mailbox" : 1,
"No Response Sent" : 1,
"Update received on Ticket and was not communicated" : 1,
"Not Acknowledged on the ticket opening" : 1,
"Lack of Customer Empathy" : 1,
"Use of Non professionalized replies" : 1,
"No Extra Mile (Calls/ chat/ etc...)" : 1,
"Ticket Process Flow is not communicated " : 1,
"Delayed response sent to the customer" : 1,
"Missing Response" : 1,
"Delayed Response sent to 3rd party" : 1,
"No Follow up" : 1
}
Sec3CommL2 = {
"Non-Customer email format(font/color/ size)" : 1,
"Non-Customer- Grammer Mistakes" : 1,
"Non-Customer - Using Jargons" : 1,
"Non-Customer- Negative language" : 1,
"Non-Customer- Ordering Language" : 1,
"Non-Customer-Sent on wrong case thread" : 1,
"Non-Customer- Incomplete information provided" : 1,
"Non- Customer - Incorrect information provided" : 1,
"Non-customer Case not sent on most updated thread" : 1,
"Reply Sent from Personal Mailbox to 3rd Party Team" : 1,
"Email body-subject mismatch" : 1,
"Comm L2- Delayed Action taken on the ticket" : 1
}
Sec4RoboticLang = {
"Robotics- Lack of Customer Empathy" : 2,
"Use of non-personalized replies" : 2
}
Sec6ProcAdher = {
"Ticket escalated to wrong 3 party team" : 1,
"Ticket escalation process not followed" : 1,
"Ticket raised with missing info." : 1,
"Task/ Reports not done on time" : 1,
"No Action Taken" : 1,
"Wrong Case priority" : 1,
"Incorrect Ticket Status Assigned" : 1,
"Ticket Set to Resolved and not Closed" : 1,
"Tickets not Related on System" : 1,
"Ticket History not Acknowledged" : 1,
"Ticket SLA stopped without valid Reason" : 1
}
Sec7DataPrivBrch = {
"Password Requested from Customer" : 2,
"Sending Sensitive Customer Data to Incorrect Internal Team" : 2,
"Sending Sensitive Customer Data to Another Customer" : 2,
"Sending Sensitive Customer Data to Incorrect External Team" : 2
}
Sec8RegretContact = {
"Missing Info" : 2,
"Regret Contact- Wrong Template" : 2,
"Unnecessary Pending Reason" : 2,
"Resolution Confirmation" : 2,
"Issue Not Resolved" : 2,
"Received Update By Customer no action taken" : 2,
"Customer Chasing Ticket" : 2
}
sections_list = [Sec1TicketLog, Sec2CommCustomer, Sec3CommL2, Sec4RoboticLang,
Sec6ProcAdher, Sec7DataPrivBrch, Sec8RegretContact]
data = clean_data.copy()
# insert columns to the existing sheet
for idx, names in enumerate(findings_cols):
for idxx, items in enumerate(sections_list[idx].keys()):
data.insert(data.columns.get_loc(names) + (idxx+1), column = items, value=0)
# Score per section
# Section 1:
N1 = sum(Sec1TicketLog.values())
# Section 2:
N2_1 = 5
N2_2 = 5
N2_3 = sum(Sec2CommCustomer.values())
N2 = N2_1 + N2_2 + N2_3
# Section 3:
N3 = sum(Sec3CommL2.values())
# Section 4:
N4 = sum(Sec4RoboticLang.values())
# Section 5:
N5 = 6
# Section 6:
N6 = sum(Sec6ProcAdher.values())
# Section 7:
N7 = sum(Sec7DataPrivBrch.values())
# Section 8:
N8 = sum(Sec8RegretContact.values())
ScoreList = [N1, N2_1, N2_2, N2_3, N3, N4, N5, N6, N7, N8]
data = self.feature_extraction(data, findings_cols)
scores_col = self.get_scores(data, ScoreList, findings_cols, sections_list)
clean_data_no_null = df.where(pd.notnull(df), "")
final_data = clean_data_no_null.copy()
final_data = final_data.assign(Score=scores_col)
final_data.to_excel("final_data.xlsx")
self.download_link.file = "final_data.xlsx"
self.download_link.filename = "processed_data.xlsx"
self.download_link
except Exception as e:
pn.pane.Alert(f"An error occurred: {str(e)}", alert_type='danger').show()
else:
pn.pane.Alert("Please select an Excel file first.", alert_type='warning').show()
#if __name__ == "__main__":
app = ExcelProcessorPanel()
app.app.servable()