import gradio as gr import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import io import os from reportlab.lib.pagesizes import letter from reportlab.pdfgen import canvas from reportlab.lib.utils import ImageReader # Load the call center logs CSV (assumed to be uploaded to the Space) CSV_FILE_PATH = "call_center_logs.csv" # Data cleanup function def clean_data(df): original_count = len(df) cleanup_details = { 'original': original_count, 'nulls_removed': 0, 'duplicates_removed': 0, 'short_removed': 0, 'malformed_removed': 0, 'invalid_timestamps': 0 } # Remove nulls in critical columns critical_columns = ['query', 'resolution', 'duration_minutes', 'satisfaction_score'] null_rows = df[critical_columns].isna().any(axis=1) cleanup_details['nulls_removed'] = null_rows.sum() df = df[~null_rows] # Remove duplicates based on call_id duplicate_rows = df['call_id'].duplicated() cleanup_details['duplicates_removed'] = duplicate_rows.sum() df = df[~duplicate_rows] # Remove short queries short_rows = (df['query'].str.len() < 5) | (df['resolution'].str.len() < 5) cleanup_details['short_removed'] = short_rows.sum() df = df[~short_rows] # Remove malformed queries malformed_rows = df['query'].str.contains(r'[!?]{2,}|\b(Invalid|N/A)\b', regex=True, case=False, na=False) cleanup_details['malformed_removed'] = malformed_rows.sum() df = df[~malformed_rows] # Validate and clean timestamps invalid_timestamps = pd.to_datetime(df['timestamp'], errors='coerce').isna() cleanup_details['invalid_timestamps'] = invalid_timestamps.sum() df = df[~invalid_timestamps] # Standardize language (fill missing with 'en') df['language'] = df['language'].fillna('en') # Convert duration and satisfaction score to numeric df['duration_minutes'] = pd.to_numeric(df['duration_minutes'], errors='coerce') df['satisfaction_score'] = pd.to_numeric(df['satisfaction_score'], errors='coerce') cleaned_count = len(df) cleanup_details['cleaned'] = cleaned_count cleanup_details['removed'] = original_count - cleaned_count # Save cleaned CSV for SageMaker/Azure AI cleaned_path = 'cleaned_call_center_logs.csv' df.to_csv(cleaned_path, index=False) return df, cleanup_details, cleaned_path # Statistical plotting function def plot_statistics(df): # Plot 1: Distribution of Call Durations plt.figure(figsize=(10, 6)) sns.histplot(df['duration_minutes'], bins=20, kde=True, color='skyblue') plt.title('Distribution of Call Durations') plt.xlabel('Duration (minutes)') plt.ylabel('Frequency') plt.savefig('duration_distribution.png') plt.close() # Plot 2: Satisfaction Scores by Agent plt.figure(figsize=(10, 6)) sns.boxplot(x='agent_id', y='satisfaction_score', data=df, color='lightblue') plt.title('Satisfaction Scores by Agent') plt.xlabel('Agent ID') plt.ylabel('Satisfaction Score') plt.savefig('satisfaction_by_agent.png') plt.close() # Plot 3: Query Frequency by Language plt.figure(figsize=(10, 6)) sns.countplot(x='language', data=df, color='skyblue') plt.title('Query Frequency by Language') plt.xlabel('Language') plt.ylabel('Number of Queries') plt.savefig('query_by_language.png') plt.close() return ['duration_distribution.png', 'satisfaction_by_agent.png', 'query_by_language.png'] # Generate PDF report def generate_pdf_report(cleanup_details, plot_paths): pdf_path = 'data_analysis_report.pdf' c = canvas.Canvas(pdf_path, pagesize=letter) width, height = letter # Title c.setFont("Helvetica-Bold", 16) c.drawString(50, height - 50, "Call Center Data Analysis Report") # Cleanup Stats c.setFont("Helvetica", 12) y_position = height - 80 c.drawString(50, y_position, "Data Cleanup Statistics:") y_position -= 20 for key, value in cleanup_details.items(): c.drawString(70, y_position, f"{key.replace('_', ' ').title()}: {value}") y_position -= 15 # Add Plots y_position -= 30 for plot_path in plot_paths: if os.path.exists(plot_path): img = ImageReader(plot_path) img_width, img_height = img.getSize() aspect = img_height / float(img_width) plot_width = 500 plot_height = plot_width * aspect if y_position - plot_height < 50: c.showPage() y_position = height - 50 c.drawImage(img, 50, y_position - plot_height, width=plot_width, height=plot_height) y_position -= plot_height + 20 c.save() return pdf_path # Main analysis function def analyze_data(): try: # Load the CSV df = pd.read_csv(CSV_FILE_PATH) # Clean the data cleaned_df, cleanup_details, cleaned_path = clean_data(df) # Generate statistical plots plot_paths = plot_statistics(cleaned_df) # Generate PDF report pdf_path = generate_pdf_report(cleanup_details, plot_paths) # Prepare cleanup stats for display cleanup_stats = "\n".join([f"{key.replace('_', ' ').title()}: {value}" for key, value in cleanup_details.items()]) return ( cleaned_df.head(50).to_html(), # Display first 50 rows as a table cleanup_stats, plot_paths[0], # Duration distribution plot_paths[1], # Satisfaction by agent plot_paths[2], # Query by language gr.File(value=cleaned_path, label="Download Cleaned CSV"), gr.File(value=pdf_path, label="Download PDF Report") ) except Exception as e: return f"Error: {str(e)}", "", None, None, None, None, None # Gradio interface custom_css = """ body { background: linear-gradient(135deg, #1a1a1a 0%, #2a2a2a 100%); color: #e0e0e0; font-family: 'Arial', sans-serif; display: flex; justify-content: center; align-items: center; min-height: 100vh; margin: 0; } .gr-box { background: #3a3a3a; border: 1px solid #4a4a4a; border-radius: 8px; padding: 20px; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.3); } .gr-button { background: #1e90ff; color: white; border-radius: 5px; padding: 12px 20px; margin: 8px 0; width: 100%; text-align: center; transition: background 0.3s ease; font-size: 16px; } .gr-button:hover { background: #1c86ee; box-shadow: 0 1px 3px rgba(0, 0, 0, 0.2); } .gr-textbox { background: #2f2f2f; color: #e0e0e0; border: 1px solid #4a4a4a; border-radius: 5px; margin-bottom: 15px; font-size: 16px; padding: 15px; min-height: 120px; width: 100%; } .gr-image { width: 100%; height: auto; max-height: 400px; } #app-container { max-width: 900px; width: 100%; padding: 20px; background: #252525; border-radius: 12px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.5); } .text-center { text-align: center; margin-bottom: 20px; } """ with gr.Blocks(css=custom_css) as demo: with gr.Column(elem_id="app-container"): gr.Markdown("# Call Center Data Analysis", elem_classes="text-center") gr.Markdown("Analyze call center logs, view statistics, and export cleaned data for SageMaker/Azure AI.", elem_classes="text-center") # Button to trigger analysis analyze_button = gr.Button("Analyze Data") # Outputs raw_data_output = gr.HTML(label="Raw Data (First 50 Rows)") cleanup_stats_output = gr.Textbox(label="Data Cleanup Statistics") duration_plot_output = gr.Image(label="Distribution of Call Durations") satisfaction_plot_output = gr.Image(label="Satisfaction Scores by Agent") language_plot_output = gr.Image(label="Query Frequency by Language") csv_download = gr.File(label="Download Cleaned CSV") pdf_download = gr.File(label="Download PDF Report") # Connect the button to the analysis function analyze_button.click( fn=analyze_data, inputs=None, outputs=[ raw_data_output, cleanup_stats_output, duration_plot_output, satisfaction_plot_output, language_plot_output, csv_download, pdf_download ] ) demo.launch()