Spaces:

RathodHarish
/

LabOpsDashboard

Sleeping

App Files Files Community

RathodHarish commited on Jun 13

Commit

dc56f41

verified ·

1 Parent(s): d0b0f06

Update app.py

Browse files

Files changed (1) hide show

app.py +333 -671

app.py CHANGED Viewed

@@ -1,67 +1,233 @@
-"""
-LabOps Log Analyzer Dashboard with CSV file upload, PDF generation, and email alerts
-"""
 import gradio as gr
 import pandas as pd
 from datetime import datetime, timedelta
 import logging
 import plotly.express as px
 from sklearn.ensemble import IsolationForest
 from concurrent.futures import ThreadPoolExecutor
 import os
 import io
-import smtplib
-from email.mime.text import MIMEText
-from email.mime.multipart import MIMEMultipart
-from email.mime.application import MIMEApplication
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 # Try to import reportlab
 try:
     from reportlab.lib.pagesizes import letter
-    from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
     from reportlab.lib.styles import getSampleStyleSheet
     reportlab_available = True
     logging.info("reportlab module successfully imported")
 except ImportError:
     logging.warning("reportlab module not found. PDF generation disabled.")
     reportlab_available = False
-# Generate summary and insights without Hugging Face model
-def generate_summary_and_insights(df):
     try:
         total_devices = df["device_id"].nunique()
         most_used = df.groupby("device_id")["usage_hours"].sum().idxmax() if not df.empty else "N/A"
-        avg_usage = df["usage_hours"].mean() if "usage_hours" in df.columns else 0
-        summary = f"Maintenance logs for {total_devices} devices. Most used: {most_used}."
-        insights = f"{total_devices} devices, average usage {avg_usage:.2f} hours."
-        return summary, f"Insights: {insights}"
     except Exception as e:
-        logging.error(f"Summary and insights generation failed: {str(e)}")
-        return f"Failed to generate summary: {str(e)}", f"Failed to generate insights: {str(e)}"
 # Anomaly detection
 def detect_anomalies(df):
     try:
         if "usage_hours" not in df.columns or "downtime" not in df.columns:
             return "Anomaly detection requires 'usage_hours' and 'downtime' columns.", pd.DataFrame()
-        if len(df) > 1000:
-            df = df.sample(n=1000, random_state=42)
         features = df[["usage_hours", "downtime"]].fillna(0)
-        iso_forest = IsolationForest(contamination=0.1, random_state=42, n_jobs=-1)
         df["anomaly"] = iso_forest.fit_predict(features)
         anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
         if anomalies.empty:
             return "No anomalies detected.", anomalies
-        anomaly_lines = ["Detected Anomalies:"]
-        for _, row in anomalies.head(5).iterrows():
-            anomaly_lines.append(
-                f"- Device ID: {row['device_id']}, Usage Hours: {row['usage_hours']}, "
-                f"Downtime: {row['downtime']}, Timestamp: {row['timestamp']}"
-            )
-        return "\n".join(anomaly_lines), anomalies
     except Exception as e:
         logging.error(f"Anomaly detection failed: {str(e)}")
         return f"Anomaly detection failed: {str(e)}", pd.DataFrame()
@@ -69,387 +235,153 @@ def detect_anomalies(df):
 # AMC reminders
 def check_amc_reminders(df, current_date):
     try:
-        logging.info(f"Input DataFrame for AMC reminders:\n{df.head().to_string()}")
         if "device_id" not in df.columns or "amc_date" not in df.columns:
-            logging.warning("Missing 'device_id' or 'amc_date' columns for AMC reminders.")
             return "AMC reminders require 'device_id' and 'amc_date' columns.", pd.DataFrame()
         df["amc_date"] = pd.to_datetime(df["amc_date"], errors='coerce')
-        if df["amc_date"].dt.tz is None:
-            logging.info("Localizing naive AMC dates to IST")
-            df["amc_date"] = df["amc_date"].dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
-        current_date = pd.to_datetime(current_date).tz_localize('Asia/Kolkata')
-        logging.info(f"Current date for AMC check: {current_date}")
         df["days_to_amc"] = (df["amc_date"] - current_date).dt.days
-        logging.info(f"Days to AMC:\n{df[['device_id', 'amc_date', 'days_to_amc']].to_string()}")
         reminders = df[(df["days_to_amc"] >= 0) & (df["days_to_amc"] <= 30)][["device_id", "log_type", "status", "timestamp", "usage_hours", "downtime", "amc_date"]]
         if reminders.empty:
-            logging.info("No AMC reminders found within the next 30 days.")
             return "No AMC reminders due within the next 30 days.", reminders
-        reminder_lines = ["Upcoming AMC Reminders:"]
-        for _, row in reminders.head(5).iterrows():
-            reminder_lines.append(f"- Device ID: {row['device_id']}, AMC Date: {row['amc_date']}")
-        logging.info(f"Found {len(reminders)} AMC reminders: {reminder_lines}")
-        return "\n".join(reminder_lines), reminders
     except Exception as e:
         logging.error(f"AMC reminder generation failed: {str(e)}")
         return f"AMC reminder generation failed: {str(e)}", pd.DataFrame()
 # Create usage chart
-def create_usage_chart(agg_data):
     try:
-        usage_data = agg_data['usage_per_device']
-        logging.info(f"Usage data for chart: {usage_data.to_string()}")
-        if usage_data.empty:
-            logging.warning("Usage data is empty.")
             return None
         if len(usage_data) > 5:
             usage_data = usage_data.nlargest(5, "usage_hours")
-        q75, q25 = usage_data["usage_hours"].quantile([0.75, 0.25])
-        iqr = q75 - q25
-        spike_threshold = q75 + 1.5 * iqr
-        usage_data["color"] = usage_data["usage_hours"].apply(
-            lambda x: "red" if x > spike_threshold else "teal"
-        )
         fig = px.bar(
             usage_data,
             x="device_id",
             y="usage_hours",
-            title="Usage Hours per Device (Red = Usage Spike)",
-            labels={"device_id": "Device ID", "usage_hours": "Usage Hours"},
-            color="color",
-            color_discrete_map={"teal": "#4ECDC4", "red": "#FF0000"}
-        )
-        fig.update_traces(
-            marker_line_color='#333333',
-            marker_line_width=1.5,
-            opacity=0.9
-        )
-        fig.update_layout(
-            title_font=dict(size=18, family="Arial", color="#333333"),
-            font=dict(family="Arial", size=12, color="#333333"),
-            plot_bgcolor="white",
-            paper_bgcolor="white",
-            margin=dict(l=30, r=30, t=50, b=30),
-            xaxis=dict(
-                title="Device ID",
-                showgrid=False,
-                tickangle=45,
-                title_font=dict(size=14),
-                tickfont=dict(size=12)
-            ),
-            yaxis=dict(
-                title="Usage Hours",
-                gridcolor="#E5E5E5",
-                gridwidth=1,
-                title_font=dict(size=14),
-                tickfont=dict(size=12)
-            ),
-            showlegend=False,
-            bargap=0.2
         )
         return fig
     except Exception as e:
         logging.error(f"Failed to create usage chart: {str(e)}")
         return None
-# Create downtime chart (fixed syntax error)
-def create_downtime_chart(agg_data):
     try:
-        downtime_data = agg_data['downtime_per_device']
-        logging.info(f"Downtime data for chart: {downtime_data.to_string()}")
-        if downtime_data.empty:
-            logging.warning("Downtime data is empty.")
-            return None
         if len(downtime_data) > 5:
             downtime_data = downtime_data.nlargest(5, "downtime")
-        q75, q25 = downtime_data["downtime"].quantile([0.75, 0.25])
-        iqr = q75 - q25
-        spike_threshold = q75 + 1.5 * iqr
-        downtime_data["color"] = downtime_data["downtime"].apply(
-            lambda x: "red" if x > spike_threshold else "green"
-        )
         fig = px.bar(
             downtime_data,
             x="device_id",
             y="downtime",
-            title="Downtime per Device (Red = Downtime Spike)",
-            labels={"device_id": "Device ID", "downtime": "Downtime (Hours)"},
-            color="color",
-            color_discrete_map={"green": "#96CEB4", "red": "#FF0000"}
-        )
-        fig.update_traces(
-            marker_line_color='#333333',
-            marker_line_width=1.5,
-            opacity=0.9
-        )
-        fig.update_layout(
-            title_font=dict(size=18, family="Arial", color="#333333"),
-            font=dict(family="Arial", size=12, color="#333333"),
-            plot_bgcolor="white",
-            paper_bgcolor="white",
-            margin=dict(l=30, r=30, t=50, b=30),
-            xaxis=dict(
-                title="Device ID",
-                showgrid=False,
-                tickangle=45,
-                title_font=dict(size=14),
-                tickfont=dict(size=12)
-            ),
-            yaxis=dict(
-                title="Downtime (Hours)",
-                gridcolor="#E5E5E5",
-                gridwidth=1,
-                title_font=dict(size=14),
-                tickfont=dict(size=12)
-            ),
-            showlegend=False,
-            bargap=0.2
         )
         return fig
     except Exception as e:
         logging.error(f"Failed to create downtime chart: {str(e)}")
         return None
-# Create Daily Log Trends chart (area chart with markers)
 def create_daily_log_trends_chart(df):
     try:
-        if df.empty or 'timestamp' not in df.columns:
-            logging.warning("DataFrame is empty or missing 'timestamp' column for Daily Log Trends.")
-            return None
-        # Group by date to count logs per day
         df['date'] = df['timestamp'].dt.date
-        log_counts = df.groupby('date').size().reset_index(name='log_count')
-        fig = px.area(
-            log_counts,
             x='date',
             y='log_count',
             title="Daily Log Trends",
             labels={"date": "Date", "log_count": "Number of Logs"}
         )
-        # Add markers
-        fig.update_traces(
-            fill='tozeroy',
-            line_color='#4ECDC4',
-            line_width=2,
-            mode='lines+markers',
-            marker=dict(size=8, color='#4ECDC4', line=dict(width=1, color='#333333')),
-            fillcolor='rgba(78, 205, 196, 0.3)'  # Gradient fill with transparency
-        )
-        fig.update_layout(
-            title_font=dict(size=18, family="Arial", color="#333333"),
-            font=dict(family="Arial", size=12, color="#333333"),
-            plot_bgcolor="white",
-            paper_bgcolor="white",
-            margin=dict(l=30, r=30, t=50, b=30),
-            xaxis=dict(
-                title="Date",
-                showgrid=False,
-                title_font=dict(size=14),
-                tickfont=dict(size=12)
-            ),
-            yaxis=dict(
-                title="Number of Logs",
-                gridcolor="#E5E5E5",
-                gridwidth=1,
-                title_font=dict(size=14),
-                tickfont=dict(size=12)
-            )
-        )
         return fig
     except Exception as e:
-        logging.error(f"Failed to create Daily Log Trends chart: {str(e)}")
         return None
-# Create Weekly Uptime Percentage chart
 def create_weekly_uptime_chart(df):
     try:
-        if df.empty or 'timestamp' not in df.columns or 'downtime' not in df.columns:
-            logging.warning("DataFrame is empty or missing required columns for Weekly Uptime Percentage.")
-            return None
-        logging.info(f"DataFrame for Weekly Uptime:\n{df[['timestamp', 'downtime']].to_string()}")
-        # Group by week (handle pandas 2.x compatibility)
-        try:
-            df['week'] = df['timestamp'].dt.isocalendar().week
-        except AttributeError:
-            # For pandas 2.x, use .dt.weekofyear or manual calculation
-            df['week'] = df['timestamp'].dt.isocalendar()['week']
         df['year'] = df['timestamp'].dt.year
         weekly_data = df.groupby(['year', 'week']).agg({
             'downtime': 'sum'
         }).reset_index()
-        logging.info(f"Weekly data:\n{weekly_data.to_string()}")
-        # Calculate uptime percentage (assuming 24*7 = 168 hours per week)
-        total_hours_per_week = 168
-        weekly_data['uptime_percentage'] = ((total_hours_per_week - weekly_data['downtime']) / total_hours_per_week) * 100
-        weekly_data['uptime_percentage'] = weekly_data['uptime_percentage'].clip(0, 100)  # Ensure percentage is between 0 and 100
-        weekly_data['week_label'] = weekly_data.apply(lambda x: f"{x['year']}-W{x['week']:02d}", axis=1)
-        if weekly_data.empty:
-            logging.warning("No weekly data available for Weekly Uptime Percentage chart.")
-            return None
         fig = px.bar(
             weekly_data,
-            x='week_label',
-            y='uptime_percentage',
             title="Weekly Uptime Percentage",
-            labels={"week_label": "Week", "uptime_percentage": "Uptime Percentage (%)"},
-            color='uptime_percentage',
-            color_continuous_scale=['#FF0000', '#96CEB4']
-        )
-        fig.update_traces(
-            marker_line_color='#333333',
-            marker_line_width=1.5,
-            opacity=0.9
-        )
-        fig.update_layout(
-            title_font=dict(size=18, family="Arial", color="#333333"),
-            font=dict(family="Arial", size=12, color="#333333"),
-            plot_bgcolor="white",
-            paper_bgcolor="white",
-            margin=dict(l=30, r=30, t=50, b=30),
-            xaxis=dict(
-                title="Week",
-                showgrid=False,
-                tickangle=45,
-                title_font=dict(size=14),
-                tickfont=dict(size=12)
-            ),
-            yaxis=dict(
-                title="Uptime Percentage (%)",
-                gridcolor="#E5E5E5",
-                gridwidth=1,
-                title_font=dict(size=14),
-                tickfont=dict(size=12)
-            ),
-            showlegend=False,
-            bargap=0.2
         )
         return fig
     except Exception as e:
-        logging.error(f"Failed to create Weekly Uptime Percentage chart: {str(e)}")
         return None
-# Create Anomaly Alerts chart (bubble chart)
-def create_anomaly_alerts_chart(df, anomalies_df):
     try:
-        if df.empty or anomalies_df.empty:
-            logging.warning("DataFrame or anomalies DataFrame is empty for Anomaly Alerts chart.")
             return None
-        # Prepare data for bubble chart
-        df['is_anomaly'] = df.index.isin(anomalies_df.index)
-        df['color'] = df['is_anomaly'].map({True: 'red', False: 'blue'})
         fig = px.scatter(
-            df,
-            x='usage_hours',
-            y='downtime',
-            size='usage_hours',  # Bubble size based on usage hours
-            color='color',
-            title="Anomaly Alerts (Red = Anomaly)",
-            labels={"usage_hours": "Usage Hours", "downtime": "Downtime (Hours)"},
-            color_discrete_map={'blue': '#4ECDC4', 'red': '#FF0000'}
-        )
-        fig.update_traces(
-            marker=dict(
-                sizemode='area',
-                sizeref=0.1,  # Adjust bubble size scaling
-                line=dict(width=1, color='#333333')
-            ),
-            opacity=0.7
-        )
-        fig.update_layout(
-            title_font=dict(size=18, family="Arial", color="#333333"),
-            font=dict(family="Arial", size=12, color="#333333"),
-            plot_bgcolor="white",
-            paper_bgcolor="white",
-            margin=dict(l=30, r=30, t=50, b=30),
-            xaxis=dict(
-                title="Usage Hours",
-                showgrid=False,
-                title_font=dict(size=14),
-                tickfont=dict(size=12)
-            ),
-            yaxis=dict(
-                title="Downtime (Hours)",
-                gridcolor="#E5E5E5",
-                gridwidth=1,
-                title_font=dict(size=14),
-                tickfont=dict(size=12)
-            ),
-            showlegend=False
         )
         return fig
     except Exception as e:
-        logging.error(f"Failed to create Anomaly Alerts chart: {str(e)}")
         return None
-# Generate Device Cards HTML
 def generate_device_cards(df):
     try:
         if df.empty:
-            logging.warning("DataFrame is empty in generate_device_cards.")
             return '<p>No devices available to display.</p>'
-        required_columns = ['device_id', 'status', 'timestamp']
-        missing_columns = [col for col in required_columns if col not in df.columns]
-        if missing_columns:
-            logging.error(f"Missing required columns in DataFrame: {missing_columns}")
-            return f'<p>Error: Missing required columns: {missing_columns}</p>'
-        if df['timestamp'].isna().all():
-            logging.warning("All timestamps are NaT. Cannot generate device cards.")
-            return '<p>Error: All timestamps are invalid.</p>'
-        df_clean = df.dropna(subset=['timestamp']).copy()
-        if df_clean.empty:
-            logging.warning("DataFrame is empty after dropping NaT timestamps.")
-            return '<p>No valid timestamps available to display.</p>'
-        device_stats = df_clean.groupby('device_id').agg({
             'status': 'last',
             'timestamp': 'max',
         }).reset_index()
-        counts = df_clean.groupby('device_id').size().reset_index(name='count')
-        device_stats = device_stats.merge(counts, on='device_id')
-        # Limit to top 10 devices by count
-        device_stats = device_stats.nlargest(10, 'count')
-        logging.info(f"Limited device cards to top {len(device_stats)} devices by usage count.")
         device_stats['health'] = device_stats['status'].map({
             'Active': 'Healthy',
             'Inactive': 'Unhealthy',
             'Pending': 'Warning'
         }).fillna('Unknown')
         cards_html = '<div style="display: flex; flex-wrap: wrap; gap: 20px;">'
         for _, row in device_stats.iterrows():
-            health_color = {
-                'Healthy': 'green',
-                'Unhealthy': 'red',
-                'Warning': 'orange',
-                'Unknown': 'gray'
-            }.get(row['health'], 'gray')
             timestamp_str = str(row['timestamp']) if pd.notna(row['timestamp']) else 'Unknown'
-            card = f"""
             <div style="border: 1px solid #e0e0e0; padding: 10px; border-radius: 5px; width: 200px;">
                 <h4>Device: {row['device_id']}</h4>
                 <p><b>Health:</b> <span style="color: {health_color}">{row['health']}</span></p>
@@ -457,57 +389,50 @@ def generate_device_cards(df):
                 <p><b>Last Log:</b> {timestamp_str}</p>
             </div>
             """
-            cards_html += card
         cards_html += '</div>'
-        logging.info("Device cards generated successfully")
         return cards_html
     except Exception as e:
-        logging.error(f"Failed to generate device cards: {str(e)}", exc_info=True)
         return f'<p>Error generating device cards: {str(e)}</p>'
-# Generate monthly status summary for PDF
 def generate_monthly_status(df, selected_month):
     try:
         total_devices = df['device_id'].nunique()
         total_usage_hours = df['usage_hours'].sum()
         total_downtime = df['downtime'].sum()
-        avg_usage_per_device = total_usage_hours / total_devices if total_devices > 0 else 0
-        avg_downtime_per_device = total_downtime / total_devices if total_devices > 0 else 0
-        summary = f"""
         Monthly Status for {selected_month}:
         - Total Devices: {total_devices}
         - Total Usage Hours: {total_usage_hours:.2f}
         - Total Downtime Hours: {total_downtime:.2f}
-        - Average Usage per Device: {avg_usage_per_device:.2f} hours
-        - Average Downtime per Device: {avg_downtime_per_device:.2f} hours
         """
-        return summary
     except Exception as e:
         logging.error(f"Failed to generate monthly status: {str(e)}")
         return f"Failed to generate monthly status: {str(e)}"
 # Generate PDF content
-def generate_pdf_content(summary, preview, anomalies, amc_reminders, insights, device_cards_html, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, df, selected_month):
     if not reportlab_available:
-        logging.error("reportlab not available. PDF generation is disabled.")
         return None
     try:
-        logging.info("Starting PDF generation...")
         pdf_path = f"monthly_status_report_{selected_month.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
         doc = SimpleDocTemplate(pdf_path, pagesize=letter)
         styles = getSampleStyleSheet()
         story = []
         def safe_paragraph(text, style):
-            text_str = str(text) if text else ""
-            return Paragraph(text_str.replace('\n', '<br/>'), style) if text_str else Paragraph("", style)
         story.append(Paragraph("LabOps Monthly Status Report", styles['Title']))
         story.append(Paragraph(f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
         story.append(Spacer(1, 12))
-        if selected_month != "All" and df is not None:
             monthly_status = generate_monthly_status(df, selected_month)
             story.append(Paragraph("Monthly Status Summary", styles['Heading2']))
             story.append(safe_paragraph(monthly_status, styles['Normal']))
@@ -518,11 +443,29 @@ def generate_pdf_content(summary, preview, anomalies, amc_reminders, insights, d
         story.append(Spacer(1, 12))
         story.append(Paragraph("Log Preview", styles['Heading2']))
-        story.append(safe_paragraph(preview, styles['Normal']))
         story.append(Spacer(1, 12))
         story.append(Paragraph("Device Cards", styles['Heading2']))
-        device_cards_text = device_cards_html.replace('<div>', '').replace('</div>', '\n').replace('<h4>', '').replace('</h4>', '\n').replace('<p>', '').replace('</p>', '\n').replace('<b>', '').replace('</b>', '').replace('<span style="color: green">', '').replace('<span style="color: red">', '').replace('<span style="color: orange">', '').replace('<span style="color: gray">', '').replace('</span>', '') if device_cards_html else "No device cards available."
         story.append(safe_paragraph(device_cards_text, styles['Normal']))
         story.append(Spacer(1, 12))
@@ -538,105 +481,31 @@ def generate_pdf_content(summary, preview, anomalies, amc_reminders, insights, d
         story.append(safe_paragraph(insights, styles['Normal']))
         story.append(Spacer(1, 12))
-        story.append(Paragraph("Daily Log Trends Chart", styles['Heading2']))
-        story.append(Paragraph("[Chart placeholder - see dashboard for Daily Log Trends]" if daily_log_chart is None else "[Chart included in dashboard]", styles['Normal']))
-        story.append(Spacer(1, 12))
-        story.append(Paragraph("Weekly Uptime Percentage Chart", styles['Heading2']))
-        story.append(Paragraph("[Chart placeholder - see dashboard for Weekly Uptime Percentage]" if weekly_uptime_chart is None else "[Chart included in dashboard]", styles['Normal']))
-        story.append(Spacer(1, 12))
-        story.append(Paragraph("Anomaly Alerts Chart", styles['Heading2']))
-        story.append(Paragraph("[Chart placeholder - see dashboard for Anomaly Alerts]" if anomaly_alerts_chart is None else "[Chart included in dashboard]", styles['Normal']))
-        story.append(Spacer(1, 12))
-        story.append(Paragraph("Downtime Chart", styles['Heading2']))
-        story.append(Paragraph("[Chart placeholder - see dashboard for Downtime per Device]" if downtime_chart is None else "[Chart included in dashboard]", styles['Normal']))
         doc.build(story)
         logging.info(f"PDF generated at {pdf_path}")
         return pdf_path
     except Exception as e:
-        logging.error(f"Failed to generate PDF: {str(e)}", exc_info=True)
         return None
-# Send email alert with analysis summary and PDF attachment
-def send_email_alert(summary, anomalies, amc_reminders, pdf_path, recipient_email="recipient@example.com"):
-    try:
-        # Email configuration
-        sender_email = "your_email@gmail.com"  # Replace with your email
-        sender_password = "your_app_password"  # Replace with your app-specific password
-        smtp_server = "smtp.gmail.com"
-        smtp_port = 587
-        # Create email message
-        subject = "LabOps Log Analyzer Report - Analysis Completed"
-        body = f"""
-        Dear Recipient,
-        The LabOps Log Analyzer has completed its analysis. Below are the key findings:
-        **Summary:**
-        {summary}
-        **Anomalies Detected:**
-        {anomalies}
-        **AMC Reminders:**
-        {amc_reminders}
-        The full report is attached as a PDF for your review.
-        Regards,
-        LabOps Team
-        """
-        msg = MIMEMultipart()
-        msg['From'] = sender_email
-        msg['To'] = recipient_email
-        msg['Subject'] = subject
-        msg.attach(MIMEText(body, 'plain'))
-        # Attach the PDF if it exists
-        if pdf_path and os.path.exists(pdf_path):
-            with open(pdf_path, 'rb') as f:
-                pdf_attachment = MIMEApplication(f.read(), _subtype="pdf")
-                pdf_attachment.add_header(
-                    'Content-Disposition', 'attachment', filename=os.path.basename(pdf_path)
-                )
-                msg.attach(pdf_attachment)
-            logging.info(f"Attached PDF to email: {pdf_path}")
-        else:
-            logging.warning("No PDF file to attach to email.")
-        # Send the email
-        with smtplib.SMTP(smtp_server, smtp_port) as server:
-            server.starttls()
-            server.login(sender_email, sender_password)
-            server.sendmail(sender_email, recipient_email, msg.as_string())
-        logging.info(f"Email alert sent to {recipient_email}")
-    except Exception as e:
-        logging.error(f"Failed to send email alert: {str(e)}")
-# Main Gradio function
 async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_range, month_filter, last_modified_state):
     try:
-        start_time = datetime.now()
         if not file_obj:
-            return "No file uploaded.", "No data to preview.", None, '<p>No device cards available.</p>', None, None, None, None, "No anomalies detected.", "No AMC reminders.", "No insights generated.", None, last_modified_state, None, None, None, None, None, None, "Please upload a CSV file to analyze."
         file_path = file_obj.name
         current_modified_time = os.path.getmtime(file_path)
         if last_modified_state and current_modified_time == last_modified_state:
-            return None, None, None, None, None, None, None, None, None, None, None, None, last_modified_state, None, None, None, None, None, None, "No changes detected in the file."
-        logging.info(f"Processing file: {file_path}, last modified: {current_modified_time}")
         if not file_path.endswith(".csv"):
-            return "Please upload a CSV file.", "", None, '<p>No device cards available.</p>', None, None, None, None, "", "", "", None, last_modified_state, None, None, None, None, None, None, "Invalid file format. Please upload a CSV file."
         required_columns = ["device_id", "log_type", "status", "timestamp", "usage_hours", "downtime", "amc_date"]
         dtypes = {
@@ -648,235 +517,96 @@ async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_ra
             "amc_date": "string"
         }
         df = pd.read_csv(file_path, dtype=dtypes)
-        # Downsample early if dataset is too large
-        if len(df) > 5000:
-            df = df.sample(n=5000, random_state=42)
-            logging.info(f"Downsampled DataFrame to 5,000 rows immediately after loading.")
         missing_columns = [col for col in required_columns if col not in df.columns]
         if missing_columns:
-            return f"Missing columns: {missing_columns}", None, None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state, None, None, None, None, None, None, f"Missing required columns: {missing_columns}"
         df["timestamp"] = pd.to_datetime(df["timestamp"], errors='coerce')
         df["amc_date"] = pd.to_datetime(df["amc_date"], errors='coerce')
         if df["timestamp"].dt.tz is None:
-            logging.info("Localizing naive timestamps to IST")
             df["timestamp"] = df["timestamp"].dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
         if df.empty:
-            return "No data available.", None, None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state, None, None, None, None, None, None, "No data available in the uploaded file."
-        logging.info(f"DataFrame before filtering:\n{df.head().to_string()}")
-        # Apply filters directly on df
-        filtered_df = df
         if lab_site_filter and lab_site_filter != 'All' and 'lab_site' in filtered_df.columns:
             filtered_df = filtered_df[filtered_df['lab_site'] == lab_site_filter]
-            logging.info(f"After lab_site filter ({lab_site_filter}): {filtered_df.shape[0]} rows")
         if equipment_type_filter and equipment_type_filter != 'All' and 'equipment_type' in filtered_df.columns:
             filtered_df = filtered_df[filtered_df['equipment_type'] == equipment_type_filter]
-            logging.info(f"After equipment_type filter ({equipment_type_filter}): {filtered_df.shape[0]} rows")
         if date_range and len(date_range) == 2:
             days_start, days_end = date_range
             today = pd.to_datetime(datetime.now().date()).tz_localize('Asia/Kolkata')
             start_date = today + pd.Timedelta(days=days_start)
-            end_date = today + pd.Timedelta(days=days_end)
-            end_date = end_date + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
-            logging.info(f"Applying date range filter: {start_date} to {end_date}")
             filtered_df = filtered_df[(filtered_df['timestamp'] >= start_date) & (filtered_df['timestamp'] <= end_date)]
-            logging.info(f"After date range filter: {filtered_df.shape[0]} rows")
         if month_filter and month_filter != "All":
             selected_date = pd.to_datetime(month_filter, format="%B %Y")
             filtered_df = filtered_df[
                 (filtered_df['timestamp'].dt.year == selected_date.year) &
                 (filtered_df['timestamp'].dt.month == selected_date.month)
             ]
-            logging.info(f"After month filter ({month_filter}): {filtered_df.shape[0]} rows")
         if filtered_df.empty:
-            logging.warning("Filtered DataFrame is empty after applying filters.")
-            return "No data after applying filters.", None, None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state, None, None, None, None, None, None, "No data available after applying filters."
-        logging.info(f"Filtered DataFrame before AMC check:\n{filtered_df[['device_id', 'amc_date']].to_string()}")
-        if len(filtered_df) > 1000:
-            filtered_df = filtered_df.sample(n=1000, random_state=42)
-            logging.info(f"Downsampled filtered DataFrame to 1,000 rows for chart generation.")
-        # Pre-aggregate data for charts
-        agg_data = {
-            'usage_per_device': filtered_df.groupby("device_id")["usage_hours"].sum().reset_index(),
-            'downtime_per_device': filtered_df.groupby("device_id")["downtime"].sum().reset_index(),
-        }
         # Run tasks concurrently
-        with ThreadPoolExecutor(max_workers=3) as executor:
-            future_summary_insights = executor.submit(generate_summary_and_insights, filtered_df)
             future_anomalies = executor.submit(detect_anomalies, filtered_df)
             future_amc = executor.submit(check_amc_reminders, filtered_df, datetime.now())
-            future_usage_chart = executor.submit(create_usage_chart, agg_data)
-            future_downtime_chart = executor.submit(create_downtime_chart, agg_data)
             future_daily_log_chart = executor.submit(create_daily_log_trends_chart, filtered_df)
             future_weekly_uptime_chart = executor.submit(create_weekly_uptime_chart, filtered_df)
             future_device_cards = executor.submit(generate_device_cards, filtered_df)
-            summary, insights = future_summary_insights.result()
-            summary = f"Step 1: Summary Report\n{summary}"
-            insights = f"Dashboard Insights (AI)\n{insights}"
             anomalies, anomalies_df = future_anomalies.result()
             anomalies = f"Anomaly Detection\n{anomalies}"
             amc_reminders, reminders_df = future_amc.result()
             amc_reminders = f"AMC Reminders\n{amc_reminders}"
             usage_chart = future_usage_chart.result()
             downtime_chart = future_downtime_chart.result()
             daily_log_chart = future_daily_log_chart.result()
             weekly_uptime_chart = future_weekly_uptime_chart.result()
             device_cards = future_device_cards.result()
-        # Generate Anomaly Alerts chart after anomalies are detected
-        anomaly_alerts_chart = create_anomaly_alerts_chart(filtered_df, anomalies_df)
-        # Generate the log preview as an HTML table
-        preview_html = """
-        <style>
-            .log-preview-table {
-                width: 100%;
-                border-collapse: collapse;
-                font-family: Arial, sans-serif;
-                margin-top: 10px;
-            }
-            .log-preview-table th, .log-preview-table td {
-                border: 1px solid #ddd;
-                padding: 8px;
-                text-align: left;
-            }
-            .log-preview-table th {
-                background-color: #4ECDC4;
-                color: white;
-            }
-            .log-preview-table tr:nth-child(even) {
-                background-color: #f2f2f2;
-            }
-            .log-preview-table tr:hover {
-                background-color: #ddd;
-            }
-        </style>
-        <h3>Step 2: Log Preview (First 5 Rows)</h3>
-        <table class='log-preview-table'>
-            <thead>
-                <tr>
-                    <th>Row</th>
-                    <th>Device ID</th>
-                    <th>Log Type</th>
-                    <th>Status</th>
-                    <th>Timestamp</th>
-                    <th>Usage Hours</th>
-                    <th>Downtime</th>
-                    <th>AMC Date</th>
-                </tr>
-            </thead>
-            <tbody>
-        """
-        if filtered_df.empty:
-            preview_html += "<tr><td colspan='8'>No data to preview.</td></tr>"
-        else:
-            for idx, row in filtered_df.head(5).iterrows():
-                preview_html += f"""
-                <tr>
-                    <td>{idx + 1}</td>
-                    <td>{row['device_id']}</td>
-                    <td>{row['log_type']}</td>
-                    <td>{row['status']}</td>
-                    <td>{row['timestamp']}</td>
-                    <td>{row['usage_hours']}</td>
-                    <td>{row['downtime']}</td>
-                    <td>{row['amc_date']}</td>
-                </tr>
-                """
-        preview_html += """
-            </tbody>
-        </table>
-        """
-        preview_lines = ["Step 2: Log Preview (First 5 Rows)"]
-        for idx, row in filtered_df.head(5).iterrows():
-            preview_lines.append(
-                f"Row {idx + 1}: Device ID: {row['device_id']}, "
-                f"Log Type: {row['log_type']}, Status: {row['status']}, "
-                f"Timestamp: {row['timestamp']}, Usage Hours: {row['usage_hours']}, "
-                f"Downtime: {row['downtime']}, AMC Date: {row['amc_date']}"
-            )
-        preview_text = "\n".join(preview_lines)
-        # Auto-generate PDF after analysis
-        pdf_file = None
-        status_msg = "Analysis completed successfully."
-        if all([summary, preview_text, anomalies, amc_reminders, insights, device_cards, filtered_df is not None]):
-            pdf_file = generate_pdf_content(
-                summary, preview_text, anomalies, amc_reminders, insights, device_cards,
-                daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart,
-                filtered_df, month_filter
-            )
-            if pdf_file:
-                status_msg = "Analysis completed successfully. PDF report generated and available for download."
-            else:
-                status_msg = "Analysis completed successfully, but failed to generate PDF. Check logs for details."
-        else:
-            status_msg = "Analysis completed, but some data is missing for PDF generation."
-        # Send email alert
-        send_email_alert(summary, anomalies, amc_reminders, pdf_file)
-        elapsed_time = (datetime.now() - start_time).total_seconds()
         logging.info(f"Processing completed in {elapsed_time:.2f} seconds")
         if elapsed_time > 10:
             logging.warning(f"Processing time exceeded 10 seconds: {elapsed_time:.2f} seconds")
-        return (summary, preview_html, usage_chart, device_cards, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, anomalies, amc_reminders, insights, pdf_file, current_modified_time, summary, preview_text, anomalies, amc_reminders, insights, device_cards, filtered_df, status_msg)
     except Exception as e:
         logging.error(f"Failed to process file: {str(e)}")
-        return f"Error: {str(e)}", None, None, '<p>Error processing data.</p>', None, None, None, None, None, None, None, None, last_modified_state, None, None, None, None, None, None, f"Failed to process file: {str(e)}"
-# Update filter options
 def update_filters(file_obj):
     if not file_obj:
-        logging.info("No file uploaded for filter update, returning default options.")
         return gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All')
     try:
-        logging.info(f"Attempting to read CSV file: {file_obj.name}")
         with open(file_obj.name, 'rb') as f:
             csv_content = f.read().decode('utf-8')
         df = pd.read_csv(io.StringIO(csv_content))
         df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
-        logging.info(f"CSV file read successfully. Columns found: {list(df.columns)}")
-        lab_site_options = ['All']
-        if 'lab_site' in df.columns:
-            unique_lab_sites = df['lab_site'].dropna().astype(str).unique().tolist()
-            lab_site_options.extend([site for site in unique_lab_sites if site.strip()])
-            logging.info(f"Lab site options extracted: {lab_site_options}")
-        else:
-            logging.warning("Column 'lab_site' not found in CSV.")
-        equipment_type_options = ['All']
-        if 'equipment_type' in df.columns:
-            unique_equipment_types = df['equipment_type'].dropna().astype(str).unique().tolist()
-            equipment_type_options.extend([equip for equip in unique_equipment_types if equip.strip()])
-            logging.info(f"Equipment type options extracted: {equipment_type_options}")
-        else:
-            logging.warning("Column 'equipment_type' not found in CSV.")
-        month_options = ['All']
-        if 'timestamp' in df.columns:
-            df['month_year'] = df['timestamp'].dt.strftime('%B %Y')
-            unique_months = df['month_year'].dropna().unique().tolist()
-            month_options.extend(sorted(unique_months))
-            logging.info(f"Month options extracted: {month_options}")
-        else:
-            logging.warning("Column 'timestamp' not found in CSV.")
         return gr.update(choices=lab_site_options, value='All'), gr.update(choices=equipment_type_options, value='All'), gr.update(choices=month_options, value='All')
     except Exception as e:
@@ -893,74 +623,39 @@ try:
         .dashboard-section h3 {font-size: 18px; margin-bottom: 2px;}
         .dashboard-section p {margin: 1px 0; line-height: 1.2;}
         .dashboard-section ul {margin: 2px 0; padding-left: 20px;}
     """) as iface:
-        gr.Markdown("<h1>LabOps Log Analyzer Dashboard</h1>")
-        gr.Markdown("Upload a CSV file to analyze. Click 'Analyze' to refresh the dashboard with the latest data. A PDF report will be generated automatically, and an email alert will be sent.")
         last_modified_state = gr.State(value=None)
-        summary_state = gr.State()
-        preview_state = gr.State()
-        anomalies_state = gr.State()
-        amc_reminders_state = gr.State()
-        insights_state = gr.State()
-        device_cards_state = gr.State()
-        df_state = gr.State()
         with gr.Row():
             with gr.Column(scale=1):
                 file_input = gr.File(label="Upload Logs (CSV)", file_types=[".csv"])
                 with gr.Group():
                     gr.Markdown("### Filters")
-                    lab_site_filter = gr.Dropdown(
-                        label="Lab Site",
-                        choices=['All'],
-                        value='All',
-                        interactive=True
-                    )
-                    equipment_type_filter = gr.Dropdown(
-                        label="Equipment Type",
-                        choices=['All'],
-                        value='All',
-                        interactive=True
-                    )
-                    date_range_filter = gr.Slider(
-                        label="Date Range (Days from Today)",
-                        minimum=-365,
-                        maximum=0,
-                        step=1,
-                        value=[-30, 0],
-                        info="Select the range of days relative to today (e.g., -30 to 0 for the last 30 days)."
-                    )
-                    month_filter = gr.Dropdown(
-                        label="Select Month for Report",
-                        choices=['All'],
-                        value='All',
-                        interactive=True
-                    )
                 submit_button = gr.Button("Analyze", variant="primary")
             with gr.Column(scale=2):
                 with gr.Group(elem_classes="dashboard-container"):
                     gr.Markdown("<div class='dashboard-title'>Analysis Results</div>")
-                    with gr.Group(elem_classes="dashboard-section"):
-                        gr.Markdown("### Status Message")
-                        status_message = gr.Markdown("Please upload a CSV file and click 'Analyze' to begin.")
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Step 1: Summary Report")
                         summary_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Step 2: Log Preview")
                         preview_output = gr.HTML()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Device Cards")
                         device_cards_output = gr.HTML()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Charts")
                         with gr.Tab("Usage Hours per Device"):
@@ -973,19 +668,15 @@ try:
                             weekly_uptime_output = gr.Plot()
                         with gr.Tab("Anomaly Alerts"):
                             anomaly_alerts_output = gr.Plot()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Step 4: Anomaly Detection")
                         anomaly_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Step 5: AMC Reminders")
                         amc_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Step 6: Insights (AI)")
                         insights_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Export Report")
                         pdf_output = gr.File(label="Download Monthly Status Report as PDF")
@@ -999,37 +690,8 @@ try:
         submit_button.click(
             fn=process_logs,
-            inputs=[
-                file_input,
-                lab_site_filter,
-                equipment_type_filter,
-                date_range_filter,
-                month_filter,
-                last_modified_state
-            ],
-            outputs=[
-                summary_output,
-                preview_output,
-                usage_chart_output,
-                device_cards_output,
-                daily_log_trends_output,
-                weekly_uptime_output,
-                anomaly_alerts_output,
-                downtime_chart_output,
-                anomaly_output,
-                amc_output,
-                insights_output,
-                pdf_output,
-                last_modified_state,
-                summary_state,
-                preview_state,
-                anomalies_state,
-                amc_reminders_state,
-                insights_state,
-                device_cards_state,
-                df_state,
-                status_message
-            ]
         )
     logging.info("Gradio interface initialized successfully")

 import gradio as gr
 import pandas as pd
 from datetime import datetime, timedelta
 import logging
 import plotly.express as px
 from sklearn.ensemble import IsolationForest
+from transformers import pipeline
+import torch
 from concurrent.futures import ThreadPoolExecutor
+from simple_salesforce import Salesforce
 import os
+import json
 import io
+import time
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+# Salesforce configuration
+try:
+    sf = Salesforce(
+        username='multi-devicelabopsdashboard@sathkrutha.com',
+        password='Team@1234',
+        security_token=os.getenv('SF_SECURITY_TOKEN', ''),
+        domain='login'
+    )
+    logging.info("Salesforce connection established")
+except Exception as e:
+    logging.error(f"Failed to connect to Salesforce: {str(e)}")
+    sf = None
 # Try to import reportlab
 try:
     from reportlab.lib.pagesizes import letter
+    from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
     from reportlab.lib.styles import getSampleStyleSheet
+    from reportlab.lib import colors
     reportlab_available = True
     logging.info("reportlab module successfully imported")
 except ImportError:
     logging.warning("reportlab module not found. PDF generation disabled.")
     reportlab_available = False
+# Preload Hugging Face model with optimization
+logging.info("Preloading Hugging Face model...")
+try:
+    device = 0 if torch.cuda.is_available() else -1
+    # Use a smaller model for faster inference
+    summarizer = pipeline(
+        "summarization",
+        model="t5-small",
+        device=device,
+        max_length=50,
+        min_length=10,
+        num_beams=2
+    )
+    logging.info(f"Hugging Face model preloaded on {'GPU' if device == 0 else 'CPU'}")
+except Exception as e:
+    logging.error(f"Failed to preload model: {str(e)}")
+    raise e
+# Cache picklist values at startup
+def get_picklist_values(field_name):
+    if sf is None:
+        return []
+    try:
+        obj_desc = sf.SmartLog__c.describe()
+        for field in obj_desc['fields']:
+            if field['name'] == field_name:
+                return [value['value'] for value in field['picklistValues'] if value['active']]
+        return []
+    except Exception as e:
+        logging.error(f"Failed to fetch picklist values for {field_name}: {str(e)}")
+        return []
+status_values = get_picklist_values('Status__c') or ["Active", "Inactive", "Pending"]
+log_type_values = get_picklist_values('Log_Type__c') or ["Smart Log", "Cell Analysis", "UV Verification"]
+logging.info(f"Valid Status__c values: {status_values}")
+logging.info(f"Valid Log_Type__c values: {log_type_values}")
+# Map invalid picklist values
+picklist_mapping = {
+    'Status__c': {
+        'normal': 'Active',
+        'error': 'Inactive',
+        'warning': 'Pending',
+        'ok': 'Active',
+        'failed': 'Inactive'
+    },
+    'Log_Type__c': {
+        'maint': 'Smart Log',
+        'error': 'Cell Analysis',
+        'ops': 'UV Verification',
+        'maintenance': 'Smart Log',
+        'cell': 'Cell Analysis',
+        'uv': 'UV Verification',
+        'weight log': 'Smart Log'
+    }
+}
+# Cache folder ID
+def get_folder_id(folder_name):
+    if sf is None:
+        return None
+    try:
+        query = f"SELECT Id FROM Folder WHERE Name = '{folder_name}' AND Type = 'Report'"
+        result = sf.query(query)
+        if result['totalSize'] > 0:
+            folder_id = result['records'][0]['Id']
+            logging.info(f"Found folder ID for '{folder_name}': {folder_id}")
+            return folder_id
+        else:
+            logging.error(f"Folder '{folder_name}' not found in Salesforce.")
+            return None
+    except Exception as e:
+        logging.error(f"Failed to fetch folder ID for '{folder_name}': {str(e)}")
+        return None
+LABOPS_REPORTS_FOLDER_ID = get_folder_id('LabOps Reports')
+# Salesforce report creation
+def create_salesforce_reports(df):
+    if sf is None or not LABOPS_REPORTS_FOLDER_ID:
+        return
+    try:
+        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
+        reports = [
+            {
+                "reportMetadata": {
+                    "name": f"SmartLog_Usage_Report_{timestamp}",
+                    "developerName": f"SmartLog_Usage_Report_{timestamp}",
+                    "reportType": {"type": "CustomEntity", "value": "SmartLog__c"},
+                    "reportFormat": "TABULAR",
+                    "reportBooleanFilter": None,
+                    "reportFilters": [],
+                    "detailColumns": ["SmartLog__c.Device_Id__c", "SmartLog__c.Usage_Hours__c"],
+                    "folderId": LABOPS_REPORTS_FOLDER_ID
+                }
+            },
+            {
+                "reportMetadata": {
+                    "name": f"SmartLog_AMC_Reminders_{timestamp}",
+                    "developerName": f"SmartLog_AMC_Reminders_{timestamp}",
+                    "reportType": {"type": "CustomEntity", "value": "SmartLog__c"},
+                    "reportFormat": "TABULAR",
+                    "reportBooleanFilter": None,
+                    "reportFilters": [],
+                    "detailColumns": ["SmartLog__c.Device_Id__c", "SmartLog__c.AMC_Date__c"],
+                    "folderId": LABOPS_REPORTS_FOLDER_ID
+                }
+            }
+        ]
+        for report in reports:
+            sf.restful('analytics/reports', method='POST', json=report)
+        logging.info("Salesforce reports created")
+    except Exception as e:
+        logging.error(f"Failed to create Salesforce reports: {str(e)}")
+# Save to Salesforce
+def save_to_salesforce(df, reminders_df):
+    if sf is None:
+        return
+    try:
+        current_date = datetime.now()
+        next_30_days = current_date + timedelta(days=30)
+        records = []
+        reminder_device_ids = set(reminders_df['device_id']) if not reminders_df.empty else set()
+        for _, row in df.iterrows():
+            status = str(row['status'])
+            log_type = str(row['log_type'])
+            status = picklist_mapping['Status__c'].get(status.lower(), status_values[0] if status_values else None)
+            log_type = picklist_mapping['Log_Type__c'].get(log_type.lower(), log_type_values[0] if log_type_values else None)
+            if status is None or log_type is None:
+                continue
+            amc_date_str = None
+            if pd.notna(row['amc_date']):
+                try:
+                    amc_date = pd.to_datetime(row['amc_date']).strftime('%Y-%m-%d')
+                    amc_date_dt = datetime.strptime(amc_date, '%Y-%m-%d')
+                    if status == "Active" and current_date.date() <= amc_date_dt.date() <= next_30_days.date():
+                        logging.info(f"AMC Reminder for Device ID {row['device_id']}")
+                except:
+                    amc_date_str = None
+            record = {
+                'Device_Id__c': str(row['device_id'])[:50],
+                'Log_Type__c': log_type,
+                'Status__c': status,
+                'Timestamp__c': row['timestamp'].isoformat() if pd.notna(row['timestamp']) else None,
+                'Usage_Hours__c': float(row['usage_hours']) if pd.notna(row['usage_hours']) else 0.0,
+                'Downtime__c': float(row['downtime']) if pd.notna(row['downtime']) else 0.0,
+                'AMC_Date__c': amc_date_str
+            }
+            if row['device_id'] not in reminder_device_ids:
+                records.append(record)
+        if records:
+            sf.bulk.SmartLog__c.insert(records)
+            logging.info(f"Saved {len(records)} records to Salesforce")
+    except Exception as e:
+        logging.error(f"Failed to save to Salesforce: {str(e)}")
+# Summarize logs
+def summarize_logs(df):
     try:
         total_devices = df["device_id"].nunique()
         most_used = df.groupby("device_id")["usage_hours"].sum().idxmax() if not df.empty else "N/A"
+        prompt = f"Maintenance logs: {total_devices} devices. Most used: {most_used}."
+        summary = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
+        return summary
     except Exception as e:
+        logging.error(f"Summary generation failed: {str(e)}")
+        return f"Failed to generate summary: {str(e)}"
 # Anomaly detection
 def detect_anomalies(df):
     try:
         if "usage_hours" not in df.columns or "downtime" not in df.columns:
             return "Anomaly detection requires 'usage_hours' and 'downtime' columns.", pd.DataFrame()
         features = df[["usage_hours", "downtime"]].fillna(0)
+        if len(features) > 500:  # Reduced sample size
+            features = features.sample(n=500, random_state=42)
+        iso_forest = IsolationForest(contamination=0.1, random_state=42)
         df["anomaly"] = iso_forest.fit_predict(features)
         anomalies = df[df["anomaly"] == -1][["device_id", "usage_hours", "downtime", "timestamp"]]
         if anomalies.empty:
             return "No anomalies detected.", anomalies
+        return "\n".join([f"- Device ID: {row['device_id']}, Usage: {row['usage_hours']}, Downtime: {row['downtime']}, Timestamp: {row['timestamp']}" for _, row in anomalies.head(5).iterrows()]), anomalies
     except Exception as e:
         logging.error(f"Anomaly detection failed: {str(e)}")
         return f"Anomaly detection failed: {str(e)}", pd.DataFrame()
 # AMC reminders
 def check_amc_reminders(df, current_date):
     try:
         if "device_id" not in df.columns or "amc_date" not in df.columns:
             return "AMC reminders require 'device_id' and 'amc_date' columns.", pd.DataFrame()
         df["amc_date"] = pd.to_datetime(df["amc_date"], errors='coerce')
+        current_date = pd.to_datetime(current_date)
         df["days_to_amc"] = (df["amc_date"] - current_date).dt.days
         reminders = df[(df["days_to_amc"] >= 0) & (df["days_to_amc"] <= 30)][["device_id", "log_type", "status", "timestamp", "usage_hours", "downtime", "amc_date"]]
         if reminders.empty:
             return "No AMC reminders due within the next 30 days.", reminders
+        return "\n".join([f"- Device ID: {row['device_id']}, AMC Date: {row['amc_date']}" for _, row in reminders.head(5).iterrows()]), reminders
     except Exception as e:
         logging.error(f"AMC reminder generation failed: {str(e)}")
         return f"AMC reminder generation failed: {str(e)}", pd.DataFrame()
+# Dashboard insights
+def generate_dashboard_insights(df):
+    try:
+        total_devices = df["device_id"].nunique()
+        avg_usage = df["usage_hours"].mean() if "usage_hours" in df.columns else 0
+        prompt = f"Insights: {total_devices} devices, avg usage {avg_usage:.2f} hours."
+        insights = summarizer(prompt, max_length=50, min_length=10, do_sample=False)[0]["summary_text"]
+        return insights
+    except Exception as e:
+        logging.error(f"Dashboard insights generation failed: {str(e)}")
+        return f"Dashboard insights generation failed: {str(e)}"
 # Create usage chart
+def create_usage_chart(df):
     try:
+        if df.empty:
             return None
+        usage_data = df.groupby("device_id")["usage_hours"].sum().reset_index()
         if len(usage_data) > 5:
             usage_data = usage_data.nlargest(5, "usage_hours")
         fig = px.bar(
             usage_data,
             x="device_id",
             y="usage_hours",
+            title="Usage Hours per Device",
+            labels={"device_id": "Device ID", "usage_hours": "Usage Hours"}
         )
+        fig.update_layout(title_font_size=16, margin=dict(l=20, r=20, t=40, b=20))
         return fig
     except Exception as e:
         logging.error(f"Failed to create usage chart: {str(e)}")
         return None
+# Create downtime chart
+def create_downtime_chart(df):
     try:
+        downtime_data = df.groupby("device_id")["downtime"].sum().reset_index()
         if len(downtime_data) > 5:
             downtime_data = downtime_data.nlargest(5, "downtime")
         fig = px.bar(
             downtime_data,
             x="device_id",
             y="downtime",
+            title="Downtime per Device",
+            labels={"device_id": "Device ID", "downtime": "Downtime (Hours)"}
         )
+        fig.update_layout(title_font_size=16, margin=dict(l=20, r=20, t=40, b=20))
         return fig
     except Exception as e:
         logging.error(f"Failed to create downtime chart: {str(e)}")
         return None
+# Create daily log trends chart
 def create_daily_log_trends_chart(df):
     try:
         df['date'] = df['timestamp'].dt.date
+        daily_logs = df.groupby('date').size().reset_index(name='log_count')
+        fig = px.line(
+            daily_logs,
             x='date',
             y='log_count',
             title="Daily Log Trends",
             labels={"date": "Date", "log_count": "Number of Logs"}
         )
+        fig.update_layout(title_font_size=16, margin=dict(l=20, r=20, t=40, b=20))
         return fig
     except Exception as e:
+        logging.error(f"Failed to create daily log trends chart: {str(e)}")
         return None
+# Create weekly uptime chart
 def create_weekly_uptime_chart(df):
     try:
+        df['week'] = df['timestamp'].dt.isocalendar().week
         df['year'] = df['timestamp'].dt.year
         weekly_data = df.groupby(['year', 'week']).agg({
+            'usage_hours': 'sum',
             'downtime': 'sum'
         }).reset_index()
+        weekly_data['uptime_percent'] = (weekly_data['usage_hours'] / (weekly_data['usage_hours'] + weekly_data['downtime'])) * 100
+        weekly_data['year_week'] = weekly_data['year'].astype(str) + '-W' + weekly_data['week'].astype(str)
         fig = px.bar(
             weekly_data,
+            x='year_week',
+            y='uptime_percent',
             title="Weekly Uptime Percentage",
+            labels={"year_week": "Year-Week", "uptime_percent": "Uptime %"}
         )
+        fig.update_layout(title_font_size=16, margin=dict(l=20, r=20, t=40, b=20))
         return fig
     except Exception as e:
+        logging.error(f"Failed to create weekly uptime chart: {str(e)}")
         return None
+# Create anomaly alerts chart
+def create_anomaly_alerts_chart(anomalies_df):
     try:
+        if anomalies_df.empty:
             return None
+        anomalies_df['date'] = anomalies_df['timestamp'].dt.date
+        anomaly_counts = anomalies_df.groupby('date').size().reset_index(name='anomaly_count')
         fig = px.scatter(
+            anomaly_counts,
+            x='date',
+            y='anomaly_count',
+            title="Anomaly Alerts Over Time",
+            labels={"date": "Date", "anomaly_count": "Number of Anomalies"}
         )
+        fig.update_layout(title_font_size=16, margin=dict(l=20, r=20, t=40, b=20))
         return fig
     except Exception as e:
+        logging.error(f"Failed to create anomaly alerts chart: {str(e)}")
         return None
+# Generate device cards
 def generate_device_cards(df):
     try:
         if df.empty:
             return '<p>No devices available to display.</p>'
+        device_stats = df.groupby('device_id').agg({
             'status': 'last',
             'timestamp': 'max',
         }).reset_index()
+        device_stats['count'] = df.groupby('device_id').size().reindex(device_stats['device_id']).values
         device_stats['health'] = device_stats['status'].map({
             'Active': 'Healthy',
             'Inactive': 'Unhealthy',
             'Pending': 'Warning'
         }).fillna('Unknown')
         cards_html = '<div style="display: flex; flex-wrap: wrap; gap: 20px;">'
         for _, row in device_stats.iterrows():
+            health_color = {'Healthy': 'green', 'Unhealthy': 'red', 'Warning': 'orange', 'Unknown': 'gray'}.get(row['health'], 'gray')
             timestamp_str = str(row['timestamp']) if pd.notna(row['timestamp']) else 'Unknown'
+            cards_html += f"""
             <div style="border: 1px solid #e0e0e0; padding: 10px; border-radius: 5px; width: 200px;">
                 <h4>Device: {row['device_id']}</h4>
                 <p><b>Health:</b> <span style="color: {health_color}">{row['health']}</span></p>
                 <p><b>Last Log:</b> {timestamp_str}</p>
             </div>
             """
         cards_html += '</div>'
         return cards_html
     except Exception as e:
+        logging.error(f"Failed to generate device cards: {str(e)}")
         return f'<p>Error generating device cards: {str(e)}</p>'
+# Generate monthly status
 def generate_monthly_status(df, selected_month):
     try:
         total_devices = df['device_id'].nunique()
         total_usage_hours = df['usage_hours'].sum()
         total_downtime = df['downtime'].sum()
+        avg_usage = total_usage_hours / total_devices if total_devices > 0 else 0
+        avg_downtime = total_downtime / total_devices if total_devices > 0 else 0
+        return f"""
         Monthly Status for {selected_month}:
         - Total Devices: {total_devices}
         - Total Usage Hours: {total_usage_hours:.2f}
         - Total Downtime Hours: {total_downtime:.2f}
+        - Average Usage per Device: {avg_usage:.2f} hours
+        - Average Downtime per Device: {avg_downtime:.2f} hours
         """
     except Exception as e:
         logging.error(f"Failed to generate monthly status: {str(e)}")
         return f"Failed to generate monthly status: {str(e)}"
 # Generate PDF content
+def generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights, device_cards_html, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, df, selected_month):
     if not reportlab_available:
         return None
     try:
         pdf_path = f"monthly_status_report_{selected_month.replace(' ', '_')}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.pdf"
         doc = SimpleDocTemplate(pdf_path, pagesize=letter)
         styles = getSampleStyleSheet()
         story = []
         def safe_paragraph(text, style):
+            return Paragraph(str(text).replace('\n', '<br/>'), style) if text else Paragraph("", style)
         story.append(Paragraph("LabOps Monthly Status Report", styles['Title']))
         story.append(Paragraph(f"Generated on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", styles['Normal']))
         story.append(Spacer(1, 12))
+        if selected_month != "All":
             monthly_status = generate_monthly_status(df, selected_month)
             story.append(Paragraph("Monthly Status Summary", styles['Heading2']))
             story.append(safe_paragraph(monthly_status, styles['Normal']))
         story.append(Spacer(1, 12))
         story.append(Paragraph("Log Preview", styles['Heading2']))
+        if not preview_df.empty:
+            data = [preview_df.columns.tolist()] + preview_df.head(5).values.tolist()
+            table = Table(data)
+            table.setStyle(TableStyle([
+                ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
+                ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
+                ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
+                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
+                ('FONTSIZE', (0, 0), (-1, 0), 12),
+                ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
+                ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
+                ('TEXTCOLOR', (0, 1), (-1, -1), colors.black),
+                ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
+                ('FONTSIZE', (0, 1), (-1, -1), 10),
+                ('GRID', (0, 0), (-1, -1), 1, colors.black)
+            ]))
+            story.append(table)
+        else:
+            story.append(safe_paragraph("No preview available.", styles['Normal']))
         story.append(Spacer(1, 12))
         story.append(Paragraph("Device Cards", styles['Heading2']))
+        device_cards_text = device_cards_html.replace('<div>', '').replace('</div>', '\n').replace('<h4>', '').replace('</h4>', '\n').replace('<p>', '').replace('</p>', '\n').replace('<b>', '').replace('</b>', '').replace('<span style="color: green">', '').replace('<span style="color: red">', '').replace('<span style="color: orange">', '').replace('<span style="color: gray">', '').replace('</span>', '')
         story.append(safe_paragraph(device_cards_text, styles['Normal']))
         story.append(Spacer(1, 12))
         story.append(safe_paragraph(insights, styles['Normal']))
         story.append(Spacer(1, 12))
+        story.append(Paragraph("Charts", styles['Heading2']))
+        story.append(Paragraph("[Chart placeholders - see dashboard for visuals]", styles['Normal']))
         doc.build(story)
         logging.info(f"PDF generated at {pdf_path}")
         return pdf_path
     except Exception as e:
+        logging.error(f"Failed to generate PDF: {str(e)}")
         return None
+# Main processing function
 async def process_logs(file_obj, lab_site_filter, equipment_type_filter, date_range, month_filter, last_modified_state):
+    start_time = time.time()
     try:
         if not file_obj:
+            return "No file uploaded.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, "No anomalies detected.", "No AMC reminders.", "No insights generated.", None, last_modified_state
         file_path = file_obj.name
         current_modified_time = os.path.getmtime(file_path)
         if last_modified_state and current_modified_time == last_modified_state:
+            return None, None, None, None, None, None, None, None, None, None, None, None, last_modified_state
+        logging.info(f"Processing file: {file_path}")
         if not file_path.endswith(".csv"):
+            return "Please upload a CSV file.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, "", "", "", None, last_modified_state
         required_columns = ["device_id", "log_type", "status", "timestamp", "usage_hours", "downtime", "amc_date"]
         dtypes = {
             "amc_date": "string"
         }
         df = pd.read_csv(file_path, dtype=dtypes)
         missing_columns = [col for col in required_columns if col not in df.columns]
         if missing_columns:
+            return f"Missing columns: {missing_columns}", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state
         df["timestamp"] = pd.to_datetime(df["timestamp"], errors='coerce')
         df["amc_date"] = pd.to_datetime(df["amc_date"], errors='coerce')
         if df["timestamp"].dt.tz is None:
             df["timestamp"] = df["timestamp"].dt.tz_localize('UTC').dt.tz_convert('Asia/Kolkata')
         if df.empty:
+            return "No data available.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state
+        # Apply filters
+        filtered_df = df.copy()
         if lab_site_filter and lab_site_filter != 'All' and 'lab_site' in filtered_df.columns:
             filtered_df = filtered_df[filtered_df['lab_site'] == lab_site_filter]
         if equipment_type_filter and equipment_type_filter != 'All' and 'equipment_type' in filtered_df.columns:
             filtered_df = filtered_df[filtered_df['equipment_type'] == equipment_type_filter]
         if date_range and len(date_range) == 2:
             days_start, days_end = date_range
             today = pd.to_datetime(datetime.now().date()).tz_localize('Asia/Kolkata')
             start_date = today + pd.Timedelta(days=days_start)
+            end_date = today + pd.Timedelta(days=days_end) + pd.Timedelta(days=1) - pd.Timedelta(seconds=1)
             filtered_df = filtered_df[(filtered_df['timestamp'] >= start_date) & (filtered_df['timestamp'] <= end_date)]
         if month_filter and month_filter != "All":
             selected_date = pd.to_datetime(month_filter, format="%B %Y")
             filtered_df = filtered_df[
                 (filtered_df['timestamp'].dt.year == selected_date.year) &
                 (filtered_df['timestamp'].dt.month == selected_date.month)
             ]
         if filtered_df.empty:
+            return "No data after applying filters.", pd.DataFrame(), None, '<p>No device cards available.</p>', None, None, None, None, None, None, None, None, last_modified_state
+        # Generate table for preview
+        preview_df = filtered_df[['device_id', 'log_type', 'status', 'timestamp', 'usage_hours', 'downtime', 'amc_date']].head(5)
+        preview_html = preview_df.to_html(index=False, classes='table table-striped', border=0)
         # Run tasks concurrently
+        with ThreadPoolExecutor(max_workers=6) as executor:
+            future_summary = executor.submit(summarize_logs, filtered_df)
             future_anomalies = executor.submit(detect_anomalies, filtered_df)
             future_amc = executor.submit(check_amc_reminders, filtered_df, datetime.now())
+            future_insights = executor.submit(generate_dashboard_insights, filtered_df)
+            future_usage_chart = executor.submit(create_usage_chart, filtered_df)
+            future_downtime_chart = executor.submit(create_downtime_chart, filtered_df)
             future_daily_log_chart = executor.submit(create_daily_log_trends_chart, filtered_df)
             future_weekly_uptime_chart = executor.submit(create_weekly_uptime_chart, filtered_df)
+            future_anomaly_alerts_chart = executor.submit(create_anomaly_alerts_chart, pd.DataFrame())
             future_device_cards = executor.submit(generate_device_cards, filtered_df)
+            future_reports = executor.submit(create_salesforce_reports, filtered_df)
+            summary = f"Step 1: Summary Report\n{future_summary.result()}"
             anomalies, anomalies_df = future_anomalies.result()
             anomalies = f"Anomaly Detection\n{anomalies}"
             amc_reminders, reminders_df = future_amc.result()
             amc_reminders = f"AMC Reminders\n{amc_reminders}"
+            insights = f"Dashboard Insights (AI)\n{future_insights.result()}"
             usage_chart = future_usage_chart.result()
             downtime_chart = future_downtime_chart.result()
             daily_log_chart = future_daily_log_chart.result()
             weekly_uptime_chart = future_weekly_uptime_chart.result()
+            anomaly_alerts_chart = future_anomaly_alerts_chart.result()
             device_cards = future_device_cards.result()
+        save_to_salesforce(filtered_df, reminders_df)
+        pdf_file = generate_pdf_content(summary, preview_df, anomalies, amc_reminders, insights, device_cards, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, filtered_df, month_filter)
+        elapsed_time = time.time() - start_time
         logging.info(f"Processing completed in {elapsed_time:.2f} seconds")
         if elapsed_time > 10:
             logging.warning(f"Processing time exceeded 10 seconds: {elapsed_time:.2f} seconds")
+        return (summary, preview_html, usage_chart, device_cards, daily_log_chart, weekly_uptime_chart, anomaly_alerts_chart, downtime_chart, anomalies, amc_reminders, insights, pdf_file, current_modified_time)
     except Exception as e:
         logging.error(f"Failed to process file: {str(e)}")
+        return f"Error: {str(e)}", pd.DataFrame(), None, '<p>Error processing data.</p>', None, None, None, None, None, None, None, None, last_modified_state
+# Update filters
 def update_filters(file_obj):
     if not file_obj:
         return gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All'), gr.update(choices=['All'], value='All')
     try:
         with open(file_obj.name, 'rb') as f:
             csv_content = f.read().decode('utf-8')
         df = pd.read_csv(io.StringIO(csv_content))
         df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
+        lab_site_options = ['All'] + [site for site in df['lab_site'].dropna().astype(str).unique().tolist() if site.strip()] if 'lab_site' in df.columns else ['All']
+        equipment_type_options = ['All'] + [equip for equip in df['equipment_type'].dropna().astype(str).unique().tolist() if equip.strip()] if 'equipment_type' in df.columns else ['All']
+        month_options = ['All'] + sorted(df['timestamp'].dt.strftime('%B %Y').dropna().unique().tolist()) if 'timestamp' in df.columns else ['All']
         return gr.update(choices=lab_site_options, value='All'), gr.update(choices=equipment_type_options, value='All'), gr.update(choices=month_options, value='All')
     except Exception as e:
         .dashboard-section h3 {font-size: 18px; margin-bottom: 2px;}
         .dashboard-section p {margin: 1px 0; line-height: 1.2;}
         .dashboard-section ul {margin: 2px 0; padding-left: 20px;}
+        .table {width: 100%; border-collapse: collapse;}
+        .table th, .table td {border: 1px solid #ddd; padding: 8px; text-align: left;}
+        .table th {background-color: #f2f2f2;}
+        .table tr:nth-child(even) {background-color: #f9f9f9;}
     """) as iface:
+        gr.Markdown("<h1>LabOps Log Analyzer Dashboard (Hugging Face AI)</h1>")
+        gr.Markdown("Upload a CSV file to analyze. Click 'Analyze' to refresh the dashboard with the latest data.")
         last_modified_state = gr.State(value=None)
         with gr.Row():
             with gr.Column(scale=1):
                 file_input = gr.File(label="Upload Logs (CSV)", file_types=[".csv"])
                 with gr.Group():
                     gr.Markdown("### Filters")
+                    lab_site_filter = gr.Dropdown(label="Lab Site", choices=['All'], value='All', interactive=True)
+                    equipment_type_filter = gr.Dropdown(label="Equipment Type", choices=['All'], value='All', interactive=True)
+                    date_range_filter = gr.Slider(label="Date Range (Days from Today)", minimum=-365, maximum=0, step=1, value=[-30, 0])
+                    month_filter = gr.Dropdown(label="Select Month for Report", choices=['All'], value='All', interactive=True)
                 submit_button = gr.Button("Analyze", variant="primary")
             with gr.Column(scale=2):
                 with gr.Group(elem_classes="dashboard-container"):
                     gr.Markdown("<div class='dashboard-title'>Analysis Results</div>")
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Step 1: Summary Report")
                         summary_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Step 2: Log Preview")
                         preview_output = gr.HTML()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Device Cards")
                         device_cards_output = gr.HTML()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Charts")
                         with gr.Tab("Usage Hours per Device"):
                             weekly_uptime_output = gr.Plot()
                         with gr.Tab("Anomaly Alerts"):
                             anomaly_alerts_output = gr.Plot()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Step 4: Anomaly Detection")
                         anomaly_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Step 5: AMC Reminders")
                         amc_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Step 6: Insights (AI)")
                         insights_output = gr.Markdown()
                     with gr.Group(elem_classes="dashboard-section"):
                         gr.Markdown("### Export Report")
                         pdf_output = gr.File(label="Download Monthly Status Report as PDF")
         submit_button.click(
             fn=process_logs,
+            inputs=[file_input, lab_site_filter, equipment_type_filter, date_range_filter, month_filter, last_modified_state],
+            outputs=[summary_output, preview_output, usage_chart_output, device_cards_output, daily_log_trends_output, weekly_uptime_output, anomaly_alerts_chart, downtime_chart_output, anomaly_output, amc_output, insights_output, pdf_output, last_modified_state]
         )
     logging.info("Gradio interface initialized successfully")