import os import base64 import io import pandas as pd import plotly.express as px import plotly.graph_objects as go from dash import Dash, html, dcc, Input, Output, State, callback_context import dash_bootstrap_components as dbc import numpy as np from scipy import stats import re # Initialize Dash app app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) server = app.server class AIVisualizationEngine: def __init__(self, df): self.df = df self.numeric_cols = df.select_dtypes(include=['number']).columns.tolist() self.categorical_cols = df.select_dtypes(include=['object']).columns.tolist() self.datetime_cols = df.select_dtypes(include=['datetime64']).columns.tolist() def recommend_chart_type(self, x_col=None, y_col=None): """AI-powered chart type recommendation""" recommendations = [] if x_col and y_col: x_type = 'numeric' if x_col in self.numeric_cols else 'categorical' y_type = 'numeric' if y_col in self.numeric_cols else 'categorical' if x_type == 'numeric' and y_type == 'numeric': recommendations = [ {'type': 'scatter', 'confidence': 0.9, 'reason': 'Both variables are numeric - scatter plot shows correlation'}, {'type': 'line', 'confidence': 0.7, 'reason': 'Line chart good for trends if X is ordered'}, ] elif x_type == 'categorical' and y_type == 'numeric': recommendations = [ {'type': 'bar', 'confidence': 0.9, 'reason': 'Categorical vs numeric - bar chart shows comparisons'}, {'type': 'box', 'confidence': 0.8, 'reason': 'Box plot shows distribution across categories'}, ] elif x_type == 'categorical' and y_type == 'categorical': recommendations = [ {'type': 'bar', 'confidence': 0.8, 'reason': 'Count relationships between categories'}, ] elif x_col and not y_col: if x_col in self.numeric_cols: recommendations = [ {'type': 'histogram', 'confidence': 0.9, 'reason': 'Single numeric variable - histogram shows distribution'}, {'type': 'box', 'confidence': 0.7, 'reason': 'Box plot shows statistical summary'}, ] else: recommendations = [ {'type': 'pie', 'confidence': 0.8, 'reason': 'Categorical variable - pie chart shows proportions'}, {'type': 'bar', 'confidence': 0.9, 'reason': 'Bar chart shows category frequencies'}, ] return recommendations def detect_outliers(self, column): """Detect outliers using IQR method""" if column not in self.numeric_cols: return [] Q1 = self.df[column].quantile(0.25) Q3 = self.df[column].quantile(0.75) IQR = Q3 - Q1 lower_bound = Q1 - 1.5 * IQR upper_bound = Q3 + 1.5 * IQR outliers = self.df[(self.df[column] < lower_bound) | (self.df[column] > upper_bound)] return outliers.index.tolist() def generate_insights(self, x_col, y_col=None): """Generate AI insights about the data""" insights = [] if x_col in self.numeric_cols: mean_val = self.df[x_col].mean() median_val = self.df[x_col].median() std_val = self.df[x_col].std() insights.append(f"📊 {x_col}: Mean = {mean_val:.2f}, Median = {median_val:.2f}") if abs(mean_val - median_val) > std_val * 0.5: insights.append(f"⚠️ {x_col} distribution appears skewed") outliers = self.detect_outliers(x_col) if outliers: insights.append(f"🎯 Found {len(outliers)} potential outliers in {x_col}") if y_col and x_col in self.numeric_cols and y_col in self.numeric_cols: correlation = self.df[x_col].corr(self.df[y_col]) if abs(correlation) > 0.7: strength = "strong" if abs(correlation) > 0.8 else "moderate" direction = "positive" if correlation > 0 else "negative" insights.append(f"🔗 {strength.title()} {direction} correlation ({correlation:.3f}) between {x_col} and {y_col}") elif abs(correlation) < 0.3: insights.append(f"📈 Weak correlation ({correlation:.3f}) between {x_col} and {y_col}") return insights def parse_natural_language_query(self, query): """Simple NLP to parse visualization requests""" query = query.lower().strip() # Extract chart types chart_keywords = { 'scatter': ['scatter', 'correlation', 'relationship'], 'bar': ['bar', 'compare', 'comparison', 'by'], 'histogram': ['histogram', 'distribution', 'freq'], 'line': ['line', 'trend', 'over time', 'timeline'], 'box': ['box', 'quartile', 'median'], 'pie': ['pie', 'proportion', 'percentage'], 'heatmap': ['heatmap', 'correlation matrix'] } suggested_chart = None for chart_type, keywords in chart_keywords.items(): if any(keyword in query for keyword in keywords): suggested_chart = chart_type break # Extract column names mentioned_cols = [] for col in self.df.columns: if col.lower() in query or col.lower().replace('_', ' ') in query: mentioned_cols.append(col) return { 'chart_type': suggested_chart, 'columns': mentioned_cols, 'query': query } def get_smart_color_scheme(self, chart_type, column=None): """AI-powered color scheme selection""" color_schemes = { 'scatter': 'Viridis', 'line': 'Blues', 'bar': 'Set3', 'histogram': 'Plasma', 'box': 'Set2', 'pie': 'Pastel', 'heatmap': 'RdBu_r' } return color_schemes.get(chart_type, 'Viridis') # App layout with AI features app.layout = dbc.Container([ dbc.Row([ dbc.Col([ html.H1("🤖 AI-Enhanced Data Dashboard", className="text-center mb-4"), html.P("Upload data and let AI help you create intelligent visualizations!", className="text-center text-muted"), html.Hr(), ], width=12) ]), dbc.Row([ dbc.Col([ dbc.Card([ dbc.CardBody([ html.H4("📁 Data Upload", className="card-title"), dcc.Upload( id='upload-data', children=html.Div([ 'Drag and Drop or ', html.A('Select Files') ]), style={ 'width': '100%', 'height': '60px', 'lineHeight': '60px', 'borderWidth': '1px', 'borderStyle': 'dashed', 'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px' }, multiple=False, accept='.csv,.xlsx,.txt' ), html.Div(id='upload-status', className="mt-2"), html.Hr(), html.H4("🎯 AI Query Interface", className="card-title"), dbc.InputGroup([ dbc.Input( id="ai-query", placeholder="Try: 'Show scatter plot of age vs salary' or 'Bar chart of departments'", type="text", ), dbc.Button( "🤖 AI Create", id="ai-create-btn", color="primary", n_clicks=0 ) ]), html.Div(id="ai-recommendations", className="mt-3"), html.Hr(), html.H4("📊 Quick Analytics", className="card-title"), dbc.ButtonGroup([ dbc.Button("Summary Stats", id="stats-btn", size="sm"), dbc.Button("AI Insights", id="insights-btn", size="sm"), dbc.Button("Outliers", id="outliers-btn", size="sm"), ], className="w-100"), html.Div(id="quick-analytics", className="mt-3") ]) ]) ], width=4), dbc.Col([ dbc.Card([ dbc.CardBody([ html.H4("📈 AI-Enhanced Visualizations", className="card-title"), # Chart controls dbc.Row([ dbc.Col([ html.Label("Chart Type:", className="form-label"), dcc.Dropdown( id='chart-type', options=[ {'label': 'AI Recommend', 'value': 'ai_recommend'}, {'label': 'Scatter Plot', 'value': 'scatter'}, {'label': 'Line Chart', 'value': 'line'}, {'label': 'Bar Chart', 'value': 'bar'}, {'label': 'Histogram', 'value': 'histogram'}, {'label': 'Box Plot', 'value': 'box'}, {'label': 'Heatmap', 'value': 'heatmap'}, {'label': 'Pie Chart', 'value': 'pie'} ], value='ai_recommend', className="mb-2" ) ], width=6), dbc.Col([ html.Label("Color By:", className="form-label"), dcc.Dropdown( id='color-column', placeholder="AI will suggest colors", className="mb-2" ) ], width=6) ]), dbc.Row([ dbc.Col([ html.Label("X-Axis:", className="form-label"), dcc.Dropdown( id='x-column', placeholder="Select X column" ) ], width=6), dbc.Col([ html.Label("Y-Axis:", className="form-label"), dcc.Dropdown( id='y-column', placeholder="Select Y column" ) ], width=6) ], className="mb-3"), dcc.Graph(id='main-graph', style={'height': '500px'}), html.Div(id='ai-insights-display', className="mt-3") ]) ]), dbc.Card([ dbc.CardBody([ html.H4("🔍 Data Explorer", className="card-title"), html.Div(id='data-table') ]) ], className="mt-3") ], width=8) ], className="mt-4"), # Store components dcc.Store(id='stored-data'), dcc.Store(id='ai-engine'), ], fluid=True) def parse_contents(contents, filename): """Parse uploaded file contents""" content_type, content_string = contents.split(',') decoded = base64.b64decode(content_string) try: if 'csv' in filename: df = pd.read_csv(io.StringIO(decoded.decode('utf-8'))) elif 'xls' in filename: df = pd.read_excel(io.BytesIO(decoded)) else: return None, "Unsupported file type" return df, None except Exception as e: return None, f"Error processing file: {str(e)}" @app.callback( [Output('stored-data', 'data'), Output('upload-status', 'children'), Output('data-table', 'children'), Output('x-column', 'options'), Output('y-column', 'options'), Output('color-column', 'options'), Output('x-column', 'value'), Output('y-column', 'value')], [Input('upload-data', 'contents')], [State('upload-data', 'filename')] ) def update_data(contents, filename): """Update data when file is uploaded""" if contents is None: return None, "", "", [], [], [], None, None df, error = parse_contents(contents, filename) if error: return None, dbc.Alert(error, color="danger"), "", [], [], [], None, None # Create data table preview table = dbc.Table.from_dataframe( df.head(10), striped=True, bordered=True, hover=True, size='sm' ) # AI analysis of dataset ai_engine = AIVisualizationEngine(df) success_msg = dbc.Alert([ html.H6(f"✅ File uploaded successfully! 🤖 AI Ready"), html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"), html.P(f"📊 Numeric: {len(ai_engine.numeric_cols)}, 📝 Categorical: {len(ai_engine.categorical_cols)}") ], color="success") # Create column options for dropdowns all_columns = [{'label': col, 'value': col} for col in df.columns] # AI recommends default columns if ai_engine.numeric_cols: default_x = ai_engine.numeric_cols[0] default_y = ai_engine.numeric_cols[1] if len(ai_engine.numeric_cols) > 1 else None else: default_x = all_columns[0]['value'] if all_columns else None default_y = all_columns[1]['value'] if len(all_columns) > 1 else None return df.to_dict('records'), success_msg, table, all_columns, all_columns, all_columns, default_x, default_y @app.callback( [Output('chart-type', 'value'), Output('ai-recommendations', 'children')], [Input('ai-create-btn', 'n_clicks')], [State('ai-query', 'value'), State('stored-data', 'data')] ) def handle_ai_query(n_clicks, query, data): """Handle AI natural language queries""" if not n_clicks or not query or not data: return 'ai_recommend', "" df = pd.DataFrame(data) ai_engine = AIVisualizationEngine(df) # Parse the natural language query parsed = ai_engine.parse_natural_language_query(query) recommendations = [] if parsed['chart_type']: recommendations.append(f"🎯 Suggested chart type: **{parsed['chart_type'].title()}**") if parsed['columns']: recommendations.append(f"📊 Detected columns: {', '.join(parsed['columns'])}") if not recommendations: recommendations.append("🤖 Try queries like: 'scatter age salary', 'bar chart departments', 'histogram of scores'") return parsed['chart_type'] or 'ai_recommend', dbc.Alert(recommendations, color="info") @app.callback( Output('quick-analytics', 'children'), [Input('stats-btn', 'n_clicks'), Input('insights-btn', 'n_clicks'), Input('outliers-btn', 'n_clicks')], [State('stored-data', 'data'), State('x-column', 'value'), State('y-column', 'value')] ) def quick_analytics(stats_clicks, insights_clicks, outliers_clicks, data, x_col, y_col): """Handle quick analytics buttons with AI insights""" if not data: return "" df = pd.DataFrame(data) ai_engine = AIVisualizationEngine(df) ctx = callback_context if not ctx.triggered: return "" button_id = ctx.triggered[0]['prop_id'].split('.')[0] if button_id == 'stats-btn': stats = df.describe() return dbc.Alert([ html.H6("📊 Summary Statistics"), dbc.Table.from_dataframe(stats.reset_index(), size='sm') ], color="light") elif button_id == 'insights-btn': if x_col: insights = ai_engine.generate_insights(x_col, y_col) return dbc.Alert([ html.H6("🤖 AI Insights"), html.Ul([html.Li(insight) for insight in insights]) ], color="info") return dbc.Alert("Select columns to get AI insights", color="warning") elif button_id == 'outliers-btn': if x_col and x_col in ai_engine.numeric_cols: outliers = ai_engine.detect_outliers(x_col) if outliers: outlier_data = df.loc[outliers, [x_col]] return dbc.Alert([ html.H6(f"🎯 Outliers in {x_col}"), dbc.Table.from_dataframe(outlier_data.reset_index(), size='sm') ], color="warning") return dbc.Alert(f"✅ No outliers detected in {x_col}", color="success") return dbc.Alert("Select a numeric column to detect outliers", color="warning") return "" @app.callback( [Output('main-graph', 'figure'), Output('ai-insights-display', 'children')], [Input('stored-data', 'data'), Input('chart-type', 'value'), Input('x-column', 'value'), Input('y-column', 'value'), Input('color-column', 'value')] ) def update_main_graph(data, chart_type, x_col, y_col, color_col): """Update visualization with AI enhancements""" if not data: fig = go.Figure() fig.add_annotation(text="Upload data to see AI-powered visualizations", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray")) fig.update_layout(template="plotly_white") return fig, "" df = pd.DataFrame(data) ai_engine = AIVisualizationEngine(df) # AI recommendation system if chart_type == 'ai_recommend' and x_col: recommendations = ai_engine.recommend_chart_type(x_col, y_col) if recommendations: chart_type = recommendations[0]['type'] # Handle cases where columns aren't selected yet if not x_col and not y_col: fig = go.Figure() fig.add_annotation(text="Select columns or use AI Query to create visualization", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray")) fig.update_layout(template="plotly_white") return fig, "" insights_display = "" try: # Get AI-powered color scheme color_scheme = ai_engine.get_smart_color_scheme(chart_type, color_col) # Create visualization based on chart type if chart_type == 'scatter': if x_col and y_col: fig = px.scatter(df, x=x_col, y=y_col, color=color_col, title=f"🤖 AI Scatter Plot: {y_col} vs {x_col}", color_continuous_scale=color_scheme) # Add AI insights insights = ai_engine.generate_insights(x_col, y_col) insights_display = dbc.Alert([ html.H6("🤖 AI Insights"), html.Ul([html.Li(insight) for insight in insights]) ], color="info") else: fig = go.Figure() fig.add_annotation(text="Select both X and Y columns for scatter plot", x=0.5, y=0.5, showarrow=False) elif chart_type == 'line': if x_col and y_col: fig = px.line(df, x=x_col, y=y_col, color=color_col, title=f"🤖 AI Line Chart: {y_col} vs {x_col}", color_discrete_sequence=px.colors.qualitative.Set3) else: fig = go.Figure() fig.add_annotation(text="Select both X and Y columns for line chart", x=0.5, y=0.5, showarrow=False) elif chart_type == 'bar': if x_col and y_col: fig = px.bar(df, x=x_col, y=y_col, color=color_col, title=f"🤖 AI Bar Chart: {y_col} by {x_col}", color_discrete_sequence=px.colors.qualitative.Set3) elif x_col: fig = px.bar(df[x_col].value_counts().reset_index(), x='index', y=x_col, title=f"🤖 AI Value Counts: {x_col}", color_discrete_sequence=px.colors.qualitative.Set3) else: fig = go.Figure() fig.add_annotation(text="Select at least X column for bar chart", x=0.5, y=0.5, showarrow=False) elif chart_type == 'histogram': if x_col: fig = px.histogram(df, x=x_col, color=color_col, title=f"🤖 AI Histogram: {x_col}", color_discrete_sequence=px.colors.qualitative.Pastel) # Add statistical annotations mean_val = df[x_col].mean() if x_col in ai_engine.numeric_cols else None if mean_val: fig.add_vline(x=mean_val, line_dash="dash", line_color="red", annotation_text=f"Mean: {mean_val:.2f}") else: fig = go.Figure() fig.add_annotation(text="Select X column for histogram", x=0.5, y=0.5, showarrow=False) elif chart_type == 'box': if y_col: fig = px.box(df, x=color_col, y=y_col, title=f"🤖 AI Box Plot: {y_col}" + (f" by {color_col}" if color_col else ""), color_discrete_sequence=px.colors.qualitative.Set2) elif x_col: fig = px.box(df, y=x_col, title=f"🤖 AI Box Plot: {x_col}", color_discrete_sequence=px.colors.qualitative.Set2) else: fig = go.Figure() fig.add_annotation(text="Select a column for box plot", x=0.5, y=0.5, showarrow=False) elif chart_type == 'heatmap': numeric_cols = df.select_dtypes(include=['number']).columns if len(numeric_cols) > 1: corr_matrix = df[numeric_cols].corr() fig = px.imshow(corr_matrix, text_auto=True, aspect="auto", title="🤖 AI Correlation Heatmap", color_continuous_scale='RdBu_r') else: fig = go.Figure() fig.add_annotation(text="Need at least 2 numeric columns for heatmap", x=0.5, y=0.5, showarrow=False) elif chart_type == 'pie': if x_col: value_counts = df[x_col].value_counts() fig = px.pie(values=value_counts.values, names=value_counts.index, title=f"🤖 AI Pie Chart: {x_col}", color_discrete_sequence=px.colors.qualitative.Pastel) else: fig = go.Figure() fig.add_annotation(text="Select X column for pie chart", x=0.5, y=0.5, showarrow=False) else: fig = go.Figure() fig.add_annotation(text="🤖 AI is analyzing... Select chart type or use AI Query", x=0.5, y=0.5, showarrow=False) # Apply AI styling enhancements fig.update_layout( template="plotly_white", height=500, font=dict(size=12), title_font_size=16, ) return fig, insights_display except Exception as e: fig = go.Figure() fig.add_annotation(text=f"AI Error: {str(e)}", x=0.5, y=0.5, showarrow=False, font=dict(color="red")) fig.update_layout(template="plotly_white") return fig, "" if __name__ == '__main__': app.run(host='0.0.0.0', port=8051, debug=True)