import os import base64 import io import pandas as pd import plotly.express as px import plotly.graph_objects as go from dash import Dash, html, dcc, Input, Output, State, callback_context import dash_bootstrap_components as dbc from typing import Optional from dotenv import load_dotenv from pydantic import Field, SecretStr import numpy as np # Langchain imports - simplified without embeddings from langchain_community.vectorstores import FAISS from langchain_text_splitters import RecursiveCharacterTextSplitter from langchain.schema import Document from langchain_core.prompts import PromptTemplate # Load environment variables load_dotenv() # Simplified - no OpenRouter for now AI_AVAILABLE = False openrouter_model = None # Initialize Dash app app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP]) server = app.server # Global variables vector_store = None # Built-in datasets def create_builtin_datasets(): """Create built-in sample datasets""" datasets = {} # Gapminder dataset np.random.seed(42) countries = ['USA', 'China', 'India', 'Germany', 'UK', 'France', 'Japan', 'Brazil', 'Canada', 'Australia'] years = list(range(2000, 2021)) gapminder_data = [] for country in countries: base_gdp = np.random.uniform(20000, 80000) base_life_exp = np.random.uniform(70, 85) base_pop = np.random.uniform(10000000, 100000000) for year in years: gapminder_data.append({ 'country': country, 'year': year, 'gdpPercap': base_gdp * (1 + np.random.uniform(-0.1, 0.15)) * ((year-2000)*0.02 + 1), 'lifeExp': base_life_exp + np.random.uniform(-2, 3) + (year-2000)*0.1, 'pop': base_pop * (1.01 + np.random.uniform(-0.005, 0.015))**(year-2000), 'continent': 'Asia' if country in ['China', 'India', 'Japan'] else 'Europe' if country in ['Germany', 'UK', 'France'] else 'Americas' if country in ['USA', 'Brazil', 'Canada'] else 'Oceania' }) datasets['Gapminder'] = pd.DataFrame(gapminder_data) # Iris dataset from sklearn.datasets import load_iris try: iris = load_iris() datasets['Iris'] = pd.DataFrame(iris.data, columns=iris.feature_names) datasets['Iris']['species'] = [iris.target_names[i] for i in iris.target] except ImportError: # Fallback if sklearn not available iris_data = { 'sepal_length': np.random.normal(5.8, 0.8, 150), 'sepal_width': np.random.normal(3.0, 0.4, 150), 'petal_length': np.random.normal(3.8, 1.8, 150), 'petal_width': np.random.normal(1.2, 0.8, 150), 'species': ['setosa']*50 + ['versicolor']*50 + ['virginica']*50 } datasets['Iris'] = pd.DataFrame(iris_data) # Tips dataset tips_data = { 'total_bill': np.random.uniform(10, 50, 200), 'tip': np.random.uniform(1, 10, 200), 'sex': np.random.choice(['Male', 'Female'], 200), 'smoker': np.random.choice(['Yes', 'No'], 200), 'day': np.random.choice(['Thur', 'Fri', 'Sat', 'Sun'], 200), 'time': np.random.choice(['Lunch', 'Dinner'], 200), 'size': np.random.choice([1, 2, 3, 4, 5, 6], 200) } datasets['Tips'] = pd.DataFrame(tips_data) # Stock Data dates = pd.date_range('2020-01-01', '2023-12-31', freq='D') stock_price = 100 stock_data = [] for date in dates: daily_return = np.random.normal(0.001, 0.02) stock_price *= (1 + daily_return) stock_data.append({ 'date': date, 'price': stock_price, 'volume': np.random.randint(1000000, 5000000), 'high': stock_price * (1 + abs(np.random.normal(0, 0.01))), 'low': stock_price * (1 - abs(np.random.normal(0, 0.01))), 'open': stock_price * (1 + np.random.normal(0, 0.005)) }) datasets['Stock Data'] = pd.DataFrame(stock_data) # Wind Data hours = list(range(24)) wind_data = [] for month in range(1, 13): for day in range(1, 29): for hour in hours: wind_data.append({ 'month': month, 'day': day, 'hour': hour, 'wind_speed': abs(np.random.normal(15, 8)) + 5*np.sin(hour/24*2*np.pi), 'temperature': np.random.normal(20, 15) + 10*np.cos(month/12*2*np.pi), 'humidity': np.random.uniform(30, 90), 'pressure': np.random.normal(1013, 20) }) datasets['Wind Data'] = pd.DataFrame(wind_data) return datasets # Initialize built-in datasets builtin_datasets = create_builtin_datasets() # App layout app.layout = dbc.Container([ dbc.Row([ dbc.Col([ html.H1("🤖 AI-Powered Data Analytics", className="text-center mb-4"), html.P("Upload data, ask questions, and get AI-powered insights!", className="text-center text-muted"), html.Hr(), ], width=12) ]), # Tabbed interface dbc.Tabs([ # Tab 1: Dataset Management dbc.Tab(label="📁 Dataset Management", tab_id="dataset-management", children=[ dbc.Row([ dbc.Col([ dbc.Card([ dbc.CardBody([ html.H4("Load Built-in Dataset", className="card-title"), dcc.Dropdown( id="builtin-choice", options=[ {"label": "Gapminder", "value": "Gapminder"}, {"label": "Iris", "value": "Iris"}, {"label": "Tips", "value": "Tips"}, {"label": "Stock Data", "value": "Stock Data"}, {"label": "Wind Data", "value": "Wind Data"} ], value="Gapminder", className="mb-2" ), dbc.Button("Load Dataset", id="load-builtin-btn", color="primary", className="mb-3"), html.Hr(), html.H4("Upload Custom Dataset", className="card-title"), dcc.Upload( id='file-upload', children=html.Div([ 'Drag and Drop or ', html.A('Select CSV/Excel Files') ]), style={ 'width': '100%', 'height': '60px', 'lineHeight': '60px', 'borderWidth': '1px', 'borderStyle': 'dashed', 'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px' }, multiple=False, accept='.csv,.xlsx,.xls' ), dbc.Input( id="custom-name", placeholder="Dataset Name (optional)", type="text", className="mb-2" ), dbc.Button("Upload", id="upload-btn", color="primary", className="mb-3"), html.Hr(), html.H4("Active Datasets", className="card-title"), dcc.Dropdown( id="dataset-selector", options=[{"label": "Gapminder", "value": "Gapminder"}], value="Gapminder", className="mb-2" ), html.Hr(), html.Div(id="status-msg", children=[ dbc.Alert("Ready to load data", color="info") ]), html.Div(id="data-info") ]) ]) ], width=4), dbc.Col([ dbc.Card([ dbc.CardBody([ html.H4("Data Preview (First 10 rows)", className="card-title"), html.Div(id="data-preview", className="mb-4"), html.H4("Quick Analytics", className="card-title"), html.Div(id="auto-analytics") ]) ]) ], width=8) ], className="mt-4") ]), # Tab 2: AI Assistant dbc.Tab(label="🤖 AI Assistant", tab_id="ai-assistant", children=[ dbc.Row([ dbc.Col([ dbc.Card([ dbc.CardBody([ html.H4("🤖 AI Assistant", className="card-title"), html.Div(id="ai-dataset-info", className="mb-3", children=[ dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.", color="warning", className="mb-3") ]), dbc.InputGroup([ dbc.Input( id="ai-question", placeholder="Ask questions about your data...", type="text", style={"fontSize": "14px"} ), dbc.Button( "Ask AI", id="ask-button", color="primary", n_clicks=0 ) ]), html.Div(id="ai-response", className="mt-3") ]) ]) ], width=12) ], className="mt-4") ]), # Tab 3: Visualizations dbc.Tab(label="📈 Visualizations", tab_id="visualizations", children=[ dbc.Row([ dbc.Col([ dbc.Card([ dbc.CardBody([ html.H4("📈 Visualizations", className="card-title"), # Chart controls dbc.Row([ dbc.Col([ html.Label("Chart Type:", className="form-label"), dcc.Dropdown( id='chart-type', options=[ {'label': 'Scatter Plot', 'value': 'scatter'}, {'label': 'Line Chart', 'value': 'line'}, {'label': 'Bar Chart', 'value': 'bar'}, {'label': 'Histogram', 'value': 'histogram'}, {'label': 'Box Plot', 'value': 'box'}, {'label': 'Heatmap', 'value': 'heatmap'}, {'label': 'Pie Chart', 'value': 'pie'} ], value='scatter', className="mb-2" ) ], width=6), dbc.Col([ html.Label("Color By:", className="form-label"), dcc.Dropdown( id='color-column', placeholder="Select column (optional)", className="mb-2" ) ], width=6) ]), dbc.Row([ dbc.Col([ html.Label("X-Axis:", className="form-label"), dcc.Dropdown( id='x-column', placeholder="Select X column" ) ], width=6), dbc.Col([ html.Label("Y-Axis:", className="form-label"), dcc.Dropdown( id='y-column', placeholder="Select Y column" ) ], width=6) ], className="mb-3"), dcc.Graph(id='main-graph', style={'height': '500px'}), ]) ]) ], width=12) ], className="mt-4") ]), # Tab 4: Data Explorer dbc.Tab(label="🔍 Data Explorer", tab_id="data-explorer", children=[ dbc.Row([ dbc.Col([ dbc.Card([ dbc.CardBody([ html.H4("🔍 Data Explorer", className="card-title"), html.Div(id='data-table') ]) ]) ], width=12) ], className="mt-4") ]) ], id="main-tabs", active_tab="dataset-management"), # Store components dcc.Store(id='stored-data'), dcc.Store(id='data-context'), dcc.Store(id='dataset-registry', data={"Gapminder": "builtin"}), dcc.Store(id='current-dataset-name', data="Gapminder") ], fluid=True) def create_vector_store(df): """Simplified - just return True for now""" return True # Import AI assistant module from ai_assistant import get_ai_response def create_auto_analytics(df): """Create automatic analytics display""" analytics_components = [] # Summary Statistics numeric_cols = df.select_dtypes(include=['number']).columns if len(numeric_cols) > 0: stats = df[numeric_cols].describe() analytics_components.extend([ html.H6("📊 Summary Statistics", className="mt-2"), dbc.Table.from_dataframe( stats.reset_index().round(2), size='sm', striped=True, hover=True ) ]) # Missing Data Analysis missing_data = df.isnull().sum() missing_data = missing_data[missing_data > 0] if not missing_data.empty: analytics_components.extend([ html.H6("⚠️ Missing Data", className="mt-3"), dbc.Alert([ html.Pre(missing_data.to_string()) ], color="warning") ]) else: analytics_components.extend([ html.H6("✅ Data Quality", className="mt-3"), dbc.Alert("No missing values found!", color="success") ]) # Data Types Analysis dtype_info = df.dtypes.value_counts() analytics_components.extend([ html.H6("🔍 Data Types", className="mt-3"), dbc.Alert([ html.P(f"📈 Numeric columns: {len(df.select_dtypes(include=['number']).columns)}"), html.P(f"📝 Text columns: {len(df.select_dtypes(include=['object']).columns)}"), html.P(f"📅 DateTime columns: {len(df.select_dtypes(include=['datetime64']).columns)}"), html.P(f"🔢 Boolean columns: {len(df.select_dtypes(include=['bool']).columns)}") ], color="light") ]) # Correlation Analysis for numeric columns if len(numeric_cols) > 1: corr_matrix = df[numeric_cols].corr() # Find highest correlations corr_pairs = [] for i in range(len(corr_matrix.columns)): for j in range(i+1, len(corr_matrix.columns)): corr_val = corr_matrix.iloc[i, j] if abs(corr_val) > 0.5: # Only show strong correlations corr_pairs.append((corr_matrix.columns[i], corr_matrix.columns[j], corr_val)) if corr_pairs: analytics_components.extend([ html.H6("🔗 Strong Correlations (>0.5)", className="mt-3"), dbc.Alert([ html.P(f"{pair[0]} ↔ {pair[1]}: {pair[2]:.3f}") for pair in corr_pairs[:5] # Show top 5 ], color="info") ]) return analytics_components def parse_contents(contents, filename): """Parse uploaded file contents""" content_type, content_string = contents.split(',') decoded = base64.b64decode(content_string) try: if 'csv' in filename: df = pd.read_csv(io.StringIO(decoded.decode('utf-8'))) elif 'xls' in filename: df = pd.read_excel(io.BytesIO(decoded)) else: return None, "Unsupported file type" return df, None except Exception as e: return None, f"Error processing file: {str(e)}" # Dataset management callbacks @app.callback( [Output('stored-data', 'data'), Output('status-msg', 'children'), Output('data-preview', 'children'), Output('data-info', 'children'), Output('auto-analytics', 'children'), Output('x-column', 'options'), Output('y-column', 'options'), Output('color-column', 'options'), Output('x-column', 'value'), Output('y-column', 'value'), Output('dataset-registry', 'data'), Output('dataset-selector', 'options'), Output('current-dataset-name', 'data')], [Input('load-builtin-btn', 'n_clicks'), Input('file-upload', 'contents'), Input('dataset-selector', 'value')], [State('builtin-choice', 'value'), State('file-upload', 'filename'), State('custom-name', 'value'), State('dataset-registry', 'data')] ) def manage_datasets(builtin_clicks, file_contents, selected_dataset, builtin_choice, filename, custom_name, registry): """Handle dataset loading and switching""" ctx = callback_context # Initialize defaults registry = registry or {"Gapminder": "builtin"} if not ctx.triggered: # Initial load - load Gapminder dataset df = builtin_datasets["Gapminder"] dataset_name = "Gapminder" # Create vector store for AI vector_success = create_vector_store(df) # Create data table preview table = dbc.Table.from_dataframe( df.head(10), striped=True, bordered=True, hover=True, size='sm' ) ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited" status_msg = dbc.Alert(f"✅ Gapminder dataset loaded! {ai_status}", color="success") data_info = dbc.Alert([ html.H6("Dataset Information:"), html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"), html.P(f"Columns: {', '.join(df.columns.tolist())}"), html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical") ], color="light") # Create automatic analytics auto_analytics = create_auto_analytics(df) # Create column options for dropdowns all_columns = [{'label': col, 'value': col} for col in df.columns] numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns] # Set default values - prefer numeric columns for x and y default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None)) selector_options = [{"label": name, "value": name} for name in registry.keys()] return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name trigger_id = ctx.triggered[0]['prop_id'].split('.')[0] if trigger_id == 'load-builtin-btn' and builtin_clicks: # Load built-in dataset if builtin_choice in builtin_datasets: df = builtin_datasets[builtin_choice] registry[builtin_choice] = "builtin" # Create vector store for AI vector_success = create_vector_store(df) # Create data table preview table = dbc.Table.from_dataframe( df.head(10), striped=True, bordered=True, hover=True, size='sm' ) ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited" status_msg = dbc.Alert(f"✅ {builtin_choice} dataset loaded! {ai_status}", color="success") data_info = dbc.Alert([ html.H6(f"{builtin_choice} Dataset Information:"), html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"), html.P(f"Columns: {', '.join(df.columns.tolist())}"), html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical") ], color="light") # Create automatic analytics auto_analytics = create_auto_analytics(df) # Create column options for dropdowns all_columns = [{'label': col, 'value': col} for col in df.columns] numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns] # Set default values - prefer numeric columns for x and y default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None)) selector_options = [{"label": name, "value": name} for name in registry.keys()] return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, builtin_choice elif trigger_id == 'file-upload' and file_contents: # Upload custom dataset df, error = parse_contents(file_contents, filename) if error: status_msg = dbc.Alert(error, color="danger") selector_options = [{"label": name, "value": name} for name in registry.keys()] return None, status_msg, "", "", "", [], [], [], None, None, registry, selector_options, None # Determine dataset name dataset_name = custom_name if custom_name else filename.split('.')[0] registry[dataset_name] = "custom" # Create vector store for AI vector_success = create_vector_store(df) # Create data table preview table = dbc.Table.from_dataframe( df.head(10), striped=True, bordered=True, hover=True, size='sm' ) ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited" status_msg = dbc.Alert(f"✅ {dataset_name} uploaded successfully! {ai_status}", color="success") data_info = dbc.Alert([ html.H6(f"{dataset_name} Dataset Information:"), html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"), html.P(f"Columns: {', '.join(df.columns.tolist())}"), html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical") ], color="light") # Create automatic analytics auto_analytics = create_auto_analytics(df) # Create column options for dropdowns all_columns = [{'label': col, 'value': col} for col in df.columns] numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns] # Set default values - prefer numeric columns for x and y default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None)) selector_options = [{"label": name, "value": name} for name in registry.keys()] return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, dataset_name elif trigger_id == 'dataset-selector' and selected_dataset: # Switch between datasets if selected_dataset in registry: if registry[selected_dataset] == "builtin" and selected_dataset in builtin_datasets: df = builtin_datasets[selected_dataset] else: # For custom datasets, we would need to store them persistently # For now, just reload builtin if available if selected_dataset in builtin_datasets: df = builtin_datasets[selected_dataset] else: # Fallback to Gapminder if dataset not found df = builtin_datasets["Gapminder"] selected_dataset = "Gapminder" # Create vector store for AI vector_success = create_vector_store(df) # Create data table preview table = dbc.Table.from_dataframe( df.head(10), striped=True, bordered=True, hover=True, size='sm' ) ai_status = "🤖 AI Ready" if vector_success else "⚠️ AI Limited" status_msg = dbc.Alert(f"✅ Switched to {selected_dataset} dataset! {ai_status}", color="success") data_info = dbc.Alert([ html.H6(f"{selected_dataset} Dataset Information:"), html.P(f"Shape: {df.shape[0]} rows × {df.shape[1]} columns"), html.P(f"Columns: {', '.join(df.columns.tolist())}"), html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical") ], color="light") # Create automatic analytics auto_analytics = create_auto_analytics(df) # Create column options for dropdowns all_columns = [{'label': col, 'value': col} for col in df.columns] numeric_columns = [{'label': col, 'value': col} for col in df.select_dtypes(include=['number']).columns] # Set default values - prefer numeric columns for x and y default_x = numeric_columns[0]['value'] if numeric_columns else all_columns[0]['value'] if all_columns else None default_y = numeric_columns[1]['value'] if len(numeric_columns) > 1 else (numeric_columns[0]['value'] if numeric_columns else (all_columns[1]['value'] if len(all_columns) > 1 else None)) selector_options = [{"label": name, "value": name} for name in registry.keys()] return df.to_dict('records'), status_msg, table, data_info, auto_analytics, all_columns, all_columns, all_columns, default_x, default_y, registry, selector_options, selected_dataset # Default fallback selector_options = [{"label": name, "value": name} for name in registry.keys()] return None, "", "", "", "", [], [], [], None, None, registry, selector_options, None # Updated callback for data table (now shared across tabs) @app.callback( Output('data-table', 'children'), [Input('stored-data', 'data')] ) def update_data_table(data): """Update data table for data explorer tab""" if not data: return html.P("No data loaded", className="text-muted") df = pd.DataFrame(data) return dbc.Table.from_dataframe( df.head(20), striped=True, bordered=True, hover=True, size='sm', responsive=True ) # Callback to update AI assistant tab with current dataset info @app.callback( Output('ai-dataset-info', 'children'), [Input('stored-data', 'data'), Input('current-dataset-name', 'data')] ) def update_ai_dataset_info(data, dataset_name): """Update AI assistant tab with current dataset information""" if not data or not dataset_name: return dbc.Alert("No dataset loaded. Please load a dataset in the Dataset Management tab first.", color="warning", className="mb-3") df = pd.DataFrame(data) return dbc.Alert([ html.H6(f"📊 Current Dataset: {dataset_name}"), html.P(f"Shape: {df.shape[0]:,} rows × {df.shape[1]} columns"), html.P(f"Columns: {', '.join(df.columns.tolist()[:5])}{'...' if len(df.columns) > 5 else ''}"), html.P(f"Data types: {len(df.select_dtypes(include=['number']).columns)} numeric, {len(df.select_dtypes(include=['object']).columns)} categorical"), html.Small("✨ AI is ready to answer questions about this data!", className="text-muted") ], color="success", className="mb-3") @app.callback( Output('ai-response', 'children'), [Input('ask-button', 'n_clicks')], [State('ai-question', 'value'), State('stored-data', 'data'), State('current-dataset-name', 'data')] ) def handle_ai_question(n_clicks, question, data, dataset_name): """Handle AI question""" if not n_clicks or not question or not data: return "" if not dataset_name: return dbc.Alert("Please load a dataset first in the Dataset Management tab.", color="warning") df = pd.DataFrame(data) response = get_ai_response(question, df) return dbc.Alert( dcc.Markdown(response), color="info" ) @app.callback( Output('main-graph', 'figure'), [Input('stored-data', 'data'), Input('chart-type', 'value'), Input('x-column', 'value'), Input('y-column', 'value'), Input('color-column', 'value')] ) def update_main_graph(data, chart_type, x_col, y_col, color_col): """Update main visualization based on user selections""" if not data: fig = go.Figure() fig.add_annotation(text="Upload data to see visualizations", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray")) fig.update_layout(template="plotly_white") return fig df = pd.DataFrame(data) # Handle cases where columns aren't selected yet if not x_col and not y_col: fig = go.Figure() fig.add_annotation(text="Select columns to create visualization", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray")) fig.update_layout(template="plotly_white") return fig try: # Create visualization based on chart type if chart_type == 'scatter': if x_col and y_col: fig = px.scatter(df, x=x_col, y=y_col, color=color_col, title=f"Scatter Plot: {y_col} vs {x_col}") else: fig = go.Figure() fig.add_annotation(text="Select both X and Y columns for scatter plot", x=0.5, y=0.5, showarrow=False) elif chart_type == 'line': if x_col and y_col: fig = px.line(df, x=x_col, y=y_col, color=color_col, title=f"Line Chart: {y_col} vs {x_col}") else: fig = go.Figure() fig.add_annotation(text="Select both X and Y columns for line chart", x=0.5, y=0.5, showarrow=False) elif chart_type == 'bar': if x_col and y_col: fig = px.bar(df, x=x_col, y=y_col, color=color_col, title=f"Bar Chart: {y_col} by {x_col}") elif x_col: fig = px.bar(df[x_col].value_counts().reset_index(), x='index', y=x_col, title=f"Value Counts: {x_col}") else: fig = go.Figure() fig.add_annotation(text="Select at least X column for bar chart", x=0.5, y=0.5, showarrow=False) elif chart_type == 'histogram': if x_col: fig = px.histogram(df, x=x_col, color=color_col, title=f"Histogram: {x_col}") else: fig = go.Figure() fig.add_annotation(text="Select X column for histogram", x=0.5, y=0.5, showarrow=False) elif chart_type == 'box': if y_col: fig = px.box(df, x=color_col, y=y_col, title=f"Box Plot: {y_col}" + (f" by {color_col}" if color_col else "")) elif x_col: fig = px.box(df, y=x_col, title=f"Box Plot: {x_col}") else: fig = go.Figure() fig.add_annotation(text="Select a column for box plot", x=0.5, y=0.5, showarrow=False) elif chart_type == 'heatmap': numeric_cols = df.select_dtypes(include=['number']).columns if len(numeric_cols) > 1: corr_matrix = df[numeric_cols].corr() fig = px.imshow(corr_matrix, text_auto=True, aspect="auto", title="Correlation Heatmap", color_continuous_scale='RdBu_r') else: fig = go.Figure() fig.add_annotation(text="Need at least 2 numeric columns for heatmap", x=0.5, y=0.5, showarrow=False) elif chart_type == 'pie': if x_col: value_counts = df[x_col].value_counts() fig = px.pie(values=value_counts.values, names=value_counts.index, title=f"Pie Chart: {x_col}") else: fig = go.Figure() fig.add_annotation(text="Select X column for pie chart", x=0.5, y=0.5, showarrow=False) else: fig = go.Figure() fig.add_annotation(text="Select a chart type", x=0.5, y=0.5, showarrow=False) fig.update_layout(template="plotly_white", height=500) return fig except Exception as e: fig = go.Figure() fig.add_annotation(text=f"Error creating chart: {str(e)}", x=0.5, y=0.5, showarrow=False, font=dict(color="red")) fig.update_layout(template="plotly_white") return fig if __name__ == '__main__': app.run(host='0.0.0.0', port=7860, debug=False)