import gradio as gr import plotly.express as px import pandas as pd import io # Store datasets in a dictionary (acts as our "database") datasets = {} # Load default dataset default_df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv') datasets['Gapminder'] = default_df # Function to load different built-in datasets def load_builtin_dataset(dataset_name): """Load various built-in datasets""" try: if dataset_name == "Gapminder": df = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/gapminder_unfiltered.csv') datasets[dataset_name] = df return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns" elif dataset_name == "Iris": df = px.data.iris() datasets[dataset_name] = df return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns" elif dataset_name == "Tips": df = px.data.tips() datasets[dataset_name] = df return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns" elif dataset_name == "Stock Data": df = px.data.stocks() # Reshape from wide to long format for better analysis df = df.melt(id_vars='date', var_name='company', value_name='stock_price') df['date'] = pd.to_datetime(df['date']) datasets[dataset_name] = df return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns" elif dataset_name == "Wind Data": df = px.data.wind() datasets[dataset_name] = df return df, f"✅ Loaded {dataset_name} dataset: {len(df)} rows, {len(df.columns)} columns" except Exception as e: return None, f"❌ Error loading {dataset_name}: {str(e)}" # Function to handle file uploads def upload_dataset(file, custom_name): """Handle CSV/Excel file uploads""" if file is None: return None, "Please upload a file", gr.update(choices=list(datasets.keys())) try: # Determine file type and read accordingly if file.name.endswith('.csv'): df = pd.read_csv(file.name) elif file.name.endswith(('.xlsx', '.xls')): df = pd.read_excel(file.name) else: return None, "❌ Unsupported file format. Please upload CSV or Excel.", gr.update() # Store with custom name or filename dataset_name = custom_name if custom_name else file.name.split('/')[-1].split('.')[0] datasets[dataset_name] = df return df, f"✅ Uploaded {dataset_name}: {len(df)} rows, {len(df.columns)} columns", gr.update(choices=list(datasets.keys()), value=dataset_name) except Exception as e: return None, f"❌ Error reading file: {str(e)}", gr.update() # Function to switch between datasets def switch_dataset(dataset_name): """Switch to a different dataset""" if dataset_name in datasets: df = datasets[dataset_name] # Get column info numeric_cols = df.select_dtypes(include=['number']).columns.tolist() categorical_cols = df.select_dtypes(include=['object', 'category']).columns.tolist() all_cols = df.columns.tolist() info = f""" ### Dataset: {dataset_name} - **Rows**: {len(df)} - **Columns**: {len(df.columns)} - **Numeric columns**: {', '.join(numeric_cols[:5])}{'...' if len(numeric_cols) > 5 else ''} - **Categorical columns**: {', '.join(categorical_cols[:5])}{'...' if len(categorical_cols) > 5 else ''} """ return ( df.head(10), # Preview info, # Info gr.update(choices=all_cols, value=all_cols[0] if all_cols else None), # X-axis gr.update(choices=numeric_cols, value=numeric_cols[0] if numeric_cols else None), # Y-axis gr.update(choices=[""] + categorical_cols, value=""), # Color gr.update(choices=[""] + numeric_cols, value=""), # Size df # Store current df ) else: return None, "Dataset not found", gr.update(), gr.update(), gr.update(), gr.update(), None # Dynamic plotting function def create_plot(df, plot_type, x_col, y_col, color_col, size_col): """Create different plot types based on current dataset and selections""" if df is None or x_col is None: return None try: # Handle empty string selections color_col = None if color_col == "" else color_col size_col = None if size_col == "" else size_col # Create different plot types if plot_type == "Scatter": fig = px.scatter(df, x=x_col, y=y_col, color=color_col, size=size_col, title=f"Scatter: {x_col} vs {y_col}") elif plot_type == "Line": fig = px.line(df, x=x_col, y=y_col, color=color_col, title=f"Line: {x_col} vs {y_col}") elif plot_type == "Bar": # For bar charts, aggregate if necessary if color_col: fig = px.bar(df, x=x_col, y=y_col, color=color_col, title=f"Bar: {x_col} vs {y_col}") else: fig = px.bar(df, x=x_col, y=y_col, title=f"Bar: {x_col} vs {y_col}") elif plot_type == "Histogram": fig = px.histogram(df, x=x_col, color=color_col, title=f"Histogram of {x_col}") elif plot_type == "Box": fig = px.box(df, x=x_col, y=y_col, color=color_col, title=f"Box plot: {x_col} vs {y_col}") elif plot_type == "Heatmap": # Create correlation matrix for numeric columns numeric_df = df.select_dtypes(include=['number']) if len(numeric_df.columns) > 1: corr = numeric_df.corr() fig = px.imshow(corr, text_auto=True, title="Correlation Heatmap") else: return None fig.update_layout(height=500) return fig except Exception as e: print(f"Plot error: {e}") return None # Create the Gradio interface with gr.Blocks(title="Dynamic Dataset Explorer", theme=gr.themes.Soft()) as demo: gr.Markdown(""" # 📊 Dynamic Dataset Explorer Upload your own data or explore built-in datasets with automatic visualization """) # Hidden state to store current dataframe current_df = gr.State(value=default_df) with gr.Tabs(): # Tab 1: Dataset Management with gr.TabItem("📁 Dataset Management"): with gr.Row(): with gr.Column(scale=1): gr.Markdown("### Load Built-in Dataset") builtin_choice = gr.Dropdown( choices=["Gapminder", "Iris", "Tips", "Stock Data", "Wind Data"], value="Gapminder", label="Select Dataset" ) load_builtin_btn = gr.Button("Load Dataset", variant="primary") gr.Markdown("### Upload Custom Dataset") file_upload = gr.File(label="Upload CSV or Excel", file_types=[".csv", ".xlsx", ".xls"]) custom_name = gr.Textbox(label="Dataset Name (optional)", placeholder="My Dataset") upload_btn = gr.Button("Upload", variant="primary") gr.Markdown("### Active Datasets") dataset_selector = gr.Dropdown( choices=list(datasets.keys()), value="Gapminder", label="Switch Dataset" ) with gr.Column(scale=2): status_msg = gr.Markdown("Ready to load data") data_info = gr.Markdown() data_preview = gr.Dataframe(label="Data Preview (first 10 rows)") # Tab 2: Dynamic Visualization with gr.TabItem("📈 Visualization"): with gr.Row(): with gr.Column(scale=1): plot_type = gr.Radio( choices=["Scatter", "Line", "Bar", "Histogram", "Box", "Heatmap"], value="Scatter", label="Plot Type" ) x_axis = gr.Dropdown(label="X Axis", choices=[], interactive=True) y_axis = gr.Dropdown(label="Y Axis", choices=[], interactive=True) color_by = gr.Dropdown(label="Color By (optional)", choices=[], interactive=True) size_by = gr.Dropdown(label="Size By (optional)", choices=[], interactive=True) plot_btn = gr.Button("Create Plot", variant="primary") with gr.Column(scale=2): plot_output = gr.Plot(label="Visualization") # Tab 3: Data Analysis with gr.TabItem("🔍 Data Analysis"): with gr.Row(): with gr.Column(): analysis_type = gr.Radio( choices=["Summary Statistics", "Missing Values", "Data Types", "Unique Values"], value="Summary Statistics", label="Analysis Type" ) analyze_btn = gr.Button("Analyze", variant="primary") with gr.Column(): analysis_output = gr.Markdown() def analyze_data(df, analysis_type): """Perform different types of data analysis""" if df is None: return "No dataset loaded" if analysis_type == "Summary Statistics": return f"```\n{df.describe().to_string()}\n```" elif analysis_type == "Missing Values": missing = df.isnull().sum() return f"```\n{missing[missing > 0].to_string()}\n```" if missing.any() else "No missing values!" elif analysis_type == "Data Types": return f"```\n{df.dtypes.to_string()}\n```" elif analysis_type == "Unique Values": unique_counts = df.nunique() return f"```\n{unique_counts.to_string()}\n```" # Event handlers load_builtin_btn.click( load_builtin_dataset, inputs=[builtin_choice], outputs=[data_preview, status_msg] ).then( lambda: gr.update(choices=list(datasets.keys())), outputs=[dataset_selector] ) upload_btn.click( upload_dataset, inputs=[file_upload, custom_name], outputs=[data_preview, status_msg, dataset_selector] ) # When dataset is switched, update everything dataset_selector.change( switch_dataset, inputs=[dataset_selector], outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df] ) # Create plot based on selections plot_btn.click( create_plot, inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by], outputs=[plot_output] ) # Auto-update plot when parameters change for component in [plot_type, x_axis, y_axis, color_by, size_by]: component.change( create_plot, inputs=[current_df, plot_type, x_axis, y_axis, color_by, size_by], outputs=[plot_output] ) # Analysis analyze_btn.click( analyze_data, inputs=[current_df, analysis_type], outputs=[analysis_output] ) # Load initial dataset demo.load( switch_dataset, inputs=[dataset_selector], outputs=[data_preview, data_info, x_axis, y_axis, color_by, size_by, current_df] ) if __name__ == "__main__": demo.launch(share=False, debug=True)