# Install required packages #!pip install gradio pandas numpy plotly scikit-learn matplotlib seaborn openpyxl import gradio as gr import pandas as pd import numpy as np import plotly.express as px import plotly.graph_objects as go from sklearn.preprocessing import StandardScaler import io class DataVisualizationPlatform: def __init__(self): self.df = None self.processed_df = None self.scaler = StandardScaler() def load_and_update_columns(self, file): """Load data and return column choices""" try: if file.name.endswith('.csv'): self.df = pd.read_csv(file.name) else: self.df = pd.read_excel(file.name) columns = list(self.df.columns) # Add "None" option for color column columns_with_none = ["None"] + columns return { "status": f"Data loaded successfully. Shape: {self.df.shape}", "columns": columns, "columns_with_none": columns_with_none } except Exception as e: return { "status": f"Error loading data: {str(e)}", "columns": [], "columns_with_none": ["None"] } def preprocess_data(self): """Preprocess the data""" if self.df is None: return "Please load data first" try: # Handle missing values self.processed_df = self.df.copy() numeric_cols = self.df.select_dtypes(include=['float64', 'int64']).columns self.processed_df[numeric_cols] = self.processed_df[numeric_cols].fillna(self.processed_df[numeric_cols].mean()) # Scale numeric features self.processed_df[numeric_cols] = self.scaler.fit_transform(self.processed_df[numeric_cols]) return "Data preprocessing completed successfully" except Exception as e: return f"Error during preprocessing: {str(e)}" def generate_summary(self): """Generate basic statistics and info about the dataset""" if self.df is None: return "Please load data first" try: buffer = io.StringIO() self.df.info(buf=buffer) info_str = buffer.getvalue() summary = f""" Dataset Summary: ---------------- Shape: {self.df.shape} Data Info: {info_str} Basic Statistics: {self.df.describe().to_string()} """ return summary except Exception as e: return f"Error generating summary: {str(e)}" def create_correlation_heatmap(self): """Create correlation heatmap for numeric columns""" if self.df is None: return None try: numeric_cols = self.df.select_dtypes(include=['float64', 'int64']).columns if len(numeric_cols) == 0: return None corr = self.df[numeric_cols].corr() fig = px.imshow(corr, labels=dict(color="Correlation"), title="Correlation Heatmap") return fig except Exception as e: print(f"Error creating heatmap: {str(e)}") return None def create_scatter_plot(self, x_col, y_col, color_col): """Create interactive scatter plot""" if self.df is None or not x_col or not y_col: return None try: if color_col == "None": color_col = None fig = px.scatter(self.df, x=x_col, y=y_col, color=color_col, title=f"Scatter Plot: {x_col} vs {y_col}") return fig except Exception as e: print(f"Error creating scatter plot: {str(e)}") return None def create_time_series(self, date_col, value_col): """Create time series plot""" if self.df is None or not date_col or not value_col: return None try: fig = px.line(self.df, x=date_col, y=value_col, title=f"Time Series: {value_col} over {date_col}") return fig except Exception as e: print(f"Error creating time series: {str(e)}") return None def create_visualization_interface(): dvp = DataVisualizationPlatform() with gr.Blocks(title="Data Visualization Platform") as interface: gr.Markdown("# Interactive Data Visualization Platform") # Shared state for column choices state = gr.State({ "columns": [], "columns_with_none": ["None"] }) with gr.Tab("Data Loading & Preprocessing"): file_input = gr.File(label="Upload CSV or Excel file") load_btn = gr.Button("Load Data") load_output = gr.Textbox(label="Loading Status") preprocess_btn = gr.Button("Preprocess Data") preprocess_output = gr.Textbox(label="Preprocessing Status") summary_btn = gr.Button("Generate Summary") summary_output = gr.Textbox(label="Data Summary", lines=10) with gr.Tab("Visualizations"): with gr.Row(): with gr.Column(): # Correlation Heatmap heatmap_btn = gr.Button("Generate Correlation Heatmap") heatmap_plot = gr.Plot(label="Correlation Heatmap") with gr.Column(): # Scatter Plot x_col = gr.Dropdown(label="X Column", choices=[]) y_col = gr.Dropdown(label="Y Column", choices=[]) color_col = gr.Dropdown(label="Color Column (optional)", choices=["None"]) scatter_btn = gr.Button("Generate Scatter Plot") scatter_plot = gr.Plot(label="Scatter Plot") with gr.Row(): # Time Series date_col = gr.Dropdown(label="Date Column", choices=[]) value_col = gr.Dropdown(label="Value Column", choices=[]) timeseries_btn = gr.Button("Generate Time Series") timeseries_plot = gr.Plot(label="Time Series Plot") def update_interface(file): result = dvp.load_and_update_columns(file) return { load_output: result["status"], x_col: gr.Dropdown(choices=result["columns"]), y_col: gr.Dropdown(choices=result["columns"]), color_col: gr.Dropdown(choices=result["columns_with_none"]), date_col: gr.Dropdown(choices=result["columns"]), value_col: gr.Dropdown(choices=result["columns"]) } # Event handlers load_btn.click( fn=update_interface, inputs=[file_input], outputs=[load_output, x_col, y_col, color_col, date_col, value_col] ) preprocess_btn.click(fn=dvp.preprocess_data, outputs=preprocess_output) summary_btn.click(fn=dvp.generate_summary, outputs=summary_output) heatmap_btn.click(fn=dvp.create_correlation_heatmap, outputs=heatmap_plot) scatter_btn.click( fn=dvp.create_scatter_plot, inputs=[x_col, y_col, color_col], outputs=scatter_plot ) timeseries_btn.click( fn=dvp.create_time_series, inputs=[date_col, value_col], outputs=timeseries_plot ) return interface # Launch the interface demo = create_visualization_interface() demo.launch()