# Install required packages
#!pip install gradio pandas numpy plotly scikit-learn matplotlib seaborn openpyxl

import gradio as gr
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
import io

class DataVisualizationPlatform:
    def __init__(self):
        self.df = None
        self.processed_df = None
        self.scaler = StandardScaler()
    
    def load_and_update_columns(self, file):
        """Load data and return column choices"""
        try:
            if file.name.endswith('.csv'):
                self.df = pd.read_csv(file.name)
            else:
                self.df = pd.read_excel(file.name)
            
            columns = list(self.df.columns)
            # Add "None" option for color column
            columns_with_none = ["None"] + columns
            
            return {
                "status": f"Data loaded successfully. Shape: {self.df.shape}",
                "columns": columns,
                "columns_with_none": columns_with_none
            }
        except Exception as e:
            return {
                "status": f"Error loading data: {str(e)}",
                "columns": [],
                "columns_with_none": ["None"]
            }
    
    def preprocess_data(self):
        """Preprocess the data"""
        if self.df is None:
            return "Please load data first"
        
        try:
            # Handle missing values
            self.processed_df = self.df.copy()
            numeric_cols = self.df.select_dtypes(include=['float64', 'int64']).columns
            self.processed_df[numeric_cols] = self.processed_df[numeric_cols].fillna(self.processed_df[numeric_cols].mean())
            
            # Scale numeric features
            self.processed_df[numeric_cols] = self.scaler.fit_transform(self.processed_df[numeric_cols])
            
            return "Data preprocessing completed successfully"
        except Exception as e:
            return f"Error during preprocessing: {str(e)}"
    
    def generate_summary(self):
        """Generate basic statistics and info about the dataset"""
        if self.df is None:
            return "Please load data first"
        
        try:
            buffer = io.StringIO()
            self.df.info(buf=buffer)
            info_str = buffer.getvalue()
            
            summary = f"""
            Dataset Summary:
            ----------------
            Shape: {self.df.shape}
            
            Data Info:
            {info_str}
            
            Basic Statistics:
            {self.df.describe().to_string()}
            """
            return summary
        except Exception as e:
            return f"Error generating summary: {str(e)}"
    
    def create_correlation_heatmap(self):
        """Create correlation heatmap for numeric columns"""
        if self.df is None:
            return None
        
        try:
            numeric_cols = self.df.select_dtypes(include=['float64', 'int64']).columns
            if len(numeric_cols) == 0:
                return None
            
            corr = self.df[numeric_cols].corr()
            fig = px.imshow(corr,
                           labels=dict(color="Correlation"),
                           title="Correlation Heatmap")
            return fig
        except Exception as e:
            print(f"Error creating heatmap: {str(e)}")
            return None
    
    def create_scatter_plot(self, x_col, y_col, color_col):
        """Create interactive scatter plot"""
        if self.df is None or not x_col or not y_col:
            return None
        
        try:
            if color_col == "None":
                color_col = None
            
            fig = px.scatter(self.df, x=x_col, y=y_col, color=color_col,
                           title=f"Scatter Plot: {x_col} vs {y_col}")
            return fig
        except Exception as e:
            print(f"Error creating scatter plot: {str(e)}")
            return None
    
    def create_time_series(self, date_col, value_col):
        """Create time series plot"""
        if self.df is None or not date_col or not value_col:
            return None
        
        try:
            fig = px.line(self.df, x=date_col, y=value_col,
                         title=f"Time Series: {value_col} over {date_col}")
            return fig
        except Exception as e:
            print(f"Error creating time series: {str(e)}")
            return None

def create_visualization_interface():
    dvp = DataVisualizationPlatform()
    
    with gr.Blocks(title="Data Visualization Platform") as interface:
        gr.Markdown("# Interactive Data Visualization Platform")
        
        # Shared state for column choices
        state = gr.State({
            "columns": [],
            "columns_with_none": ["None"]
        })
        
        with gr.Tab("Data Loading & Preprocessing"):
            file_input = gr.File(label="Upload CSV or Excel file")
            load_btn = gr.Button("Load Data")
            load_output = gr.Textbox(label="Loading Status")
            preprocess_btn = gr.Button("Preprocess Data")
            preprocess_output = gr.Textbox(label="Preprocessing Status")
            summary_btn = gr.Button("Generate Summary")
            summary_output = gr.Textbox(label="Data Summary", lines=10)
        
        with gr.Tab("Visualizations"):
            with gr.Row():
                with gr.Column():
                    # Correlation Heatmap
                    heatmap_btn = gr.Button("Generate Correlation Heatmap")
                    heatmap_plot = gr.Plot(label="Correlation Heatmap")
                
                with gr.Column():
                    # Scatter Plot
                    x_col = gr.Dropdown(label="X Column", choices=[])
                    y_col = gr.Dropdown(label="Y Column", choices=[])
                    color_col = gr.Dropdown(label="Color Column (optional)", choices=["None"])
                    scatter_btn = gr.Button("Generate Scatter Plot")
                    scatter_plot = gr.Plot(label="Scatter Plot")
            
            with gr.Row():
                # Time Series
                date_col = gr.Dropdown(label="Date Column", choices=[])
                value_col = gr.Dropdown(label="Value Column", choices=[])
                timeseries_btn = gr.Button("Generate Time Series")
                timeseries_plot = gr.Plot(label="Time Series Plot")
        
        def update_interface(file):
            result = dvp.load_and_update_columns(file)
            return {
                load_output: result["status"],
                x_col: gr.Dropdown(choices=result["columns"]),
                y_col: gr.Dropdown(choices=result["columns"]),
                color_col: gr.Dropdown(choices=result["columns_with_none"]),
                date_col: gr.Dropdown(choices=result["columns"]),
                value_col: gr.Dropdown(choices=result["columns"])
            }
        
        # Event handlers
        load_btn.click(
            fn=update_interface,
            inputs=[file_input],
            outputs=[load_output, x_col, y_col, color_col, date_col, value_col]
        )
        
        preprocess_btn.click(fn=dvp.preprocess_data, outputs=preprocess_output)
        summary_btn.click(fn=dvp.generate_summary, outputs=summary_output)
        heatmap_btn.click(fn=dvp.create_correlation_heatmap, outputs=heatmap_plot)
        scatter_btn.click(
            fn=dvp.create_scatter_plot,
            inputs=[x_col, y_col, color_col],
            outputs=scatter_plot
        )
        timeseries_btn.click(
            fn=dvp.create_time_series,
            inputs=[date_col, value_col],
            outputs=timeseries_plot
        )
    
    return interface

# Launch the interface
demo = create_visualization_interface()
demo.launch()