Spaces:

arad1367
/

pejmanai_data_analysis_regression_classification_v1

Runtime error

pejmanai_data_analysis_regression_classification_v1

File size: 6,424 Bytes

b0e246d

import gradio as gr
import io
import pandas as pd
import matplotlib.pyplot as plt
from contextlib import redirect_stdout
from pejmanai_data_analysis.app import (
    read_csv, data_description, data_preprocessing,
    data_visualization, data_prediction, data_classification
)

# Function to capture printed output with error handling
def capture_output(func, *args, **kwargs):
    f = io.StringIO()
    try:
        with redirect_stdout(f):
            func(*args, **kwargs)
        return f.getvalue()
    except Exception as e:
        return f"Error occurred: {str(e)}"

# Function to handle regression workflow with error handling
def regression_workflow(csv_file, x_column, y_column, target_column):
    try:
        # Capture data description output
        data_desc = capture_output(data_description, csv_file.name)

        # Step b) Data Preprocessing
        df_preprocessed = data_preprocessing(csv_file.name)

        # Step c) Data Visualization
        if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]):
            plt.figure(figsize=(16, 12))
            data_visualization(csv_file.name, x_column, y_column)
            visualization_output = plt.gcf()
        else:
            plt.figure()
            plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center')
            visualization_output = plt.gcf()

        # Capture regression output
        regression_output = capture_output(data_prediction, csv_file.name, target_column)
        
        return data_desc, df_preprocessed, visualization_output, regression_output
    except Exception as e:
        return f"Error occurred during regression workflow: {str(e)}", None, None, None

# Function to handle classification workflow with error handling
def classification_workflow(csv_file, x_column, y_column, target_column):
    try:
        # Capture data description output
        data_desc = capture_output(data_description, csv_file.name)

        # Step b) Data Preprocessing
        df_preprocessed = data_preprocessing(csv_file.name)

        # Step c) Data Visualization
        if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]):
            plt.figure(figsize=(16, 12))
            data_visualization(csv_file.name, x_column, y_column)
            visualization_output = plt.gcf()
        else:
            plt.figure()
            plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center')
            visualization_output = plt.gcf()

        # Capture classification output
        classification_output = capture_output(data_classification, csv_file.name, target_column)
        
        return data_desc, df_preprocessed, visualization_output, classification_output
    except Exception as e:
        return f"Error occurred during classification workflow: {str(e)}", None, None, None

# Main Gradio interface function with error handling
def gradio_interface(option, csv_file, x_column, y_column, target_column):
    if option == "Regression Problem":
        return regression_workflow(csv_file, x_column, y_column, target_column)
    elif option == "Classification Problem":
        return classification_workflow(csv_file, x_column, y_column, target_column)

# Reset function to clear outputs
def reset_all():
    return "", None, None, ""

# Explanation text
explanation = """

### PejmanAI Data Analysis Tool



This app uses the `pejmanai_data_analysis` package, available on [PyPI](https://pypi.org/project/pejmanai-data-analysis/). 

The GitHub repository for the project is available [here](https://github.com/arad1367/pejmanai_data_analysis_pypi_package).



**About the app:**

- In the visualization part, you must use two numerical columns. If you select string columns, you will not see any output.

- The target column is the dependent variable on which you want to make predictions.

- Due to the nature of the `pejmanai_data_analysis` package, the data description and model output are shown in a captured format (this will be addressed in the next version).

"""

# Footer HTML
footer = """

<div style="text-align: center; margin-top: 20px;">

    <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |

    <a href="https://github.com/arad1367" target="_blank">GitHub</a> |

    <a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>

    <br>

    Made with 💖 by Pejman Ebrahimi

</div>

"""

# Set up the Gradio interface with UI adjustments
with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as interface:
    gr.Markdown(explanation)
    
    with gr.Row():
        problem_type = gr.Radio(["Regression Problem", "Classification Problem"], label="Select Problem Type")
    with gr.Row():
        csv_file = gr.File(label="Upload CSV File")
    with gr.Row():
        x_column = gr.Textbox(label="Enter X Column for Visualization")
    with gr.Row():
        y_column = gr.Textbox(label="Enter Y Column for Visualization")
    with gr.Row():
        target_column = gr.Textbox(label="Enter Target Column for Model Training")
    
    with gr.Row():
        submit_button = gr.Button("Run Analysis")
    
    with gr.Row():
        data_desc_output = gr.Textbox(label="Data Description", lines=20, placeholder="Data Description Output")
    with gr.Row():
        df_preprocessed_output = gr.Dataframe(label="Data Preprocessing Output")
    with gr.Row():
        visualization_output = gr.Plot(label="Data Visualization Output")
    with gr.Row():
        model_output = gr.Textbox(label="Model Output", lines=20, placeholder="Model Output")
    
    with gr.Row():
        reset_button = gr.Button("Reset Outputs")
    
    reset_button.click(
        fn=reset_all,
        inputs=[],
        outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output]
    )
    
    submit_button.click(
        fn=gradio_interface,
        inputs=[problem_type, csv_file, x_column, y_column, target_column],
        outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output]
    )
    
    gr.HTML(footer)

# Launch the Gradio interface
interface.launch()