File size: 6,424 Bytes
b0e246d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
import gradio as gr
import io
import pandas as pd
import matplotlib.pyplot as plt
from contextlib import redirect_stdout
from pejmanai_data_analysis.app import (
    read_csv, data_description, data_preprocessing,
    data_visualization, data_prediction, data_classification
)

# Function to capture printed output with error handling
def capture_output(func, *args, **kwargs):
    f = io.StringIO()
    try:
        with redirect_stdout(f):
            func(*args, **kwargs)
        return f.getvalue()
    except Exception as e:
        return f"Error occurred: {str(e)}"

# Function to handle regression workflow with error handling
def regression_workflow(csv_file, x_column, y_column, target_column):
    try:
        # Capture data description output
        data_desc = capture_output(data_description, csv_file.name)

        # Step b) Data Preprocessing
        df_preprocessed = data_preprocessing(csv_file.name)

        # Step c) Data Visualization
        if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]):
            plt.figure(figsize=(16, 12))
            data_visualization(csv_file.name, x_column, y_column)
            visualization_output = plt.gcf()
        else:
            plt.figure()
            plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center')
            visualization_output = plt.gcf()

        # Capture regression output
        regression_output = capture_output(data_prediction, csv_file.name, target_column)
        
        return data_desc, df_preprocessed, visualization_output, regression_output
    except Exception as e:
        return f"Error occurred during regression workflow: {str(e)}", None, None, None

# Function to handle classification workflow with error handling
def classification_workflow(csv_file, x_column, y_column, target_column):
    try:
        # Capture data description output
        data_desc = capture_output(data_description, csv_file.name)

        # Step b) Data Preprocessing
        df_preprocessed = data_preprocessing(csv_file.name)

        # Step c) Data Visualization
        if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]):
            plt.figure(figsize=(16, 12))
            data_visualization(csv_file.name, x_column, y_column)
            visualization_output = plt.gcf()
        else:
            plt.figure()
            plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center')
            visualization_output = plt.gcf()

        # Capture classification output
        classification_output = capture_output(data_classification, csv_file.name, target_column)
        
        return data_desc, df_preprocessed, visualization_output, classification_output
    except Exception as e:
        return f"Error occurred during classification workflow: {str(e)}", None, None, None

# Main Gradio interface function with error handling
def gradio_interface(option, csv_file, x_column, y_column, target_column):
    if option == "Regression Problem":
        return regression_workflow(csv_file, x_column, y_column, target_column)
    elif option == "Classification Problem":
        return classification_workflow(csv_file, x_column, y_column, target_column)

# Reset function to clear outputs
def reset_all():
    return "", None, None, ""

# Explanation text
explanation = """

### PejmanAI Data Analysis Tool



This app uses the `pejmanai_data_analysis` package, available on [PyPI](https://pypi.org/project/pejmanai-data-analysis/). 

The GitHub repository for the project is available [here](https://github.com/arad1367/pejmanai_data_analysis_pypi_package).



**About the app:**

- In the visualization part, you must use two numerical columns. If you select string columns, you will not see any output.

- The target column is the dependent variable on which you want to make predictions.

- Due to the nature of the `pejmanai_data_analysis` package, the data description and model output are shown in a captured format (this will be addressed in the next version).

"""

# Footer HTML
footer = """

<div style="text-align: center; margin-top: 20px;">

    <a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |

    <a href="https://github.com/arad1367" target="_blank">GitHub</a> |

    <a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>

    <br>

    Made with 💖 by Pejman Ebrahimi

</div>

"""

# Set up the Gradio interface with UI adjustments
with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as interface:
    gr.Markdown(explanation)
    
    with gr.Row():
        problem_type = gr.Radio(["Regression Problem", "Classification Problem"], label="Select Problem Type")
    with gr.Row():
        csv_file = gr.File(label="Upload CSV File")
    with gr.Row():
        x_column = gr.Textbox(label="Enter X Column for Visualization")
    with gr.Row():
        y_column = gr.Textbox(label="Enter Y Column for Visualization")
    with gr.Row():
        target_column = gr.Textbox(label="Enter Target Column for Model Training")
    
    with gr.Row():
        submit_button = gr.Button("Run Analysis")
    
    with gr.Row():
        data_desc_output = gr.Textbox(label="Data Description", lines=20, placeholder="Data Description Output")
    with gr.Row():
        df_preprocessed_output = gr.Dataframe(label="Data Preprocessing Output")
    with gr.Row():
        visualization_output = gr.Plot(label="Data Visualization Output")
    with gr.Row():
        model_output = gr.Textbox(label="Model Output", lines=20, placeholder="Model Output")
    
    with gr.Row():
        reset_button = gr.Button("Reset Outputs")
    
    reset_button.click(
        fn=reset_all,
        inputs=[],
        outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output]
    )
    
    submit_button.click(
        fn=gradio_interface,
        inputs=[problem_type, csv_file, x_column, y_column, target_column],
        outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output]
    )
    
    gr.HTML(footer)

# Launch the Gradio interface
interface.launch()