File size: 6,424 Bytes
b0e246d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
import gradio as gr
import io
import pandas as pd
import matplotlib.pyplot as plt
from contextlib import redirect_stdout
from pejmanai_data_analysis.app import (
read_csv, data_description, data_preprocessing,
data_visualization, data_prediction, data_classification
)
# Function to capture printed output with error handling
def capture_output(func, *args, **kwargs):
f = io.StringIO()
try:
with redirect_stdout(f):
func(*args, **kwargs)
return f.getvalue()
except Exception as e:
return f"Error occurred: {str(e)}"
# Function to handle regression workflow with error handling
def regression_workflow(csv_file, x_column, y_column, target_column):
try:
# Capture data description output
data_desc = capture_output(data_description, csv_file.name)
# Step b) Data Preprocessing
df_preprocessed = data_preprocessing(csv_file.name)
# Step c) Data Visualization
if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]):
plt.figure(figsize=(16, 12))
data_visualization(csv_file.name, x_column, y_column)
visualization_output = plt.gcf()
else:
plt.figure()
plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center')
visualization_output = plt.gcf()
# Capture regression output
regression_output = capture_output(data_prediction, csv_file.name, target_column)
return data_desc, df_preprocessed, visualization_output, regression_output
except Exception as e:
return f"Error occurred during regression workflow: {str(e)}", None, None, None
# Function to handle classification workflow with error handling
def classification_workflow(csv_file, x_column, y_column, target_column):
try:
# Capture data description output
data_desc = capture_output(data_description, csv_file.name)
# Step b) Data Preprocessing
df_preprocessed = data_preprocessing(csv_file.name)
# Step c) Data Visualization
if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]):
plt.figure(figsize=(16, 12))
data_visualization(csv_file.name, x_column, y_column)
visualization_output = plt.gcf()
else:
plt.figure()
plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center')
visualization_output = plt.gcf()
# Capture classification output
classification_output = capture_output(data_classification, csv_file.name, target_column)
return data_desc, df_preprocessed, visualization_output, classification_output
except Exception as e:
return f"Error occurred during classification workflow: {str(e)}", None, None, None
# Main Gradio interface function with error handling
def gradio_interface(option, csv_file, x_column, y_column, target_column):
if option == "Regression Problem":
return regression_workflow(csv_file, x_column, y_column, target_column)
elif option == "Classification Problem":
return classification_workflow(csv_file, x_column, y_column, target_column)
# Reset function to clear outputs
def reset_all():
return "", None, None, ""
# Explanation text
explanation = """
### PejmanAI Data Analysis Tool
This app uses the `pejmanai_data_analysis` package, available on [PyPI](https://pypi.org/project/pejmanai-data-analysis/).
The GitHub repository for the project is available [here](https://github.com/arad1367/pejmanai_data_analysis_pypi_package).
**About the app:**
- In the visualization part, you must use two numerical columns. If you select string columns, you will not see any output.
- The target column is the dependent variable on which you want to make predictions.
- Due to the nature of the `pejmanai_data_analysis` package, the data description and model output are shown in a captured format (this will be addressed in the next version).
"""
# Footer HTML
footer = """
<div style="text-align: center; margin-top: 20px;">
<a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> |
<a href="https://github.com/arad1367" target="_blank">GitHub</a> |
<a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>
<br>
Made with 💖 by Pejman Ebrahimi
</div>
"""
# Set up the Gradio interface with UI adjustments
with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as interface:
gr.Markdown(explanation)
with gr.Row():
problem_type = gr.Radio(["Regression Problem", "Classification Problem"], label="Select Problem Type")
with gr.Row():
csv_file = gr.File(label="Upload CSV File")
with gr.Row():
x_column = gr.Textbox(label="Enter X Column for Visualization")
with gr.Row():
y_column = gr.Textbox(label="Enter Y Column for Visualization")
with gr.Row():
target_column = gr.Textbox(label="Enter Target Column for Model Training")
with gr.Row():
submit_button = gr.Button("Run Analysis")
with gr.Row():
data_desc_output = gr.Textbox(label="Data Description", lines=20, placeholder="Data Description Output")
with gr.Row():
df_preprocessed_output = gr.Dataframe(label="Data Preprocessing Output")
with gr.Row():
visualization_output = gr.Plot(label="Data Visualization Output")
with gr.Row():
model_output = gr.Textbox(label="Model Output", lines=20, placeholder="Model Output")
with gr.Row():
reset_button = gr.Button("Reset Outputs")
reset_button.click(
fn=reset_all,
inputs=[],
outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output]
)
submit_button.click(
fn=gradio_interface,
inputs=[problem_type, csv_file, x_column, y_column, target_column],
outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output]
)
gr.HTML(footer)
# Launch the Gradio interface
interface.launch()
|