import gradio as gr import io import pandas as pd import matplotlib.pyplot as plt from contextlib import redirect_stdout from pejmanai_data_analysis.app import ( read_csv, data_description, data_preprocessing, data_visualization, data_prediction, data_classification ) # Function to capture printed output with error handling def capture_output(func, *args, **kwargs): f = io.StringIO() try: with redirect_stdout(f): func(*args, **kwargs) return f.getvalue() except Exception as e: return f"Error occurred: {str(e)}" # Function to handle regression workflow with error handling def regression_workflow(csv_file, x_column, y_column, target_column): try: # Capture data description output data_desc = capture_output(data_description, csv_file.name) # Step b) Data Preprocessing df_preprocessed = data_preprocessing(csv_file.name) # Step c) Data Visualization if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]): plt.figure(figsize=(16, 12)) data_visualization(csv_file.name, x_column, y_column) visualization_output = plt.gcf() else: plt.figure() plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center') visualization_output = plt.gcf() # Capture regression output regression_output = capture_output(data_prediction, csv_file.name, target_column) return data_desc, df_preprocessed, visualization_output, regression_output except Exception as e: return f"Error occurred during regression workflow: {str(e)}", None, None, None # Function to handle classification workflow with error handling def classification_workflow(csv_file, x_column, y_column, target_column): try: # Capture data description output data_desc = capture_output(data_description, csv_file.name) # Step b) Data Preprocessing df_preprocessed = data_preprocessing(csv_file.name) # Step c) Data Visualization if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]): plt.figure(figsize=(16, 12)) data_visualization(csv_file.name, x_column, y_column) visualization_output = plt.gcf() else: plt.figure() plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center') visualization_output = plt.gcf() # Capture classification output classification_output = capture_output(data_classification, csv_file.name, target_column) return data_desc, df_preprocessed, visualization_output, classification_output except Exception as e: return f"Error occurred during classification workflow: {str(e)}", None, None, None # Main Gradio interface function with error handling def gradio_interface(option, csv_file, x_column, y_column, target_column): if option == "Regression Problem": return regression_workflow(csv_file, x_column, y_column, target_column) elif option == "Classification Problem": return classification_workflow(csv_file, x_column, y_column, target_column) # Reset function to clear outputs def reset_all(): return "", None, None, "" # Explanation text explanation = """ ### PejmanAI Data Analysis Tool This app uses the `pejmanai_data_analysis` package, available on [PyPI](https://pypi.org/project/pejmanai-data-analysis/). The GitHub repository for the project is available [here](https://github.com/arad1367/pejmanai_data_analysis_pypi_package). **About the app:** - In the visualization part, you must use two numerical columns. If you select string columns, you will not see any output. - The target column is the dependent variable on which you want to make predictions. - Due to the nature of the `pejmanai_data_analysis` package, the data description and model output are shown in a captured format (this will be addressed in the next version). """ # Footer HTML footer = """
""" # Set up the Gradio interface with UI adjustments with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as interface: gr.Markdown(explanation) with gr.Row(): problem_type = gr.Radio(["Regression Problem", "Classification Problem"], label="Select Problem Type") with gr.Row(): csv_file = gr.File(label="Upload CSV File") with gr.Row(): x_column = gr.Textbox(label="Enter X Column for Visualization") with gr.Row(): y_column = gr.Textbox(label="Enter Y Column for Visualization") with gr.Row(): target_column = gr.Textbox(label="Enter Target Column for Model Training") with gr.Row(): submit_button = gr.Button("Run Analysis") with gr.Row(): data_desc_output = gr.Textbox(label="Data Description", lines=20, placeholder="Data Description Output") with gr.Row(): df_preprocessed_output = gr.Dataframe(label="Data Preprocessing Output") with gr.Row(): visualization_output = gr.Plot(label="Data Visualization Output") with gr.Row(): model_output = gr.Textbox(label="Model Output", lines=20, placeholder="Model Output") with gr.Row(): reset_button = gr.Button("Reset Outputs") reset_button.click( fn=reset_all, inputs=[], outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output] ) submit_button.click( fn=gradio_interface, inputs=[problem_type, csv_file, x_column, y_column, target_column], outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output] ) gr.HTML(footer) # Launch the Gradio interface interface.launch()