Spaces:

arad1367
/

pejmanai_data_analysis_regression_classification_v1

Runtime error

App Files Files Community

pejmanai_data_analysis_regression_classification_v1 / app.py

arad1367

Upload 2 files

b0e246d verified 11 months ago

raw

history blame contribute delete

6.42 kB

	import gradio as gr
	import io
	import pandas as pd
	import matplotlib.pyplot as plt
	from contextlib import redirect_stdout
	from pejmanai_data_analysis.app import (
	read_csv, data_description, data_preprocessing,
	data_visualization, data_prediction, data_classification
	)

	# Function to capture printed output with error handling
	def capture_output(func, args, *kwargs):
	f = io.StringIO()
	try:
	with redirect_stdout(f):
	func(args, *kwargs)
	return f.getvalue()
	except Exception as e:
	return f"Error occurred: {str(e)}"

	# Function to handle regression workflow with error handling
	def regression_workflow(csv_file, x_column, y_column, target_column):
	try:
	# Capture data description output
	data_desc = capture_output(data_description, csv_file.name)

	# Step b) Data Preprocessing
	df_preprocessed = data_preprocessing(csv_file.name)

	# Step c) Data Visualization
	if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]):
	plt.figure(figsize=(16, 12))
	data_visualization(csv_file.name, x_column, y_column)
	visualization_output = plt.gcf()
	else:
	plt.figure()
	plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center')
	visualization_output = plt.gcf()

	# Capture regression output
	regression_output = capture_output(data_prediction, csv_file.name, target_column)

	return data_desc, df_preprocessed, visualization_output, regression_output
	except Exception as e:
	return f"Error occurred during regression workflow: {str(e)}", None, None, None

	# Function to handle classification workflow with error handling
	def classification_workflow(csv_file, x_column, y_column, target_column):
	try:
	# Capture data description output
	data_desc = capture_output(data_description, csv_file.name)

	# Step b) Data Preprocessing
	df_preprocessed = data_preprocessing(csv_file.name)

	# Step c) Data Visualization
	if pd.api.types.is_numeric_dtype(df_preprocessed[x_column]) and pd.api.types.is_numeric_dtype(df_preprocessed[y_column]):
	plt.figure(figsize=(16, 12))
	data_visualization(csv_file.name, x_column, y_column)
	visualization_output = plt.gcf()
	else:
	plt.figure()
	plt.text(0.5, 0.5, 'Selected columns are not numeric.', fontsize=12, ha='center')
	visualization_output = plt.gcf()

	# Capture classification output
	classification_output = capture_output(data_classification, csv_file.name, target_column)

	return data_desc, df_preprocessed, visualization_output, classification_output
	except Exception as e:
	return f"Error occurred during classification workflow: {str(e)}", None, None, None

	# Main Gradio interface function with error handling
	def gradio_interface(option, csv_file, x_column, y_column, target_column):
	if option == "Regression Problem":
	return regression_workflow(csv_file, x_column, y_column, target_column)
	elif option == "Classification Problem":
	return classification_workflow(csv_file, x_column, y_column, target_column)

	# Reset function to clear outputs
	def reset_all():
	return "", None, None, ""

	# Explanation text
	explanation = """
	### PejmanAI Data Analysis Tool

	This app uses the `pejmanai_data_analysis` package, available on [PyPI](https://pypi.org/project/pejmanai-data-analysis/).
	The GitHub repository for the project is available [here](https://github.com/arad1367/pejmanai_data_analysis_pypi_package).

	About the app:
	- In the visualization part, you must use two numerical columns. If you select string columns, you will not see any output.
	- The target column is the dependent variable on which you want to make predictions.
	- Due to the nature of the `pejmanai_data_analysis` package, the data description and model output are shown in a captured format (this will be addressed in the next version).
	"""

	# Footer HTML
	footer = """
	<div style="text-align: center; margin-top: 20px;">
	<a href="https://www.linkedin.com/in/pejman-ebrahimi-4a60151a7/" target="_blank">LinkedIn</a> \|
	<a href="https://github.com/arad1367" target="_blank">GitHub</a> \|
	<a href="https://arad1367.pythonanywhere.com/" target="_blank">Live demo of my PhD defense</a>
	<br>
	Made with 💖 by Pejman Ebrahimi
	</div>
	"""

	# Set up the Gradio interface with UI adjustments
	with gr.Blocks(theme='JohnSmith9982/small_and_pretty') as interface:
	gr.Markdown(explanation)

	with gr.Row():
	problem_type = gr.Radio(["Regression Problem", "Classification Problem"], label="Select Problem Type")
	with gr.Row():
	csv_file = gr.File(label="Upload CSV File")
	with gr.Row():
	x_column = gr.Textbox(label="Enter X Column for Visualization")
	with gr.Row():
	y_column = gr.Textbox(label="Enter Y Column for Visualization")
	with gr.Row():
	target_column = gr.Textbox(label="Enter Target Column for Model Training")

	with gr.Row():
	submit_button = gr.Button("Run Analysis")

	with gr.Row():
	data_desc_output = gr.Textbox(label="Data Description", lines=20, placeholder="Data Description Output")
	with gr.Row():
	df_preprocessed_output = gr.Dataframe(label="Data Preprocessing Output")
	with gr.Row():
	visualization_output = gr.Plot(label="Data Visualization Output")
	with gr.Row():
	model_output = gr.Textbox(label="Model Output", lines=20, placeholder="Model Output")

	with gr.Row():
	reset_button = gr.Button("Reset Outputs")

	reset_button.click(
	fn=reset_all,
	inputs=[],
	outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output]
	)

	submit_button.click(
	fn=gradio_interface,
	inputs=[problem_type, csv_file, x_column, y_column, target_column],
	outputs=[data_desc_output, df_preprocessed_output, visualization_output, model_output]
	)

	gr.HTML(footer)

	# Launch the Gradio interface
	interface.launch()