Spaces:
Runtime error
Runtime error
import gradio as gr | |
import xgboost as xgb | |
import pandas as pd | |
from datasets import load_dataset | |
from sklearn.model_selection import train_test_split | |
import subprocess | |
from sklearn.metrics import classification_report | |
# Function to show value counts of 'Hair Loss' | |
def show_hair_loss_counts(): | |
return df['Hair Loss'].value_counts().to_json() # Convert to JSON for display | |
# Function to run commands | |
def run_command(command): | |
try: | |
result = subprocess.run(command, shell=True, capture_output=True, text=True) | |
return result.stdout if result.returncode == 0 else result.stderr | |
except Exception as e: | |
return str(e) | |
# Load the dataset | |
dataset = load_dataset("Ammok/hair_health") | |
# Convert to Pandas DataFrame for exploration | |
df = pd.DataFrame(dataset['train']) | |
### PREPROCESSING | |
# Replace "No Data" entries with NaN for missing values handling | |
df.replace("No Data", pd.NA, inplace=True) | |
# Handle missing numerical values with mean | |
df.fillna(df.select_dtypes(include=['number']).mean(), inplace=True) | |
# Handle missing categorical values with mode | |
for col in df.select_dtypes(include=['object']).columns: | |
df[col] = df[col].fillna(df[col].mode()[0]) | |
# One-hot encoding for categorical variables | |
categorical_cols = [ | |
'Genetics', 'Hormonal Changes', 'Medical Conditions', | |
'Medications & Treatments', 'Nutritional Deficiencies ', 'Stress', | |
'Poor Hair Care Habits ', 'Environmental Factors', 'Smoking', 'Weight Loss ' | |
] | |
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True) | |
# Extract features and target | |
X = df.drop(columns=["Hair Loss"]) | |
y = df["Hair Loss"] | |
# Split the dataset into train and test sets | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
# Train a basic XGBoost model | |
model = xgb.XGBClassifier() | |
model.fit(X_train, y_train) | |
def predict(*inputs): | |
# Convert the list of inputs into a dictionary | |
input_data = {col: val for col, val in zip(X.columns, inputs)} | |
# Handle missing values or intentionally omitted fields | |
for col in X.columns: | |
if input_data.get(col) is None: | |
if X[col].dtype == 'float64': # For numerical features | |
input_data[col] = X[col].mean() # Use the mean for missing numerical values | |
else: # For categorical features | |
input_data[col] = X[col].mode()[0] # Use the mode for missing categorical values | |
# Convert input data to a DataFrame | |
data = pd.DataFrame([input_data], columns=X.columns) | |
prediction = model.predict(data) | |
return prediction[0] | |
# Set up Gradio interface for data exploration | |
def explore_data(row_number): | |
return df.iloc[row_number].to_dict() | |
# Gradio UI | |
with gr.Blocks() as demo: | |
gr.Markdown("# Hair Health Dataset Exploration") | |
row_number_input = gr.Number(label="Row Number") | |
data_output = gr.JSON(label="Row Data") | |
row_number_input.change(explore_data, inputs=[row_number_input], outputs=[data_output]) | |
gr.Markdown("## Make a Prediction") | |
# Create a dictionary for input components | |
input_components = {col: gr.Number(label=col) for col in X.columns} # Generate number inputs for each column | |
output = gr.Textbox(label="Prediction") | |
submit_button = gr.Button("Predict") | |
# Unpack the dictionary values into a list of input components | |
submit_button.click(predict, inputs=list(input_components.values()), outputs=[output]) | |
gr.Markdown("## Hair Loss Value Counts") | |
value_counts_output = gr.JSON(label="Hair Loss Value Counts") | |
value_counts_button = gr.Button("Show Hair Loss Counts") | |
value_counts_button.click(show_hair_loss_counts, outputs=[value_counts_output]) | |
gr.Markdown("## Command Runner") | |
command_input = gr.Textbox(label="Enter Command") | |
command_output = gr.Textbox(label="Command Output") | |
run_button = gr.Button("Run Command") | |
run_button.click(run_command, inputs=command_input, outputs=command_output) | |
y_pred = model.predict(X_test) | |
print(classification_report(y_test, y_pred)) | |
demo.launch() | |