import gradio as gr import xgboost as xgb import pandas as pd from datasets import load_dataset from sklearn.model_selection import train_test_split # Load the dataset dataset = load_dataset("Ammok/hair_health") # Convert to Pandas DataFrame for exploration df = pd.DataFrame(dataset['train']) ### PREPROCESSING # Replace "No Data" entries with NaN for missing values handling df.replace("No Data", pd.NA, inplace=True) # Handle missing numerical values with mean df.fillna(df.select_dtypes(include=['number']).mean(), inplace=True) # Handle missing categorical values with mode for col in df.select_dtypes(include=['object']).columns: df[col] = df[col].fillna(df[col].mode()[0]) # One-hot encoding for categorical variables categorical_cols = [ 'Genetics', 'Hormonal Changes', 'Medical Conditions', 'Medications & Treatments', 'Nutritional Deficiencies ', 'Stress', 'Poor Hair Care Habits ', 'Environmental Factors', 'Smoking', 'Weight Loss ' ] df = pd.get_dummies(df, columns=categorical_cols, drop_first=True) # Extract features and target X = df.drop(columns=["Hair Loss"]) y = df["Hair Loss"] # Split the dataset into train and test sets X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Train a basic XGBoost model model = xgb.XGBClassifier() model.fit(X_train, y_train) def predict(input_data): # Handle missing values or intentionally omitted fields for col in X.columns: if input_data.get(col) is None: if X[col].dtype == 'float64': # For numerical features input_data[col] = X[col].mean() # Use the mean for missing numerical values else: # For categorical features input_data[col] = X[col].mode()[0] # Use the mode for missing categorical values # Convert input data to a DataFrame data = pd.DataFrame([input_data], columns=X.columns) prediction = model.predict(data) return prediction[0] # Set up Gradio interface for data exploration def explore_data(row_number): return df.iloc[row_number].to_dict() # Gradio UI with gr.Blocks() as demo: gr.Markdown("# Hair Health Dataset Exploration") row_number_input = gr.Number(label="Row Number") data_output = gr.JSON(label="Row Data") row_number_input.change(explore_data, inputs=[row_number_input], outputs=[data_output]) gr.Markdown("## Make a Prediction") # Create a dictionary for input components input_components = {col: gr.Number(label=col) for col in X.columns} # Generate number inputs for each column output = gr.Textbox(label="Prediction") submit_button = gr.Button("Predict") submit_button.click(predict, inputs=[input_components], outputs=[output]) # Pass the dictionary of inputs demo.launch()