Spaces:
Runtime error
Runtime error
danielritchie
commited on
Commit
•
5d54022
1
Parent(s):
d50344a
Update app.py
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
|
|
2 |
import xgboost as xgb
|
3 |
import pandas as pd
|
4 |
from datasets import load_dataset
|
|
|
5 |
|
6 |
# Load the dataset
|
7 |
dataset = load_dataset("Ammok/hair_health")
|
@@ -9,13 +10,36 @@ dataset = load_dataset("Ammok/hair_health")
|
|
9 |
# Convert to Pandas DataFrame for exploration
|
10 |
df = pd.DataFrame(dataset['train'])
|
11 |
|
12 |
-
|
13 |
-
X = df.drop(columns=["target_column"]) # Replace with your feature columns
|
14 |
-
y = df["target_column"] # Replace with your target column
|
15 |
|
16 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
model = xgb.XGBClassifier()
|
18 |
-
model.fit(
|
19 |
|
20 |
# Function for making predictions
|
21 |
def predict(input_data):
|
|
|
2 |
import xgboost as xgb
|
3 |
import pandas as pd
|
4 |
from datasets import load_dataset
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
|
7 |
# Load the dataset
|
8 |
dataset = load_dataset("Ammok/hair_health")
|
|
|
10 |
# Convert to Pandas DataFrame for exploration
|
11 |
df = pd.DataFrame(dataset['train'])
|
12 |
|
13 |
+
### PREPROCESSING
|
|
|
|
|
14 |
|
15 |
+
# Replace "No Data" entries with NaN for missing values handling
|
16 |
+
df.replace("No Data", pd.NA, inplace=True)
|
17 |
+
|
18 |
+
# Handle missing numerical values with mean
|
19 |
+
df.fillna(df.mean(), inplace=True)
|
20 |
+
|
21 |
+
# Handle missing categorical values with mode
|
22 |
+
for col in df.select_dtypes(include=['object']).columns:
|
23 |
+
df[col].fillna(df[col].mode()[0], inplace=True)
|
24 |
+
|
25 |
+
# One-hot encoding for categorical variables
|
26 |
+
categorical_cols = [
|
27 |
+
'Genetics', 'Hormonal Changes', 'Medical Conditions',
|
28 |
+
'Medications & Treatments', 'Nutritional Deficiencies ', 'Stress',
|
29 |
+
'Poor Hair Care Habits ', 'Environmental Factors', 'Smoking', 'Weight Loss '
|
30 |
+
]
|
31 |
+
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
|
32 |
+
|
33 |
+
# Extract features and target
|
34 |
+
X = df.drop(columns=["Hair Loss"])
|
35 |
+
y = df["Hair Loss"]
|
36 |
+
|
37 |
+
# Split the dataset into train and test sets
|
38 |
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
39 |
+
|
40 |
+
# Train a basic XGBoost model
|
41 |
model = xgb.XGBClassifier()
|
42 |
+
model.fit(X_train, y_train)
|
43 |
|
44 |
# Function for making predictions
|
45 |
def predict(input_data):
|