danielritchie commited on
Commit
5d54022
1 Parent(s): d50344a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -5
app.py CHANGED
@@ -2,6 +2,7 @@ import gradio as gr
2
  import xgboost as xgb
3
  import pandas as pd
4
  from datasets import load_dataset
 
5
 
6
  # Load the dataset
7
  dataset = load_dataset("Ammok/hair_health")
@@ -9,13 +10,36 @@ dataset = load_dataset("Ammok/hair_health")
9
  # Convert to Pandas DataFrame for exploration
10
  df = pd.DataFrame(dataset['train'])
11
 
12
- # Example: Train a simple XGBoost model
13
- X = df.drop(columns=["target_column"]) # Replace with your feature columns
14
- y = df["target_column"] # Replace with your target column
15
 
16
- # Train a basic XGBoost model (replace with custom model training code)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  model = xgb.XGBClassifier()
18
- model.fit(X, y)
19
 
20
  # Function for making predictions
21
  def predict(input_data):
 
2
  import xgboost as xgb
3
  import pandas as pd
4
  from datasets import load_dataset
5
+ from sklearn.model_selection import train_test_split
6
 
7
  # Load the dataset
8
  dataset = load_dataset("Ammok/hair_health")
 
10
  # Convert to Pandas DataFrame for exploration
11
  df = pd.DataFrame(dataset['train'])
12
 
13
+ ### PREPROCESSING
 
 
14
 
15
+ # Replace "No Data" entries with NaN for missing values handling
16
+ df.replace("No Data", pd.NA, inplace=True)
17
+
18
+ # Handle missing numerical values with mean
19
+ df.fillna(df.mean(), inplace=True)
20
+
21
+ # Handle missing categorical values with mode
22
+ for col in df.select_dtypes(include=['object']).columns:
23
+ df[col].fillna(df[col].mode()[0], inplace=True)
24
+
25
+ # One-hot encoding for categorical variables
26
+ categorical_cols = [
27
+ 'Genetics', 'Hormonal Changes', 'Medical Conditions',
28
+ 'Medications & Treatments', 'Nutritional Deficiencies ', 'Stress',
29
+ 'Poor Hair Care Habits ', 'Environmental Factors', 'Smoking', 'Weight Loss '
30
+ ]
31
+ df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)
32
+
33
+ # Extract features and target
34
+ X = df.drop(columns=["Hair Loss"])
35
+ y = df["Hair Loss"]
36
+
37
+ # Split the dataset into train and test sets
38
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
39
+
40
+ # Train a basic XGBoost model
41
  model = xgb.XGBClassifier()
42
+ model.fit(X_train, y_train)
43
 
44
  # Function for making predictions
45
  def predict(input_data):