File size: 4,078 Bytes
d50344a
 
 
 
5d54022
a66da94
 
cb7e45c
 
 
 
 
a2ada34
 
 
 
a66da94
 
 
 
 
 
 
d50344a
 
 
 
 
 
 
5d54022
d50344a
5d54022
 
 
 
857bde5
5d54022
 
 
16b217f
5d54022
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d50344a
5d54022
d50344a
55a248d
 
 
 
de576e2
100fe27
 
de576e2
 
 
 
100fe27
de576e2
 
d50344a
 
 
100fe27
de576e2
55a248d
d50344a
 
 
a66da94
a2ada34
d50344a
 
 
 
 
 
 
 
 
de576e2
 
 
 
d50344a
 
 
a6d130c
 
d50344a
a2ada34
 
 
 
 
 
a66da94
 
 
 
 
 
 
 
cb7e45c
 
 
 
 
 
 
d50344a
16b217f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import gradio as gr
import xgboost as xgb
import pandas as pd
from datasets import load_dataset
from sklearn.model_selection import train_test_split
import subprocess

from sklearn.metrics import classification_report




# Function to show value counts of 'Hair Loss'
def show_hair_loss_counts():
    return df['Hair Loss'].value_counts().to_json()  # Convert to JSON for display

# Function to run commands
def run_command(command):
    try:
        result = subprocess.run(command, shell=True, capture_output=True, text=True)
        return result.stdout if result.returncode == 0 else result.stderr
    except Exception as e:
        return str(e)

# Load the dataset
dataset = load_dataset("Ammok/hair_health")

# Convert to Pandas DataFrame for exploration
df = pd.DataFrame(dataset['train'])

### PREPROCESSING

# Replace "No Data" entries with NaN for missing values handling
df.replace("No Data", pd.NA, inplace=True)

# Handle missing numerical values with mean
df.fillna(df.select_dtypes(include=['number']).mean(), inplace=True)

# Handle missing categorical values with mode
for col in df.select_dtypes(include=['object']).columns:
    df[col] = df[col].fillna(df[col].mode()[0])

# One-hot encoding for categorical variables
categorical_cols = [
    'Genetics', 'Hormonal Changes', 'Medical Conditions', 
    'Medications & Treatments', 'Nutritional Deficiencies ', 'Stress',
    'Poor Hair Care Habits ', 'Environmental Factors', 'Smoking', 'Weight Loss '
]
df = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

# Extract features and target
X = df.drop(columns=["Hair Loss"])
y = df["Hair Loss"]

# Split the dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a basic XGBoost model
model = xgb.XGBClassifier()
model.fit(X_train, y_train)

def predict(*inputs):
    # Convert the list of inputs into a dictionary
    input_data = {col: val for col, val in zip(X.columns, inputs)}

    # Handle missing values or intentionally omitted fields
    for col in X.columns:
        if input_data.get(col) is None:
            if X[col].dtype == 'float64':  # For numerical features
                input_data[col] = X[col].mean()  # Use the mean for missing numerical values
            else:  # For categorical features
                input_data[col] = X[col].mode()[0]  # Use the mode for missing categorical values

    # Convert input data to a DataFrame
    data = pd.DataFrame([input_data], columns=X.columns)
    prediction = model.predict(data)
    return prediction[0]




# Set up Gradio interface for data exploration
def explore_data(row_number):
    return df.iloc[row_number].to_dict()
    

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# Hair Health Dataset Exploration")
    
    row_number_input = gr.Number(label="Row Number")
    data_output = gr.JSON(label="Row Data")
    row_number_input.change(explore_data, inputs=[row_number_input], outputs=[data_output])

    gr.Markdown("## Make a Prediction")

    # Create a dictionary for input components
    input_components = {col: gr.Number(label=col) for col in X.columns}  # Generate number inputs for each column
    
    output = gr.Textbox(label="Prediction")
    
    submit_button = gr.Button("Predict")
    # Unpack the dictionary values into a list of input components
    submit_button.click(predict, inputs=list(input_components.values()), outputs=[output])

    gr.Markdown("## Hair Loss Value Counts")

    value_counts_output = gr.JSON(label="Hair Loss Value Counts")
    value_counts_button = gr.Button("Show Hair Loss Counts")
    value_counts_button.click(show_hair_loss_counts, outputs=[value_counts_output])

    gr.Markdown("## Command Runner")

    command_input = gr.Textbox(label="Enter Command")
    command_output = gr.Textbox(label="Command Output")

    run_button = gr.Button("Run Command")
    run_button.click(run_command, inputs=command_input, outputs=command_output)




y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))


demo.launch()