COPD / app.py
MLML202512's picture
Update app.py
a97ac2a verified
import gradio as gr
import pandas as pd
import numpy as np
import joblib
# Load the model and scaler
model = joblib.load('catboost_model.pkl')
scaler = joblib.load('scaler.pkl')
# Feature list(顺序要与训练时一致)
cat_features = ['Gender', 'Race', 'Education', 'Marital status', 'Smoke']
num_features = ['Age', 'Family income', 'BMI', 'MPAH', 'PFDE', 'PFHxS', 'PFNA', 'PFOA', 'PFOS', 'PFUA']
all_features = cat_features + num_features
# Define the prediction function
def predict_copd(age, gender, race, education, marital_status, family_income, bmi, smoke,
mpah, pfde, pfhxs, pfna, pfoa, pfos, pfua):
# Step 1: Create input DataFrame
input_data = pd.DataFrame([{
'Gender': gender,
'Race': race,
'Education': education,
'Marital status': marital_status,
'Smoke': smoke,
'Age': age,
'Family income': family_income,
'BMI': bmi,
'MPAH': mpah,
'PFDE': pfde,
'PFHxS': pfhxs,
'PFNA': pfna,
'PFOA': pfoa,
'PFOS': pfos,
'PFUA': pfua
}])
# Step 2: Standardize numeric features only
input_data[num_features] = scaler.transform(input_data[num_features])
# Step 3: Ensure column order matches model training
input_data = input_data[all_features]
# Step 4: Make prediction
prediction = model.predict(input_data)[0]
probability = model.predict_proba(input_data)[0][1]
# Step 5: Format output
result_text = "Prediction: COPD" if prediction == 1 else "Prediction: Healthy"
risk_level = "Low Risk" if probability < 0.3 else "Medium Risk" if probability < 0.7 else "High Risk"
return f"{result_text}\nRisk Probability: {probability:.2%}\nRisk Level: {risk_level}"
# Gradio UI
with gr.Blocks() as iface:
gr.Markdown("# COPD Risk Prediction Calculator")
gr.Markdown("Enter patient information to predict COPD risk.")
with gr.Row():
with gr.Column():
gr.Markdown("## Demographic Information")
age = gr.Number(label="Age", value=40)
gender = gr.Radio([1, 2], label="Gender", info="1: Male, 2: Female", value=1)
race = gr.Dropdown([1, 2, 3, 4, 5], label="Race",
info="1: Mexican American, 2: Other Hispanic, 3: Non-Hispanic White, 4: Non-Hispanic Black, 5: Other Race",
value=3)
education = gr.Radio([0, 1], label="Education Level",
info="0: High school or below, 1: Above high school",
value=0)
marital_status = gr.Dropdown([1, 2, 3, 4, 5, 6], label="Marital Status",
info="1: Married, 2: Widowed, 3: Divorced, 4: Separated, 5: Never married, 6: Living with partner",
value=1)
family_income = gr.Number(label="Family Income (PIR)", value=3.0,
info="Poverty Income Ratio")
with gr.Column():
gr.Markdown("## Health Indicators")
bmi = gr.Number(label="BMI", value=25.0)
smoke = gr.Radio([0, 1], label="Smoking", info="0: No, 1: Yes", value=0)
gr.Markdown("## Biomarkers (ng/mL)")
mpah = gr.Number(label="MPAH", value=0.5)
pfde = gr.Number(label="PFDE", value=0.5)
pfhxs = gr.Number(label="PFHxS", value=1.0)
pfna = gr.Number(label="PFNA", value=0.5)
pfoa = gr.Number(label="PFOA", value=2.0)
pfos = gr.Number(label="PFOS", value=5.0)
pfua = gr.Number(label="PFUA", value=0.5)
predict_button = gr.Button("Predict")
output_textbox = gr.Textbox(label="Prediction Result")
predict_button.click(
predict_copd,
inputs=[age, gender, race, education, marital_status, family_income, bmi, smoke,
mpah, pfde, pfhxs, pfna, pfoa, pfos, pfua],
outputs=output_textbox
)
gr.Markdown("### Variable Information")
gr.Markdown("""
**Demographic Variables:**
- **Gender**: 1=Male, 2=Female
- **Race**: 1=Mexican American, 2=Other Hispanic, 3=Non-Hispanic White, 4=Non-Hispanic Black, 5=Other Race
- **Education**: 0=High school or below, 1=Above high school
- **Marital Status**: 1=Married, 2=Widowed, 3=Divorced, 4=Separated, 5=Never married, 6=Living with partner
**Health Variables:**
- **BMI**: Body Mass Index (kg/m²)
- **Smoke**: 0=No, 1=Yes
**Biomarkers:**
- **MPAH, PFDE, PFHxS, PFNA, PFOA, PFOS, PFUA**: Per- and polyfluoroalkyl substances (PFAS) in ng/mL
""")
# Launch app
iface.launch()