diabetes_model / app.py
amissah1's picture
Update app.py
a1f8700 verified
import gradio as gr
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
df = pd.read_csv('diabetes_risk_prediction_dataset.csv')
# Loop over columns and apply LabelEncoder
for column in df.columns:
le = LabelEncoder()
df[column] = le.fit_transform(df[column])
df.rename(columns={'sudden weight loss': 'sudden_weight_loss', 'visual blurring': 'visual_blurring', 'partial paresis': 'partial_paresis'}, inplace=True)
cols = ['Age', 'Gender', 'Genital thrush', 'Itching', 'delayed healing',
'muscle stiffness', 'Alopecia', 'Obesity']
df.drop(cols, axis = 1, inplace = True)
X = df.drop(columns=['class'])
y = df['class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
param_grid = {
'C': [0.001, 0.01, 0.1, 1, 10, 100], # Regularization parameter
'penalty': ['l1', 'l2'], # Regularization penalty
'solver': ['liblinear', 'lbfgs'] # Solver algorithm
}
grid_search = GridSearchCV(
estimator=LogisticRegression(),
param_grid = param_grid,
cv = 5,
scoring='accuracy',
verbose =1,
n_jobs = -1
)
grid_search
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
best_params
model_lgr = LogisticRegression(max_iter=1000, **best_params)
model_lgr.fit(X_train, y_train)
y_pred = model_lgr.predict_proba(X_test)[:5,-1]
y_pred
# Create a function to predict stroke
def predicta(Polyuria, Polydipsia, sudden_weight_loss, weakness, Polyphagia, visual_blurring, Irritability, partial_paresis):
# Create a dataframe with the inputs
df = pd.DataFrame({
'Polyuria': [Polyuria],
'Polydipsia': [Polydipsia],
'sudden_weight_loss': [sudden_weight_loss],
'weakness': [weakness],
'Polyphagia': [Polyphagia],
'visual_blurring': [visual_blurring],
'Irritability': [Irritability],
'partial_paresis': [partial_paresis]
})
# Make prediction
y_pred = model_lgr.predict_proba(df)[:,1]
rounded_y_pred = np.round(y_pred, decimals=2)
# Return the stroke probability
return float(rounded_y_pred[0])
# Define the inputs with labels using the new Gradio components
inputs = [
gr.Number(label="Polyuria (1 for Yes, 0 for No)"),
gr.Number(label="Polydipsia (1 for Yes, 0 for No)"),
gr.Number(label="Sudden Weight Loss (1 for Yes, 0 for No)"),
gr.Number(label="Weakness (1 for Yes, 0 for No)"),
gr.Number(label="Polyphagia (1 for Yes, 0 for No)"),
gr.Number(label="Visual Blurring (1 for Yes, 0 for No)"),
gr.Number(label="Irritability (1 for Yes, 0 for No)"),
gr.Number(label="Partial Paresis (1 for Yes, 0 for No)")
]
# Define output with label
output = gr.Number(label="Diabetes Probability")
# Create the interface
app = gr.Interface(
title="Diabetes Prediction App",
fn=predicta,
inputs=inputs,
outputs=output
)
# Launch the app
app.launch(share=True)