Spaces:
Sleeping
Sleeping
File size: 6,026 Bytes
027053f 980e735 07d43dc 980e735 d24d7bc 027053f dc6947b 027053f fe08e18 9a7f2ca fe08e18 9a7f2ca fe08e18 9a7f2ca e8182c9 9a7f2ca b662922 ca4bef1 027053f b662922 027053f b662922 027053f b662922 027053f 5200e1f 027053f b662922 027053f 980e735 027053f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 |
import gradio as gr
import numpy as np
from joblib import load
from tensorflow.keras.models import load_model
import tensorflow as tf
import pickle
import pandas as pd # Ensure pandas is imported for DataFrame operations
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
# Load dataset
df = pd.read_csv('processed_data.csv') # Replace with the correct path to your dataset
# Load the LabelEncoder and ColumnTransformer before prediction
with open('label_encoder.pkl', 'rb') as f:
label_encoder = pickle.load(f)
categorical_features = [0, 1, 9, 10] # Update if column positions change
ct = ColumnTransformer(
transformers=[('encoder', OneHotEncoder(sparse_output=False, drop="first"), categorical_features)],
remainder="passthrough"
)
# Fit it using your training data
ct.fit(df[['Gender', 'Race (Reported)', 'Age', 'Height (cm)', 'Weight (kg)',
'Diabetes', 'Simvastatin (Zocor)', 'Amiodarone (Cordarone)',
'INR on Reported Therapeutic Dose of Warfarin', 'Cyp2C9 genotypes',
'VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T']])
# Assuming 'ct' is your ColumnTransformer (replace this with the actual loading code for your preprocessor)
# Make sure that 'ct' is properly loaded, or use the same transformation logic here.
# UI Components for user input
input_Gender = gr.Radio(["male", "female"], label="Gender")
input_Race = gr.Dropdown(list(dict(df['Race (Reported)'].value_counts()).keys()), label="Race")
input_Age = gr.Dropdown(list(dict(df['Age'].value_counts())), label='Age')
input_Height = gr.Number(label='Height (cm)')
input_Weight = gr.Number(label='Weight (kg)')
input_Diabetes = gr.Radio([0.0, 1.0], label='Diabetes')
input_Simvastatin = gr.Radio([0.0, 1.0], label='Simvastatin (Zocor)')
input_Amiodarone = gr.Radio([0.0, 1.0], label='Amiodarone (Cordarone)')
input_INR_reported = gr.Number(label='INR on Reported Therapeutic Dose of Warfarin')
input_Cyp2C9_genotypes = gr.Dropdown(list(dict(df['Cyp2C9 genotypes'].value_counts())), label='Cyp2C9 genotypes')
input_VKORC1_genotypes = gr.Radio(list(dict(df['VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T'].value_counts())), label='VKORC1 genotypes')
input_model = gr.Dropdown(['Decision Tree Regression', 'Support Vector Regression', 'Random Forest Regression', 'Deep Learning'], label='Model Selection')
# Output textbox to display predicted dose
output_warfarin_dosage = gr.Textbox(label='Therapeutic Dose of Warfarin')
# Prediction function with renamed input variables
def predict_dosage(gender, race, age, height, weight, diabetes, simvastatin, amiodarone, inr, cyp2c9, vkorc1, selected_model):
import numpy as np
from joblib import load
from tensorflow.keras.models import load_model
import tensorflow as tf
# Optional debug function to inspect data before prediction
def print_input_debug(transformed_input, final_array):
print("Transformed input shape:", transformed_input.shape)
print("Final input shape:", final_array.shape)
print("Input data type:", final_array.dtype)
try:
# Load the selected model
if selected_model == 'Deep Learning':
model = load_model('best_DeepLearning_model (2).h5')
elif selected_model == 'Support Vector Regression':
model = load('SVR_optimized.joblib')
elif selected_model == 'Random Forest Regression':
model = load('RandomForestRegressor_optimized.joblib')
else:
model = load("DecisionTreeRegressor_optimized.joblib")
# Handle unseen labels by attempting to map them to known labels
def safe_transform_label(encoder, label, default=None):
try:
return encoder.transform([label])[0]
except ValueError:
# If label is unseen, return default (e.g., most frequent label or a fallback value)
return default if default is not None else encoder.transform([encoder.classes_[0]])[0]
# Encode Age using LabelEncoder (catching unseen labels)
encoded_age = safe_transform_label(label_encoder, age, default=label_encoder.classes_[0])
# Ensure numerical inputs are valid floats
height = float(height) if height is not None else 0.0
weight = float(weight) if weight is not None else 0.0
inr = float(inr) if inr is not None else 0.0
# Assemble input for transformation
raw_inputs = [
str(gender),
str(race),
str(age),
height,
weight,
float(diabetes),
float(simvastatin),
float(amiodarone),
inr,
str(cyp2c9),
str(vkorc1)
]
# Apply preprocessing pipeline (ct should be defined or loaded)
transformed_input = ct.transform([raw_inputs])
transformed_input[0][-7] = encoded_age # Age is encoded, so replace it in the transformed input
# Convert to NumPy array for model input
input_array = np.array(transformed_input, dtype=np.float32)
print_input_debug(transformed_input, input_array)
# Predict using appropriate model type
if selected_model == 'Deep Learning':
tensor_input = tf.convert_to_tensor(input_array)
prediction = model.predict(tensor_input, verbose=0)
return float(prediction[0][0])
else:
prediction = model.predict(input_array)
return float(prediction[0])
except Exception as e:
print(f"Error in prediction: {str(e)}")
return f"Error in prediction: {str(e)}"
# Launch Gradio app
gr.Interface(
fn=predict_dosage,
inputs=[input_Gender, input_Race, input_Age, input_Height, input_Weight,
input_Diabetes, input_Simvastatin, input_Amiodarone,
input_INR_reported, input_Cyp2C9_genotypes, input_VKORC1_genotypes, input_model],
outputs=[output_warfarin_dosage]
).launch(debug=True)
|