Spaces:

Moditha24
/

ML_project

Sleeping

File size: 6,026 Bytes

import gradio as gr
import numpy as np
from joblib import load
from tensorflow.keras.models import load_model
import tensorflow as tf
import pickle
import pandas as pd  # Ensure pandas is imported for DataFrame operations
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
# Load dataset
df = pd.read_csv('processed_data.csv')  # Replace with the correct path to your dataset

# Load the LabelEncoder and ColumnTransformer before prediction
with open('label_encoder.pkl', 'rb') as f:
    label_encoder = pickle.load(f)
    
categorical_features = [0, 1, 9, 10]  # Update if column positions change

ct = ColumnTransformer(
    transformers=[('encoder', OneHotEncoder(sparse_output=False, drop="first"), categorical_features)],
    remainder="passthrough"
)

# Fit it using your training data
ct.fit(df[['Gender', 'Race (Reported)', 'Age', 'Height (cm)', 'Weight (kg)',
       'Diabetes', 'Simvastatin (Zocor)', 'Amiodarone (Cordarone)',
       'INR on Reported Therapeutic Dose of Warfarin', 'Cyp2C9 genotypes',
       'VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T']])

# Assuming 'ct' is your ColumnTransformer (replace this with the actual loading code for your preprocessor)
# Make sure that 'ct' is properly loaded, or use the same transformation logic here.

# UI Components for user input
input_Gender = gr.Radio(["male", "female"], label="Gender")
input_Race = gr.Dropdown(list(dict(df['Race (Reported)'].value_counts()).keys()), label="Race")
input_Age = gr.Dropdown(list(dict(df['Age'].value_counts())), label='Age')
input_Height = gr.Number(label='Height (cm)')
input_Weight = gr.Number(label='Weight (kg)')
input_Diabetes = gr.Radio([0.0, 1.0], label='Diabetes')
input_Simvastatin = gr.Radio([0.0, 1.0], label='Simvastatin (Zocor)')
input_Amiodarone = gr.Radio([0.0, 1.0], label='Amiodarone (Cordarone)')
input_INR_reported = gr.Number(label='INR on Reported Therapeutic Dose of Warfarin')
input_Cyp2C9_genotypes = gr.Dropdown(list(dict(df['Cyp2C9 genotypes'].value_counts())), label='Cyp2C9 genotypes')
input_VKORC1_genotypes = gr.Radio(list(dict(df['VKORC1 genotype: -1639 G>A (3673); chr16:31015190; rs9923231; C/T'].value_counts())), label='VKORC1 genotypes')
input_model = gr.Dropdown(['Decision Tree Regression', 'Support Vector Regression', 'Random Forest Regression', 'Deep Learning'], label='Model Selection')

# Output textbox to display predicted dose
output_warfarin_dosage = gr.Textbox(label='Therapeutic Dose of Warfarin')

# Prediction function with renamed input variables
def predict_dosage(gender, race, age, height, weight, diabetes, simvastatin, amiodarone, inr, cyp2c9, vkorc1, selected_model):
    import numpy as np
    from joblib import load
    from tensorflow.keras.models import load_model
    import tensorflow as tf

    # Optional debug function to inspect data before prediction
    def print_input_debug(transformed_input, final_array):
        print("Transformed input shape:", transformed_input.shape)
        print("Final input shape:", final_array.shape)
        print("Input data type:", final_array.dtype)

    try:
        # Load the selected model
        if selected_model == 'Deep Learning':
            model = load_model('best_DeepLearning_model (2).h5')
        elif selected_model == 'Support Vector Regression':
            model = load('SVR_optimized.joblib')
        elif selected_model == 'Random Forest Regression':
            model = load('RandomForestRegressor_optimized.joblib')
        else:
            model = load("DecisionTreeRegressor_optimized.joblib")

        # Handle unseen labels by attempting to map them to known labels
        def safe_transform_label(encoder, label, default=None):
            try:
                return encoder.transform([label])[0]
            except ValueError:
                # If label is unseen, return default (e.g., most frequent label or a fallback value)
                return default if default is not None else encoder.transform([encoder.classes_[0]])[0]

        # Encode Age using LabelEncoder (catching unseen labels)
        encoded_age = safe_transform_label(label_encoder, age, default=label_encoder.classes_[0])

        # Ensure numerical inputs are valid floats
        height = float(height) if height is not None else 0.0
        weight = float(weight) if weight is not None else 0.0
        inr = float(inr) if inr is not None else 0.0

        # Assemble input for transformation
        raw_inputs = [
            str(gender),
            str(race),
            str(age),
            height,
            weight,
            float(diabetes),
            float(simvastatin),
            float(amiodarone),
            inr,
            str(cyp2c9),
            str(vkorc1)
        ]

        # Apply preprocessing pipeline (ct should be defined or loaded)
        transformed_input = ct.transform([raw_inputs])
        transformed_input[0][-7] = encoded_age  # Age is encoded, so replace it in the transformed input

        # Convert to NumPy array for model input
        input_array = np.array(transformed_input, dtype=np.float32)
        print_input_debug(transformed_input, input_array)

        # Predict using appropriate model type
        if selected_model == 'Deep Learning':
            tensor_input = tf.convert_to_tensor(input_array)
            prediction = model.predict(tensor_input, verbose=0)
            return float(prediction[0][0])
        else:
            prediction = model.predict(input_array)
            return float(prediction[0])

    except Exception as e:
        print(f"Error in prediction: {str(e)}")
        return f"Error in prediction: {str(e)}"

# Launch Gradio app
gr.Interface(
    fn=predict_dosage,
    inputs=[input_Gender, input_Race, input_Age, input_Height, input_Weight,
            input_Diabetes, input_Simvastatin, input_Amiodarone,
            input_INR_reported, input_Cyp2C9_genotypes, input_VKORC1_genotypes, input_model],
    outputs=[output_warfarin_dosage]
).launch(debug=True)