Spaces:

eaglelandsonce
/

BreastCancerModel

Sleeping

App Files Files Community

eaglelandsonce commited on Nov 23, 2024

Commit

f93ed07

verified ·

1 Parent(s): 98a7aa8

Update app.py

Browse files

Files changed (1) hide show

app.py +173 -24

app.py CHANGED Viewed

@@ -1,7 +1,24 @@
 DEFAULT_PREDICT_FILE = "synthetic_breast_cancer_data_withColumn.csv"
 def main():
-    global feature_columns
     st.title("Patient Treatment Prediction App")
     st.write("Upload patient data to train a model and predict treatments based on input data.")
@@ -10,10 +27,20 @@ def main():
     uploaded_file = st.file_uploader("Upload a CSV file for training", type="csv")
     if uploaded_file is None:
         st.write("Using default training data.")
-        data = pd.read_csv(DEFAULT_TRAIN_FILE)
     else:
-        data = pd.read_csv(uploaded_file)
-    st.write("Training Dataset Preview:", data.head())
     # Check for Treatment column in training data
     if 'Treatment' not in data.columns:
@@ -21,7 +48,11 @@ def main():
         return
     # Prepare Data
-    X, y, input_dim, num_classes, feature_columns = preprocess_training_data(data)
     # Model Parameters
     hidden_dim = st.slider("Hidden Layer Dimension", 10, 100, 50)
@@ -30,37 +61,155 @@ def main():
     # Model training
     if st.button("Train Model"):
-        model, loss_curve = train_model(X, y, input_dim, hidden_dim, num_classes, learning_rate, epochs)
-        plot_loss_curve(loss_curve)
     # Upload data for prediction
     st.write("Upload new data for prediction (ensure 'Treatment' column is removed if present).")
     new_data_file = st.file_uploader("Upload new CSV file for prediction", type="csv")
     if new_data_file is None:
         st.write("Using default prediction data.")
-        new_data = pd.read_csv(DEFAULT_PREDICT_FILE)
     else:
-        new_data = pd.read_csv(new_data_file)
     # Drop 'Treatment' column if it exists
     if 'Treatment' in new_data.columns:
         st.warning("The 'Treatment' column is present in the prediction data and will be removed.")
         new_data = new_data.drop(columns=['Treatment'])
-    st.write("Prediction Dataset Preview:", new_data.head())
-    if 'model' in locals() and feature_columns is not None:
-        # Align columns to match training data
-        new_data_aligned = align_columns(new_data, feature_columns)
-        if new_data_aligned is not None:
-            predictions = predict_treatment(new_data_aligned, model)
-            # Display Predictions in an Output Box
-            st.subheader("Predicted Treatment Outcomes")
-            prediction_output = "\n".join([f"Patient {i+1}: {pred}" for i, pred in enumerate(predictions)])
-            st.text_area("Prediction Results", prediction_output, height=200)
-        else:
-            st.error("Unable to align prediction data to the training feature columns.")
     else:
         st.warning("Please train the model first before predicting on new data.")

+import streamlit as st
+import pandas as pd
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+import numpy as np
+# Global scaler and label encoder for consistent preprocessing
+scaler = StandardScaler()
+label_encoder = LabelEncoder()
+feature_columns = None  # To store feature columns from the training data
+model = None  # Declare the model globally for predictions
+# Preload default files
+DEFAULT_TRAIN_FILE = "patientdata.csv"
 DEFAULT_PREDICT_FILE = "synthetic_breast_cancer_data_withColumn.csv"
 def main():
+    global feature_columns, model
     st.title("Patient Treatment Prediction App")
     st.write("Upload patient data to train a model and predict treatments based on input data.")
     uploaded_file = st.file_uploader("Upload a CSV file for training", type="csv")
     if uploaded_file is None:
         st.write("Using default training data.")
+        try:
+            data = pd.read_csv(DEFAULT_TRAIN_FILE)
+        except Exception as e:
+            st.error(f"Error loading default training file: {e}")
+            return
     else:
+        try:
+            data = pd.read_csv(uploaded_file)
+        except Exception as e:
+            st.error(f"Error loading uploaded file: {e}")
+            return
+    st.write("Training Dataset Preview:")
+    st.dataframe(data.head())  # Use st.dataframe for better visibility
     # Check for Treatment column in training data
     if 'Treatment' not in data.columns:
         return
     # Prepare Data
+    try:
+        X, y, input_dim, num_classes, feature_columns = preprocess_training_data(data)
+    except Exception as e:
+        st.error(f"Error during data preprocessing: {e}")
+        return
     # Model Parameters
     hidden_dim = st.slider("Hidden Layer Dimension", 10, 100, 50)
     # Model training
     if st.button("Train Model"):
+        try:
+            model, loss_curve = train_model(X, y, input_dim, hidden_dim, num_classes, learning_rate, epochs)
+            plot_loss_curve(loss_curve)
+            st.success("Model trained successfully!")
+        except Exception as e:
+            st.error(f"Error during model training: {e}")
+            return
     # Upload data for prediction
     st.write("Upload new data for prediction (ensure 'Treatment' column is removed if present).")
     new_data_file = st.file_uploader("Upload new CSV file for prediction", type="csv")
     if new_data_file is None:
         st.write("Using default prediction data.")
+        try:
+            new_data = pd.read_csv(DEFAULT_PREDICT_FILE)
+        except Exception as e:
+            st.error(f"Error loading default prediction file: {e}")
+            return
     else:
+        try:
+            new_data = pd.read_csv(new_data_file)
+        except Exception as e:
+            st.error(f"Error loading uploaded prediction file: {e}")
+            return
     # Drop 'Treatment' column if it exists
     if 'Treatment' in new_data.columns:
         st.warning("The 'Treatment' column is present in the prediction data and will be removed.")
         new_data = new_data.drop(columns=['Treatment'])
+    st.write("Prediction Dataset Preview:")
+    st.dataframe(new_data.head())  # Display new data
+    if model is not None and feature_columns is not None:
+        try:
+            # Align columns to match training data
+            new_data_aligned = align_columns(new_data, feature_columns)
+            if new_data_aligned is not None:
+                predictions = predict_treatment(new_data_aligned, model)
+                # Display Predictions in an Output Box
+                st.subheader("Predicted Treatment Outcomes")
+                prediction_output = "\n".join([f"Patient {i+1}: {pred}" for i, pred in enumerate(predictions)])
+                st.text_area("Prediction Results", prediction_output, height=200)
+            else:
+                st.error("Unable to align prediction data to the training feature columns.")
+        except Exception as e:
+            st.error(f"Error during prediction: {e}")
     else:
         st.warning("Please train the model first before predicting on new data.")
+def preprocess_training_data(data):
+    global scaler, label_encoder
+    # Label encode the 'Treatment' target column
+    data['Treatment'] = label_encoder.fit_transform(data['Treatment'])
+    y = data['Treatment'].values
+    # Encode and standardize feature columns
+    X = data.drop('Treatment', axis=1)
+    feature_columns = X.columns  # Store feature columns for later alignment
+    for col in X.select_dtypes(include=['object']).columns:
+        X[col] = LabelEncoder().fit_transform(X[col])
+    # Standardize features
+    X = scaler.fit_transform(X)
+    return torch.tensor(X, dtype=torch.float32), torch.tensor(y, dtype=torch.long), X.shape[1], len(np.unique(y)), feature_columns
+def align_columns(new_data, feature_columns):
+    try:
+        # Ensure the new data has the same columns as the training data
+        missing_cols = set(feature_columns) - set(new_data.columns)
+        extra_cols = set(new_data.columns) - set(feature_columns)
+        # Remove any extra columns
+        new_data = new_data.drop(columns=extra_cols)
+        # Add missing columns with default value 0
+        for col in missing_cols:
+            new_data[col] = 0
+        # Reorder columns to match the training data
+        new_data = new_data[feature_columns]
+        # Encode and standardize feature columns
+        for col in new_data.select_dtypes(include=['object']).columns:
+            new_data[col] = LabelEncoder().fit_transform(new_data[col])
+        # Scale features
+        new_data = scaler.transform(new_data)
+        return torch.tensor(new_data, dtype=torch.float32)
+    except Exception as e:
+        st.error(f"Error aligning columns: {e}")
+        return None
+def train_model(X, y, input_dim, hidden_dim, num_classes, learning_rate, epochs):
+    class SimpleNN(nn.Module):
+        def __init__(self, input_dim, hidden_dim, num_classes):
+            super(SimpleNN, self).__init__()
+            self.fc1 = nn.Linear(input_dim, hidden_dim)
+            self.relu = nn.ReLU()
+            self.fc2 = nn.Linear(hidden_dim, num_classes)
+        def forward(self, x):
+            x = self.fc1(x)
+            x = self.relu(x)
+            x = self.fc2(x)
+            return x
+    model = SimpleNN(input_dim, hidden_dim, num_classes)
+    criterion = nn.CrossEntropyLoss()
+    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
+    loss_curve = []
+    for epoch in range(epochs):
+        optimizer.zero_grad()
+        outputs = model(X)
+        loss = criterion(outputs, y)
+        loss.backward()
+        optimizer.step()
+        loss_curve.append(loss.item())
+    return model, loss_curve
+def plot_loss_curve(loss_curve):
+    plt.figure()
+    plt.plot(loss_curve, label="Training Loss")
+    plt.xlabel("Epochs")
+    plt.ylabel("Loss")
+    plt.title("Loss Curve")
+    plt.legend()
+    plt.tight_layout()  # Ensure layout is tight for Streamlit
+    st.pyplot(plt)
+def predict_treatment(new_data, model, batch_size=32):
+    model.eval()
+    predictions = []
+    with torch.no_grad():
+        for i in range(0, new_data.size(0), batch_size):
+            batch_data = new_data[i:i + batch_size]
+            outputs = model(batch_data)
+            _, batch_predictions = torch.max(outputs, 1)
+            predictions.extend(batch_predictions.numpy())
+    return label_encoder.inverse_transform(predictions)
+if __name__ == "__main__":
+    main()