|
import streamlit as st |
|
import pandas as pd |
|
import numpy as np |
|
import matplotlib.pyplot as plt |
|
import os |
|
import joblib |
|
from sklearn.metrics import root_mean_squared_error |
|
|
|
from scripts.make_dataset import create_features |
|
from scripts.naive_approach import get_column_specs, prepare_data, zeroshot_eval, simple_diagonal_averaging |
|
from scripts.ml_approach import format_dataset |
|
|
|
CONTEXT_LENGTH = 52 |
|
PREDICTION_LENGTH = 6 |
|
|
|
|
|
st.set_page_config( |
|
page_title="Glucose Level Prediction App", |
|
page_icon="📊", |
|
layout="wide" |
|
) |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
/* Primary accent color */ |
|
.stButton button, .stSelectbox, .stMultiselect, .stSlider, .stNumberInput { |
|
border-color: #58A618 !important; |
|
} |
|
.stProgress .st-bo { |
|
background-color: #58A618 !important; |
|
} |
|
.st-bq { |
|
color: #58A618 !important; |
|
} |
|
/* Header styling */ |
|
h1, h2, h3 { |
|
color: #58A618 !important; |
|
} |
|
.stTabs [data-baseweb="tab-list"] button [data-testid="stMarkdownContainer"] p { |
|
color: #58A618 !important; |
|
} |
|
/* Success messages */ |
|
.element-container .stAlert.st-ae.st-af { |
|
border-color: #58A618 !important; |
|
color: #58A618 !important; |
|
} |
|
/* Link color */ |
|
a { |
|
color: #58A618 !important; |
|
} |
|
/* Button color */ |
|
.stButton>button { |
|
background-color: #58A618 !important; |
|
color: white !important; |
|
} |
|
/* Make background white */ |
|
.stApp { |
|
background-color: white !important; |
|
} |
|
/* Tab styling */ |
|
.stTabs [data-baseweb="tab-list"] { |
|
gap: 10px; |
|
} |
|
.stTabs [data-baseweb="tab"] { |
|
background-color: #f0f0f0; |
|
border-radius: 4px 4px 0 0; |
|
padding: 10px 16px; |
|
border: 1px solid #ccc; |
|
} |
|
.stTabs [data-baseweb="tab"][aria-selected="true"] { |
|
background-color: white; |
|
border-bottom: 3px solid #58A618; |
|
} |
|
</style> |
|
""", unsafe_allow_html=True) |
|
|
|
|
|
if 'combined_data' not in st.session_state: |
|
st.session_state.combined_data = None |
|
if 'files_uploaded' not in st.session_state: |
|
st.session_state.files_uploaded = False |
|
if 'data_processed' not in st.session_state: |
|
st.session_state.data_processed = False |
|
|
|
|
|
st.title("Glucose Level Prediction App") |
|
st.markdown(""" |
|
This app allows you to upload glucose measurements, food logs, and accelerometer data |
|
to analyze patterns and predict glucose levels. |
|
""") |
|
|
|
|
|
st.subheader("Choose Data Source") |
|
data_option = st.selectbox( |
|
"Select how you'd like to provide input data:", |
|
("Upload files", "Sample A", "Sample B") |
|
) |
|
|
|
glucose_data = None |
|
food_data = None |
|
accel_data = None |
|
combined_data = None |
|
show_tabs = False |
|
|
|
if data_option == "Upload files": |
|
st.subheader("Upload Your Data Files") |
|
|
|
glucose_file = st.file_uploader("Upload Glucose Levels CSV", type=["csv"], key="glucose") |
|
food_file = st.file_uploader("Upload Food Logs CSV", type=["csv"], key="food") |
|
accel_file = st.file_uploader("Upload Accelerometer Data CSV", type=["csv"], key="accel") |
|
|
|
st.subheader("Patient Demographics") |
|
|
|
|
|
gender = st.selectbox("Select Patient Gender", options=["Female", "Male", "Other"], index=0) |
|
|
|
|
|
hba1c = st.number_input("Enter HbA1c (%)", min_value=3.0, max_value=15.0, step=0.1) |
|
|
|
all_files_uploaded = (glucose_file is not None) and (food_file is not None) and (accel_file is not None) |
|
|
|
|
|
if glucose_file is not None: |
|
try: |
|
glucose_data = pd.read_csv(glucose_file) |
|
st.success("Glucose data loaded successfully!") |
|
except Exception as e: |
|
st.error(f"Error loading glucose data: {e}") |
|
glucose_data = None |
|
|
|
if food_file is not None: |
|
try: |
|
food_data = pd.read_csv(food_file) |
|
st.success("Food logs loaded successfully!") |
|
except Exception as e: |
|
st.error(f"Error loading food logs: {e}") |
|
food_data = None |
|
|
|
if accel_file is not None: |
|
try: |
|
accel_data = pd.read_csv(accel_file) |
|
st.success("Accelerometer data loaded successfully!") |
|
except Exception as e: |
|
st.error(f"Error loading accelerometer data: {e}") |
|
accel_data = None |
|
|
|
|
|
st.session_state.files_uploaded = all_files_uploaded |
|
|
|
|
|
if not all_files_uploaded: |
|
st.warning("Please upload all three data files to enable data processing.") |
|
|
|
col1, col2, col3 = st.columns([1,1,1]) |
|
|
|
with col2: |
|
|
|
if st.button('Process Data', key='process_data_button', disabled=not all_files_uploaded): |
|
if all_files_uploaded: |
|
try: |
|
|
|
combined_data = create_features( |
|
bg_df=glucose_data, |
|
food_df=food_data, |
|
acc_df=accel_data, |
|
gender=gender, |
|
hba1c=hba1c, |
|
add_patient_id=True |
|
) |
|
st.session_state.combined_data = combined_data |
|
st.session_state.data_processed = True |
|
st.success("Data processed successfully!") |
|
show_tabs = True |
|
except Exception as e: |
|
st.error(f"Error processing data: {e}") |
|
st.session_state.data_processed = False |
|
show_tabs = False |
|
|
|
st.subheader("Expected File Formats:") |
|
|
|
col1, col2, col3 = st.columns(3) |
|
|
|
with col1: |
|
st.markdown(""" |
|
**Glucose Levels CSV:** |
|
- Timestamp column |
|
- Glucose measurement values |
|
""") |
|
|
|
with col2: |
|
st.markdown(""" |
|
**Food Logs CSV:** |
|
- Timestamp column |
|
- Carbohydrates |
|
- Sugar |
|
- Calories |
|
""") |
|
|
|
with col3: |
|
st.markdown(""" |
|
**Accelerometer Data CSV:** |
|
- Timestamp column |
|
- Activity measurements |
|
""") |
|
|
|
|
|
if st.session_state.data_processed and st.session_state.combined_data is not None: |
|
combined_data = st.session_state.combined_data |
|
show_tabs = True |
|
|
|
elif data_option == "Sample A": |
|
combined_data_path = 'data/processed/samples/sample_A.csv' |
|
combined_data = pd.read_csv(combined_data_path) |
|
st.session_state.combined_data = combined_data |
|
st.session_state.data_processed = True |
|
st.success("Sample A loaded successfully!") |
|
show_tabs = True |
|
|
|
elif data_option == "Sample B": |
|
combined_data_path = 'data/processed/samples/sample_B.csv' |
|
combined_data = pd.read_csv(combined_data_path) |
|
st.session_state.combined_data = combined_data |
|
st.session_state.data_processed = True |
|
st.success("Sample B loaded successfully!") |
|
show_tabs = True |
|
|
|
|
|
st.write("") |
|
st.write("") |
|
|
|
|
|
if show_tabs: |
|
|
|
tab1, tab2, tab3 = st.tabs(["Naive Model", "Machine Learning Model", "Deep Learning Model"]) |
|
|
|
with tab1: |
|
st.subheader("Naive Model") |
|
|
|
if st.button('Make prediction', key='naive_button'): |
|
if combined_data is not None: |
|
|
|
|
|
try: |
|
|
|
column_specs = get_column_specs() |
|
prepared_data = prepare_data(combined_data, column_specs["timestamp_column"]) |
|
train_file = 'data/processed/train_dataset.csv' |
|
train_data = pd.read_csv(train_file) |
|
train_data = prepare_data(train_data, column_specs["timestamp_column"]) |
|
predictions = zeroshot_eval( |
|
train_df=train_data, |
|
test_df=prepared_data, |
|
batch_size=8 |
|
) |
|
|
|
|
|
step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")] |
|
|
|
|
|
final_results = simple_diagonal_averaging( |
|
predictions["predictions_df"], |
|
prepared_data, |
|
CONTEXT_LENGTH, |
|
step_columns |
|
) |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
|
|
|
|
non_zero_mask = final_results['averaged_prediction'] != 0 |
|
filtered_results = final_results[non_zero_mask] |
|
|
|
|
|
ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], |
|
label='Predicted', alpha=0.7, color='#58A618') |
|
|
|
|
|
ax.plot(final_results['Timestamp'], final_results['Glucose'], |
|
label='Ground truth', alpha=0.7, color='#1f77b4') |
|
|
|
ax.set_title('Glucose Predictions vs Actual Values') |
|
ax.set_xlabel('Time') |
|
ax.set_ylabel('Glucose Level') |
|
ax.legend() |
|
|
|
st.pyplot(fig) |
|
|
|
y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) |
|
y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) |
|
|
|
|
|
non_zero_mask = y_pred != 0 |
|
y_true_filtered = y_true[non_zero_mask] |
|
y_pred_filtered = y_pred[non_zero_mask] |
|
|
|
if len(y_pred_filtered) > 0: |
|
rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) |
|
st.subheader("Performance Metrics") |
|
st.metric("AVERAGE RMSE", f"{rmse:.4f}") |
|
else: |
|
st.subheader("Performance Metrics") |
|
st.metric("AVERAGE RMSE", "N/A") |
|
|
|
except Exception as e: |
|
st.error(f"Error in naive model prediction: {e}") |
|
else: |
|
st.error("Data not available. Please try again.") |
|
|
|
with tab2: |
|
st.subheader("Machine Learning Model") |
|
|
|
if st.button('Make prediction', key='ml_button'): |
|
if combined_data is not None: |
|
X_test, y_test = format_dataset(combined_data, CONTEXT_LENGTH, PREDICTION_LENGTH) |
|
|
|
model_output_path = "models/xgb_model.pkl" |
|
xgb_model = joblib.load(model_output_path) |
|
|
|
y_test_pred = xgb_model.predict(X_test) |
|
|
|
final_results = simple_diagonal_averaging( |
|
pd.DataFrame(y_test_pred), |
|
combined_data, |
|
CONTEXT_LENGTH, |
|
pd.DataFrame(y_test_pred).columns |
|
) |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
|
|
|
|
ax.plot(final_results['Timestamp'], final_results['Glucose'], |
|
label='Ground truth', alpha=0.7, color='#1f77b4') |
|
|
|
|
|
plot_predictions = final_results['averaged_prediction'].copy() |
|
plot_predictions = plot_predictions.replace(0, float('nan')) |
|
|
|
|
|
ax.plot(final_results['Timestamp'], plot_predictions, |
|
label='Predicted', alpha=0.7, color='#58A618') |
|
|
|
ax.set_title('Glucose Predictions vs Actual Values') |
|
ax.set_xlabel('Time') |
|
ax.set_ylabel('Glucose Level') |
|
ax.legend() |
|
|
|
st.pyplot(fig) |
|
|
|
|
|
y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) |
|
y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) |
|
|
|
|
|
non_zero_mask = y_pred != 0 |
|
y_true_filtered = y_true[non_zero_mask] |
|
y_pred_filtered = y_pred[non_zero_mask] |
|
|
|
if len(y_pred_filtered) > 0: |
|
rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) |
|
st.subheader("Performance Metrics") |
|
st.metric("AVERAGE RMSE", f"{rmse:.4f}") |
|
else: |
|
st.subheader("Performance Metrics") |
|
st.metric("AVERAGE RMSE", "N/A") |
|
|
|
else: |
|
st.error("Data not available. Please try again.") |
|
|
|
with tab3: |
|
st.subheader("Deep Learning Model") |
|
|
|
if st.button('Make prediction', key='dl_button'): |
|
if combined_data is not None: |
|
column_specs = get_column_specs() |
|
prepared_data = prepare_data(combined_data, column_specs["timestamp_column"]) |
|
|
|
train_file = 'data/processed/train_dataset.csv' |
|
train_data = pd.read_csv(train_file) |
|
train_data = prepare_data(train_data, column_specs["timestamp_column"]) |
|
predictions = zeroshot_eval( |
|
train_df=train_data, |
|
test_df=prepared_data, |
|
batch_size=8, |
|
model_path="iaravagni/ttm-finetuned-model" |
|
) |
|
|
|
|
|
step_columns = [col for col in predictions["predictions_df"].columns if col.startswith("Glucose_step_")] |
|
|
|
|
|
final_results = simple_diagonal_averaging( |
|
predictions["predictions_df"], |
|
prepared_data, |
|
CONTEXT_LENGTH, |
|
step_columns |
|
) |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(10, 6)) |
|
|
|
|
|
non_zero_mask = final_results['averaged_prediction'] != 0 |
|
filtered_results = final_results[non_zero_mask] |
|
|
|
|
|
ax.plot(filtered_results['Timestamp'], filtered_results['averaged_prediction'], |
|
label='Predicted', alpha=0.7, color='#58A618') |
|
|
|
|
|
ax.plot(final_results['Timestamp'], final_results['Glucose'], |
|
label='Ground truth', alpha=0.7, color='#1f77b4') |
|
|
|
ax.set_title('Glucose Predictions vs Actual Values') |
|
ax.set_xlabel('Time') |
|
ax.set_ylabel('Glucose Level') |
|
ax.legend() |
|
|
|
st.pyplot(fig) |
|
|
|
|
|
y_true = final_results['Glucose'][CONTEXT_LENGTH:].reset_index(drop=True) |
|
y_pred = final_results['averaged_prediction'][CONTEXT_LENGTH:].reset_index(drop=True) |
|
|
|
|
|
non_zero_mask = y_pred != 0 |
|
y_true_filtered = y_true[non_zero_mask] |
|
y_pred_filtered = y_pred[non_zero_mask] |
|
|
|
if len(y_pred_filtered) > 0: |
|
rmse = np.sqrt(root_mean_squared_error(y_true_filtered, y_pred_filtered)) |
|
st.subheader("Performance Metrics") |
|
st.metric("AVERAGE RMSE", f"{rmse:.4f}") |
|
else: |
|
st.subheader("Performance Metrics") |
|
st.metric("AVERAGE RMSE", "N/A") |
|
else: |
|
st.error("Data not available. Please try again.") |
|
else: |
|
st.info("Upload and process data or select a sample dataset to view prediction models.") |
|
|
|
|
|
st.write("") |
|
st.write("") |
|
|
|
|
|
st.markdown(""" |
|
--- |
|
### About this App |
|
|
|
This application is designed to help analyze and predict glucose levels based on glucose measurements, |
|
food logs, and physical activity data. The app merges these datasets based on timestamps to identify |
|
patterns and make predictions. |
|
|
|
Please note that this is a demonstration tool and should not be used for medical decisions without |
|
consultation with healthcare professionals. |
|
|
|
""") |
|
|
|
|
|
st.markdown(""" |
|
<style> |
|
.footer { |
|
position: fixed; |
|
left: 0; |
|
bottom: 0; |
|
width: 100%; |
|
background-color: white; |
|
color: #58A618; |
|
text-align: center; |
|
padding: 10px; |
|
border-top: 2px solid #58A618; |
|
} |
|
</style> |
|
<div class="footer"> |
|
<p>Glucose Prediction Application © 2025</p> |
|
</div> |
|
""", unsafe_allow_html=True) |