Spaces:

pseudolab
/

MiniMed_EHR_Analyst

Sleeping

App Files Files Community

Solshine commited on Nov 5, 2023

Commit

fc5ee8e

•

1 Parent(s): a069f68

Create app.py

Browse files

Files changed (1) hide show

app.py +53 -0

app.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import streamlit as st
+import pandas as pd
+from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
+#Note this should be used always in compliance with applicable laws and regulations if used with real patient data.
+# Load the tokenizer and model: pseudolab/K23_MiniMed by Tonic (Note: This is a large model and will take a while to download)
+# Config issues persist with this model, unfortunately.  It may not be ready for use.
+tokenizer = AutoTokenizer.from_pretrained("pseudolab/K23_MiniMed")
+model = AutoModelForCausalLM.from_pretrained("pseudolab/K23_MiniMed")
+#Upload Patient Data
+uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
+# Prepare the context
+def prepare_context(data):
+    # Format the data as a string
+    data_str = data.to_string(index=False, header=False)
+    # Tokenize the data
+    input_ids = tokenizer.encode(data_str, return_tensors="pt")
+    # Truncate the input if it's too long for the model
+    max_length = tokenizer.model_max_length
+    if input_ids.shape[1] > max_length:
+        input_ids = input_ids[:, :max_length]
+    return input_ids
+if uploaded_file is not None:
+    data = pd.read_csv(uploaded_file)
+    st.write(data)
+    # Generate text based on the context
+    context = prepare_context(data)
+    generated_text = pipeline('text-generation', model=model)(context)[0]['generated_text']
+    st.write(generated_text)
+    # Internally prompt the model to data analyze the EHR patient data
+    prompt = "You are an Electronic Health Records analyst with nursing school training. Please analyze patient data that you are provided here. Give an organized, step-by-step, formatted health records analysis. You will always be truthful and if you do nont know the answer say you do not know."
+    if prompt:
+        # Tokenize the prompt
+        input_ids = tokenizer.encode(prompt, return_tensors="pt")
+        # Generate text based on the prompt
+        generated_text = pipeline('text-generation', model=model)(input_ids=input_ids)[0]['generated_text']
+        st.write(generated_text)
+    else:
+        st.write("Please enter patient data")
+else:
+    st.write("No file uploaded")