Spaces:
Sleeping
Sleeping
File size: 2,391 Bytes
fc5ee8e 130318f fc5ee8e 130318f ba57ab6 130318f bcb7f51 130318f ba57ab6 fc5ee8e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import streamlit as st
import pandas as pd
from transformers import pipeline, AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
from peft import PeftModel, PeftConfig
#Note this should be used always in compliance with applicable laws and regulations if used with real patient data.
# Instantiate the Tokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = 'left'
# Load the PEFT model
peft_config = PeftConfig.from_pretrained("pseudolab/K23_MiniMed")
peft_model = MistralForCausalLM.from_pretrained("pseudolab/K23_MiniMed", trust_remote_code=True)
peft_model = PeftModel.from_pretrained(peft_model, "pseudolab/K23_MiniMed")
#Upload Patient Data
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
# Prepare the context
def prepare_context(data):
# Format the data as a string
data_str = data.to_string(index=False, header=False)
# Tokenize the data
input_ids = tokenizer.encode(data_str, return_tensors="pt")
# Truncate the input if it's too long for the model
max_length = tokenizer.model_max_length
if input_ids.shape[1] > max_length:
input_ids = input_ids[:, :max_length]
return input_ids
if uploaded_file is not None:
data = pd.read_csv(uploaded_file)
st.write(data)
# Generate text based on the context
context = prepare_context(data)
generated_text = pipeline('text-generation', model=model)(context)[0]['generated_text']
st.write(generated_text)
# Internally prompt the model to data analyze the EHR patient data
prompt = "You are an Electronic Health Records analyst with nursing school training. Please analyze patient data that you are provided here. Give an organized, step-by-step, formatted health records analysis. You will always be truthful and if you do nont know the answer say you do not know."
if prompt:
# Tokenize the prompt
input_ids = tokenizer.encode(prompt, return_tensors="pt")
# Generate text based on the prompt
generated_text = pipeline('text-generation', model=model)(input_ids=input_ids)[0]['generated_text']
st.write(generated_text)
else:
st.write("Please enter patient data")
else:
st.write("No file uploaded") |