Spaces:

pseudolab
/

MiniMed_EHR_Analyst

Sleeping

App Files Files Community

MiniMed_EHR_Analyst / app.py

Solshine

Update app.py (#2)

130318f 12 months ago

raw

history blame

2.42 kB

	import streamlit as st
	import pandas as pd
	from transformers import pipeline, AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM
	from peft import PeftModel, PeftConfig

	#Note this should be used always in compliance with applicable laws and regulations if used with real patient data.

	# Instantiate the Tokenizer
	tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True, padding_side="left")
	tokenizer.pad_token = tokenizer.eos_token
	tokenizer.padding_side = 'left'
	# Load the PEFT model
	peft_config = PeftConfig.from_pretrained("pseudolab/K23_MiniMed")
	peft_model = MistralForCausalLM.from_pretrained("https://huggingface.co/HuggingFaceH4/zephyr-7b-beta", trust_remote_code=True)
	peft_model = PeftModel.from_pretrained(peft_model, "pseudolab/K23_MiniMed")

	#Upload Patient Data
	uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

	# Prepare the context
	def prepare_context(data):
	# Format the data as a string
	data_str = data.to_string(index=False, header=False)

	# Tokenize the data
	input_ids = tokenizer.encode(data_str, return_tensors="pt")

	# Truncate the input if it's too long for the model
	max_length = tokenizer.model_max_length
	if input_ids.shape[1] > max_length:
	input_ids = input_ids[:, :max_length]

	return input_ids

	if uploaded_file is not None:
	data = pd.read_csv(uploaded_file)
	st.write(data)

	# Generate text based on the context
	context = prepare_context(data)
	generated_text = pipeline('text-generation', model=model)(context)[0]['generated_text']
	st.write(generated_text)

	# Internally prompt the model to data analyze the EHR patient data
	prompt = "You are an Electronic Health Records analyst with nursing school training. Please analyze patient data that you are provided here. Give an organized, step-by-step, formatted health records analysis. You will always be truthful and if you do nont know the answer say you do not know."

	if prompt:
	# Tokenize the prompt
	input_ids = tokenizer.encode(prompt, return_tensors="pt")

	# Generate text based on the prompt
	generated_text = pipeline('text-generation', model=model)(input_ids=input_ids)[0]['generated_text']
	st.write(generated_text)
	else:
	st.write("Please enter patient data")

	else:
	st.write("No file uploaded")