import streamlit as st import pandas as pd from transformers import pipeline, AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForCausalLM, MistralForCausalLM from peft import PeftModel, PeftConfigimport streamlit as st from streamlit_theme import theme st.set_theme("pseudolab/huggingface-korea-theme') #Note this should be used always in compliance with applicable laws and regulations if used with real patient data. # Instantiate the Tokenizer tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1", trust_remote_code=True, padding_side="left") tokenizer.pad_token = tokenizer.eos_token tokenizer.padding_side = 'left' # Load the PEFT model peft_config = PeftConfig.from_pretrained("pseudolab/K23_MiniMed") peft_model = MistralForCausalLM.from_pretrained("pseudolab/K23_MiniMed", trust_remote_code=True) peft_model = PeftModel.from_pretrained(peft_model, "pseudolab/K23_MiniMed") #Upload Patient Data uploaded_file = st.file_uploader("Choose a CSV file", type="csv") # Prepare the context def prepare_context(data): # Format the data as a string data_str = data.to_string(index=False, header=False) # Tokenize the data input_ids = tokenizer.encode(data_str, return_tensors="pt") # Truncate the input if it's too long for the model max_length = tokenizer.model_max_length if input_ids.shape[1] > max_length: input_ids = input_ids[:, :max_length] return input_ids if uploaded_file is not None: data = pd.read_csv(uploaded_file) st.write(data) # Generate text based on the context context = prepare_context(data) generated_text = pipeline('text-generation', model=model)(context)[0]['generated_text'] st.write(generated_text) # Internally prompt the model to data analyze the EHR patient data prompt = "You are an Electronic Health Records analyst with nursing school training. Please analyze patient data that you are provided here. Give an organized, step-by-step, formatted health records analysis. You will always be truthful and if you do nont know the answer say you do not know." if prompt: # Tokenize the prompt input_ids = tokenizer.encode(prompt, return_tensors="pt") # Generate text based on the prompt generated_text = pipeline('text-generation', model=model)(input_ids=input_ids)[0]['generated_text'] st.write(generated_text) else: st.write("Please enter patient data") else: st.write("No file uploaded")