import streamlit as st import pandas as pd from PIL import Image import re def summarize_function(notes): gen_text = pipe(notes, max_new_tokens=(len(notes.split(' '))*2*.215), temperature=0.8, num_return_sequences=1, top_p=0.2)[0]['generated_text'][len(notes):] for i in range(len(gen_text)): if gen_text[-i-8:].startswith('[Notes]:'): gen_text = gen_text[:-i-8] st.write('Summary: ') return gen_text notes_df = pd.read_csv('notes_small.csv') examples_tuple = () for i in range(len(notes_df)): examples_tuple += (f"Patient {i+1}", ) example = st.sidebar.selectbox('Example', (examples_tuple), index=0) st.markdown("

GPT Clinical Notes Summarizer

", unsafe_allow_html=True) st.markdown("
by Bryan Mildort
", unsafe_allow_html=True) st.sidebar.markdown("

GPT Clinical Notes Summarizer 0.1v

", unsafe_allow_html=True) st.sidebar.markdown("
The model for this application was created with the generous support of the Google TPU Research Cloud (TRC). This demo is for investigative research purposes only. The model is assumed to have several limiations and biases, so please oversee responses with human moderation. It is not intended for production ready enterprises and is displayed to illustrate the capabilities of Large Language Models for health care research.
", unsafe_allow_html=True) tower = Image.open('howard_social.png') seal = Image.open('Howard_University_seal.svg.png') st.sidebar.image(tower) # st.sidebar.image(seal) from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline # from accelerate import infer_auto_device_map # device = "cuda:0" if torch.cuda.is_available() else "cpu" # device_str = f"""Device being used: {device}""" # st.write(device_str) # device_map = infer_auto_device_map(model, dtype="float16") # st.write(device_map) @st.cache(allow_output_mutation=True) def load_model(): model = AutoModelForCausalLM.from_pretrained("bryanmildort/gpt_neo_notes", low_cpu_mem_usage=True) # model = model.to(device) tokenizer = AutoTokenizer.from_pretrained("bryanmildort/gpt_neo_notes") return pipeline("text-generation", model=model, tokenizer=tokenizer) pipe = load_model() prompt = notes_df.iloc[int(example[-1:])-1].PARSED input_text = st.text_area("Notes:", prompt) if st.button('Summarize'): parsed_input = re.sub(r'\n\s*\n', '\n\n', input_text) parsed_input = re.sub(r'\n+', '\n',parsed_input) final_input = f"""[Notes]:\n{parsed_input}\n[Summary]:\n""" st.write(summarize_function(final_input))