from openai import OpenAI import streamlit as st import streamlit.components.v1 as components import datetime, time from dataclasses import dataclass import math import base64 ## Firestore ?? import os # import sys # import inspect # currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) # parentdir = os.path.dirname(currentdir) # sys.path.append(parentdir) import openai from langchain_openai import ChatOpenAI, OpenAI, OpenAIEmbeddings import tiktoken from langchain.prompts.few_shot import FewShotPromptTemplate from langchain.prompts.prompt import PromptTemplate from operator import itemgetter from langchain.schema import StrOutputParser from langchain_core.output_parsers import StrOutputParser from langchain_core.runnables import RunnablePassthrough, RunnableLambda, RunnableParallel from langchain_core.runnables import chain import langchain_community.embeddings.huggingface from langchain_community.embeddings.huggingface import HuggingFaceBgeEmbeddings, HuggingFaceEmbeddings from langchain_community.vectorstores import FAISS from langchain.chains import LLMChain from langchain.chains.conversation.memory import ConversationBufferWindowMemory #, ConversationBufferMemory, ConversationSummaryMemory, ConversationSummaryBufferMemory import os, dotenv from dotenv import load_dotenv load_dotenv() import firebase_admin, json from firebase_admin import credentials, storage, firestore import plotly.express as px import plotly.graph_objects as go import pandas as pd import networkx as nx if not os.path.isdir("./.streamlit"): os.mkdir("./.streamlit") print('made streamlit folder') if not os.path.isfile("./.streamlit/secrets.toml"): with open("./.streamlit/secrets.toml", "w") as f: f.write(os.environ.get("STREAMLIT_SECRETS")) print('made new file') import os, dotenv from dotenv import load_dotenv load_dotenv() if not os.path.isdir("./.streamlit"): os.mkdir("./.streamlit") print('made streamlit folder') if not os.path.isfile("./.streamlit/secrets.toml"): with open("./.streamlit/secrets.toml", "w") as f: f.write(os.environ.get("STREAMLIT_SECRETS")) print('made new file') import db_firestore as db ## Load from streamlit!! os.environ["HF_TOKEN"] = os.environ.get("HF_TOKEN") or st.secrets["HF_TOKEN"] os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY") or st.secrets["OPENAI_API_KEY"] os.environ["FIREBASE_CREDENTIAL"] = os.environ.get("FIREBASE_CREDENTIAL") or st.secrets["FIREBASE_CREDENTIAL"] if "openai_model" not in st.session_state: st.session_state["openai_model"] = "gpt-3.5-turbo-1106" ## Hardcode indexes for now ## TODO: Move indexes to firebase indexes = """Bleeding ChestPain Dysphagia Headache ShortnessOfBreath Vomiting Weakness Weakness2""".split("\n") model_name = "BAAI/bge-large-en-v1.5" model_kwargs = {"device": "cpu"} encode_kwargs = {"normalize_embeddings": True} if "embeddings" not in st.session_state: st.session_state.embeddings = HuggingFaceBgeEmbeddings( model_name=model_name, model_kwargs = model_kwargs, encode_kwargs = encode_kwargs) embeddings = st.session_state.embeddings if "llm" not in st.session_state: st.session_state.llm = ChatOpenAI(model_name="gpt-3.5-turbo-1106", temperature=0) llm = st.session_state.llm if "llm_i" not in st.session_state: st.session_state.llm_i = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0) llm_i = st.session_state.llm_i if "llm_gpt4" not in st.session_state: st.session_state.llm_gpt4 = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0) llm_gpt4 = st.session_state.llm_gpt4 if "TEMPLATE" not in st.session_state: with open('templates/patient.txt', 'r') as file: TEMPLATE = file.read() st.session_state.TEMPLATE = TEMPLATE TEMPLATE = st.session_state.TEMPLATE prompt = PromptTemplate( input_variables = ["question", "context"], template = st.session_state.TEMPLATE ) def format_docs(docs): return "\n--------------------\n".join(doc.page_content for doc in docs) sp_mapper = {"human":"student","ai":"patient", "user":"student","assistant":"patient"} if "TEMPLATE2" not in st.session_state: with open('templates/grader.txt', 'r') as file: TEMPLATE2 = file.read() st.session_state.TEMPLATE2 = TEMPLATE2 TEMPLATE2 = st.session_state.TEMPLATE2 prompt2 = PromptTemplate( input_variables = ["question", "context", "history"], template = st.session_state.TEMPLATE2 ) @chain def get_patient_chat_history(_): return st.session_state.get("patient_chat_history") if not st.session_state.get("scenario_list", None): st.session_state.scenario_list = indexes def init_patient_llm(): index_name = f"indexes/{st.session_state.scenario_list[st.session_state.selected_scenario]}/QA" if "store" not in st.session_state: st.session_state.store = db.get_store(index_name, embeddings=embeddings) if "retriever" not in st.session_state: st.session_state.retriever = st.session_state.store.as_retriever(search_type="similarity", search_kwargs={"k":2}) if "memory" not in st.session_state: st.session_state.memory = ConversationBufferWindowMemory( llm=llm, memory_key="chat_history", input_key="question", k=5, human_prefix="student", ai_prefix="patient",) if ("chain" not in st.session_state or st.session_state.TEMPLATE != TEMPLATE): st.session_state.chain = ( RunnableParallel({ "context": st.session_state.retriever | format_docs, "question": RunnablePassthrough() }) | LLMChain(llm=llm, prompt=prompt, memory=st.session_state.memory, verbose=False) ) # def init_grader_llm(): login_info = { "bob":"builder", "student1": "password", "admin":"admin" } def set_username(x): st.session_state.username = x def validate_username(username, password): if login_info.get(username) == password: set_username(username) else: st.warning("Wrong username or password") return None if not st.session_state.get("username"): ## ask to login st.title("Login") username = st.text_input("Username:") password = st.text_input("Password:", type="password") login_button = st.button("Login", on_click=validate_username, args=[username, password]) ll, rr = st.columns(2) ## TODO: Sync login info usernames to firebase, and remove this portion ll.header("Admin Login") ll.write("Username: admin") ll.write("Password: admin") rr.header("Student Login") rr.write("Username: student1") rr.write("Password: password") else: if True: ## Says hello and logout col_1, col_2 = st.columns([1,3]) col_2.title(f"Hello there, {st.session_state.username}") # Display logout button if col_1.button('Logout'): # Remove username from session state del st.session_state.username # Rerun the app to go back to the login view st.rerun() scenario_tab, dashboard_tab, generate_tab = st.tabs(["Training", "Dashboard", "Generate Scenario"]) class ScenarioTabIndex: SELECT_SCENARIO = 0 PATIENT_LLM = 1 GRADER_LLM = 2 def set_scenario_tab_index(x): st.session_state.scenario_tab_index=x return None def go_to_patient_llm(): selected_scenario = st.session_state.get('selected_scenario') if selected_scenario is None or selected_scenario < 0: st.warning("Please select a scenario!") else: st.session_state.start_time = datetime.datetime.utcnow() states = ["store", "store2","retriever","retriever2","chain","chain2"] for state_to_del in states: if state_to_del in st.session_state: del st.session_state[state_to_del] init_patient_llm() set_scenario_tab_index(ScenarioTabIndex.PATIENT_LLM) if not st.session_state.get("scenario_tab_index"): set_scenario_tab_index(ScenarioTabIndex.SELECT_SCENARIO) with scenario_tab: ## if True: ## Check in select scenario if st.session_state.scenario_tab_index == ScenarioTabIndex.SELECT_SCENARIO: def change_scenario(scenario_index): st.session_state.selected_scenario = scenario_index if st.session_state.get("selected_scenario", None) is None: st.session_state.selected_scenario = -1 total_cols = 3 rows = list() # for _ in range(0, number_of_indexes, total_cols): # rows.extend(st.columns(total_cols)) st.header(f"Selected Scenario: {st.session_state.scenario_list[st.session_state.selected_scenario] if st.session_state.selected_scenario>=0 else 'None'}") #st.button("Generate a new scenario") for i, scenario in enumerate(st.session_state.scenario_list): if i % total_cols == 0: rows.extend(st.columns(total_cols)) curr_col = rows[(-total_cols + i % total_cols)] tile = curr_col.container(height=120) ## TODO: Implement highlight box if index is selected # if st.session_state.selected_scenario == i: # tile.markdown("", unsafe_allow_html=True) tile.write(":balloon:") tile.button(label=scenario, on_click=change_scenario, args=[i]) select_scenario_btn = st.button("Select Scenario", on_click=go_to_patient_llm, args=[]) elif st.session_state.scenario_tab_index == ScenarioTabIndex.PATIENT_LLM: st.header("Patient info") ## TODO: Put the patient's info here, from SCENARIO # st.write("Pull the info here!!!") col1, col2, col3 = st.columns([1,3,1]) with col1: back_to_scenario_btn = st.button("Back to selection", on_click=set_scenario_tab_index, args=[ScenarioTabIndex.SELECT_SCENARIO]) # with col3: # start_timer_button = st.button("START") with col2: TIME_LIMIT = 60*10 ## to change to 10 minutes time.sleep(1) # if start_timer_button: # st.session_state.start_time = datetime.datetime.now() # st.session_state.time = -1 if not st.session_state.get('time') else st.session_state.get('time') st.session_state.start_time = False if not st.session_state.get('start_time') else st.session_state.start_time from streamlit.components.v1 import html html(f"""

Time left

""", ) with open("./public/chars/Female_talk.gif", "rb") as f: contents = f.read() student_url = base64.b64encode(contents).decode("utf-8") with open("./public/chars/Male_talk.gif", "rb") as f: contents = f.read() patient_url = base64.b64encode(contents).decode("utf-8") interactive_container = st.container() user_input_col ,r = st.columns([4,1]) def to_grader_llm(): if "chain2" in st.session_state: del st.session_state.chain2 """ init_grader_llm() """ st.session_state["patient_chat_history"] = "History\n" + '\n'.join([(sp_mapper.get(i.type, i.type) + ": "+ i.content) for i in st.session_state.memory.chat_memory.messages]) ## Grader index_name = f"indexes/{st.session_state.scenario_list[st.session_state.selected_scenario]}/Rubric" ## Reset time st.session_state.start_time = False if "store2" not in st.session_state: st.session_state.store2 = db.get_store(index_name, embeddings=embeddings) if "retriever2" not in st.session_state: st.session_state.retriever2 = st.session_state.store2.as_retriever(search_type="similarity", search_kwargs={"k":2}) ## Re-init history st.session_state["patient_chat_history"] = "History\n" + '\n'.join([(sp_mapper.get(i.type, i.type) + ": "+ i.content) for i in st.session_state.memory.chat_memory.messages]) if ("chain2" not in st.session_state or st.session_state.TEMPLATE2 != TEMPLATE2): st.session_state.chain2 = ( RunnableParallel({ "context": st.session_state.retriever2 | format_docs, # "history": RunnableLambda(lambda _: "History\n" + '\n'.join([(sp_mapper.get(i.type, i.type) + ": "+ i.content) for i in st.session_state.memory.chat_memory.messages])), "history": (get_patient_chat_history), "question": RunnablePassthrough(), }) | # LLMChain(llm=llm_i, prompt=prompt2, verbose=False ) #| LLMChain(llm=llm_gpt4, prompt=prompt2, verbose=False ) #| | { "json": itemgetter("text"), "text": ( LLMChain( llm=llm, prompt=PromptTemplate( input_variables=["text"], template="Interpret the following JSON of the student's grades, and do a write-up for each section.\n\n```json\n{text}\n```"), verbose=False) ) } ) set_scenario_tab_index(ScenarioTabIndex.GRADER_LLM) with r: to_grader_btn = st.button("To Grader", on_click=to_grader_llm) with user_input_col: user_inputs = st.text_input("", placeholder="Chat with the patient here!", key="user_inputs") if user_inputs: response = st.session_state.chain.invoke(user_inputs).get("text") st.session_state.patient_response = response with interactive_container: html(f"""
Doctor
Patient
You: {st.session_state.get('user_inputs') or ''}
{'Patient: '+st.session_state.get('patient_response') if st.session_state.get('patient_response') else '...'}
""", height=500) elif st.session_state.scenario_tab_index == ScenarioTabIndex.GRADER_LLM: st.session_state.grader_output = "" if not st.session_state.get("grader_output") else st.session_state.grader_output def get_grades(): st.session_state["patient_chat_history"] = "History\n" + '\n'.join([(sp_mapper.get(i.type, i.type) + ": "+ i.content) for i in st.session_state.get("memory").chat_memory.messages]) txt = f""" {st.session_state.diagnosis} {st.session_state.differential_1} {st.session_state.differential_2} {st.session_state.differential_3} """ response = st.session_state.chain2.invoke(txt) st.session_state.grader_output = response st.session_state.has_llm_output = bool(st.session_state.get("grader_output")) ## TODO: False for now, need check llm output! with st.expander("Your Diagnosis and Differentials", expanded=not st.session_state.has_llm_output): st.session_state.diagnosis = st.text_area("Input your case summary and **main** diagnosis:", placeholder="This is a young gentleman with significant family history of stroke, and medical history of poorly-controlled hypertension. He presents with acute onset of bitemporal headache associated with dysarthria and meningism symptoms. Important negatives include the absence of focal neurological deficits, ataxia, and recent trauma.") st.divider() st.session_state.differential_1 = st.text_input("Differential 1") st.session_state.differential_2 = st.text_input("Differential 2") st.session_state.differential_3 = st.text_input("Differential 3") with st.columns(6)[5]: send_for_grading = st.button("Get grades!", on_click=get_grades) with st.expander("Your grade", expanded=st.session_state.has_llm_output): if st.session_state.grader_output: st.write(st.session_state.grader_output.get("text").get("text")) # back_btn = st.button("back to LLM?", on_click=set_scenario_tab_index, args=[ScenarioTabIndex.PATIENT_LLM]) back_btn = st.button("New Scenario?", on_click=set_scenario_tab_index, args=[ScenarioTabIndex.SELECT_SCENARIO]) else: pass with dashboard_tab: cred = db.cred # cred = credentials.Certificate(json.loads(os.environ.get("FIREBASE_CREDENTIAL"))) # Initialize Firebase (if not already initialized) if not firebase_admin._apps: firebase_admin.initialize_app(cred, {'storageBucket': 'healthhack-store.appspot.com'}) #firebase_admin.initialize_app(cred,{'storageBucket': 'healthhack-store.appspot.com'}) # connecting to firebase db_client = firestore.client() docs = db_client.collection("clinical_scores").stream() # Create a list of dictionaries from the documents data = [] for doc in docs: doc_dict = doc.to_dict() doc_dict['document_id'] = doc.id # In case you need the document ID later data.append(doc_dict) # Create a DataFrame df = pd.DataFrame(data) username = st.session_state.get("username") st.title("Dashboard") # Convert date from string to datetime if it's not already in datetime format df['date'] = pd.to_datetime(df['date'], errors='coerce') # Streamlit page configuration #st.set_page_config(page_title="Interactive Data Dashboard", layout="wide") # Use df_selection for filtering data based on authenticated user if username != 'admin': df_selection = df[df['name'] == username] else: df_selection = df # Admin sees all data # Chart Title: Student Performance Dashboard st.title(":bar_chart: Student Performance Dashboard") st.markdown("##") # Chart 1: Total attempts if df_selection.empty: st.error("No data available to display.") else: # Total attempts by name (filtered) total_attempts_by_name = df_selection.groupby("name")['date'].count().reset_index() total_attempts_by_name.columns = ['name', 'total_attempts'] # For a single point or multiple points, use a scatter plot fig_total_attempts = px.scatter( total_attempts_by_name, x="name", y="total_attempts", title="Total Attempts", size='total_attempts', # Adjust the size of points color_discrete_sequence=["#0083B8"] * len(total_attempts_by_name), template="plotly_white", text='total_attempts' # Display total_attempts as text labels ) # Add text annotation for each point for line in range(0, total_attempts_by_name.shape[0]): fig_total_attempts.add_annotation( text=str(total_attempts_by_name['total_attempts'].iloc[line]), x=total_attempts_by_name['name'].iloc[line], y=total_attempts_by_name['total_attempts'].iloc[line], showarrow=True, font=dict(family="Courier New, monospace", size=18, color="#ffffff"), align="center", arrowhead=2, arrowsize=1, arrowwidth=2, arrowcolor="#636363", ax=20, ay=-30, bordercolor="#c7c7c7", borderwidth=2, borderpad=4, bgcolor="#ff7f0e", opacity=0.8 ) # Update traces for styling fig_total_attempts.update_traces(marker=dict(size=12), selector=dict(mode='markers+text')) # Display the scatter plot in Streamlit st.plotly_chart(fig_total_attempts, use_container_width=True) # Chart 2 (students only): Personal scores over time if username != 'admin': # Sort the DataFrame by 'date' in chronological order df_selection = df_selection.sort_values(by='date') #fig = px.bar(df_selection, x='date', y='global_score', title='Your scores!') if len(df_selection) > 1: # # If more than one point, use a bar chart # fig = px.bar(df_selection, x='date', y='global_score', title='Global Score Over Time') # # fig.update_yaxes( # # tickmode='array', # # tickvals=[1, 2, 3, 4, 5], # Reverse the order of tickvals # # ticktext=['A', 'B','C','D','E'] # Reverse the order of ticktext # # ) # Mapping dictionary grade_to_score = {'A': 100, 'B': 80, 'C': 60, 'D': 40, 'E': 20} # Apply mapping to convert letter grades to numerical scores df_selection['numeric_score'] = df_selection['global_score'].map(grade_to_score) # Sort the DataFrame by 'date' in chronological order df_selection = df_selection.sort_values(by='date') # Check if there's more than one point in the DataFrame if len(df_selection) > 1: # Create a bar chart using Plotly Express fig = px.bar(df_selection, x='date', y='numeric_score', title='Your scores over time') else: # Create a bar chart with just one point fig = px.bar(df_selection, x='date', y='numeric_score', title='Global Score') # Manually set the y-axis ticks and labels fig.update_yaxes( tickmode='array', tickvals=list(grade_to_score.values()), # Positions for the ticks ticktext=list(grade_to_score.keys()), # Text labels for the ticks range=[0, 120] # Extend the range a bit beyond 100 to accommodate 'A' ) # # Use st.plotly_chart to display the chart in Streamlit # st.plotly_chart(fig, use_container_width=True) else: # For a single point, use a scatter plot fig = px.scatter(df_selection, x='date', y='global_score', title='Global Score', text='global_score', size_max=60) # Add text annotation for line in range(0,df_selection.shape[0]): fig.add_annotation(text=df_selection['global_score'].iloc[line], x=df_selection['date'].iloc[line], y=df_selection['global_score'].iloc[line], showarrow=True, font=dict(family="Courier New, monospace", size=18, color="#ffffff"), align="center", arrowhead=2, arrowsize=1, arrowwidth=2, arrowcolor="#636363", ax=20, ay=-30, bordercolor="#c7c7c7", borderwidth=2, borderpad=4, bgcolor="#ff7f0e", opacity=0.8) fig.update_traces(marker=dict(size=12), selector=dict(mode='markers+text')) # Display the chart in Streamlit st.plotly_chart(fig, use_container_width=True) # Show students their scores over time st.dataframe(df_selection[['date', 'global_score', 'name']]) # Chart 3 (admin only): Global score chart # Define the order of categories explicitly order_of_categories = ['A', 'B', 'C', 'D', 'E'] # Convert global_score to a categorical type with the specified order df_selection['global_score'] = pd.Categorical(df_selection['global_score'], categories=order_of_categories, ordered=True) # Plot the histogram fig_score_distribution = px.histogram( df_selection, x="global_score", title="Global Score Distribution", color_discrete_sequence=["#33CFA5"], category_orders={"global_score": ["A", "B", "C", "D", "E"]} ) if username == 'admin': st.plotly_chart(fig_score_distribution, use_container_width=True) # Chart 4 (admin only): Students with <5 attempts (filtered) if username == 'admin': students_with_less_than_5_attempts = total_attempts_by_name[total_attempts_by_name['total_attempts'] < 5] fig_less_than_5_attempts = px.bar( students_with_less_than_5_attempts, x="name", y="total_attempts", title="Students with <5 Attempts", color_discrete_sequence=["#D62728"] * len(students_with_less_than_5_attempts), template="plotly_white", ) if username == 'admin': st.plotly_chart(fig_less_than_5_attempts, use_container_width=True) # Selection of a student for detailed view (<5 attempts) - based on filtered data if username == 'admin': selected_student_less_than_5 = st.selectbox("Select a student with less than 5 attempts to view details:", students_with_less_than_5_attempts['name']) if selected_student_less_than_5: st.write(df_selection[df_selection['name'] == selected_student_less_than_5]) # Chart 5 (admin only): Students with at least one global score of 'C', 'D', 'E' (filtered) if username == 'admin': students_with_cde = df_selection[df_selection['global_score'].isin(['C', 'D', 'E'])].groupby("name")['date'].count().reset_index() students_with_cde.columns = ['name', 'total_attempts'] fig_students_with_cde = px.bar( students_with_cde, x="name", y="total_attempts", title="Students with at least one global score of 'C', 'D', 'E'", color_discrete_sequence=["#FF7F0E"] * len(students_with_cde), template="plotly_white", ) st.plotly_chart(fig_students_with_cde, use_container_width=True) # Selection of a student for detailed view (score of 'C', 'D', 'E') - based on filtered data if username == 'admin': selected_student_cde = st.selectbox("Select a student with at least one score of 'C', 'D', 'E' to view details:", students_with_cde['name']) if selected_student_cde: st.write(df_selection[df_selection['name'] == selected_student_cde]) # Chart 7 (all): Radar Chart # Mapping grades to numeric values grade_to_numeric = {'A': 90, 'B': 70, 'C': 50, 'D': 30, 'E': 10} df.replace(grade_to_numeric, inplace=True) # Calculate average numeric scores for each category average_scores = df.groupby('name')[['hx_PC_score', 'hx_AS_score', 'hx_others_score', 'differentials_score']].mean().reset_index() if username == 'admin': st.title('Average Scores Radar Chart') else: st.title('Performance in each segment as compared to your friends!') # Categories for the radar chart categories = ['Presenting complaint', 'Associated symptoms', '(Others)', 'Differentials'] st.markdown(""" ### Double click on the names in the legend to include/exclude them from the plot. """) # Custom colors for better contrast colors = ['gold', 'cyan', 'magenta', 'green'] # Plotly Radar Chart fig = go.Figure() for index, row in average_scores.iterrows(): fig.add_trace(go.Scatterpolar( r=[row['hx_PC_score'], row['hx_AS_score'], row['hx_others_score'], row['differentials_score']], theta=categories, fill='toself', name=row['name'], line=dict(color=colors[index % len(colors)]) )) fig.update_layout( polar=dict( radialaxis=dict( visible=True, range=[0, 100], # Numeric range tickvals=[10, 30, 50, 70, 90], # Positions for the grade labels ticktext=['E', 'D', 'C', 'B', 'A'] # Grade labels )), showlegend=True, height=600, # Set the height of the figure width=600 # Set the width of the figure ) # Display the figure in Streamlit st.plotly_chart(fig, use_container_width=True) with generate_tab: st.title("Medical Scenario Generator (for Admins)") ## Hardcode scenarios for now, indexes_gen = """ aortic dissection anemia cystitis pneumonia """.split("\n") if "selected_index_gen" not in st.session_state: st.session_state.selected_index_gen = 0 if "search_selectbox_gen" not in st.session_state: st.session_state.search_selectbox_gen = " " # st.session_state.index_selectbox_gen = "Headache" if "search_freetext" not in st.session_state: st.session_state.search_freetext = " " # st.session_state.index_selectbox = "Headache" #index_selectbox = st_tags( # label='What medical condition would you like to generate a scenario for?', # text='Input here ...', # suggestions=indexes_gen, # value = ' ', # maxtags = 1, # key='0') st.write('What medical condition would you like to generate a scenario for?') search_freetext = st.text_input("Type your own", value = " ") if search_freetext != st.session_state.search_freetext: st.session_state.search_freetext = search_freetext #hard0, free0 = st.columns(2) #search_selectbox = hard0.selectbox( # 'Choose one OR Type on the right', # indexes, index=0) #search_freetext = free0.text_input("Type your own") # #if search_selectbox != indexes[st.session_state.selected_index]: # st.session_state.selected_index = indexes.index(search_selectbox) # st.session_state.search_selectbox = search_selectbox if "openai_model_gen" not in st.session_state: st.session_state["openai_model_gen"] = "gpt-3.5-turbo" model_name = "pritamdeka/S-PubMedBert-MS-MARCO" model_kwargs = {"device": "cpu"} # model_kwargs = {"device": "cuda"} encode_kwargs = {"normalize_embeddings": True} if "embeddings_gen" not in st.session_state: st.session_state.embeddings_gen = HuggingFaceEmbeddings( model_name=model_name, model_kwargs = model_kwargs, encode_kwargs = encode_kwargs) embeddings_gen = st.session_state.embeddings_gen if "llm_gen" not in st.session_state: st.session_state.llm_gen = ChatOpenAI(model_name="gpt-3.5-turbo-1106", temperature=0) #if "llm" not in st.session_state: # st.session_state.llm = OpenAI(model_name="gpt-3.5-turbo-instruct", temperature=0) #llm = st.session_state.llm #if "llm" not in st.session_state: # st.session_state.llm = ChatOpenAI(model_name="gpt-4-1106-preview", temperature=0) llm_gen = st.session_state.llm_gen ## ------------------------------------------------------------------------------------------------ ## Generator part index_name_gen = f"indexes/faiss_index_large_v2" if "store_gen" not in st.session_state: #st.session_state.store_gen = FAISS.load_local(index_name_gen, embeddings_gen) st.session_state.store_gen = db.get_store(index_name_gen, embeddings=embeddings_gen) store_gen = st.session_state.store_gen def topk(searchKW): search_r = st.session_state.store_gen.similarity_search(searchKW, k=5) return [x.page_content for x in search_r] if 'searchbtn_clicked' not in st.session_state: st.session_state['searchbtn_clicked'] = False if 'selected_option' not in st.session_state: st.session_state['selected_option'] = "" def search_callback(): st.session_state['searchbtn_clicked'] = True if st.button('search', on_click=search_callback) or st.session_state['searchbtn_clicked'] or st.session_state.search_freetext != ' ': def searchInner(searchOptions): if len(searchOptions)>0: st.markdown('---') col1, col2 = st.columns(2) selected_options = col1.multiselect( 'Choose the most relevant condition:', searchOptions, max_selections = 1) if len(selected_options)>0: col2.write(selected_options[0]) st.session_state['selected_option'] = selected_options[0] else: col2.write('') else: st.markdown('---') st.write("No results found. Perhaps try another condition? Some examples that work: "+', '.join(indexes_gen)) if search_freetext != " ": options = topk(search_freetext) searchInner(options) else: options = topk(indexes_gen[st.session_state.selected_index]) searchInner(options) st.write(st.session_state['selected_option']) ## ------------------------------------------------------------------------------------------------ ## LLM part kg_name = f"kgstore" if 'infostorekg' not in st.session_state: st.session_state.infostorekg = "" if "dfdisease" not in st.session_state: st.session_state.dfdisease = db.get_csv(kg_name, isDiseases = True) if "dffull" not in st.session_state: st.session_state.dffull = db.get_csv(kg_name, isDiseases = False) if "datanet" not in st.session_state: st.session_state.datanet = nx.from_pandas_edgelist(st.session_state.dffull , 'x_id', 'y_id', ['relation']) datanet = st.session_state.datanet kgD = st.session_state.dfdisease[['group_id_bert','group_name_bert', 'mondo_definition', 'umls_description','orphanet_definition']].astype(str).values.tolist() kgD2 = [' '.join([x[1]+'.']+list(set([y for y in x[2:] if y != 'nan']))) for x in kgD] if 'genbtn_clicked' not in st.session_state: st.session_state['genbtn_clicked'] = False if "TEMPLATE_gen" not in st.session_state: with open('templates/kgen.txt', 'r') as file: TEMPLATE_gen = file.read() st.session_state.TEMPLATE_gen = TEMPLATE_gen ### ------------------------------------------------------------------------------------------------ ### DEBUGGING CODE #with st.expander("Patient Prompt"): # TEMPLATE = st.text_area("Patient Prompt", value=st.session_state.TEMPLATE) # st.session_state.TEMPLATE= TEMPLATE ### ------------------------------------------------------------------------------------------------ prompt_gen = PromptTemplate( input_variables = ["infostorekg"], template = st.session_state.TEMPLATE_gen ) if 'formautofill' not in st.session_state: st.session_state['formautofill'] = "" def gen_callback(): st.session_state['genbtn_clicked'] = True def kgMatch(nodeName): newidx = kgD[kgD2.index(nodeName)][0] df_disease = st.session_state.dfdisease df_full = st.session_state.dffull desG = nx.single_source_dijkstra(datanet, newidx, cutoff = 1) diseaseName = df_disease[df_disease.group_id_bert == newidx]['group_name_bert'].unique().tolist()[0] phenotypeFilter = df_full[(df_full['x_id'] == newidx)| (df_full['y_id'] == newidx)] phenotypeList = [x for x in list(set(phenotypeFilter.y_name.unique().tolist()+ phenotypeFilter.x_name.unique().tolist())) if diseaseName not in x ] return (diseaseName, phenotypeList) def passState(dummy): if "infostorekg" in st.session_state: return str(st.session_state.infostorekg) else: return dummy if st.button('Generate scenario', on_click=gen_callback) or st.session_state['genbtn_clicked']: if len(st.session_state.selected_option)>0: infoPrompt = kgMatch(st.session_state.selected_option) st.session_state.infostorekg = str(infoPrompt) if ("chain_gen" not in st.session_state or st.session_state.TEMPLATE_gen != TEMPLATE): #st.session_state.chain = ( #{ # "infostorekg": passState, # } | #LLMChain(llm=llm_gen, prompt=prompt, verbose=False) st.session_state.chain_gen = LLMChain(llm=llm_gen, prompt=prompt_gen, verbose = False) chain = st.session_state.chain_gen st.session_state['formautofill'] = chain.invoke({"infostorekg": st.session_state.infostorekg}).get("text") else: st.warning('Please search and select a condition first!') ## ------------------------------------------------------------------------------------------------ ## Forms part conDict = { } rubDict = {'complaints': """Grade A: Elicits all of the above points in detail Grade B: Explores both presenting complaints (fill in) and (others) in almost full detail and rules out red flags Grade C: Explores both presenting complaints (fill in) incompletely and looks out for red flags Grade D: Explores both presenting complaints incompletely (fill in) but does not rule out any red flags/ explores one complaint and rules out at least one red flag Grade E: Only explores one of the two presenting complaints (fill in)""", 'syms': """Grade A: Explores at least (5) differentials in detail including (fill in) and elicits all * (6) points Grade B: Explores most (4) of the above systems including (fill in) and elicits all (6) * points Grade C: Explores most (4) of the above systems and elicits most (4-6) * points Grade D: Explores more than half (3) of the above systems and elicits most (4-6) * points Grade E: Explores only 1-2 of the above systems or asks less than half (1-3) * points""", 'others': """Grade A: Elicits all (4) of the * points and past medical Hx of (fill in) Grade B: Elicits all (4) of the * points and past medical Hx of (fill in), but did not go into important details Grade C: Elicits most (2-3) of the * points and past medical Hx of (fill in) in adequate detail Grade D: Elicits most (2-3) of the * points and past medical Hx of (fill in) but not in detail Grade E: Elicits 0-1 of the * points or did not take past medical Hx of (fill in)(not taking a (specific history: fill in ) history will give the candidate this score for the domain)""", 'findings': """Grade A: Presents all (4) of the * points, has (fill in) as top differentials with justification, and at least one other differentials with adequate justification Grade B: Presents most (2-3) of the * points, has (fill in) as top differentials but inadequate justification Grade C: Presents most (2-3) of the * points, has either (fill in) as top differential with at least one other differential Grade D: Presents most (2-3) of the *points OR only able to have 1 diagnosis without differential diagnosis Grade E: Presents few (0-1) of * points OR unable to have any diagnosis or differentials""" } ### ------------------------------------------------------------------------------------------------ ### DEBUGGING CODE #with st.expander("GPTOUTPUT"): # out = st.text_area(" ", value=st.session_state['formautofill']) ### ------------------------------------------------------------------------------------------------ def splitReply(): gendata = json.loads(st.session_state['formautofill'], strict = False) conditionsGen = [] def curseDict(possibleDict, defDict): if type(defDict[possibleDict]) == str: return '\n' + possibleDict + ': '+ defDict[possibleDict] elif type(defDict[possibleDict]) == list: if all(isinstance(item, str) for item in defDict[possibleDict]): return '\n' + possibleDict + ': '+ '\n '.join(defDict[possibleDict]) else: returnList = [str(x) for x in defDict[possibleDict]] return '\n' + possibleDict + ': '+ '\n '.join(returnList) elif type(defDict[possibleDict]) == dict: out = possibleDict for m in defDict[possibleDict]: out += curseDict(m, defDict[possibleDict]) return out else: return possibleDict+'\n'+ str(defDict[possibleDict]) for x in gendata: if 'patient' in x.lower(): conditionsGen.append(x) for y in gendata[x]: conditionsGen[-1] += curseDict(y, gendata[x]) conDict['patients'] = conditionsGen[-1] elif 'complain' in x.lower() or 'present' in x.lower(): conditionsGen.append(x) for y in gendata[x]: conditionsGen[-1] += curseDict(y, gendata[x]) conDict['complaints'] = conditionsGen[-1] elif 'symptom' in x.lower() or 'associate' in x.lower(): conditionsGen.append(x) for y in gendata[x]: conditionsGen[-1] += curseDict(y, gendata[x]) conDict['syms'] = conditionsGen[-1] elif 'other' in x.lower(): conditionsGen.append(x) for y in gendata[x]: conditionsGen[-1] += curseDict(y, gendata[x]) conDict['others'] = conditionsGen[-1] if 'diagnosis' in x.lower() or 'differential' in x.lower(): conditionsGen.append(x) for y in gendata[x]: conditionsGen[-1] += curseDict(y, gendata[x]) conDict['findings'] = conditionsGen[-1] if len(st.session_state['formautofill'])>0: with st.form("filled_form"): st.write("Generated Autofill") splitReply() with st.expander("Patient Scenario: Provided to students at the start of the exam"): patient_val_filled = st.text_area(" ", conDict['patients'], height=400, key="patientscenario") st.write("Rubrics: Details students are expected to ask about and rubrics details for grading") with st.expander("History Taking: Presenting Complaints"): patient_val_filled = st.text_area(" ", conDict['complaints'], height=400, key="complaints1") complaints_val_filled = st.text_area("Rubrics: Complaints", rubDict['complaints'], height=400, key="complaints2") with st.expander("History Taking: Associated Symptoms"): syms_val_filled = st.text_area(" ", conDict['syms'], height=400, key="syms") syms_rubrics_filled = st.text_area("Rubrics: Symptoms", rubDict['syms'], height=400, key="syms2") with st.expander("History Taking: Others"): others_val_filled = st.text_area(" ", conDict['others'], height=400, key="others") others_rubrics_filled = st.text_area("Rubrics: Others", rubDict['others'], height=400, key="others2") with st.expander("Presentation of Findings, Diagnosis, and Differentials"): findings_val_filled = st.text_area(" ", conDict['findings'], height=400, key="findings") findings_rubrics_filled = st.text_area("Rubrics: Findings and Diagnosis",rubDict['findings'], height=400, key="findings2") # Every form must have a submit button. submitted = st.form_submit_button("Submit") if submitted: #conDict.send(to firebase, with key) # retrieve from key st.write("check out your new scenario here! (not implemented yet)") #loadScenario = st.button("Go to patient simulator (currently not implemented)") else: with st.form("empty_form"): st.write("Blank Form") with st.expander("Patient Scenario: Provided to students at the start of the exam"): patient_val_filled = st.text_area(" ", height=400, key="patientscenario_empty") st.write("Rubrics: Details students are expected to ask about and rubrics details for grading") with st.expander("History Taking: Presenting Complaints"): col1_com, col2_com= st.columns(2) patient_val_filled = col1_com.text_area(" ", height=400, key="complaints_empty") complaints_val_filled = col2_com.text_area("Rubrics: Complaints", rubDict['complaints'], height=400, key="complaints2_empty") with st.expander("History Taking: Associated Symptoms"): syms_val_filled = st.text_area(" ", height=400, key="syms_empty") syms_rubrics_filled = st.text_area("Rubrics: Symptoms", rubDict['syms'], height=400, key="syms2_empty") with st.expander("History Taking: Others"): others_val_filled = st.text_area(" ", height=400, key="others_empty") others_rubrics_filled = st.text_area("Rubrics: Others", rubDict['others'], height=400, key="others2_empty") with st.expander("Presentation of Findings, Diagnosis, and Differentials"): findings_val_filled = st.text_area(" ", height=400, key="findings_empty") findings_rubrics_filled = st.text_area("Rubrics: Findings and Diagnosis",rubDict['findings'], height=400, key="findings2_empty") # Every form must have a submit button. submitted_empty = st.form_submit_button("Submit") if submitted_empty: #conDict.send(to firebase, with key) # retrieve from key st.write("check out your new scenario here! (not implemented yet)") #loadScenario = st.button("Go to patient simulator (currently not implemented)")