Spaces:
Running
Running
Upload 4 files
Browse files- app.py +151 -0
- document_store.pkl +3 -0
- recommendations.csv +0 -0
- requirements.txt +4 -0
app.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
from haystack.document_stores.in_memory import InMemoryDocumentStore
|
4 |
+
from haystack import Document, Pipeline
|
5 |
+
from haystack_integrations.components.embedders.cohere import CohereDocumentEmbedder, CohereTextEmbedder
|
6 |
+
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
|
7 |
+
from haystack.components.generators.chat import OpenAIChatGenerator
|
8 |
+
from haystack.components.builders import DynamicChatPromptBuilder
|
9 |
+
from haystack.dataclasses import ChatMessage
|
10 |
+
import streamlit as st
|
11 |
+
|
12 |
+
# API keys
|
13 |
+
os.environ['COHERE_API_KEY'] = os.getenv("COHEREAPIKEY")
|
14 |
+
os.environ['OPENAI_API_KEY'] = os.getenv("OPENAIAPIKEY")
|
15 |
+
|
16 |
+
|
17 |
+
def load_data():
|
18 |
+
"""
|
19 |
+
Loads an existing document store from 'document_store.pkl',
|
20 |
+
otherwise, creates the store from 'recommendations.csv', embeds the documents, and saves it.
|
21 |
+
|
22 |
+
Returns:
|
23 |
+
In-memory document store.
|
24 |
+
"""
|
25 |
+
if os.path.exists('document_store.pkl'):
|
26 |
+
with open('document_store.pkl', 'rb') as f:
|
27 |
+
document_store = pickle.load(f)
|
28 |
+
else:
|
29 |
+
import pandas as pd
|
30 |
+
from haystack.components.writers import DocumentWriter
|
31 |
+
|
32 |
+
document_store = InMemoryDocumentStore()
|
33 |
+
|
34 |
+
data = pd.read_csv('recommendations.csv')
|
35 |
+
documents = []
|
36 |
+
for _, row in data[['Recommendation', 'Source']].iterrows():
|
37 |
+
documents.append(
|
38 |
+
Document(content=row['Recommendation'], meta={'source': row['Source']}))
|
39 |
+
|
40 |
+
indexing_pipeline = Pipeline()
|
41 |
+
indexing_pipeline.add_component("embedder", CohereDocumentEmbedder(
|
42 |
+
model="embed-multilingual-v3.0", input_type="search_document"))
|
43 |
+
indexing_pipeline.add_component(
|
44 |
+
"writer", DocumentWriter(document_store=document_store))
|
45 |
+
indexing_pipeline.connect("embedder", "writer")
|
46 |
+
indexing_pipeline.run({"embedder": {"documents": documents}})
|
47 |
+
|
48 |
+
with open('document_store.pkl', 'wb') as f:
|
49 |
+
pickle.dump(document_store, f)
|
50 |
+
|
51 |
+
return document_store
|
52 |
+
|
53 |
+
|
54 |
+
def load_pipeline(text_embedder, retriever, prompt_builder, llm):
|
55 |
+
"""
|
56 |
+
Creates and connects components to form a complete pipeline.
|
57 |
+
|
58 |
+
Args:
|
59 |
+
text_embedder (Embedding): Text embedder.
|
60 |
+
retriever (Retriever): Retriever.
|
61 |
+
prompt_builder (PromptBuilder): Prompt builder.
|
62 |
+
llm (LanguageModel): Language model.
|
63 |
+
|
64 |
+
Returns:
|
65 |
+
Connected pipeline.
|
66 |
+
"""
|
67 |
+
pipeline = Pipeline()
|
68 |
+
pipeline.add_component("text_embedder", text_embedder)
|
69 |
+
pipeline.add_component("retriever", retriever)
|
70 |
+
pipeline.add_component("prompt_builder", prompt_builder)
|
71 |
+
pipeline.add_component("llm", llm)
|
72 |
+
|
73 |
+
pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
|
74 |
+
pipeline.connect("retriever.documents", "prompt_builder.documents")
|
75 |
+
pipeline.connect("prompt_builder.prompt", "llm.messages")
|
76 |
+
|
77 |
+
return pipeline
|
78 |
+
|
79 |
+
|
80 |
+
def run_streamlit(pipeline, messages):
|
81 |
+
"""
|
82 |
+
Displays Streamlit application using a provided pipeline and messages.
|
83 |
+
|
84 |
+
Generates a summary of geriatric trauma care recommendations based on user input.
|
85 |
+
Warns against entering personal information or patient data.
|
86 |
+
|
87 |
+
Args:
|
88 |
+
pipeline (Pipeline): The AI pipeline to use.
|
89 |
+
messages (List[Message]): Initial messages.
|
90 |
+
|
91 |
+
Returns:
|
92 |
+
None.
|
93 |
+
"""
|
94 |
+
st.title('ChatCPG-demo')
|
95 |
+
st.write(
|
96 |
+
"""
|
97 |
+
This application takes a user query (e. g., clinical question) and generates a summary of evidence-based recommendations on geriatric trauma care, relevant to this query. \n
|
98 |
+
DO NOT ENTER PERSONAL INFORMATION OR PATIENT DATA! \n
|
99 |
+
<u>Publication:</u> Kocar et al. 2024, submitted <br>
|
100 |
+
<u>GitHub:</u> https://github.com/IfGF-UUlm/CPG-summarization <br>
|
101 |
+
<u>Contact:</u> thomas.kocar@uni-ulm.de
|
102 |
+
""",
|
103 |
+
unsafe_allow_html=True
|
104 |
+
)
|
105 |
+
query = st.text_input(r'$\textsf{\Large Enter your query here:}$', '')
|
106 |
+
if query:
|
107 |
+
with st.spinner('Generating summary...'):
|
108 |
+
res = pipeline.run(
|
109 |
+
data={"text": query, "prompt_source": messages, "query": query})
|
110 |
+
st.write(res['llm']['replies'][0].content)
|
111 |
+
return None
|
112 |
+
|
113 |
+
|
114 |
+
if __name__ == "__main__":
|
115 |
+
|
116 |
+
# Load Models and Data
|
117 |
+
text_embedder = CohereTextEmbedder(
|
118 |
+
model="embed-multilingual-v3.0", input_type="search_document")
|
119 |
+
retriever = InMemoryEmbeddingRetriever(document_store=load_data())
|
120 |
+
prompt_builder = DynamicChatPromptBuilder(
|
121 |
+
runtime_variables=["query", "documents"])
|
122 |
+
llm = OpenAIChatGenerator(model="gpt-4-turbo-2024-04-09")
|
123 |
+
|
124 |
+
# Load Pipeline
|
125 |
+
pipeline = load_pipeline(text_embedder, retriever, prompt_builder, llm)
|
126 |
+
|
127 |
+
# Load Prompt
|
128 |
+
messages = [
|
129 |
+
ChatMessage.from_system(
|
130 |
+
"Act as an experienced geriatrician who works as a consultant for surgeons in geriatric trauma care."
|
131 |
+
),
|
132 |
+
ChatMessage.from_user(
|
133 |
+
"""
|
134 |
+
Clinical practice guideline recommendations:
|
135 |
+
{% for document in documents %}
|
136 |
+
{{ document.content }}
|
137 |
+
Source: {{ document.meta['source']}}
|
138 |
+
{% endfor %}
|
139 |
+
|
140 |
+
Summarize the clinical practice guideline recommendations in no more than 150 words in the context of the query: β{{query}}β
|
141 |
+
Pay attention to whether the query relates to the preoperative, intraoperative, or postoperative phase or is generally applicable.
|
142 |
+
Try to structure the summary as an ordered list, ranking the interventions according to relevance and complexity, starting with the most relevant and least complex ones.
|
143 |
+
Try to structure the summary in pharmacological and non-pharmacological interventions, separating them as two ordered lists.
|
144 |
+
If the query (β{{query}}β) cannot be answered with the recommendations of the clinical practice guidelines, do not reveal any information about the guidelines or their recommendations, but explain in 1 sentence that you are unable to provide a summary.
|
145 |
+
If you can answer the query, return the sources word by word in an unordered list under the heading "References". The references should be the last part of your response.
|
146 |
+
"""
|
147 |
+
)
|
148 |
+
]
|
149 |
+
|
150 |
+
# Run Streamlit
|
151 |
+
run_streamlit(pipeline, messages)
|
document_store.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdfbf47f79e153f6868a33171c6e2171df63a50158d82a092ce7d66533f157d8
|
3 |
+
size 7863559
|
recommendations.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
requirements.txt
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pandas
|
2 |
+
haystack-ai
|
3 |
+
cohere-haystack
|
4 |
+
streamlit
|