Jafar874 commited on
Commit
b068cbe
β€’
1 Parent(s): 10a9447

Upload 4 files

Browse files
Files changed (4) hide show
  1. app.py +151 -0
  2. document_store.pkl +3 -0
  3. recommendations.csv +0 -0
  4. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ from haystack.document_stores.in_memory import InMemoryDocumentStore
4
+ from haystack import Document, Pipeline
5
+ from haystack_integrations.components.embedders.cohere import CohereDocumentEmbedder, CohereTextEmbedder
6
+ from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
7
+ from haystack.components.generators.chat import OpenAIChatGenerator
8
+ from haystack.components.builders import DynamicChatPromptBuilder
9
+ from haystack.dataclasses import ChatMessage
10
+ import streamlit as st
11
+
12
+ # API keys
13
+ os.environ['COHERE_API_KEY'] = os.getenv("COHEREAPIKEY")
14
+ os.environ['OPENAI_API_KEY'] = os.getenv("OPENAIAPIKEY")
15
+
16
+
17
+ def load_data():
18
+ """
19
+ Loads an existing document store from 'document_store.pkl',
20
+ otherwise, creates the store from 'recommendations.csv', embeds the documents, and saves it.
21
+
22
+ Returns:
23
+ In-memory document store.
24
+ """
25
+ if os.path.exists('document_store.pkl'):
26
+ with open('document_store.pkl', 'rb') as f:
27
+ document_store = pickle.load(f)
28
+ else:
29
+ import pandas as pd
30
+ from haystack.components.writers import DocumentWriter
31
+
32
+ document_store = InMemoryDocumentStore()
33
+
34
+ data = pd.read_csv('recommendations.csv')
35
+ documents = []
36
+ for _, row in data[['Recommendation', 'Source']].iterrows():
37
+ documents.append(
38
+ Document(content=row['Recommendation'], meta={'source': row['Source']}))
39
+
40
+ indexing_pipeline = Pipeline()
41
+ indexing_pipeline.add_component("embedder", CohereDocumentEmbedder(
42
+ model="embed-multilingual-v3.0", input_type="search_document"))
43
+ indexing_pipeline.add_component(
44
+ "writer", DocumentWriter(document_store=document_store))
45
+ indexing_pipeline.connect("embedder", "writer")
46
+ indexing_pipeline.run({"embedder": {"documents": documents}})
47
+
48
+ with open('document_store.pkl', 'wb') as f:
49
+ pickle.dump(document_store, f)
50
+
51
+ return document_store
52
+
53
+
54
+ def load_pipeline(text_embedder, retriever, prompt_builder, llm):
55
+ """
56
+ Creates and connects components to form a complete pipeline.
57
+
58
+ Args:
59
+ text_embedder (Embedding): Text embedder.
60
+ retriever (Retriever): Retriever.
61
+ prompt_builder (PromptBuilder): Prompt builder.
62
+ llm (LanguageModel): Language model.
63
+
64
+ Returns:
65
+ Connected pipeline.
66
+ """
67
+ pipeline = Pipeline()
68
+ pipeline.add_component("text_embedder", text_embedder)
69
+ pipeline.add_component("retriever", retriever)
70
+ pipeline.add_component("prompt_builder", prompt_builder)
71
+ pipeline.add_component("llm", llm)
72
+
73
+ pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
74
+ pipeline.connect("retriever.documents", "prompt_builder.documents")
75
+ pipeline.connect("prompt_builder.prompt", "llm.messages")
76
+
77
+ return pipeline
78
+
79
+
80
+ def run_streamlit(pipeline, messages):
81
+ """
82
+ Displays Streamlit application using a provided pipeline and messages.
83
+
84
+ Generates a summary of geriatric trauma care recommendations based on user input.
85
+ Warns against entering personal information or patient data.
86
+
87
+ Args:
88
+ pipeline (Pipeline): The AI pipeline to use.
89
+ messages (List[Message]): Initial messages.
90
+
91
+ Returns:
92
+ None.
93
+ """
94
+ st.title('ChatCPG-demo')
95
+ st.write(
96
+ """
97
+ This application takes a user query (e. g., clinical question) and generates a summary of evidence-based recommendations on geriatric trauma care, relevant to this query. \n
98
+ DO NOT ENTER PERSONAL INFORMATION OR PATIENT DATA! \n
99
+ <u>Publication:</u> Kocar et al. 2024, submitted <br>
100
+ <u>GitHub:</u> https://github.com/IfGF-UUlm/CPG-summarization <br>
101
+ <u>Contact:</u> thomas.kocar@uni-ulm.de
102
+ """,
103
+ unsafe_allow_html=True
104
+ )
105
+ query = st.text_input(r'$\textsf{\Large Enter your query here:}$', '')
106
+ if query:
107
+ with st.spinner('Generating summary...'):
108
+ res = pipeline.run(
109
+ data={"text": query, "prompt_source": messages, "query": query})
110
+ st.write(res['llm']['replies'][0].content)
111
+ return None
112
+
113
+
114
+ if __name__ == "__main__":
115
+
116
+ # Load Models and Data
117
+ text_embedder = CohereTextEmbedder(
118
+ model="embed-multilingual-v3.0", input_type="search_document")
119
+ retriever = InMemoryEmbeddingRetriever(document_store=load_data())
120
+ prompt_builder = DynamicChatPromptBuilder(
121
+ runtime_variables=["query", "documents"])
122
+ llm = OpenAIChatGenerator(model="gpt-4-turbo-2024-04-09")
123
+
124
+ # Load Pipeline
125
+ pipeline = load_pipeline(text_embedder, retriever, prompt_builder, llm)
126
+
127
+ # Load Prompt
128
+ messages = [
129
+ ChatMessage.from_system(
130
+ "Act as an experienced geriatrician who works as a consultant for surgeons in geriatric trauma care."
131
+ ),
132
+ ChatMessage.from_user(
133
+ """
134
+ Clinical practice guideline recommendations:
135
+ {% for document in documents %}
136
+ {{ document.content }}
137
+ Source: {{ document.meta['source']}}
138
+ {% endfor %}
139
+
140
+ Summarize the clinical practice guideline recommendations in no more than 150 words in the context of the query: β€œ{{query}}”
141
+ Pay attention to whether the query relates to the preoperative, intraoperative, or postoperative phase or is generally applicable.
142
+ Try to structure the summary as an ordered list, ranking the interventions according to relevance and complexity, starting with the most relevant and least complex ones.
143
+ Try to structure the summary in pharmacological and non-pharmacological interventions, separating them as two ordered lists.
144
+ If the query (β€œ{{query}}”) cannot be answered with the recommendations of the clinical practice guidelines, do not reveal any information about the guidelines or their recommendations, but explain in 1 sentence that you are unable to provide a summary.
145
+ If you can answer the query, return the sources word by word in an unordered list under the heading "References". The references should be the last part of your response.
146
+ """
147
+ )
148
+ ]
149
+
150
+ # Run Streamlit
151
+ run_streamlit(pipeline, messages)
document_store.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdfbf47f79e153f6868a33171c6e2171df63a50158d82a092ce7d66533f157d8
3
+ size 7863559
recommendations.csv ADDED
The diff for this file is too large to render. See raw diff
 
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ pandas
2
+ haystack-ai
3
+ cohere-haystack
4
+ streamlit