Spaces:

IfGF-UUlm
/

ChatCPG-demo

Running

App Files Files Community

Jafar874 commited on May 6

Commit

b068cbe

•

1 Parent(s): 10a9447

Upload 4 files

Browse files

Files changed (4) hide show

app.py +151 -0
document_store.pkl +3 -0
recommendations.csv +0 -0
requirements.txt +4 -0

app.py ADDED Viewed

	@@ -0,0 +1,151 @@

+import os
+import pickle
+from haystack.document_stores.in_memory import InMemoryDocumentStore
+from haystack import Document, Pipeline
+from haystack_integrations.components.embedders.cohere import CohereDocumentEmbedder, CohereTextEmbedder
+from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
+from haystack.components.generators.chat import OpenAIChatGenerator
+from haystack.components.builders import DynamicChatPromptBuilder
+from haystack.dataclasses import ChatMessage
+import streamlit as st
+# API keys
+os.environ['COHERE_API_KEY'] = os.getenv("COHEREAPIKEY")
+os.environ['OPENAI_API_KEY'] = os.getenv("OPENAIAPIKEY")
+def load_data():
+    """
+    Loads an existing document store from 'document_store.pkl',
+    otherwise, creates the store from 'recommendations.csv', embeds the documents, and saves it.
+    Returns:
+        In-memory document store.
+    """
+    if os.path.exists('document_store.pkl'):
+        with open('document_store.pkl', 'rb') as f:
+            document_store = pickle.load(f)
+    else:
+        import pandas as pd
+        from haystack.components.writers import DocumentWriter
+        document_store = InMemoryDocumentStore()
+        data = pd.read_csv('recommendations.csv')
+        documents = []
+        for _, row in data[['Recommendation', 'Source']].iterrows():
+            documents.append(
+                Document(content=row['Recommendation'], meta={'source': row['Source']}))
+        indexing_pipeline = Pipeline()
+        indexing_pipeline.add_component("embedder", CohereDocumentEmbedder(
+            model="embed-multilingual-v3.0", input_type="search_document"))
+        indexing_pipeline.add_component(
+            "writer", DocumentWriter(document_store=document_store))
+        indexing_pipeline.connect("embedder", "writer")
+        indexing_pipeline.run({"embedder": {"documents": documents}})
+        with open('document_store.pkl', 'wb') as f:
+            pickle.dump(document_store, f)
+    return document_store
+def load_pipeline(text_embedder, retriever, prompt_builder, llm):
+    """
+    Creates and connects components to form a complete pipeline.
+    Args:
+        text_embedder (Embedding): Text embedder.
+        retriever (Retriever): Retriever.
+        prompt_builder (PromptBuilder): Prompt builder.
+        llm (LanguageModel): Language model.
+    Returns:
+        Connected pipeline.
+    """
+    pipeline = Pipeline()
+    pipeline.add_component("text_embedder", text_embedder)
+    pipeline.add_component("retriever", retriever)
+    pipeline.add_component("prompt_builder", prompt_builder)
+    pipeline.add_component("llm", llm)
+    pipeline.connect("text_embedder.embedding", "retriever.query_embedding")
+    pipeline.connect("retriever.documents", "prompt_builder.documents")
+    pipeline.connect("prompt_builder.prompt", "llm.messages")
+    return pipeline
+def run_streamlit(pipeline, messages):
+    """
+    Displays Streamlit application using a provided pipeline and messages.
+    Generates a summary of geriatric trauma care recommendations based on user input.
+    Warns against entering personal information or patient data.
+    Args:
+        pipeline (Pipeline): The AI pipeline to use.
+        messages (List[Message]): Initial messages.
+    Returns:
+        None.
+    """
+    st.title('ChatCPG-demo')
+    st.write(
+        """
+        This application takes a user query (e. g., clinical question) and generates a summary of evidence-based recommendations on geriatric trauma care, relevant to this query. \n
+        DO NOT ENTER PERSONAL INFORMATION OR PATIENT DATA! \n
+        <u>Publication:</u> Kocar et al. 2024, submitted <br>
+        <u>GitHub:</u> https://github.com/IfGF-UUlm/CPG-summarization <br>
+        <u>Contact:</u> thomas.kocar@uni-ulm.de
+        """,
+        unsafe_allow_html=True
+    )
+    query = st.text_input(r'$\textsf{\Large Enter your query here:}$', '')
+    if query:
+        with st.spinner('Generating summary...'):
+            res = pipeline.run(
+                data={"text": query, "prompt_source": messages, "query": query})
+            st.write(res['llm']['replies'][0].content)
+    return None
+if __name__ == "__main__":
+    # Load Models and Data
+    text_embedder = CohereTextEmbedder(
+        model="embed-multilingual-v3.0", input_type="search_document")
+    retriever = InMemoryEmbeddingRetriever(document_store=load_data())
+    prompt_builder = DynamicChatPromptBuilder(
+        runtime_variables=["query", "documents"])
+    llm = OpenAIChatGenerator(model="gpt-4-turbo-2024-04-09")
+    # Load Pipeline
+    pipeline = load_pipeline(text_embedder, retriever, prompt_builder, llm)
+    # Load Prompt
+    messages = [
+        ChatMessage.from_system(
+            "Act as an experienced geriatrician who works as a consultant for surgeons in geriatric trauma care."
+        ),
+        ChatMessage.from_user(
+            """
+            Clinical practice guideline recommendations:
+            {% for document in documents %}
+                {{ document.content }}
+                Source: {{ document.meta['source']}}
+            {% endfor %}
+            Summarize the clinical practice guideline recommendations in no more than 150 words in the context of the query: “{{query}}”
+            Pay attention to whether the query relates to the preoperative, intraoperative, or postoperative phase or is generally applicable.
+            Try to structure the summary as an ordered list, ranking the interventions according to relevance and complexity, starting with the most relevant and least complex ones.
+            Try to structure the summary in pharmacological and non-pharmacological interventions, separating them as two ordered lists.
+            If the query (“{{query}}”) cannot be answered with the recommendations of the clinical practice guidelines, do not reveal any information about the guidelines or their recommendations, but explain in 1 sentence that you are unable to provide a summary.
+            If you can answer the query, return the sources word by word in an unordered list under the heading "References". The references should be the last part of your response.
+            """
+        )
+    ]
+    # Run Streamlit
+    run_streamlit(pipeline, messages)

document_store.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdfbf47f79e153f6868a33171c6e2171df63a50158d82a092ce7d66533f157d8
+size 7863559

recommendations.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+pandas
+haystack-ai
+cohere-haystack
+streamlit