Spaces:

tsadoq
/

unofficial-haystack-documentation-agent

Paused

App Files Files Community

tsadoq commited on Dec 17, 2023

Commit

40072a5

•

1 Parent(s): 181fff1

Upload 10 files

Browse files

first real commit

Files changed (11) hide show

.gitattributes +1 -0
Dockerfile +34 -0
app.py +52 -0
requirements.txt +2 -0
service/__init__.py +0 -0
service/assets/bot.png +3 -0
service/haystack_documentation_pipeline.py +76 -0
service/utils/__init__.py +0 -0
service/utils/memory_node.py +12 -0
service/utils/prompts.py +31 -0
service/utils/retriever.py +25 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+service/assets/bot.png filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,34 @@

+# app/Dockerfile
+FROM python:3.10-slim-bookworm
+WORKDIR /app
+COPY ./requirements.txt /app/requirements.txt
+RUN apt-get update && apt-get install -y \
+    build-essential \
+    curl \
+    software-properties-common \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+RUN pip3 install --no-cache-dir -r /app/requirements.txt
+# User
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME /home/user
+ENV PATH $HOME/.local/bin:$PATH
+WORKDIR $HOME
+RUN mkdir app
+WORKDIR $HOME/app
+COPY . $HOME/app
+EXPOSE 7860
+CMD streamlit run app.py \
+    --server.headless true \
+    --server.enableCORS false \
+    --server.enableXsrfProtection false \
+    --server.fileWatcherType none

app.py ADDED Viewed

	@@ -0,0 +1,52 @@

+import os
+import streamlit as st
+from service.haystack_documentation_pipeline import return_haystack_documentation_agent
+st.title('Haystack Documentation Chatbot')
+if 'image_bytes' not in st.session_state:
+    st.session_state.image_bytes = open('service/assets/bot.png', 'rb').read()
+if 'messages' not in st.session_state:
+    st.session_state.messages = [{'role': 'assistant', 'content': 'Hello there!'}]
+with st.sidebar:
+    st.image('service/assets/bot.png')
+    st.markdown(
+        """
+        # Haystack Documentation Chatbot
+        This chatbot can answer questions about the Haystack documentation.
+        ## How to use
+        1. Type your question in the chat input box.
+        2. Press enter.
+        3. Wait for the chatbot to respond (since it works as an agent responses may take a while).
+        4. enjoy!
+        """
+    )
+for message in st.session_state.messages:
+    with st.chat_message(
+        message['role'],
+        avatar=st.session_state.image_bytes if message['role'] == 'assistant' else None,
+    ):
+        st.markdown(message['content'])
+if 'agent' not in st.session_state:
+    st.session_state.agent = return_haystack_documentation_agent(openai_key=os.environ['OPENAI_KEY'])
+if prompt := st.chat_input('What is up?"'):
+    st.chat_message('user').markdown(prompt)
+    st.session_state.messages.append({'role': 'user', 'content': prompt})
+    chat_message = st.chat_message(name='assistant', avatar=st.session_state.image_bytes)
+    with chat_message:
+        with st.spinner('Thinking...'):
+            response = st.session_state.agent.run(query=prompt)
+    answer = response['answers'][0].answer
+    chat_message.markdown(answer)
+    st.session_state.messages.append({'role': 'assistant', 'content': answer})

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ farm-haystack[inference,preprocessing]==1.23.0
2	+ streamlit==1.29.0

service/__init__.py ADDED Viewed

File without changes

service/assets/bot.png ADDED Viewed

Git LFS Details

SHA256: c53f4790d58e7b3780b794ace6dda8607f5f3b3f7c7afdf5f9eeed3bc3e6cccb
Pointer size: 132 Bytes
Size of remote file: 2.73 MB

service/haystack_documentation_pipeline.py ADDED Viewed

	@@ -0,0 +1,76 @@

+from typing import Dict, Any, Callable
+from haystack import Pipeline
+from haystack.agents.base import ToolsManager
+from haystack.nodes import PromptNode, SentenceTransformersRanker
+from haystack.agents import Agent, Tool
+from service.utils.memory_node import return_memory_node
+from service.utils.prompts import agent_prompt
+from service.utils.retriever import return_retriever
+def resolver_function(
+    query: str,
+    agent: Agent,
+    agent_step: Callable,
+) -> Dict[str, Any]:
+    """
+    This function is used to resolve the parameters of the prompt template.
+    :param query: the query
+    :param agent: the agent
+    :param agent_step: the agent step
+    :return: a dictionary of parameters
+    """
+    return {
+        'query': query,
+        'tool_names_with_descriptions': agent.tm.get_tool_names_with_descriptions(),
+        'transcript': agent_step.transcript,
+        'memory': agent.memory.load(),
+    }
+def define_haystack_doc_searcher_tool() -> Tool:
+    """
+    Defines the tool for searching the Haystack documentation.
+    :return: the Haystack documentation searcher tool
+    """
+    ranker = SentenceTransformersRanker(model_name_or_path='cross-encoder/ms-marco-MiniLM-L-12-v2', top_k=5)
+    retriever = return_retriever()
+    haystack_docs = Pipeline()
+    haystack_docs.add_node(component=retriever, name='retriever', inputs=['Query'])
+    haystack_docs.add_node(component=ranker, name='ranker', inputs=['retriever'])
+    return Tool(
+        name='haystack_documentation_search_tool',
+        pipeline_or_node=haystack_docs,
+        description='Searches the Haystack documentation for information.',
+        output_variable='documents',
+    )
+def return_haystack_documentation_agent(openai_key: str) -> Agent:
+    """
+    Returns an agent that can answer questions about the Haystack documentation.
+    :param openai_key: the OpenAI key
+    :return: the agent
+    """
+    agent_prompt_node = PromptNode(
+        'gpt-3.5-turbo-16k',
+        api_key=openai_key,
+        stop_words=['Observation:'],
+        model_kwargs={'temperature': 0.05},
+        max_length=10000,
+    )
+    agent = Agent(
+        agent_prompt_node,
+        prompt_template=agent_prompt,
+        prompt_parameters_resolver=resolver_function,
+        memory=return_memory_node(openai_key),
+        tools_manager=ToolsManager([define_haystack_doc_searcher_tool()]),
+        final_answer_pattern=r"(?s)Final Answer\s*:\s*(.*)",
+    )
+    return agent

service/utils/__init__.py ADDED Viewed

File without changes

service/utils/memory_node.py ADDED Viewed

	@@ -0,0 +1,12 @@

+from haystack.agents.memory import ConversationSummaryMemory
+from haystack.nodes import PromptNode
+def return_memory_node(openai_key: str) -> ConversationSummaryMemory:
+    """
+    Returns the memory node.
+    :param openai_key: the OpenAI key
+    :return: the memory node
+    """
+    memory_prompt_node = PromptNode('gpt-3.5-turbo-16k', api_key=openai_key, max_length=1024)
+    return ConversationSummaryMemory(memory_prompt_node)

service/utils/prompts.py ADDED Viewed

	@@ -0,0 +1,31 @@

+agent_prompt = """
+In the following conversation, a human user interacts with an AI Agent. This agent is specialises in answering questions about the Haystack documentation.
+The human user poses questions, and the AI Agent goes through several steps to provide well-informed answers. The good outcome of this conversation is of paramount importance for the wellbeing of the human.
+The AI Agent must use the available tools to find the up-to-date information. The final answer to the question should be truthfully based solely on the output of the tools.
+The AI Agent should always be focused on answering the human user's question avoiding any irrelevant information. The agent must provide as many code examples as possible.
+The AI Agent should be sure that the information and examples provided address the user's question, if it is not
+the case, the agent should use it's tools to find the correct information.
+The AI Agent should ignore its knowledge when answering the questions.
+The AI Agent has access to these tools:
+{tool_names_with_descriptions}
+The following is the previous conversation between a human and The AI Agent:
+{memory}
+AI Agent responses must start with one of the following:
+Thought: [the AI Agent's reasoning process]
+Tool: [tool names] (on a new line) Tool Input: [input as a question for the selected tool WITHOUT quotation marks and on a new line] (These must always be provided together and on separate lines.)
+Observation: [tool's result]
+Final Answer: [final answer to the human user's question]
+When selecting a tool, the AI Agent must provide both the "Tool:" and "Tool Input:" pair in the same response, but on separate lines.
+The AI Agent should not ask the human user for additional information, clarification, or context.
+If the AI Agent cannot find a specific answer after exhausting available tools and approaches, it answers with Final Answer: inconclusive
+Question: {query}
+Thought:
+{transcript}
+"""

service/utils/retriever.py ADDED Viewed

	@@ -0,0 +1,25 @@

+import os
+from haystack.nodes import PreProcessor, WebRetriever
+def return_retriever():
+    """
+    Returns the retriever.
+    :return: the retriever
+    """
+    preprocessor = PreProcessor(
+        split_by='word',
+        split_length=4096,
+        split_respect_sentence_boundary=True,
+        split_overlap=40,
+    )
+    return WebRetriever(
+        api_key=os.environ['SERPERDEV_API_KEY'],
+        allowed_domains=['docs.haystack.deepset.ai'],
+        mode='preprocessed_documents',
+        preprocessor=preprocessor,
+        top_search_results=40,
+        top_k=20,
+    )