tsadoq commited on
Commit
40072a5
1 Parent(s): 181fff1

Upload 10 files

Browse files

first real commit

.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ service/assets/bot.png filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/Dockerfile
2
+
3
+ FROM python:3.10-slim-bookworm
4
+
5
+ WORKDIR /app
6
+
7
+ COPY ./requirements.txt /app/requirements.txt
8
+
9
+ RUN apt-get update && apt-get install -y \
10
+ build-essential \
11
+ curl \
12
+ software-properties-common \
13
+ git \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ RUN pip3 install --no-cache-dir -r /app/requirements.txt
17
+
18
+ # User
19
+ RUN useradd -m -u 1000 user
20
+ USER user
21
+ ENV HOME /home/user
22
+ ENV PATH $HOME/.local/bin:$PATH
23
+
24
+ WORKDIR $HOME
25
+ RUN mkdir app
26
+ WORKDIR $HOME/app
27
+ COPY . $HOME/app
28
+
29
+ EXPOSE 7860
30
+ CMD streamlit run app.py \
31
+ --server.headless true \
32
+ --server.enableCORS false \
33
+ --server.enableXsrfProtection false \
34
+ --server.fileWatcherType none
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ import streamlit as st
4
+
5
+ from service.haystack_documentation_pipeline import return_haystack_documentation_agent
6
+
7
+ st.title('Haystack Documentation Chatbot')
8
+
9
+ if 'image_bytes' not in st.session_state:
10
+ st.session_state.image_bytes = open('service/assets/bot.png', 'rb').read()
11
+
12
+ if 'messages' not in st.session_state:
13
+ st.session_state.messages = [{'role': 'assistant', 'content': 'Hello there!'}]
14
+
15
+ with st.sidebar:
16
+ st.image('service/assets/bot.png')
17
+ st.markdown(
18
+ """
19
+ # Haystack Documentation Chatbot
20
+
21
+ This chatbot can answer questions about the Haystack documentation.
22
+
23
+ ## How to use
24
+
25
+ 1. Type your question in the chat input box.
26
+ 2. Press enter.
27
+ 3. Wait for the chatbot to respond (since it works as an agent responses may take a while).
28
+ 4. enjoy!
29
+ """
30
+ )
31
+
32
+ for message in st.session_state.messages:
33
+ with st.chat_message(
34
+ message['role'],
35
+ avatar=st.session_state.image_bytes if message['role'] == 'assistant' else None,
36
+ ):
37
+ st.markdown(message['content'])
38
+
39
+ if 'agent' not in st.session_state:
40
+ st.session_state.agent = return_haystack_documentation_agent(openai_key=os.environ['OPENAI_KEY'])
41
+
42
+ if prompt := st.chat_input('What is up?"'):
43
+ st.chat_message('user').markdown(prompt)
44
+ st.session_state.messages.append({'role': 'user', 'content': prompt})
45
+ chat_message = st.chat_message(name='assistant', avatar=st.session_state.image_bytes)
46
+ with chat_message:
47
+ with st.spinner('Thinking...'):
48
+ response = st.session_state.agent.run(query=prompt)
49
+ answer = response['answers'][0].answer
50
+ chat_message.markdown(answer)
51
+
52
+ st.session_state.messages.append({'role': 'assistant', 'content': answer})
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ farm-haystack[inference,preprocessing]==1.23.0
2
+ streamlit==1.29.0
service/__init__.py ADDED
File without changes
service/assets/bot.png ADDED

Git LFS Details

  • SHA256: c53f4790d58e7b3780b794ace6dda8607f5f3b3f7c7afdf5f9eeed3bc3e6cccb
  • Pointer size: 132 Bytes
  • Size of remote file: 2.73 MB
service/haystack_documentation_pipeline.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Dict, Any, Callable
2
+
3
+ from haystack import Pipeline
4
+ from haystack.agents.base import ToolsManager
5
+ from haystack.nodes import PromptNode, SentenceTransformersRanker
6
+ from haystack.agents import Agent, Tool
7
+
8
+ from service.utils.memory_node import return_memory_node
9
+ from service.utils.prompts import agent_prompt
10
+ from service.utils.retriever import return_retriever
11
+
12
+
13
+ def resolver_function(
14
+ query: str,
15
+ agent: Agent,
16
+ agent_step: Callable,
17
+ ) -> Dict[str, Any]:
18
+ """
19
+ This function is used to resolve the parameters of the prompt template.
20
+ :param query: the query
21
+ :param agent: the agent
22
+ :param agent_step: the agent step
23
+ :return: a dictionary of parameters
24
+ """
25
+ return {
26
+ 'query': query,
27
+ 'tool_names_with_descriptions': agent.tm.get_tool_names_with_descriptions(),
28
+ 'transcript': agent_step.transcript,
29
+ 'memory': agent.memory.load(),
30
+ }
31
+
32
+
33
+ def define_haystack_doc_searcher_tool() -> Tool:
34
+ """
35
+ Defines the tool for searching the Haystack documentation.
36
+ :return: the Haystack documentation searcher tool
37
+ """
38
+ ranker = SentenceTransformersRanker(model_name_or_path='cross-encoder/ms-marco-MiniLM-L-12-v2', top_k=5)
39
+ retriever = return_retriever()
40
+ haystack_docs = Pipeline()
41
+ haystack_docs.add_node(component=retriever, name='retriever', inputs=['Query'])
42
+ haystack_docs.add_node(component=ranker, name='ranker', inputs=['retriever'])
43
+
44
+ return Tool(
45
+ name='haystack_documentation_search_tool',
46
+ pipeline_or_node=haystack_docs,
47
+ description='Searches the Haystack documentation for information.',
48
+ output_variable='documents',
49
+ )
50
+
51
+
52
+ def return_haystack_documentation_agent(openai_key: str) -> Agent:
53
+ """
54
+ Returns an agent that can answer questions about the Haystack documentation.
55
+ :param openai_key: the OpenAI key
56
+ :return: the agent
57
+ """
58
+
59
+ agent_prompt_node = PromptNode(
60
+ 'gpt-3.5-turbo-16k',
61
+ api_key=openai_key,
62
+ stop_words=['Observation:'],
63
+ model_kwargs={'temperature': 0.05},
64
+ max_length=10000,
65
+ )
66
+
67
+ agent = Agent(
68
+ agent_prompt_node,
69
+ prompt_template=agent_prompt,
70
+ prompt_parameters_resolver=resolver_function,
71
+ memory=return_memory_node(openai_key),
72
+ tools_manager=ToolsManager([define_haystack_doc_searcher_tool()]),
73
+ final_answer_pattern=r"(?s)Final Answer\s*:\s*(.*)",
74
+ )
75
+
76
+ return agent
service/utils/__init__.py ADDED
File without changes
service/utils/memory_node.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from haystack.agents.memory import ConversationSummaryMemory
2
+ from haystack.nodes import PromptNode
3
+
4
+
5
+ def return_memory_node(openai_key: str) -> ConversationSummaryMemory:
6
+ """
7
+ Returns the memory node.
8
+ :param openai_key: the OpenAI key
9
+ :return: the memory node
10
+ """
11
+ memory_prompt_node = PromptNode('gpt-3.5-turbo-16k', api_key=openai_key, max_length=1024)
12
+ return ConversationSummaryMemory(memory_prompt_node)
service/utils/prompts.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ agent_prompt = """
3
+ In the following conversation, a human user interacts with an AI Agent. This agent is specialises in answering questions about the Haystack documentation.
4
+ The human user poses questions, and the AI Agent goes through several steps to provide well-informed answers. The good outcome of this conversation is of paramount importance for the wellbeing of the human.
5
+ The AI Agent must use the available tools to find the up-to-date information. The final answer to the question should be truthfully based solely on the output of the tools.
6
+ The AI Agent should always be focused on answering the human user's question avoiding any irrelevant information. The agent must provide as many code examples as possible.
7
+ The AI Agent should be sure that the information and examples provided address the user's question, if it is not
8
+ the case, the agent should use it's tools to find the correct information.
9
+ The AI Agent should ignore its knowledge when answering the questions.
10
+ The AI Agent has access to these tools:
11
+ {tool_names_with_descriptions}
12
+
13
+ The following is the previous conversation between a human and The AI Agent:
14
+ {memory}
15
+
16
+ AI Agent responses must start with one of the following:
17
+
18
+ Thought: [the AI Agent's reasoning process]
19
+ Tool: [tool names] (on a new line) Tool Input: [input as a question for the selected tool WITHOUT quotation marks and on a new line] (These must always be provided together and on separate lines.)
20
+ Observation: [tool's result]
21
+ Final Answer: [final answer to the human user's question]
22
+
23
+ When selecting a tool, the AI Agent must provide both the "Tool:" and "Tool Input:" pair in the same response, but on separate lines.
24
+
25
+ The AI Agent should not ask the human user for additional information, clarification, or context.
26
+ If the AI Agent cannot find a specific answer after exhausting available tools and approaches, it answers with Final Answer: inconclusive
27
+
28
+ Question: {query}
29
+ Thought:
30
+ {transcript}
31
+ """
service/utils/retriever.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ from haystack.nodes import PreProcessor, WebRetriever
4
+
5
+
6
+ def return_retriever():
7
+ """
8
+ Returns the retriever.
9
+ :return: the retriever
10
+ """
11
+ preprocessor = PreProcessor(
12
+ split_by='word',
13
+ split_length=4096,
14
+ split_respect_sentence_boundary=True,
15
+ split_overlap=40,
16
+ )
17
+
18
+ return WebRetriever(
19
+ api_key=os.environ['SERPERDEV_API_KEY'],
20
+ allowed_domains=['docs.haystack.deepset.ai'],
21
+ mode='preprocessed_documents',
22
+ preprocessor=preprocessor,
23
+ top_search_results=40,
24
+ top_k=20,
25
+ )