Spaces:
Runtime error
Runtime error
Upload salessupport_chatbot_test2_afterembbeding_partb.py
Browse files
salessupport_chatbot_test2_afterembbeding_partb.py
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
"""SalesSupport-Chatbot-test2-AfterEmbbeding-PartB.ipynb
|
3 |
+
|
4 |
+
Automatically generated by Colab.
|
5 |
+
|
6 |
+
Original file is located at
|
7 |
+
https://colab.research.google.com/drive/1MkbsRTVFogRG6BqsQFzcV_VZX7e7WvZ6
|
8 |
+
|
9 |
+
Sales supporting Chatbot using
|
10 |
+
|
11 |
+
* Haystack: document embedding & RAG pipeline
|
12 |
+
* OpenAI API: message generator, text embedder
|
13 |
+
* Gradio : chat bot UI and temporary hosting
|
14 |
+
|
15 |
+
|
16 |
+
Retrieval source:
|
17 |
+
|
18 |
+
1) BBR publicly accsessible English brochures only
|
19 |
+
2) BBR website
|
20 |
+
|
21 |
+
20240514
|
22 |
+
Xiaomeng
|
23 |
+
|
24 |
+
#0. Setting Environment
|
25 |
+
"""
|
26 |
+
|
27 |
+
# Commented out IPython magic to ensure Python compatibility.
|
28 |
+
# %%bash
|
29 |
+
#
|
30 |
+
# pip install haystack-ai
|
31 |
+
# pip install "sentence-transformers>=2.2.0"
|
32 |
+
# pip install "datasets>=2.6.1"
|
33 |
+
# pip install markdown-it-py mdit_plain pypdf
|
34 |
+
# pip install accelerate
|
35 |
+
# pip install gradio
|
36 |
+
|
37 |
+
import os
|
38 |
+
from getpass import getpass
|
39 |
+
from haystack import Pipeline, PredefinedPipeline
|
40 |
+
import urllib.request
|
41 |
+
from haystack.components.generators import OpenAIGenerator
|
42 |
+
|
43 |
+
# openai api
|
44 |
+
os.environ["OPENAI_API_KEY"] = "sk-proj-JfQx42WJcMDGG1zAVe36T3BlbkFJ2ZmLEOzRhstgod0DbHZ7"
|
45 |
+
|
46 |
+
|
47 |
+
"""# 1. Document store - dataset"""
|
48 |
+
|
49 |
+
embedded_document_path="document_store.pkl"
|
50 |
+
|
51 |
+
# Load the document store and retriever from .pkl files
|
52 |
+
import pickle
|
53 |
+
with open(embedded_document_path, "rb") as f:
|
54 |
+
document_store = pickle.load(f)
|
55 |
+
|
56 |
+
"""#2. Define the RAG pipeline based on given documents, and build it as a tool function"""
|
57 |
+
|
58 |
+
# define the RAG pipeline
|
59 |
+
from haystack.components.embedders import SentenceTransformersTextEmbedder
|
60 |
+
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
|
61 |
+
from haystack.components.builders import PromptBuilder
|
62 |
+
from haystack.components.generators import OpenAIGenerator
|
63 |
+
from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder
|
64 |
+
|
65 |
+
#define the template
|
66 |
+
template = """
|
67 |
+
Answer the questions based on the given context.
|
68 |
+
|
69 |
+
Context:
|
70 |
+
{% for document in documents %}
|
71 |
+
{{ document.content }}
|
72 |
+
{% endfor %}
|
73 |
+
Question: {{ question }}
|
74 |
+
Answer:
|
75 |
+
"""
|
76 |
+
|
77 |
+
# define each component of the pipeline and connect them
|
78 |
+
rag_pipe = Pipeline()
|
79 |
+
#rag_pipe.add_component("embedder", SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"))
|
80 |
+
rag_pipe.add_component("embedder", OpenAITextEmbedder())
|
81 |
+
|
82 |
+
|
83 |
+
rag_pipe.add_component("retriever", InMemoryEmbeddingRetriever(document_store=document_store))
|
84 |
+
rag_pipe.add_component("prompt_builder", PromptBuilder(template=template))
|
85 |
+
rag_pipe.add_component("llm", OpenAIGenerator(model="gpt-3.5-turbo"))
|
86 |
+
|
87 |
+
rag_pipe.connect("embedder.embedding", "retriever.query_embedding")
|
88 |
+
rag_pipe.connect("retriever", "prompt_builder.documents")
|
89 |
+
rag_pipe.connect("prompt_builder", "llm")
|
90 |
+
|
91 |
+
# define the funcitoin
|
92 |
+
def rag_pipeline_func(query: str):
|
93 |
+
result = rag_pipe.run({"embedder": {"text": query}, "prompt_builder": {"question": query}})
|
94 |
+
return {"reply": result["llm"]["replies"][0]}
|
95 |
+
|
96 |
+
string_text = "What is the maximum size of the CMG system?"
|
97 |
+
rag_pipeline_func(string_text)
|
98 |
+
|
99 |
+
"""## 2.2 Websearch pipeline"""
|
100 |
+
|
101 |
+
pipeline_webserch = Pipeline.from_template(PredefinedPipeline.CHAT_WITH_WEBSITE)
|
102 |
+
result = pipeline_webserch.run({
|
103 |
+
"fetcher": {"urls": ["https://www.bbrnetwork.com"]},
|
104 |
+
"prompt": {"query": "How many network members are there in the BBR network?"}}
|
105 |
+
)
|
106 |
+
print(result["llm"]["replies"][0])
|
107 |
+
|
108 |
+
# define the funcitoin
|
109 |
+
def pipeline_websearch_func(query: str):
|
110 |
+
result = pipeline_webserch.run({
|
111 |
+
"fetcher": {"urls": ["https://www.bbrnetwork.com"]},
|
112 |
+
"prompt": {"query": query}}
|
113 |
+
)
|
114 |
+
return {"reply": result["llm"]["replies"][0]}
|
115 |
+
|
116 |
+
string_text = "How many network members are there in the BBR network?"
|
117 |
+
pipeline_websearch_func(string_text)
|
118 |
+
|
119 |
+
"""# 3. Create tool list following OpenAI schema"""
|
120 |
+
|
121 |
+
#Create tool list following OpenAI schema
|
122 |
+
# following OpenAPI tool schema for the function
|
123 |
+
tools = [
|
124 |
+
{
|
125 |
+
"type": "function",
|
126 |
+
"function": {
|
127 |
+
"name": "rag_pipeline_func",
|
128 |
+
#"description": "Get information about products for geotecnical applicaions, especially CMG system",
|
129 |
+
"description": "Get information about BBR products",
|
130 |
+
"parameters": {
|
131 |
+
"type": "object",
|
132 |
+
"properties": {
|
133 |
+
"query": {
|
134 |
+
"type": "string",
|
135 |
+
"description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
|
136 |
+
}
|
137 |
+
},
|
138 |
+
"required": ["query"],
|
139 |
+
},
|
140 |
+
},
|
141 |
+
},
|
142 |
+
{
|
143 |
+
"type": "function",
|
144 |
+
"function": {
|
145 |
+
"name": "pipeline_websearch_func",
|
146 |
+
"description": "Get the information about BBR and BBR networks general information",
|
147 |
+
"parameters": {
|
148 |
+
"type": "object",
|
149 |
+
"properties": {
|
150 |
+
"query": {
|
151 |
+
"type": "string",
|
152 |
+
"description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
|
153 |
+
}
|
154 |
+
},
|
155 |
+
"required": ["query"],
|
156 |
+
},
|
157 |
+
},
|
158 |
+
},
|
159 |
+
]
|
160 |
+
|
161 |
+
from haystack.dataclasses import ChatMessage
|
162 |
+
from haystack.components.generators.chat import OpenAIChatGenerator
|
163 |
+
from haystack.components.generators.utils import print_streaming_chunk
|
164 |
+
|
165 |
+
|
166 |
+
# define system and USER messages for the Chat with predifined tools list
|
167 |
+
messages = [
|
168 |
+
#ChatMessage.from_system(
|
169 |
+
#"Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."
|
170 |
+
#),
|
171 |
+
ChatMessage.from_user("What is the special features of CMG system?"),
|
172 |
+
]
|
173 |
+
|
174 |
+
chat_generator = OpenAIChatGenerator(model="gpt-3.5-turbo", streaming_callback=print_streaming_chunk)
|
175 |
+
response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools})
|
176 |
+
|
177 |
+
import json
|
178 |
+
|
179 |
+
## Parse function calling information
|
180 |
+
function_call = json.loads(response["replies"][0].content)[0]
|
181 |
+
function_name = function_call["function"]["name"]
|
182 |
+
function_args = json.loads(function_call["function"]["arguments"])
|
183 |
+
print("Function Name:", function_name)
|
184 |
+
print("Function Arguments:", function_args)
|
185 |
+
|
186 |
+
|
187 |
+
## Find the correspoding function and call it with the given arguments
|
188 |
+
available_functions = {"rag_pipeline_func": rag_pipeline_func, "pipeline_websearch_func": pipeline_websearch_func}
|
189 |
+
function_to_call = available_functions[function_name]
|
190 |
+
function_response = function_to_call(**function_args)
|
191 |
+
print("Function Response:", function_response)
|
192 |
+
|
193 |
+
"""# 4. build chatbot by Gradio"""
|
194 |
+
|
195 |
+
chat_generator = OpenAIChatGenerator(model="gpt-3.5-turbo")
|
196 |
+
response = None
|
197 |
+
messages = [
|
198 |
+
ChatMessage.from_system(
|
199 |
+
"Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."
|
200 |
+
)
|
201 |
+
]
|
202 |
+
|
203 |
+
import gradio as gr
|
204 |
+
|
205 |
+
|
206 |
+
def chatbot_with_fc(message, history):
|
207 |
+
messages.append(ChatMessage.from_user(message))
|
208 |
+
response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools})
|
209 |
+
|
210 |
+
while True:
|
211 |
+
# if OpenAI response is a tool call
|
212 |
+
if response and response["replies"][0].meta["finish_reason"] == "tool_calls":
|
213 |
+
function_calls = json.loads(response["replies"][0].content)
|
214 |
+
print(response["replies"][0])
|
215 |
+
for function_call in function_calls:
|
216 |
+
## Parse function calling information
|
217 |
+
function_name = function_call["function"]["name"]
|
218 |
+
function_args = json.loads(function_call["function"]["arguments"])
|
219 |
+
|
220 |
+
## Find the correspoding function and call it with the given arguments
|
221 |
+
function_to_call = available_functions[function_name]
|
222 |
+
function_response = function_to_call(**function_args)
|
223 |
+
|
224 |
+
## Append function response to the messages list using `ChatMessage.from_function`
|
225 |
+
messages.append(ChatMessage.from_function(content=json.dumps(function_response), name=function_name))
|
226 |
+
response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools})
|
227 |
+
|
228 |
+
# Regular Conversation
|
229 |
+
else:
|
230 |
+
messages.append(response["replies"][0])
|
231 |
+
break
|
232 |
+
return response["replies"][0].content
|
233 |
+
|
234 |
+
|
235 |
+
demo = gr.ChatInterface(
|
236 |
+
fn=chatbot_with_fc,
|
237 |
+
examples=[
|
238 |
+
"What are BBR's main products?",
|
239 |
+
"What is BBR Network?",
|
240 |
+
"Is there a BBR member in New Zealand?",
|
241 |
+
"What's the difference between BBR CMI A5 type and A1 type anchor head?",
|
242 |
+
],
|
243 |
+
title="BBR Digital Assistante Demo - ",
|
244 |
+
)
|
245 |
+
|
246 |
+
demo.launch()
|