wxmxavier commited on
Commit
4d7aa61
1 Parent(s): 84a0b32

Upload salessupport_chatbot_test2_afterembbeding_partb.py

Browse files
salessupport_chatbot_test2_afterembbeding_partb.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """SalesSupport-Chatbot-test2-AfterEmbbeding-PartB.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1MkbsRTVFogRG6BqsQFzcV_VZX7e7WvZ6
8
+
9
+ Sales supporting Chatbot using
10
+
11
+ * Haystack: document embedding & RAG pipeline
12
+ * OpenAI API: message generator, text embedder
13
+ * Gradio : chat bot UI and temporary hosting
14
+
15
+
16
+ Retrieval source:
17
+
18
+ 1) BBR publicly accsessible English brochures only
19
+ 2) BBR website
20
+
21
+ 20240514
22
+ Xiaomeng
23
+
24
+ #0. Setting Environment
25
+ """
26
+
27
+ # Commented out IPython magic to ensure Python compatibility.
28
+ # %%bash
29
+ #
30
+ # pip install haystack-ai
31
+ # pip install "sentence-transformers>=2.2.0"
32
+ # pip install "datasets>=2.6.1"
33
+ # pip install markdown-it-py mdit_plain pypdf
34
+ # pip install accelerate
35
+ # pip install gradio
36
+
37
+ import os
38
+ from getpass import getpass
39
+ from haystack import Pipeline, PredefinedPipeline
40
+ import urllib.request
41
+ from haystack.components.generators import OpenAIGenerator
42
+
43
+ # openai api
44
+ os.environ["OPENAI_API_KEY"] = "sk-proj-JfQx42WJcMDGG1zAVe36T3BlbkFJ2ZmLEOzRhstgod0DbHZ7"
45
+
46
+
47
+ """# 1. Document store - dataset"""
48
+
49
+ embedded_document_path="document_store.pkl"
50
+
51
+ # Load the document store and retriever from .pkl files
52
+ import pickle
53
+ with open(embedded_document_path, "rb") as f:
54
+ document_store = pickle.load(f)
55
+
56
+ """#2. Define the RAG pipeline based on given documents, and build it as a tool function"""
57
+
58
+ # define the RAG pipeline
59
+ from haystack.components.embedders import SentenceTransformersTextEmbedder
60
+ from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
61
+ from haystack.components.builders import PromptBuilder
62
+ from haystack.components.generators import OpenAIGenerator
63
+ from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder
64
+
65
+ #define the template
66
+ template = """
67
+ Answer the questions based on the given context.
68
+
69
+ Context:
70
+ {% for document in documents %}
71
+ {{ document.content }}
72
+ {% endfor %}
73
+ Question: {{ question }}
74
+ Answer:
75
+ """
76
+
77
+ # define each component of the pipeline and connect them
78
+ rag_pipe = Pipeline()
79
+ #rag_pipe.add_component("embedder", SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"))
80
+ rag_pipe.add_component("embedder", OpenAITextEmbedder())
81
+
82
+
83
+ rag_pipe.add_component("retriever", InMemoryEmbeddingRetriever(document_store=document_store))
84
+ rag_pipe.add_component("prompt_builder", PromptBuilder(template=template))
85
+ rag_pipe.add_component("llm", OpenAIGenerator(model="gpt-3.5-turbo"))
86
+
87
+ rag_pipe.connect("embedder.embedding", "retriever.query_embedding")
88
+ rag_pipe.connect("retriever", "prompt_builder.documents")
89
+ rag_pipe.connect("prompt_builder", "llm")
90
+
91
+ # define the funcitoin
92
+ def rag_pipeline_func(query: str):
93
+ result = rag_pipe.run({"embedder": {"text": query}, "prompt_builder": {"question": query}})
94
+ return {"reply": result["llm"]["replies"][0]}
95
+
96
+ string_text = "What is the maximum size of the CMG system?"
97
+ rag_pipeline_func(string_text)
98
+
99
+ """## 2.2 Websearch pipeline"""
100
+
101
+ pipeline_webserch = Pipeline.from_template(PredefinedPipeline.CHAT_WITH_WEBSITE)
102
+ result = pipeline_webserch.run({
103
+ "fetcher": {"urls": ["https://www.bbrnetwork.com"]},
104
+ "prompt": {"query": "How many network members are there in the BBR network?"}}
105
+ )
106
+ print(result["llm"]["replies"][0])
107
+
108
+ # define the funcitoin
109
+ def pipeline_websearch_func(query: str):
110
+ result = pipeline_webserch.run({
111
+ "fetcher": {"urls": ["https://www.bbrnetwork.com"]},
112
+ "prompt": {"query": query}}
113
+ )
114
+ return {"reply": result["llm"]["replies"][0]}
115
+
116
+ string_text = "How many network members are there in the BBR network?"
117
+ pipeline_websearch_func(string_text)
118
+
119
+ """# 3. Create tool list following OpenAI schema"""
120
+
121
+ #Create tool list following OpenAI schema
122
+ # following OpenAPI tool schema for the function
123
+ tools = [
124
+ {
125
+ "type": "function",
126
+ "function": {
127
+ "name": "rag_pipeline_func",
128
+ #"description": "Get information about products for geotecnical applicaions, especially CMG system",
129
+ "description": "Get information about BBR products",
130
+ "parameters": {
131
+ "type": "object",
132
+ "properties": {
133
+ "query": {
134
+ "type": "string",
135
+ "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
136
+ }
137
+ },
138
+ "required": ["query"],
139
+ },
140
+ },
141
+ },
142
+ {
143
+ "type": "function",
144
+ "function": {
145
+ "name": "pipeline_websearch_func",
146
+ "description": "Get the information about BBR and BBR networks general information",
147
+ "parameters": {
148
+ "type": "object",
149
+ "properties": {
150
+ "query": {
151
+ "type": "string",
152
+ "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
153
+ }
154
+ },
155
+ "required": ["query"],
156
+ },
157
+ },
158
+ },
159
+ ]
160
+
161
+ from haystack.dataclasses import ChatMessage
162
+ from haystack.components.generators.chat import OpenAIChatGenerator
163
+ from haystack.components.generators.utils import print_streaming_chunk
164
+
165
+
166
+ # define system and USER messages for the Chat with predifined tools list
167
+ messages = [
168
+ #ChatMessage.from_system(
169
+ #"Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."
170
+ #),
171
+ ChatMessage.from_user("What is the special features of CMG system?"),
172
+ ]
173
+
174
+ chat_generator = OpenAIChatGenerator(model="gpt-3.5-turbo", streaming_callback=print_streaming_chunk)
175
+ response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools})
176
+
177
+ import json
178
+
179
+ ## Parse function calling information
180
+ function_call = json.loads(response["replies"][0].content)[0]
181
+ function_name = function_call["function"]["name"]
182
+ function_args = json.loads(function_call["function"]["arguments"])
183
+ print("Function Name:", function_name)
184
+ print("Function Arguments:", function_args)
185
+
186
+
187
+ ## Find the correspoding function and call it with the given arguments
188
+ available_functions = {"rag_pipeline_func": rag_pipeline_func, "pipeline_websearch_func": pipeline_websearch_func}
189
+ function_to_call = available_functions[function_name]
190
+ function_response = function_to_call(**function_args)
191
+ print("Function Response:", function_response)
192
+
193
+ """# 4. build chatbot by Gradio"""
194
+
195
+ chat_generator = OpenAIChatGenerator(model="gpt-3.5-turbo")
196
+ response = None
197
+ messages = [
198
+ ChatMessage.from_system(
199
+ "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."
200
+ )
201
+ ]
202
+
203
+ import gradio as gr
204
+
205
+
206
+ def chatbot_with_fc(message, history):
207
+ messages.append(ChatMessage.from_user(message))
208
+ response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools})
209
+
210
+ while True:
211
+ # if OpenAI response is a tool call
212
+ if response and response["replies"][0].meta["finish_reason"] == "tool_calls":
213
+ function_calls = json.loads(response["replies"][0].content)
214
+ print(response["replies"][0])
215
+ for function_call in function_calls:
216
+ ## Parse function calling information
217
+ function_name = function_call["function"]["name"]
218
+ function_args = json.loads(function_call["function"]["arguments"])
219
+
220
+ ## Find the correspoding function and call it with the given arguments
221
+ function_to_call = available_functions[function_name]
222
+ function_response = function_to_call(**function_args)
223
+
224
+ ## Append function response to the messages list using `ChatMessage.from_function`
225
+ messages.append(ChatMessage.from_function(content=json.dumps(function_response), name=function_name))
226
+ response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools})
227
+
228
+ # Regular Conversation
229
+ else:
230
+ messages.append(response["replies"][0])
231
+ break
232
+ return response["replies"][0].content
233
+
234
+
235
+ demo = gr.ChatInterface(
236
+ fn=chatbot_with_fc,
237
+ examples=[
238
+ "What are BBR's main products?",
239
+ "What is BBR Network?",
240
+ "Is there a BBR member in New Zealand?",
241
+ "What's the difference between BBR CMI A5 type and A1 type anchor head?",
242
+ ],
243
+ title="BBR Digital Assistante Demo - ",
244
+ )
245
+
246
+ demo.launch()