File size: 8,558 Bytes
4d7aa61
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# -*- coding: utf-8 -*-
"""SalesSupport-Chatbot-test2-AfterEmbbeding-PartB.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1MkbsRTVFogRG6BqsQFzcV_VZX7e7WvZ6

Sales supporting Chatbot using

*   Haystack: document embedding & RAG pipeline
*   OpenAI API:   message generator, text embedder
*   Gradio : chat bot UI and temporary hosting


Retrieval source:

1) BBR publicly accsessible English brochures only   
2) BBR website

20240514
Xiaomeng

#0. Setting Environment
"""

# Commented out IPython magic to ensure Python compatibility.
# %%bash
# 
# pip install haystack-ai
# pip install "sentence-transformers>=2.2.0"
# pip install "datasets>=2.6.1"
# pip install markdown-it-py mdit_plain pypdf
# pip install accelerate
# pip install gradio

import os
from getpass import getpass
from haystack import Pipeline, PredefinedPipeline
import urllib.request
from haystack.components.generators import OpenAIGenerator

# openai api
os.environ["OPENAI_API_KEY"] = "sk-proj-JfQx42WJcMDGG1zAVe36T3BlbkFJ2ZmLEOzRhstgod0DbHZ7"


"""# 1. Document store - dataset"""

embedded_document_path="document_store.pkl"

# Load the document store and retriever from .pkl files
import pickle
with open(embedded_document_path, "rb") as f:
    document_store = pickle.load(f)

"""#2. Define the RAG pipeline based on given documents, and build it as a tool function"""

# define the RAG pipeline
from haystack.components.embedders import SentenceTransformersTextEmbedder
from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever
from haystack.components.builders import PromptBuilder
from haystack.components.generators import OpenAIGenerator
from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder

#define the template
template = """
Answer the questions based on the given context.

Context:
{% for document in documents %}
    {{ document.content }}
{% endfor %}
Question: {{ question }}
Answer:
"""

# define each component of the pipeline and connect them
rag_pipe = Pipeline()
#rag_pipe.add_component("embedder", SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2"))
rag_pipe.add_component("embedder", OpenAITextEmbedder())


rag_pipe.add_component("retriever", InMemoryEmbeddingRetriever(document_store=document_store))
rag_pipe.add_component("prompt_builder", PromptBuilder(template=template))
rag_pipe.add_component("llm", OpenAIGenerator(model="gpt-3.5-turbo"))

rag_pipe.connect("embedder.embedding", "retriever.query_embedding")
rag_pipe.connect("retriever", "prompt_builder.documents")
rag_pipe.connect("prompt_builder", "llm")

# define the funcitoin
def rag_pipeline_func(query: str):
    result = rag_pipe.run({"embedder": {"text": query}, "prompt_builder": {"question": query}})
    return {"reply": result["llm"]["replies"][0]}

string_text = "What is the maximum size of the CMG system?"
rag_pipeline_func(string_text)

"""## 2.2 Websearch pipeline"""

pipeline_webserch = Pipeline.from_template(PredefinedPipeline.CHAT_WITH_WEBSITE)
result = pipeline_webserch.run({
    "fetcher": {"urls": ["https://www.bbrnetwork.com"]},
    "prompt": {"query": "How many network members are there in the BBR network?"}}
)
print(result["llm"]["replies"][0])

# define the funcitoin
def pipeline_websearch_func(query: str):
    result = pipeline_webserch.run({
        "fetcher": {"urls": ["https://www.bbrnetwork.com"]},
        "prompt": {"query": query}}
        )
    return {"reply": result["llm"]["replies"][0]}

string_text = "How many network members are there in the BBR network?"
pipeline_websearch_func(string_text)

"""# 3. Create tool list following OpenAI schema"""

#Create tool list following OpenAI schema
# following OpenAPI tool schema for the function
tools = [
    {
        "type": "function",
        "function": {
            "name": "rag_pipeline_func",
            #"description": "Get information about products for geotecnical applicaions, especially CMG system",
            "description": "Get information about BBR products",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
                    }
                },
                "required": ["query"],
            },
        },
    },
    {
        "type": "function",
        "function": {
            "name": "pipeline_websearch_func",
            "description": "Get the information about BBR and BBR networks general information",
            "parameters": {
                "type": "object",
                "properties": {
                    "query": {
                        "type": "string",
                        "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement",
                    }
                },
                "required": ["query"],
            },
        },
    },
]

from haystack.dataclasses import ChatMessage
from haystack.components.generators.chat import OpenAIChatGenerator
from haystack.components.generators.utils import print_streaming_chunk


# define system and USER messages for the Chat with predifined tools list
messages = [
    #ChatMessage.from_system(
        #"Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."
    #),
    ChatMessage.from_user("What is the special features of CMG system?"),
]

chat_generator = OpenAIChatGenerator(model="gpt-3.5-turbo", streaming_callback=print_streaming_chunk)
response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools})

import json

## Parse function calling information
function_call = json.loads(response["replies"][0].content)[0]
function_name = function_call["function"]["name"]
function_args = json.loads(function_call["function"]["arguments"])
print("Function Name:", function_name)
print("Function Arguments:", function_args)


## Find the correspoding function and call it with the given arguments
available_functions = {"rag_pipeline_func": rag_pipeline_func, "pipeline_websearch_func": pipeline_websearch_func}
function_to_call = available_functions[function_name]
function_response = function_to_call(**function_args)
print("Function Response:", function_response)

"""# 4. build chatbot by Gradio"""

chat_generator = OpenAIChatGenerator(model="gpt-3.5-turbo")
response = None
messages = [
    ChatMessage.from_system(
        "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous."
    )
]

import gradio as gr


def chatbot_with_fc(message, history):
    messages.append(ChatMessage.from_user(message))
    response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools})

    while True:
        # if OpenAI response is a tool call
        if response and response["replies"][0].meta["finish_reason"] == "tool_calls":
            function_calls = json.loads(response["replies"][0].content)
            print(response["replies"][0])
            for function_call in function_calls:
                ## Parse function calling information
                function_name = function_call["function"]["name"]
                function_args = json.loads(function_call["function"]["arguments"])

                ## Find the correspoding function and call it with the given arguments
                function_to_call = available_functions[function_name]
                function_response = function_to_call(**function_args)

                ## Append function response to the messages list using `ChatMessage.from_function`
                messages.append(ChatMessage.from_function(content=json.dumps(function_response), name=function_name))
                response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools})

        # Regular Conversation
        else:
            messages.append(response["replies"][0])
            break
    return response["replies"][0].content


demo = gr.ChatInterface(
    fn=chatbot_with_fc,
    examples=[
        "What are BBR's main products?",
        "What is BBR Network?",
        "Is there a BBR member in New Zealand?",
        "What's the difference between BBR CMI A5 type and A1 type anchor head?",
    ],
    title="BBR Digital Assistante Demo - ",
)

demo.launch()