# -*- coding: utf-8 -*- """SalesSupport-Chatbot-test2-AfterEmbbeding-PartB.ipynb Automatically generated by Colab. Original file is located at https://colab.research.google.com/drive/1MkbsRTVFogRG6BqsQFzcV_VZX7e7WvZ6 Sales supporting Chatbot using * Haystack: document embedding & RAG pipeline * OpenAI API: message generator, text embedder * Gradio : chat bot UI and temporary hosting Retrieval source: 1) BBR publicly accsessible English brochures only 2) BBR website 20240514 Xiaomeng #0. Setting Environment """ # Commented out IPython magic to ensure Python compatibility. # %%bash # # pip install haystack-ai # pip install "sentence-transformers>=2.2.0" # pip install "datasets>=2.6.1" # pip install markdown-it-py mdit_plain pypdf # pip install accelerate # pip install gradio import os from getpass import getpass from haystack import Pipeline, PredefinedPipeline import urllib.request from haystack.components.generators import OpenAIGenerator # openai api os.environ["OPENAI_API_KEY"] = "sk-proj-JfQx42WJcMDGG1zAVe36T3BlbkFJ2ZmLEOzRhstgod0DbHZ7" """# 1. Document store - dataset""" embedded_document_path="document_store.pkl" # Load the document store and retriever from .pkl files import pickle with open(embedded_document_path, "rb") as f: document_store = pickle.load(f) """#2. Define the RAG pipeline based on given documents, and build it as a tool function""" # define the RAG pipeline from haystack.components.embedders import SentenceTransformersTextEmbedder from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever from haystack.components.builders import PromptBuilder from haystack.components.generators import OpenAIGenerator from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder #define the template template = """ Answer the questions based on the given context. Context: {% for document in documents %} {{ document.content }} {% endfor %} Question: {{ question }} Answer: """ # define each component of the pipeline and connect them rag_pipe = Pipeline() #rag_pipe.add_component("embedder", SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")) rag_pipe.add_component("embedder", OpenAITextEmbedder()) rag_pipe.add_component("retriever", InMemoryEmbeddingRetriever(document_store=document_store)) rag_pipe.add_component("prompt_builder", PromptBuilder(template=template)) rag_pipe.add_component("llm", OpenAIGenerator(model="gpt-3.5-turbo")) rag_pipe.connect("embedder.embedding", "retriever.query_embedding") rag_pipe.connect("retriever", "prompt_builder.documents") rag_pipe.connect("prompt_builder", "llm") # define the funcitoin def rag_pipeline_func(query: str): result = rag_pipe.run({"embedder": {"text": query}, "prompt_builder": {"question": query}}) return {"reply": result["llm"]["replies"][0]} string_text = "What is the maximum size of the CMG system?" rag_pipeline_func(string_text) """## 2.2 Websearch pipeline""" pipeline_webserch = Pipeline.from_template(PredefinedPipeline.CHAT_WITH_WEBSITE) result = pipeline_webserch.run({ "fetcher": {"urls": ["https://www.bbrnetwork.com"]}, "prompt": {"query": "How many network members are there in the BBR network?"}} ) print(result["llm"]["replies"][0]) # define the funcitoin def pipeline_websearch_func(query: str): result = pipeline_webserch.run({ "fetcher": {"urls": ["https://www.bbrnetwork.com"]}, "prompt": {"query": query}} ) return {"reply": result["llm"]["replies"][0]} string_text = "How many network members are there in the BBR network?" pipeline_websearch_func(string_text) """# 3. Create tool list following OpenAI schema""" #Create tool list following OpenAI schema # following OpenAPI tool schema for the function tools = [ { "type": "function", "function": { "name": "rag_pipeline_func", #"description": "Get information about products for geotecnical applicaions, especially CMG system", "description": "Get information about BBR products", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement", } }, "required": ["query"], }, }, }, { "type": "function", "function": { "name": "pipeline_websearch_func", "description": "Get the information about BBR and BBR networks general information", "parameters": { "type": "object", "properties": { "query": { "type": "string", "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement", } }, "required": ["query"], }, }, }, ] from haystack.dataclasses import ChatMessage from haystack.components.generators.chat import OpenAIChatGenerator from haystack.components.generators.utils import print_streaming_chunk # define system and USER messages for the Chat with predifined tools list messages = [ #ChatMessage.from_system( #"Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous." #), ChatMessage.from_user("What is the special features of CMG system?"), ] chat_generator = OpenAIChatGenerator(model="gpt-3.5-turbo", streaming_callback=print_streaming_chunk) response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools}) import json ## Parse function calling information function_call = json.loads(response["replies"][0].content)[0] function_name = function_call["function"]["name"] function_args = json.loads(function_call["function"]["arguments"]) print("Function Name:", function_name) print("Function Arguments:", function_args) ## Find the correspoding function and call it with the given arguments available_functions = {"rag_pipeline_func": rag_pipeline_func, "pipeline_websearch_func": pipeline_websearch_func} function_to_call = available_functions[function_name] function_response = function_to_call(**function_args) print("Function Response:", function_response) """# 4. build chatbot by Gradio""" chat_generator = OpenAIChatGenerator(model="gpt-3.5-turbo") response = None messages = [ ChatMessage.from_system( "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous." ) ] import gradio as gr def chatbot_with_fc(message, history): messages.append(ChatMessage.from_user(message)) response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools}) while True: # if OpenAI response is a tool call if response and response["replies"][0].meta["finish_reason"] == "tool_calls": function_calls = json.loads(response["replies"][0].content) print(response["replies"][0]) for function_call in function_calls: ## Parse function calling information function_name = function_call["function"]["name"] function_args = json.loads(function_call["function"]["arguments"]) ## Find the correspoding function and call it with the given arguments function_to_call = available_functions[function_name] function_response = function_to_call(**function_args) ## Append function response to the messages list using `ChatMessage.from_function` messages.append(ChatMessage.from_function(content=json.dumps(function_response), name=function_name)) response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools}) # Regular Conversation else: messages.append(response["replies"][0]) break return response["replies"][0].content demo = gr.ChatInterface( fn=chatbot_with_fc, examples=[ "What are BBR's main products?", "What is BBR Network?", "Is there a BBR member in New Zealand?", "What's the difference between BBR CMI A5 type and A1 type anchor head?", ], title="BBR Digital Assistante Demo - ", ) demo.launch()