Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """SalesSupport-Chatbot-test2-AfterEmbbeding-PartB.ipynb | |
| Automatically generated by Colab. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1MkbsRTVFogRG6BqsQFzcV_VZX7e7WvZ6 | |
| Sales supporting Chatbot using | |
| * Haystack: document embedding & RAG pipeline | |
| * OpenAI API: message generator, text embedder | |
| * Gradio : chat bot UI and temporary hosting | |
| Retrieval source: | |
| 1) BBR publicly accsessible English brochures only | |
| 2) BBR website | |
| 20240514 | |
| Xiaomeng | |
| #0. Setting Environment | |
| """ | |
| # Commented out IPython magic to ensure Python compatibility. | |
| # %%bash | |
| # | |
| # pip install haystack-ai | |
| # pip install "sentence-transformers>=2.2.0" | |
| # pip install "datasets>=2.6.1" | |
| # pip install markdown-it-py mdit_plain pypdf | |
| # pip install accelerate | |
| # pip install gradio | |
| import os | |
| from getpass import getpass | |
| from haystack import Pipeline, PredefinedPipeline | |
| import urllib.request | |
| from haystack.components.generators import OpenAIGenerator | |
| # openai api | |
| os.environ["OPENAI_API_KEY"] = "sk-proj-JfQx42WJcMDGG1zAVe36T3BlbkFJ2ZmLEOzRhstgod0DbHZ7" | |
| """# 1. Document store - dataset""" | |
| embedded_document_path="document_store.pkl" | |
| # Load the document store and retriever from .pkl files | |
| import pickle | |
| with open(embedded_document_path, "rb") as f: | |
| document_store = pickle.load(f) | |
| """#2. Define the RAG pipeline based on given documents, and build it as a tool function""" | |
| # define the RAG pipeline | |
| from haystack.components.embedders import SentenceTransformersTextEmbedder | |
| from haystack.components.retrievers.in_memory import InMemoryEmbeddingRetriever | |
| from haystack.components.builders import PromptBuilder | |
| from haystack.components.generators import OpenAIGenerator | |
| from haystack.components.embedders import OpenAITextEmbedder, OpenAIDocumentEmbedder | |
| #define the template | |
| template = """ | |
| Answer the questions based on the given context. | |
| Context: | |
| {% for document in documents %} | |
| {{ document.content }} | |
| {% endfor %} | |
| Question: {{ question }} | |
| Answer: | |
| """ | |
| # define each component of the pipeline and connect them | |
| rag_pipe = Pipeline() | |
| #rag_pipe.add_component("embedder", SentenceTransformersTextEmbedder(model="sentence-transformers/all-MiniLM-L6-v2")) | |
| rag_pipe.add_component("embedder", OpenAITextEmbedder()) | |
| rag_pipe.add_component("retriever", InMemoryEmbeddingRetriever(document_store=document_store)) | |
| rag_pipe.add_component("prompt_builder", PromptBuilder(template=template)) | |
| rag_pipe.add_component("llm", OpenAIGenerator(model="gpt-3.5-turbo")) | |
| rag_pipe.connect("embedder.embedding", "retriever.query_embedding") | |
| rag_pipe.connect("retriever", "prompt_builder.documents") | |
| rag_pipe.connect("prompt_builder", "llm") | |
| # define the funcitoin | |
| def rag_pipeline_func(query: str): | |
| result = rag_pipe.run({"embedder": {"text": query}, "prompt_builder": {"question": query}}) | |
| return {"reply": result["llm"]["replies"][0]} | |
| string_text = "What is the maximum size of the CMG system?" | |
| rag_pipeline_func(string_text) | |
| """## 2.2 Websearch pipeline""" | |
| pipeline_webserch = Pipeline.from_template(PredefinedPipeline.CHAT_WITH_WEBSITE) | |
| result = pipeline_webserch.run({ | |
| "fetcher": {"urls": ["https://www.bbrnetwork.com"]}, | |
| "prompt": {"query": "How many network members are there in the BBR network?"}} | |
| ) | |
| print(result["llm"]["replies"][0]) | |
| # define the funcitoin | |
| def pipeline_websearch_func(query: str): | |
| result = pipeline_webserch.run({ | |
| "fetcher": {"urls": ["https://www.bbrnetwork.com"]}, | |
| "prompt": {"query": query}} | |
| ) | |
| return {"reply": result["llm"]["replies"][0]} | |
| string_text = "How many network members are there in the BBR network?" | |
| pipeline_websearch_func(string_text) | |
| """# 3. Create tool list following OpenAI schema""" | |
| #Create tool list following OpenAI schema | |
| # following OpenAPI tool schema for the function | |
| tools = [ | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "rag_pipeline_func", | |
| #"description": "Get information about products for geotecnical applicaions, especially CMG system", | |
| "description": "Get information about BBR products", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "query": { | |
| "type": "string", | |
| "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement", | |
| } | |
| }, | |
| "required": ["query"], | |
| }, | |
| }, | |
| }, | |
| { | |
| "type": "function", | |
| "function": { | |
| "name": "pipeline_websearch_func", | |
| "description": "Get the information about BBR and BBR networks general information", | |
| "parameters": { | |
| "type": "object", | |
| "properties": { | |
| "query": { | |
| "type": "string", | |
| "description": "The query to use in the search. Infer this from the user's message. It should be a question or a statement", | |
| } | |
| }, | |
| "required": ["query"], | |
| }, | |
| }, | |
| }, | |
| ] | |
| from haystack.dataclasses import ChatMessage | |
| from haystack.components.generators.chat import OpenAIChatGenerator | |
| from haystack.components.generators.utils import print_streaming_chunk | |
| # define system and USER messages for the Chat with predifined tools list | |
| messages = [ | |
| #ChatMessage.from_system( | |
| #"Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous." | |
| #), | |
| ChatMessage.from_user("What is the special features of CMG system?"), | |
| ] | |
| chat_generator = OpenAIChatGenerator(model="gpt-3.5-turbo", streaming_callback=print_streaming_chunk) | |
| response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools}) | |
| import json | |
| ## Parse function calling information | |
| function_call = json.loads(response["replies"][0].content)[0] | |
| function_name = function_call["function"]["name"] | |
| function_args = json.loads(function_call["function"]["arguments"]) | |
| print("Function Name:", function_name) | |
| print("Function Arguments:", function_args) | |
| ## Find the correspoding function and call it with the given arguments | |
| available_functions = {"rag_pipeline_func": rag_pipeline_func, "pipeline_websearch_func": pipeline_websearch_func} | |
| function_to_call = available_functions[function_name] | |
| function_response = function_to_call(**function_args) | |
| print("Function Response:", function_response) | |
| """# 4. build chatbot by Gradio""" | |
| chat_generator = OpenAIChatGenerator(model="gpt-3.5-turbo") | |
| response = None | |
| messages = [ | |
| ChatMessage.from_system( | |
| "Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous." | |
| ) | |
| ] | |
| import gradio as gr | |
| def chatbot_with_fc(message, history): | |
| messages.append(ChatMessage.from_user(message)) | |
| response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools}) | |
| while True: | |
| # if OpenAI response is a tool call | |
| if response and response["replies"][0].meta["finish_reason"] == "tool_calls": | |
| function_calls = json.loads(response["replies"][0].content) | |
| print(response["replies"][0]) | |
| for function_call in function_calls: | |
| ## Parse function calling information | |
| function_name = function_call["function"]["name"] | |
| function_args = json.loads(function_call["function"]["arguments"]) | |
| ## Find the correspoding function and call it with the given arguments | |
| function_to_call = available_functions[function_name] | |
| function_response = function_to_call(**function_args) | |
| ## Append function response to the messages list using `ChatMessage.from_function` | |
| messages.append(ChatMessage.from_function(content=json.dumps(function_response), name=function_name)) | |
| response = chat_generator.run(messages=messages, generation_kwargs={"tools": tools}) | |
| # Regular Conversation | |
| else: | |
| messages.append(response["replies"][0]) | |
| break | |
| return response["replies"][0].content | |
| demo = gr.ChatInterface( | |
| fn=chatbot_with_fc, | |
| examples=[ | |
| "What are BBR's main products?", | |
| "What is BBR Network?", | |
| "Is there a BBR member in New Zealand?", | |
| "What's the difference between BBR CMI A5 type and A1 type anchor head?", | |
| ], | |
| title="BBR Digital Assistante Demo - ", | |
| ) | |
| demo.launch() |