Spaces:
Runtime error
Runtime error
import os | |
from pathlib import Path | |
import numpy as np | |
import pandas as pd | |
from buster.busterbot import Buster, BusterConfig, Response | |
from buster.completers.base import Completer, Completion | |
from buster.retriever import Retriever | |
from buster.utils import get_retriever_from_extension | |
TEST_DATA_DIR = Path(__file__).resolve().parent / "data" | |
DOCUMENTS_FILE = os.path.join(str(TEST_DATA_DIR), "document_embeddings_huggingface_subset.tar.gz") | |
def get_fake_embedding(length=1536): | |
rng = np.random.default_rng() | |
return list(rng.random(length, dtype=np.float32)) | |
class MockCompleter(Completer): | |
def __init__(self, expected_answer): | |
self.expected_answer = expected_answer | |
def complete(self): | |
return | |
def generate_response(self, user_input, system_prompt) -> Completion: | |
return Completion(self.expected_answer) | |
class MockRetriever(Retriever): | |
def __init__(self, filepath): | |
self.filepath = filepath | |
n_samples = 100 | |
self.documents = pd.DataFrame.from_dict( | |
{ | |
"title": ["test"] * n_samples, | |
"url": ["http://url.com"] * n_samples, | |
"content": ["cool text"] * n_samples, | |
"embedding": [get_fake_embedding()] * n_samples, | |
"n_tokens": [10] * n_samples, | |
"source": ["fake source"] * n_samples, | |
} | |
) | |
def get_documents(self, source): | |
return self.documents | |
def get_source_display_name(self, source): | |
return source | |
import logging | |
logging.basicConfig(level=logging.INFO) | |
def test_chatbot_mock_data(tmp_path, monkeypatch): | |
gpt_expected_answer = "this is GPT answer" | |
monkeypatch.setattr(Buster, "get_embedding", lambda self, prompt, engine: get_fake_embedding()) | |
monkeypatch.setattr( | |
"buster.busterbot.completer_factory", lambda x: MockCompleter(expected_answer=gpt_expected_answer) | |
) | |
hf_transformers_cfg = BusterConfig( | |
unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.", | |
embedding_model="text-embedding-ada-002", | |
retriever_cfg={ | |
"top_k": 3, | |
"thresh": 0.7, | |
}, | |
document_source="fake source", | |
completion_cfg={ | |
"name": "ChatGPT", | |
"completion_kwargs": { | |
"engine": "gpt-3.5-turbo", | |
"max_tokens": 200, | |
"temperature": None, | |
"top_p": None, | |
"frequency_penalty": 1, | |
"presence_penalty": 1, | |
}, | |
}, | |
prompt_cfg={ | |
"max_words": 2000, | |
"text_before_documents": "", | |
"text_before_prompt": ( | |
"""You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n""" | |
"""Make sure to format your answers in Markdown format, including code block and snippets.\n""" | |
"""Do not include any links to urls or hyperlinks in your answers.\n\n""" | |
"""Now answer the following question:\n""" | |
), | |
}, | |
) | |
filepath = tmp_path / "not_a_real_file.tar.gz" | |
retriever = MockRetriever(filepath) | |
buster = Buster(cfg=hf_transformers_cfg, retriever=retriever) | |
response = buster.process_input("What is a transformer?") | |
assert isinstance(response.completion.text, str) | |
assert response.completion.text.startswith(gpt_expected_answer) | |
def test_chatbot_real_data__chatGPT(): | |
hf_transformers_cfg = BusterConfig( | |
unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?", | |
embedding_model="text-embedding-ada-002", | |
completion_cfg={ | |
"name": "ChatGPT", | |
"completion_kwargs": { | |
"model": "gpt-3.5-turbo", | |
}, | |
}, | |
prompt_cfg={ | |
"max_words": 2000, | |
"text_before_documents": "", | |
"text_before_prompt": ( | |
"""You are a slack chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python.\n""" | |
"""Make sure to format your answers in Markdown format, including code block and snippets.\n""" | |
"""Do not include any links to urls or hyperlinks in your answers.\n\n""" | |
"""Now answer the following question:\n""" | |
), | |
}, | |
) | |
retriever = get_retriever_from_extension(DOCUMENTS_FILE)(DOCUMENTS_FILE) | |
buster = Buster(cfg=hf_transformers_cfg, retriever=retriever) | |
response = buster.process_input("What is a transformer?") | |
assert isinstance(response.completion.text, str) | |
def test_chatbot_real_data__chatGPT_OOD(): | |
buster_cfg = BusterConfig( | |
unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?", | |
embedding_model="text-embedding-ada-002", | |
completion_cfg={ | |
"name": "ChatGPT", | |
"completion_kwargs": { | |
"model": "gpt-3.5-turbo", | |
}, | |
}, | |
retriever_cfg={ | |
"top_k": 3, | |
"thresh": 0.7, | |
}, | |
prompt_cfg={ | |
"max_words": 3000, | |
"text_before_prompt": ( | |
"""You are a chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python. """ | |
"""Make sure to format your answers in Markdown format, including code block and snippets. """ | |
"""Do not include any links to urls or hyperlinks in your answers. """ | |
"""If you do not know the answer to a question, or if it is completely irrelevant to the library usage, let the user know you cannot answer. """ | |
"""Use this response: """ | |
"""'I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?'\n""" | |
"""For example:\n""" | |
"""What is the meaning of life for huggingface?\n""" | |
"""I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?""" | |
"""Now answer the following question:\n""" | |
), | |
"text_before_documents": "Only use these documents as reference:\n", | |
}, | |
) | |
retriever = get_retriever_from_extension(DOCUMENTS_FILE)(DOCUMENTS_FILE) | |
buster = Buster(cfg=buster_cfg, retriever=retriever) | |
response = buster.process_input("What is a good recipe for brocolli soup?") | |
assert isinstance(response.completion.text, str) | |
assert response.is_relevant == False | |
def test_chatbot_real_data__GPT(): | |
buster_cfg = BusterConfig( | |
unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?", | |
embedding_model="text-embedding-ada-002", | |
completion_cfg={ | |
"name": "ChatGPT", | |
"completion_kwargs": { | |
"model": "gpt-3.5-turbo", | |
}, | |
}, | |
retriever_cfg={ | |
"top_k": 3, | |
"thresh": 0.7, | |
}, | |
prompt_cfg={ | |
"max_words": 3000, | |
"text_before_prompt": ( | |
"""You are a chatbot assistant answering technical questions about huggingface transformers, a library to train transformers in python. """ | |
"""Make sure to format your answers in Markdown format, including code block and snippets. """ | |
"""Do not include any links to urls or hyperlinks in your answers. """ | |
"""If you do not know the answer to a question, or if it is completely irrelevant to the library usage, let the user know you cannot answer. """ | |
"""Use this response: """ | |
"""'I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?'\n""" | |
"""For example:\n""" | |
"""What is the meaning of life for huggingface?\n""" | |
"""I'm sorry, but I am an AI language model trained to assist with questions related to the huggingface transformers library. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?""" | |
"""Now answer the following question:\n""" | |
), | |
"text_before_documents": "Only use these documents as reference:\n", | |
}, | |
) | |
retriever = get_retriever_from_extension(DOCUMENTS_FILE)(DOCUMENTS_FILE) | |
buster = Buster(cfg=buster_cfg, retriever=retriever) | |
response = buster.process_input("What is a transformer?") | |
assert isinstance(response.completion.text, str) | |
assert response.is_relevant == True | |