Spaces:

jerpint
/

buster-dev

Runtime error

App Files Files Community

jerpint commited on Apr 8, 2023

Commit

d16a006

•

1 Parent(s): 25a0d11

compartmentalize buster config

Browse files

Files changed (6) hide show

buster/busterbot.py +48 -54
buster/completers/__init__.py +2 -2
buster/completers/base.py +1 -1
buster/examples/cfg.py +13 -10
buster/formatters/prompts.py +8 -0
tests/test_chatbot.py +3 -1

buster/busterbot.py CHANGED Viewed

@@ -6,9 +6,10 @@ import numpy as np
 import pandas as pd
 from openai.embeddings_utils import cosine_similarity, get_embedding
-from buster.completers import get_completer
 from buster.completers.base import Completion
-from buster.formatters.prompts import SystemPromptFormatter
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -23,36 +24,30 @@ class Response:
 @dataclass
 class BusterConfig:
-    """Configuration object for a chatbot.
-    documents_csv: Path to the csv file containing the documents and their embeddings.
-    embedding_model: OpenAI model to use to get embeddings.
-    top_k: Max number of documents to retrieve, ordered by cosine similarity
-    thresh: threshold for cosine similarity to be considered
-    max_words: maximum number of words the retrieved documents can be. Will truncate otherwise.
-    completion_kwargs: kwargs for the OpenAI.Completion() method
-    separator: the separator to use, can be either "\n" or <p> depending on rendering.
-    response_format: the type of format to render links with, e.g. slack or markdown
-    unknown_prompt: Prompt to use to generate the "I don't know" embedding to compare to.
-    text_before_prompt: Text to prompt GPT with before the user prompt, but after the documentation.
-    reponse_footnote: Generic response to add the the chatbot's reply.
-    source: the source of the document to consider
-    """
-    documents_file: str = ""
     embedding_model: str = "text-embedding-ada-002"
-    top_k: int = 3
-    thresh: float = 0.7
-    max_words: int = 3000
-    unknown_threshold: float = 0.9  # set to 0 to deactivate
-    completer_cfg: dict = field(
-        # TODO: Put all this in its own config with sane defaults?
         default_factory=lambda: {
-            "name": "GPT3",
             "text_before_documents": "You are a chatbot answering questions.\n",
             "text_before_prompt": "Answer the following question:\n",
             "completion_kwargs": {
-                "engine": "text-davinci-003",
                 "max_tokens": 200,
                 "temperature": None,
                 "top_p": None,
@@ -61,18 +56,11 @@ class BusterConfig:
             },
         }
     )
-    unknown_prompt: str = "I Don't know how to answer your question."
-    response_format: str = "slack"
-    source: str = ""
-from buster.retriever import Retriever
 class Buster:
     def __init__(self, cfg: BusterConfig, retriever: Retriever):
         self._unk_embedding = None
-        self.cfg = cfg
         self.update_cfg(cfg)
         self.retriever = retriever
@@ -89,16 +77,23 @@ class Buster:
     def update_cfg(self, cfg: BusterConfig):
         """Every time we set a new config, we update the things that need to be updated."""
-        logger.info(f"Updating config to {cfg.source}:\n{cfg}")
-        self.cfg = cfg
-        self.completer = get_completer(cfg.completer_cfg)
-        self.unk_embedding = self.get_embedding(self.cfg.unknown_prompt, engine=self.cfg.embedding_model)
-        self.prompt_formatter = SystemPromptFormatter(
-            text_before_docs=self.cfg.completer_cfg["text_before_documents"],
-            text_after_docs=self.cfg.completer_cfg["text_before_prompt"],
-            max_words=self.cfg.max_words,
-        )
         logger.info(f"Config Updated.")
@@ -129,9 +124,8 @@ class Buster:
         logger.info(f"matched documents before thresh: {matched_documents}")
         # filter out matched_documents using a threshold
-        if thresh:
-            matched_documents = matched_documents[matched_documents.similarity > thresh]
-            logger.info(f"matched documents after thresh: {matched_documents}")
         return matched_documents
@@ -168,10 +162,10 @@ class Buster:
         matched_documents = self.rank_documents(
             query=user_input,
-            top_k=self.cfg.top_k,
-            thresh=self.cfg.thresh,
-            engine=self.cfg.embedding_model,
-            source=self.cfg.source,
         )
         if len(matched_documents) == 0:
@@ -189,15 +183,15 @@ class Buster:
         # check for relevance
         is_relevant = self.check_response_relevance(
             completion_text=completion.text,
-            engine=self.cfg.embedding_model,
             unk_embedding=self.unk_embedding,
-            unk_threshold=self.cfg.unknown_threshold,
         )
         if not is_relevant:
             matched_documents = pd.DataFrame(columns=matched_documents.columns)
             # answer generated was the chatbot saying it doesn't know how to answer
         # uncomment override completion with unknown prompt
-        # completion = Completion(text=self.cfg.unknown_prompt)
         response = Response(completion=completion, matched_documents=matched_documents, is_relevant=is_relevant)
         return response

 import pandas as pd
 from openai.embeddings_utils import cosine_similarity, get_embedding
+from buster.completers import completer_factory
 from buster.completers.base import Completion
+from buster.formatters.prompts import SystemPromptFormatter, prompt_formatter_factory
+from buster.retriever import Retriever
 logger = logging.getLogger(__name__)
 logging.basicConfig(level=logging.INFO)
 @dataclass
 class BusterConfig:
+    """Configuration object for a chatbot."""
     embedding_model: str = "text-embedding-ada-002"
+    unknown_threshold: float = 0.9
+    unknown_prompt: str = "I Don't know how to answer your question."
+    document_source: str = ""
+    retriever_cfg: dict = field(
         default_factory=lambda: {
+            "top_k": 3,
+            "thresh": 0.7,
+        }
+    )
+    prompt_cfg: dict = field(
+        default_factory=lambda: {
+            "max_words": 3000,
             "text_before_documents": "You are a chatbot answering questions.\n",
             "text_before_prompt": "Answer the following question:\n",
+        }
+    )
+    completion_cfg: dict = field(
+        default_factory=lambda: {
+            "name": "ChatGPT",
             "completion_kwargs": {
+                "engine": "gpt-3.5-turbo",
                 "max_tokens": 200,
                 "temperature": None,
                 "top_p": None,
             },
         }
     )
 class Buster:
     def __init__(self, cfg: BusterConfig, retriever: Retriever):
         self._unk_embedding = None
         self.update_cfg(cfg)
         self.retriever = retriever
     def update_cfg(self, cfg: BusterConfig):
         """Every time we set a new config, we update the things that need to be updated."""
+        logger.info(f"Updating config to {cfg.document_source}:\n{cfg}")
+        self._cfg = cfg
+        self.embedding_model = cfg.embedding_model
+        self.unknown_threshold = cfg.unknown_threshold
+        self.unknown_prompt = cfg.unknown_prompt
+        self.document_source = cfg.document_source
+        self.retriever_cfg = cfg.retriever_cfg
+        self.completion_cfg = cfg.completion_cfg
+        self.prompt_cfg = cfg.prompt_cfg
+        # set the unk. embedding
+        self.unk_embedding = self.get_embedding(self.unknown_prompt, engine=self.embedding_model)
+        # update completer and formatter cfg
+        self.completer = completer_factory(self.completion_cfg)
+        self.prompt_formatter = prompt_formatter_factory(self.prompt_cfg)
         logger.info(f"Config Updated.")
         logger.info(f"matched documents before thresh: {matched_documents}")
         # filter out matched_documents using a threshold
+        matched_documents = matched_documents[matched_documents.similarity > thresh]
+        logger.info(f"matched documents after thresh: {matched_documents}")
         return matched_documents
         matched_documents = self.rank_documents(
             query=user_input,
+            top_k=self.retriever_cfg["top_k"],
+            thresh=self.retriever_cfg["thresh"],
+            engine=self.embedding_model,
+            source=self.document_source,
         )
         if len(matched_documents) == 0:
         # check for relevance
         is_relevant = self.check_response_relevance(
             completion_text=completion.text,
+            engine=self.embedding_model,
             unk_embedding=self.unk_embedding,
+            unk_threshold=self.unknown_threshold,
         )
         if not is_relevant:
             matched_documents = pd.DataFrame(columns=matched_documents.columns)
             # answer generated was the chatbot saying it doesn't know how to answer
         # uncomment override completion with unknown prompt
+        # completion = Completion(text=self.unknown_prompt)
         response = Response(completion=completion, matched_documents=matched_documents, is_relevant=is_relevant)
         return response

buster/completers/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
-from .base import ChatGPTCompleter, GPT3Completer, get_completer
 __all__ = [
-    get_completer,
     GPT3Completer,
     ChatGPTCompleter,
 ]

+from .base import ChatGPTCompleter, GPT3Completer, completer_factory
 __all__ = [
+    completer_factory,
     GPT3Completer,
     ChatGPTCompleter,
 ]

buster/completers/base.py CHANGED Viewed

@@ -91,7 +91,7 @@ class ChatGPTCompleter(Completer):
         return response["choices"][0]["message"]["content"]
-def get_completer(completer_cfg):
     name = completer_cfg["name"]
     completers = {
         "GPT3": GPT3Completer,

         return response["choices"][0]["message"]["content"]
+def completer_factory(completer_cfg):
     name = completer_cfg["name"]
     completers = {
         "GPT3": GPT3Completer,

buster/examples/cfg.py CHANGED Viewed

@@ -2,13 +2,20 @@ from buster.busterbot import BusterConfig
 documents_filepath = "./documents.db"
 buster_cfg = BusterConfig(
-    unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
     embedding_model="text-embedding-ada-002",
-    top_k=3,
-    thresh=0.7,
-    max_words=3000,
-    completer_cfg={
         "name": "ChatGPT",
         "text_before_documents": (
             "You are a chatbot assistant answering technical questions about artificial intelligence (AI)."
             "You can only respond to a question if the content necessary to answer the question is contained in the following provided documentation. "
@@ -34,10 +41,6 @@ buster_cfg = BusterConfig(
             "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?"
             "Now answer the following question:\n"
         ),
-        "completion_kwargs": {
-            "model": "gpt-3.5-turbo",
-        },
     },
-    response_format="gradio",
-    source="stackoverflow",
 )

 documents_filepath = "./documents.db"
 buster_cfg = BusterConfig(
     embedding_model="text-embedding-ada-002",
+    unknown_prompt="I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?",
+    retriever_cfg={
+        "top_k": 3,
+        "thresh": 0.7,
+    },
+    completion_cfg={
         "name": "ChatGPT",
+        "completion_kwargs": {
+            "model": "gpt-3.5-turbo",
+        },
+    },
+    prompt_cfg={
+        "max_words": 3000,
         "text_before_documents": (
             "You are a chatbot assistant answering technical questions about artificial intelligence (AI)."
             "You can only respond to a question if the content necessary to answer the question is contained in the following provided documentation. "
             "I'm sorry, but I am an AI language model trained to assist with questions related to AI. I cannot answer that question as it is not relevant to the library or its usage. Is there anything else I can assist you with?"
             "Now answer the following question:\n"
         ),
     },
+    document_source="stackoverflow",
 )

buster/formatters/prompts.py CHANGED Viewed

@@ -40,3 +40,11 @@ class SystemPromptFormatter:
         documents = self.format_documents(matched_documents, max_words=self.max_words)
         system_prompt = self.text_before_docs + documents + self.text_after_docs
         return system_prompt

         documents = self.format_documents(matched_documents, max_words=self.max_words)
         system_prompt = self.text_before_docs + documents + self.text_after_docs
         return system_prompt
+def prompt_formatter_factory(prompt_cfg):
+    return SystemPromptFormatter(
+        text_before_docs=prompt_cfg["text_before_documents"],
+        text_after_docs=prompt_cfg["text_before_prompt"],
+        max_words=prompt_cfg["max_words"],
+    )

tests/test_chatbot.py CHANGED Viewed

@@ -60,7 +60,9 @@ logging.basicConfig(level=logging.INFO)
 def test_chatbot_mock_data(tmp_path, monkeypatch):
     gpt_expected_answer = "this is GPT answer"
     monkeypatch.setattr(Buster, "get_embedding", lambda self, prompt, engine: get_fake_embedding())
-    monkeypatch.setattr("buster.busterbot.get_completer", lambda x: MockCompleter(expected_answer=gpt_expected_answer))
     hf_transformers_cfg = BusterConfig(
         unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",

 def test_chatbot_mock_data(tmp_path, monkeypatch):
     gpt_expected_answer = "this is GPT answer"
     monkeypatch.setattr(Buster, "get_embedding", lambda self, prompt, engine: get_fake_embedding())
+    monkeypatch.setattr(
+        "buster.busterbot.completer_factory", lambda x: MockCompleter(expected_answer=gpt_expected_answer)
+    )
     hf_transformers_cfg = BusterConfig(
         unknown_prompt="This doesn't seem to be related to the huggingface library. I am not sure how to answer.",