Spaces:

jerpint
/

buster

Running

App Files Files Community

jerpint commited on Oct 16, 2023

Commit

da03d6f

•

2 Parent(s): 3555153 487c38f

Merge pull request #1 from jerpint/update-buster

Browse files

Files changed (4) hide show

.github/workflows/deploy_hf.yaml +21 -0
app.py +4 -3
cfg.py +3 -5
requirements.txt +2 -1

.github/workflows/deploy_hf.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+name: Sync to Hugging Face hub
+on:
+  push:
+    branches: [main]
+  # to run this workflow manually from the Actions tab
+  workflow_dispatch:
+jobs:
+  sync-to-hub:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+          lfs: true
+      - name: Push to hub
+        env:
+          HF_TOKEN: ${{ secrets.HF_TOKEN }}
+          HF_USERNAME: ${{ secrets.HF_USERNAME }}
+        run: git push --force https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/towardsai-buster/buster main

app.py CHANGED Viewed

@@ -67,9 +67,10 @@ with block:
     )
     gr.Markdown(
         """
-    #### This chatbot is designed to answer any questions related to the [huggingface transformers](https://huggingface.co/docs/transformers/index) library.
-    #### It uses ChatGPT + embeddings to search the docs for relevant sections and uses them to answer questions. It can then cite its sources back to you to verify the information.
-    #### Note that LLMs are prone to hallucination, so all outputs should always be vetted by users.
     #### The Code is open-sourced and available on [Github](www.github.com/jerpint/buster)
     """

     )
     gr.Markdown(
         """
+    ## Welcome to Buster!
+    This chatbot is designed to answer any questions related to the [huggingface transformers](https://huggingface.co/docs/transformers/index) library.
+    It uses ChatGPT + embeddings to search the docs for relevant sections and uses them to answer questions. It can then cite its sources back to you to verify the information.
+    Note that LLMs are prone to hallucination, so all outputs should always be vetted by users.
     #### The Code is open-sourced and available on [Github](www.github.com/jerpint/buster)
     """

cfg.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from buster.busterbot import Buster, BusterConfig
 from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
-from buster.formatters.documents import DocumentsFormatter
 from buster.formatters.prompts import PromptFormatter
 from buster.retriever import DeepLakeRetriever, Retriever
 from buster.tokenizers import GPTTokenizer
@@ -76,7 +76,7 @@ A user will submit a question. Respond 'true' if it is valid, respond 'false' if
     },
     documents_formatter_cfg={
         "max_tokens": 3500,
-        "formatter": "{content}",
     },
     prompt_formatter_cfg={
         "max_tokens": 3500,
@@ -87,10 +87,8 @@ A user will submit a question. Respond 'true' if it is valid, respond 'false' if
             "If it isn't, simply reply that you cannot answer the question. "
             "Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
             "Here is the documentation: "
-            "<DOCUMENTS> "
         ),
         "text_after_docs": (
-            "<\DOCUMENTS>\n"
             "REMEMBER:\n"
             "You are an chatbot answering technical questions on the huggingface transformers library. "
             "Here are the rules you must follow:\n"
@@ -115,7 +113,7 @@ def setup_buster(buster_cfg: BusterConfig):
     tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
     document_answerer: DocumentAnswerer = DocumentAnswerer(
         completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
-        documents_formatter=DocumentsFormatter(
             tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
         ),
         prompt_formatter=PromptFormatter(

 from buster.busterbot import Buster, BusterConfig
 from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
+from buster.formatters.documents import DocumentsFormatterJSON
 from buster.formatters.prompts import PromptFormatter
 from buster.retriever import DeepLakeRetriever, Retriever
 from buster.tokenizers import GPTTokenizer
     },
     documents_formatter_cfg={
         "max_tokens": 3500,
+        "columns": ["content", "source", "title"],
     },
     prompt_formatter_cfg={
         "max_tokens": 3500,
             "If it isn't, simply reply that you cannot answer the question. "
             "Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
             "Here is the documentation: "
         ),
         "text_after_docs": (
             "REMEMBER:\n"
             "You are an chatbot answering technical questions on the huggingface transformers library. "
             "Here are the rules you must follow:\n"
     tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
     document_answerer: DocumentAnswerer = DocumentAnswerer(
         completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
+        documents_formatter=DocumentsFormatterJSON(
             tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
         ),
         prompt_formatter=PromptFormatter(

requirements.txt CHANGED Viewed

@@ -1,3 +1,4 @@
-git+https://github.com/jerpint/buster
 huggingface-hub
 gradio

+buster-doctalk==1.0.19
 huggingface-hub
 gradio
+promptlayer