jerpint commited on
Commit
da03d6f
2 Parent(s): 3555153 487c38f

Merge pull request #1 from jerpint/update-buster

Browse files
Files changed (4) hide show
  1. .github/workflows/deploy_hf.yaml +21 -0
  2. app.py +4 -3
  3. cfg.py +3 -5
  4. requirements.txt +2 -1
.github/workflows/deploy_hf.yaml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Sync to Hugging Face hub
2
+ on:
3
+ push:
4
+ branches: [main]
5
+
6
+ # to run this workflow manually from the Actions tab
7
+ workflow_dispatch:
8
+
9
+ jobs:
10
+ sync-to-hub:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ with:
15
+ fetch-depth: 0
16
+ lfs: true
17
+ - name: Push to hub
18
+ env:
19
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
20
+ HF_USERNAME: ${{ secrets.HF_USERNAME }}
21
+ run: git push --force https://$HF_USERNAME:$HF_TOKEN@huggingface.co/spaces/towardsai-buster/buster main
app.py CHANGED
@@ -67,9 +67,10 @@ with block:
67
  )
68
  gr.Markdown(
69
  """
70
- #### This chatbot is designed to answer any questions related to the [huggingface transformers](https://huggingface.co/docs/transformers/index) library.
71
- #### It uses ChatGPT + embeddings to search the docs for relevant sections and uses them to answer questions. It can then cite its sources back to you to verify the information.
72
- #### Note that LLMs are prone to hallucination, so all outputs should always be vetted by users.
 
73
 
74
  #### The Code is open-sourced and available on [Github](www.github.com/jerpint/buster)
75
  """
 
67
  )
68
  gr.Markdown(
69
  """
70
+ ## Welcome to Buster!
71
+ This chatbot is designed to answer any questions related to the [huggingface transformers](https://huggingface.co/docs/transformers/index) library.
72
+ It uses ChatGPT + embeddings to search the docs for relevant sections and uses them to answer questions. It can then cite its sources back to you to verify the information.
73
+ Note that LLMs are prone to hallucination, so all outputs should always be vetted by users.
74
 
75
  #### The Code is open-sourced and available on [Github](www.github.com/jerpint/buster)
76
  """
cfg.py CHANGED
@@ -1,6 +1,6 @@
1
  from buster.busterbot import Buster, BusterConfig
2
  from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
3
- from buster.formatters.documents import DocumentsFormatter
4
  from buster.formatters.prompts import PromptFormatter
5
  from buster.retriever import DeepLakeRetriever, Retriever
6
  from buster.tokenizers import GPTTokenizer
@@ -76,7 +76,7 @@ A user will submit a question. Respond 'true' if it is valid, respond 'false' if
76
  },
77
  documents_formatter_cfg={
78
  "max_tokens": 3500,
79
- "formatter": "{content}",
80
  },
81
  prompt_formatter_cfg={
82
  "max_tokens": 3500,
@@ -87,10 +87,8 @@ A user will submit a question. Respond 'true' if it is valid, respond 'false' if
87
  "If it isn't, simply reply that you cannot answer the question. "
88
  "Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
89
  "Here is the documentation: "
90
- "<DOCUMENTS> "
91
  ),
92
  "text_after_docs": (
93
- "<\DOCUMENTS>\n"
94
  "REMEMBER:\n"
95
  "You are an chatbot answering technical questions on the huggingface transformers library. "
96
  "Here are the rules you must follow:\n"
@@ -115,7 +113,7 @@ def setup_buster(buster_cfg: BusterConfig):
115
  tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
116
  document_answerer: DocumentAnswerer = DocumentAnswerer(
117
  completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
118
- documents_formatter=DocumentsFormatter(
119
  tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
120
  ),
121
  prompt_formatter=PromptFormatter(
 
1
  from buster.busterbot import Buster, BusterConfig
2
  from buster.completers import ChatGPTCompleter, Completer, DocumentAnswerer
3
+ from buster.formatters.documents import DocumentsFormatterJSON
4
  from buster.formatters.prompts import PromptFormatter
5
  from buster.retriever import DeepLakeRetriever, Retriever
6
  from buster.tokenizers import GPTTokenizer
 
76
  },
77
  documents_formatter_cfg={
78
  "max_tokens": 3500,
79
+ "columns": ["content", "source", "title"],
80
  },
81
  prompt_formatter_cfg={
82
  "max_tokens": 3500,
 
87
  "If it isn't, simply reply that you cannot answer the question. "
88
  "Do not refer to the documentation directly, but use the instructions provided within it to answer questions. "
89
  "Here is the documentation: "
 
90
  ),
91
  "text_after_docs": (
 
92
  "REMEMBER:\n"
93
  "You are an chatbot answering technical questions on the huggingface transformers library. "
94
  "Here are the rules you must follow:\n"
 
113
  tokenizer = GPTTokenizer(**buster_cfg.tokenizer_cfg)
114
  document_answerer: DocumentAnswerer = DocumentAnswerer(
115
  completer=ChatGPTCompleter(**buster_cfg.completion_cfg),
116
+ documents_formatter=DocumentsFormatterJSON(
117
  tokenizer=tokenizer, **buster_cfg.documents_formatter_cfg
118
  ),
119
  prompt_formatter=PromptFormatter(
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
- git+https://github.com/jerpint/buster
2
  huggingface-hub
3
  gradio
 
 
1
+ buster-doctalk==1.0.19
2
  huggingface-hub
3
  gradio
4
+ promptlayer