jerpint commited on
Commit
1c29b1a
1 Parent(s): db19951
Files changed (3) hide show
  1. app.py +18 -8
  2. cfg.py +2 -29
  3. rtd_scraper/tutorial/spiders/docs_spider.py +0 -1
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import logging
2
  from typing import Optional, Tuple
3
 
4
  import gradio as gr
@@ -6,17 +6,26 @@ import pandas as pd
6
  from buster.completers import Completion
7
  from buster.utils import extract_zip
8
 
 
9
  import cfg
10
  from cfg import setup_buster
11
 
12
- # Create a handler to control where log messages go (e.g., console, file)
13
- handler = (
14
- logging.StreamHandler()
15
- ) # Console output, you can change it to a file handler if needed
16
 
17
- # Set the handler's level to INFO
18
- handler.setLevel(logging.INFO)
19
- logging.basicConfig(level=logging.INFO)
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  # Typehint for chatbot history
22
  ChatHistory = list[list[Optional[str], Optional[str]]]
@@ -108,6 +117,7 @@ with demo:
108
  examples=[
109
  "How can I install the library?",
110
  "What dependencies are required?",
 
111
  ],
112
  inputs=question,
113
  )
 
1
+ import os
2
  from typing import Optional, Tuple
3
 
4
  import gradio as gr
 
6
  from buster.completers import Completion
7
  from buster.utils import extract_zip
8
 
9
+ from rtd_scraper.scrape_rtd import scrape_rtd
10
  import cfg
11
  from cfg import setup_buster
12
 
 
 
 
 
13
 
14
+ # Check if an openai key is set as an env. variable
15
+ if os.getenv("OPENAI_API_KEY") is None:
16
+ print(
17
+ "Warning: No openai key detected. You can set it with 'export OPENAI_API_KEY=sk-...'."
18
+ )
19
+
20
+
21
+ homepage_url = os.getenv("RTD_URL", "https://orion.readthedocs.io/")
22
+ target_version = os.getenv("RTD_VERSION", "en/stable")
23
+
24
+ # scrape and embed content from readthedocs website
25
+ scrape_rtd(
26
+ homepage_url=homepage_url, save_directory="outputs/", target_version=target_version
27
+ )
28
+
29
 
30
  # Typehint for chatbot history
31
  ChatHistory = list[list[Optional[str], Optional[str]]]
 
117
  examples=[
118
  "How can I install the library?",
119
  "What dependencies are required?",
120
+ "Give a brief overview of the library."
121
  ],
122
  inputs=question,
123
  )
cfg.py CHANGED
@@ -1,6 +1,3 @@
1
- import os
2
- import logging
3
-
4
  from buster.busterbot import Buster, BusterConfig
5
  from buster.completers import ChatGPTCompleter, DocumentAnswerer
6
  from buster.formatters.documents import DocumentsFormatterJSON
@@ -11,29 +8,6 @@ from buster.validators import QuestionAnswerValidator, Validator
11
 
12
  from rtd_scraper.scrape_rtd import scrape_rtd
13
 
14
- # Set the root logger's level to INFO
15
- logging.basicConfig(level=logging.INFO)
16
-
17
- # Check if an openai key is set as an env. variable
18
- if os.getenv("OPENAI_API_KEY") is None:
19
- print(
20
- "Warning: No openai key detected. You can set it with 'export OPENAI_API_KEY=sk-...'."
21
- )
22
-
23
- homepage_url = os.getenv("RTD_URL", "https://orion.readthedocs.io/")
24
- target_version = os.getenv("RTD_VERSION", "en/stable")
25
-
26
- # scrape and embed content from readthedocs website
27
- scrape_rtd(
28
- homepage_url=homepage_url, save_directory="outputs/", target_version=target_version
29
- )
30
-
31
- # Disable logging for third-party libraries at DEBUG level
32
- for name in logging.root.manager.loggerDict:
33
- logger = logging.getLogger(name)
34
- logger.setLevel(logging.INFO)
35
-
36
-
37
  buster_cfg = BusterConfig(
38
  validator_cfg={
39
  "unknown_response_templates": [
@@ -43,15 +17,14 @@ buster_cfg = BusterConfig(
43
  "embedding_model": "text-embedding-ada-002",
44
  "use_reranking": True,
45
  "invalid_question_response": "This question does not seem relevant to my current knowledge.",
46
- "check_question_prompt": """You are an chatbot answering questions on artificial intelligence.
47
 
48
  Your job is to determine wether or not a question is valid, and should be answered.
49
- More general questions are not considered valid, even if you might know the response.
50
  A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
51
 
52
  For example:
53
 
54
- Q: What is backpropagation?
55
  true
56
 
57
  Q: What is the meaning of life?
 
 
 
 
1
  from buster.busterbot import Buster, BusterConfig
2
  from buster.completers import ChatGPTCompleter, DocumentAnswerer
3
  from buster.formatters.documents import DocumentsFormatterJSON
 
8
 
9
  from rtd_scraper.scrape_rtd import scrape_rtd
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  buster_cfg = BusterConfig(
12
  validator_cfg={
13
  "unknown_response_templates": [
 
17
  "embedding_model": "text-embedding-ada-002",
18
  "use_reranking": True,
19
  "invalid_question_response": "This question does not seem relevant to my current knowledge.",
20
+ "check_question_prompt": """You are an chatbot answering questions on python libraries.
21
 
22
  Your job is to determine wether or not a question is valid, and should be answered.
 
23
  A user will submit a question. Respond 'true' if it is valid, respond 'false' if it is invalid.
24
 
25
  For example:
26
 
27
+ Q: How can I install the library?
28
  true
29
 
30
  Q: What is the meaning of life?
rtd_scraper/tutorial/spiders/docs_spider.py CHANGED
@@ -62,7 +62,6 @@ class DocsSpider(scrapy.Spider):
62
  filepath = self.base_dir / parsed_uri.netloc / parsed_uri.path.strip("/")
63
  filepath.parent.mkdir(parents=True, exist_ok=True)
64
 
65
- print(f"{filepath=}")
66
  with open(filepath, "wb") as f:
67
  f.write(response.body)
68
 
 
62
  filepath = self.base_dir / parsed_uri.netloc / parsed_uri.path.strip("/")
63
  filepath.parent.mkdir(parents=True, exist_ok=True)
64
 
 
65
  with open(filepath, "wb") as f:
66
  f.write(response.body)
67