faq-website

Runtime error

App Files Files Community

Peter Vandenabeele commited on Apr 1, 2023

Commit

5f6deb9

•

1 Parent(s): 87866dc

Black it !

Browse files

Files changed (3) hide show

app.py +15 -4
requirements.txt +1 -0
scrape_website.py +14 -9

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ from peft import PeftModel
 import transformers
 import gradio as gr
 from scrape_website import process_webpages
 assert (
     "LlamaTokenizer" in transformers._import_structure["models.llama"]
 ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
@@ -71,6 +72,7 @@ def generate_prompt(instruction, input=None):
 {instruction}
 ### Response:"""
 if device != "cpu":
     model.half()
 model.eval()
@@ -122,7 +124,11 @@ g = gr.Interface(
         gr.components.Textbox(
             lines=2, label="FAQ", placeholder="Ask me anything about this website?"
         ),
-        gr.components.Textbox(lines=2, label="Website URLs", placeholder="https://www.example.org/ https://www.example.com/"),
         gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
         # gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
         # gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
@@ -139,9 +145,14 @@ g = gr.Interface(
     ],
     title="FAQ A Website",
     examples=[
-        ["Which actions can we take to reduce climate change?", "https://www.un.org/en/actnow/"],
-        ["Which actions can we take to reduce climate change?",
-         "https://support.worldwildlife.org/site/SPageNavigator/ActionsToFightClimateChange.html"]
     ]
     # description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",
 )

 import transformers
 import gradio as gr
 from scrape_website import process_webpages
 assert (
     "LlamaTokenizer" in transformers._import_structure["models.llama"]
 ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
 {instruction}
 ### Response:"""
 if device != "cpu":
     model.half()
 model.eval()
         gr.components.Textbox(
             lines=2, label="FAQ", placeholder="Ask me anything about this website?"
         ),
+        gr.components.Textbox(
+            lines=2,
+            label="Website URLs",
+            placeholder="https://www.example.org/ https://www.example.com/",
+        ),
         gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
         # gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
         # gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
     ],
     title="FAQ A Website",
     examples=[
+        [
+            "Which actions can we take to reduce climate change?",
+            "https://www.un.org/en/actnow/",
+        ],
+        [
+            "Which actions can we take to reduce climate change?",
+            "https://support.worldwildlife.org/site/SPageNavigator/ActionsToFightClimateChange.html",
+        ],
     ]
     # description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",
 )

requirements.txt CHANGED Viewed

@@ -8,3 +8,4 @@ accelerate
 bitsandbytes
 git+https://github.com/huggingface/peft.git
 gradio

 bitsandbytes
 git+https://github.com/huggingface/peft.git
 gradio
+black

scrape_website.py CHANGED Viewed

@@ -6,15 +6,15 @@ CHARACTER_CUT_OFF = 20000
 def remove_tags(soup: BeautifulSoup) -> str:
-    for data in soup(['style', 'script']):
         # Remove tags
         data.decompose()
     # return data by retrieving the tag content
-    return ' '.join(soup.stripped_strings)
-def read_webpage(url:str) -> str:
     print(f"Getting the response from url : {url})")
     response = requests.get(url)
     html_content = response.content
@@ -32,7 +32,8 @@ def read_webpage(url:str) -> str:
     print(text_content)
     return text_content
-def process_webpages(urls:List[str]):
     # A set to keep track of visited pages
     visited_pages = set()
     aggregated_text = ""
@@ -44,8 +45,12 @@ def process_webpages(urls:List[str]):
     return aggregated_text[:CHARACTER_CUT_OFF]
-if __name__ == '__main__':
-    print(process_webpages(urls=[
-        "https://www.example.org",
-        "https://www.example.com",
-        ]))

 def remove_tags(soup: BeautifulSoup) -> str:
+    for data in soup(["style", "script"]):
         # Remove tags
         data.decompose()
     # return data by retrieving the tag content
+    return " ".join(soup.stripped_strings)
+def read_webpage(url: str) -> str:
     print(f"Getting the response from url : {url})")
     response = requests.get(url)
     html_content = response.content
     print(text_content)
     return text_content
+def process_webpages(urls: List[str]):
     # A set to keep track of visited pages
     visited_pages = set()
     aggregated_text = ""
     return aggregated_text[:CHARACTER_CUT_OFF]
+if __name__ == "__main__":
+    print(
+        process_webpages(
+            urls=[
+                "https://www.example.org",
+                "https://www.example.com",
+            ]
+        )
+    )