Peter Vandenabeele commited on
Commit
5f6deb9
1 Parent(s): 87866dc

Black it !

Browse files
Files changed (3) hide show
  1. app.py +15 -4
  2. requirements.txt +1 -0
  3. scrape_website.py +14 -9
app.py CHANGED
@@ -3,6 +3,7 @@ from peft import PeftModel
3
  import transformers
4
  import gradio as gr
5
  from scrape_website import process_webpages
 
6
  assert (
7
  "LlamaTokenizer" in transformers._import_structure["models.llama"]
8
  ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
@@ -71,6 +72,7 @@ def generate_prompt(instruction, input=None):
71
  {instruction}
72
  ### Response:"""
73
 
 
74
  if device != "cpu":
75
  model.half()
76
  model.eval()
@@ -122,7 +124,11 @@ g = gr.Interface(
122
  gr.components.Textbox(
123
  lines=2, label="FAQ", placeholder="Ask me anything about this website?"
124
  ),
125
- gr.components.Textbox(lines=2, label="Website URLs", placeholder="https://www.example.org/ https://www.example.com/"),
 
 
 
 
126
  gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
127
  # gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
128
  # gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
@@ -139,9 +145,14 @@ g = gr.Interface(
139
  ],
140
  title="FAQ A Website",
141
  examples=[
142
- ["Which actions can we take to reduce climate change?", "https://www.un.org/en/actnow/"],
143
- ["Which actions can we take to reduce climate change?",
144
- "https://support.worldwildlife.org/site/SPageNavigator/ActionsToFightClimateChange.html"]
 
 
 
 
 
145
  ]
146
  # description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",
147
  )
 
3
  import transformers
4
  import gradio as gr
5
  from scrape_website import process_webpages
6
+
7
  assert (
8
  "LlamaTokenizer" in transformers._import_structure["models.llama"]
9
  ), "LLaMA is now in HuggingFace's main branch.\nPlease reinstall it: pip uninstall transformers && pip install git+https://github.com/huggingface/transformers.git"
 
72
  {instruction}
73
  ### Response:"""
74
 
75
+
76
  if device != "cpu":
77
  model.half()
78
  model.eval()
 
124
  gr.components.Textbox(
125
  lines=2, label="FAQ", placeholder="Ask me anything about this website?"
126
  ),
127
+ gr.components.Textbox(
128
+ lines=2,
129
+ label="Website URLs",
130
+ placeholder="https://www.example.org/ https://www.example.com/",
131
+ ),
132
  gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
133
  # gr.components.Slider(minimum=0, maximum=1, value=0.75, label="Top p"),
134
  # gr.components.Slider(minimum=0, maximum=100, step=1, value=40, label="Top k"),
 
145
  ],
146
  title="FAQ A Website",
147
  examples=[
148
+ [
149
+ "Which actions can we take to reduce climate change?",
150
+ "https://www.un.org/en/actnow/",
151
+ ],
152
+ [
153
+ "Which actions can we take to reduce climate change?",
154
+ "https://support.worldwildlife.org/site/SPageNavigator/ActionsToFightClimateChange.html",
155
+ ],
156
  ]
157
  # description="Alpaca-LoRA is a 7B-parameter LLaMA model finetuned to follow instructions. It is trained on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset and makes use of the Huggingface LLaMA implementation. For more information, please visit [the project's website](https://github.com/tloen/alpaca-lora).",
158
  )
requirements.txt CHANGED
@@ -8,3 +8,4 @@ accelerate
8
  bitsandbytes
9
  git+https://github.com/huggingface/peft.git
10
  gradio
 
 
8
  bitsandbytes
9
  git+https://github.com/huggingface/peft.git
10
  gradio
11
+ black
scrape_website.py CHANGED
@@ -6,15 +6,15 @@ CHARACTER_CUT_OFF = 20000
6
 
7
 
8
  def remove_tags(soup: BeautifulSoup) -> str:
9
- for data in soup(['style', 'script']):
10
  # Remove tags
11
  data.decompose()
12
 
13
  # return data by retrieving the tag content
14
- return ' '.join(soup.stripped_strings)
15
 
16
 
17
- def read_webpage(url:str) -> str:
18
  print(f"Getting the response from url : {url})")
19
  response = requests.get(url)
20
  html_content = response.content
@@ -32,7 +32,8 @@ def read_webpage(url:str) -> str:
32
  print(text_content)
33
  return text_content
34
 
35
- def process_webpages(urls:List[str]):
 
36
  # A set to keep track of visited pages
37
  visited_pages = set()
38
  aggregated_text = ""
@@ -44,8 +45,12 @@ def process_webpages(urls:List[str]):
44
  return aggregated_text[:CHARACTER_CUT_OFF]
45
 
46
 
47
- if __name__ == '__main__':
48
- print(process_webpages(urls=[
49
- "https://www.example.org",
50
- "https://www.example.com",
51
- ]))
 
 
 
 
 
6
 
7
 
8
  def remove_tags(soup: BeautifulSoup) -> str:
9
+ for data in soup(["style", "script"]):
10
  # Remove tags
11
  data.decompose()
12
 
13
  # return data by retrieving the tag content
14
+ return " ".join(soup.stripped_strings)
15
 
16
 
17
+ def read_webpage(url: str) -> str:
18
  print(f"Getting the response from url : {url})")
19
  response = requests.get(url)
20
  html_content = response.content
 
32
  print(text_content)
33
  return text_content
34
 
35
+
36
+ def process_webpages(urls: List[str]):
37
  # A set to keep track of visited pages
38
  visited_pages = set()
39
  aggregated_text = ""
 
45
  return aggregated_text[:CHARACTER_CUT_OFF]
46
 
47
 
48
+ if __name__ == "__main__":
49
+ print(
50
+ process_webpages(
51
+ urls=[
52
+ "https://www.example.org",
53
+ "https://www.example.com",
54
+ ]
55
+ )
56
+ )