Thai Tran commited on
Commit
d0bb947
β€’
1 Parent(s): 455fcb3

Removed next_thought logic. Moved helper functions from tools.py to util.py.

Browse files
Files changed (3) hide show
  1. app.py +1 -6
  2. tools.py +9 -98
  3. util.py +88 -0
app.py CHANGED
@@ -122,7 +122,7 @@ def generate(new_user_message, history):
122
  tool = matches.group(2).strip()
123
  params = matches.group(3).strip()
124
 
125
- (result, next_thought) = llm_tools.run_tool(tool, params)
126
 
127
  prompt = f"Question: {new_user_message}\n\n"
128
  prompt += f"{full_response}\n\n"
@@ -150,11 +150,6 @@ def generate(new_user_message, history):
150
 
151
  prompt += f"Result: {result}\n\n"
152
 
153
- # This is required to give more guidance on the next
154
- # step for LLMs like LLaMA 2
155
- if next_thought:
156
- prompt += f"Thought: {next_thought}\n\n"
157
-
158
  break
159
 
160
  # Stop when we either see the Conclusion or we cannot find an
 
122
  tool = matches.group(2).strip()
123
  params = matches.group(3).strip()
124
 
125
+ result = llm_tools.run_tool(tool, params)
126
 
127
  prompt = f"Question: {new_user_message}\n\n"
128
  prompt += f"{full_response}\n\n"
 
150
 
151
  prompt += f"Result: {result}\n\n"
152
 
 
 
 
 
 
153
  break
154
 
155
  # Stop when we either see the Conclusion or we cannot find an
tools.py CHANGED
@@ -1,10 +1,10 @@
1
  import inspect
2
  import re
3
  import urllib.parse
4
- from bs4 import BeautifulSoup, Comment
5
  import requests
6
  from selenium import webdriver
7
- from math import *
8
 
9
  class Tools:
10
  """
@@ -21,28 +21,22 @@ class Tools:
21
  self.add_tool(
22
  self.calculate,
23
  "Calculate",
24
- "Evaluate a mathematical expression using Python. Expression should only contain numbers, operators (+ - * / **), or math module functions.",
25
- None)
26
 
27
  self.add_tool(
28
  self.google_search,
29
  "GoogleSearch",
30
- "Use Google to search the web for the topic.",
31
- "I will scan the Google search results and determine whether it shows me the answer or whether I need to fetch one of the web pages for more information.")
32
 
33
  self.add_tool(
34
  self.get_web_page,
35
  "GetWebPage",
36
- "Get the contents of a web page. Only call this with a valid URL.",
37
- "I will scan the web page to determine whether it has the answer. If not, then I will do another Google search and try loading a different web page.")
38
 
39
- def add_tool(self, func, name, desc, next_thought=None):
40
  """
41
  Adds a Python function as an available tool for the LLM.
42
  The tool name and desc will be included in the LLM system message.
43
- The optional next_thought is intented to be appenending to the prompt after
44
- the results from the tool. This is necessary to give more explicit
45
- guidance to LLMs like LLaMA 2.
46
  """
47
  params = inspect.signature(func).parameters
48
  tool_params = list(params.keys())
@@ -50,8 +44,7 @@ class Tools:
50
  self.tools[name] = {
51
  "params": tool_params,
52
  "desc": desc,
53
- "func": func,
54
- "next_thought": next_thought
55
  }
56
 
57
  def get_tool_list_for_prompt(self):
@@ -72,8 +65,7 @@ class Tools:
72
 
73
  def run_tool(self, name, params):
74
  """
75
- Runs a tool. Returns the result of the tool and (optionally) a
76
- Thought to be appened to the LLM prompt after the Results.
77
  """
78
  if not name in self.tools:
79
  return f"{name}[] is not a valid tool"
@@ -85,7 +77,7 @@ class Tools:
85
 
86
  result = tool["func"](params)
87
 
88
- return (result, tool["next_thought"])
89
 
90
  def set_browser(self, browser):
91
  """
@@ -213,84 +205,3 @@ class Tools:
213
  except:
214
  return "Error retrieving web page"
215
 
216
-
217
- def safe_eval(expression):
218
- """
219
- A version of eval() that only allows a limited set of math functions.
220
- """
221
-
222
- safe_list = [
223
- 'abs', 'acos', 'asin', 'atan', 'atan2', 'ceil', 'cos', 'cosh',
224
- 'degrees', 'e', 'exp', 'fabs', 'floor', 'fmod', 'frexp', 'hypot',
225
- 'ldexp', 'log', 'log10', 'modf', 'pi', 'pow', 'radians', 'sin', 'sinh',
226
- 'sqrt', 'tan', 'tanh'
227
- ]
228
-
229
- safe_dict = dict([ (k, locals().get(k, None)) for k in safe_list ])
230
-
231
- return eval(expression, { "__builtins__": None }, safe_dict)
232
-
233
- def distill_html(raw_html, remove_links=False):
234
- """
235
- Reduce HTML to the minimal tags necessary to understand the content.
236
- Set remove_links=True to also replace <a> tags with their inner content.
237
- """
238
- soup = BeautifulSoup(raw_html, 'html.parser')
239
-
240
- # Tags (with inner content) that should be completely removed from the HTML
241
- # Note: We want to keep <g-section-with-header> as it shows Top Stories
242
- remove_tags = [
243
- 'aside', 'br', 'button', 'cite', 'cnx', 'fieldset', 'figcaption',
244
- 'figure', 'footer', 'form', 'g-dropdown-button',
245
- 'g-dropdown-menu-button', 'g-fab', 'g-img', 'g-inner-card',
246
- 'g-left-button', 'g-link', 'g-loading-icon', 'g-more-linkg-menu-item',
247
- 'g-popup', 'g-radio-button-group', 'g-right-button',
248
- 'g-scrolling-carousel', 'g-snackbar', 'g-white-loading-icon',
249
- 'google-read-aloud-player', 'head', 'hr', 'iframe', 'img', 'input',
250
- 'label', 'link', 'nav', 'next-route-announcer', 'noscript',
251
- 'option', 'promo-throttler', 'script', 'select', 'style', 'svg'
252
- ]
253
- valid_attrs = ['href']
254
-
255
- # Remove all unwanted tags
256
- for tag in soup(remove_tags):
257
- tag.decompose()
258
-
259
- # Remove all unwanted attributes
260
- for tag in soup():
261
- attrs = dict(tag.attrs)
262
- for attr in attrs:
263
- if attr not in valid_attrs:
264
- del tag[attr]
265
-
266
- # Replace every <span> and <p> with it's inner contents
267
- for span in soup.find_all(['span', 'p']):
268
- span.replace_with(" " + span.text + " ")
269
-
270
- # Replace links with plain text
271
- if remove_links:
272
- for link in soup.find_all('a'):
273
- link.replace_with(" " + link.text + " ")
274
-
275
- # Remove comments
276
- for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
277
- comment.extract()
278
-
279
- # Remove empty divs (e.g. <div> </div>)
280
- for div in soup.find_all("div"):
281
- if (div.text is None) or (div.text.strip() == ""):
282
- div.decompose()
283
-
284
- # Compress nested divs. For example:
285
- # <div><div><div>Content</div></div></div> -> <div>Content>/div>)
286
- for div in soup.find_all("div"):
287
- children = div.findChildren(recursive=False)
288
- if len(children) == 1 and children[0].name == 'div':
289
- div.replace_with(children[0])
290
-
291
- html = str(soup)
292
-
293
- # Compress whitespace
294
- html = re.sub(r'(\s|\n)+', ' ', html)
295
-
296
- return html
 
1
  import inspect
2
  import re
3
  import urllib.parse
4
+ from bs4 import BeautifulSoup
5
  import requests
6
  from selenium import webdriver
7
+ from util import safe_eval, distill_html
8
 
9
  class Tools:
10
  """
 
21
  self.add_tool(
22
  self.calculate,
23
  "Calculate",
24
+ "Evaluate a mathematical expression using Python. Expression should only contain numbers, operators (+ - * / **), or math module functions.")
 
25
 
26
  self.add_tool(
27
  self.google_search,
28
  "GoogleSearch",
29
+ "Use Google to search the web for the topic.")
 
30
 
31
  self.add_tool(
32
  self.get_web_page,
33
  "GetWebPage",
34
+ "Get the contents of a web page. Only call this with a valid URL.")
 
35
 
36
+ def add_tool(self, func, name, desc):
37
  """
38
  Adds a Python function as an available tool for the LLM.
39
  The tool name and desc will be included in the LLM system message.
 
 
 
40
  """
41
  params = inspect.signature(func).parameters
42
  tool_params = list(params.keys())
 
44
  self.tools[name] = {
45
  "params": tool_params,
46
  "desc": desc,
47
+ "func": func
 
48
  }
49
 
50
  def get_tool_list_for_prompt(self):
 
65
 
66
  def run_tool(self, name, params):
67
  """
68
+ Runs a tool and returns the result.
 
69
  """
70
  if not name in self.tools:
71
  return f"{name}[] is not a valid tool"
 
77
 
78
  result = tool["func"](params)
79
 
80
+ return result
81
 
82
  def set_browser(self, browser):
83
  """
 
205
  except:
206
  return "Error retrieving web page"
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
util.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import math
3
+ from bs4 import BeautifulSoup, Comment
4
+
5
+ def safe_eval(expression):
6
+ """
7
+ A version of eval() that only allows a limited set of math functions.
8
+ """
9
+
10
+ safe_list = [
11
+ 'acos', 'asin', 'atan', 'atan2', 'ceil', 'cos', 'cosh',
12
+ 'degrees', 'e', 'exp', 'fabs', 'floor', 'fmod', 'frexp', 'hypot',
13
+ 'ldexp', 'log', 'log10', 'modf', 'pi', 'pow', 'radians', 'sin', 'sinh',
14
+ 'sqrt', 'tan', 'tanh'
15
+ ]
16
+
17
+ safe_dict = {k: getattr(math, k) for k in safe_list}
18
+ safe_dict['abs'] = abs
19
+
20
+ try:
21
+ return eval(expression, {"__builtins__": None}, safe_dict)
22
+ except Exception as e:
23
+ raise ValueError(f'Error evaluating expression: {e}')
24
+
25
+ def distill_html(raw_html, remove_links=False):
26
+ """
27
+ Reduce HTML to the minimal tags necessary to understand the content.
28
+ Set remove_links=True to also replace <a> tags with their inner content.
29
+ """
30
+ soup = BeautifulSoup(raw_html, 'html.parser')
31
+
32
+ # Tags (with inner content) that should be completely removed from the HTML
33
+ # Note: We want to keep <g-section-with-header> as it shows Top Stories
34
+ remove_tags = [
35
+ 'aside', 'br', 'button', 'cite', 'cnx', 'fieldset', 'figcaption',
36
+ 'figure', 'footer', 'form', 'g-dropdown-button',
37
+ 'g-dropdown-menu-button', 'g-fab', 'g-img', 'g-inner-card',
38
+ 'g-left-button', 'g-link', 'g-loading-icon', 'g-more-linkg-menu-item',
39
+ 'g-popup', 'g-radio-button-group', 'g-right-button',
40
+ 'g-scrolling-carousel', 'g-snackbar', 'g-white-loading-icon',
41
+ 'google-read-aloud-player', 'head', 'hr', 'iframe', 'img', 'input',
42
+ 'label', 'link', 'nav', 'next-route-announcer', 'noscript',
43
+ 'option', 'promo-throttler', 'script', 'select', 'style', 'svg'
44
+ ]
45
+ valid_attrs = ['href']
46
+
47
+ # Remove all unwanted tags
48
+ for tag in soup(remove_tags):
49
+ tag.decompose()
50
+
51
+ # Remove all unwanted attributes
52
+ for tag in soup():
53
+ attrs = dict(tag.attrs)
54
+ for attr in attrs:
55
+ if attr not in valid_attrs:
56
+ del tag[attr]
57
+
58
+ # Replace every <span> and <p> with it's inner contents
59
+ for span in soup.find_all(['span', 'p']):
60
+ span.replace_with(" " + span.text + " ")
61
+
62
+ # Replace links with plain text
63
+ if remove_links:
64
+ for link in soup.find_all('a'):
65
+ link.replace_with(" " + link.text + " ")
66
+
67
+ # Remove comments
68
+ for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
69
+ comment.extract()
70
+
71
+ # Remove empty divs (e.g. <div> </div>)
72
+ for div in soup.find_all("div"):
73
+ if (div.text is None) or (div.text.strip() == ""):
74
+ div.decompose()
75
+
76
+ # Compress nested divs. For example:
77
+ # <div><div><div>Content</div></div></div> -> <div>Content>/div>)
78
+ for div in soup.find_all("div"):
79
+ children = div.findChildren(recursive=False)
80
+ if len(children) == 1 and children[0].name == 'div':
81
+ div.replace_with(children[0])
82
+
83
+ html = str(soup)
84
+
85
+ # Compress whitespace
86
+ html = re.sub(r'(\s|\n)+', ' ', html)
87
+
88
+ return html