Thai Tran
commited on
Commit
β’
d0bb947
1
Parent(s):
455fcb3
Removed next_thought logic. Moved helper functions from tools.py to util.py.
Browse files
app.py
CHANGED
@@ -122,7 +122,7 @@ def generate(new_user_message, history):
|
|
122 |
tool = matches.group(2).strip()
|
123 |
params = matches.group(3).strip()
|
124 |
|
125 |
-
|
126 |
|
127 |
prompt = f"Question: {new_user_message}\n\n"
|
128 |
prompt += f"{full_response}\n\n"
|
@@ -150,11 +150,6 @@ def generate(new_user_message, history):
|
|
150 |
|
151 |
prompt += f"Result: {result}\n\n"
|
152 |
|
153 |
-
# This is required to give more guidance on the next
|
154 |
-
# step for LLMs like LLaMA 2
|
155 |
-
if next_thought:
|
156 |
-
prompt += f"Thought: {next_thought}\n\n"
|
157 |
-
|
158 |
break
|
159 |
|
160 |
# Stop when we either see the Conclusion or we cannot find an
|
|
|
122 |
tool = matches.group(2).strip()
|
123 |
params = matches.group(3).strip()
|
124 |
|
125 |
+
result = llm_tools.run_tool(tool, params)
|
126 |
|
127 |
prompt = f"Question: {new_user_message}\n\n"
|
128 |
prompt += f"{full_response}\n\n"
|
|
|
150 |
|
151 |
prompt += f"Result: {result}\n\n"
|
152 |
|
|
|
|
|
|
|
|
|
|
|
153 |
break
|
154 |
|
155 |
# Stop when we either see the Conclusion or we cannot find an
|
tools.py
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
import inspect
|
2 |
import re
|
3 |
import urllib.parse
|
4 |
-
from bs4 import BeautifulSoup
|
5 |
import requests
|
6 |
from selenium import webdriver
|
7 |
-
from
|
8 |
|
9 |
class Tools:
|
10 |
"""
|
@@ -21,28 +21,22 @@ class Tools:
|
|
21 |
self.add_tool(
|
22 |
self.calculate,
|
23 |
"Calculate",
|
24 |
-
"Evaluate a mathematical expression using Python. Expression should only contain numbers, operators (+ - * / **), or math module functions."
|
25 |
-
None)
|
26 |
|
27 |
self.add_tool(
|
28 |
self.google_search,
|
29 |
"GoogleSearch",
|
30 |
-
"Use Google to search the web for the topic."
|
31 |
-
"I will scan the Google search results and determine whether it shows me the answer or whether I need to fetch one of the web pages for more information.")
|
32 |
|
33 |
self.add_tool(
|
34 |
self.get_web_page,
|
35 |
"GetWebPage",
|
36 |
-
"Get the contents of a web page. Only call this with a valid URL."
|
37 |
-
"I will scan the web page to determine whether it has the answer. If not, then I will do another Google search and try loading a different web page.")
|
38 |
|
39 |
-
def add_tool(self, func, name, desc
|
40 |
"""
|
41 |
Adds a Python function as an available tool for the LLM.
|
42 |
The tool name and desc will be included in the LLM system message.
|
43 |
-
The optional next_thought is intented to be appenending to the prompt after
|
44 |
-
the results from the tool. This is necessary to give more explicit
|
45 |
-
guidance to LLMs like LLaMA 2.
|
46 |
"""
|
47 |
params = inspect.signature(func).parameters
|
48 |
tool_params = list(params.keys())
|
@@ -50,8 +44,7 @@ class Tools:
|
|
50 |
self.tools[name] = {
|
51 |
"params": tool_params,
|
52 |
"desc": desc,
|
53 |
-
"func": func
|
54 |
-
"next_thought": next_thought
|
55 |
}
|
56 |
|
57 |
def get_tool_list_for_prompt(self):
|
@@ -72,8 +65,7 @@ class Tools:
|
|
72 |
|
73 |
def run_tool(self, name, params):
|
74 |
"""
|
75 |
-
Runs a tool
|
76 |
-
Thought to be appened to the LLM prompt after the Results.
|
77 |
"""
|
78 |
if not name in self.tools:
|
79 |
return f"{name}[] is not a valid tool"
|
@@ -85,7 +77,7 @@ class Tools:
|
|
85 |
|
86 |
result = tool["func"](params)
|
87 |
|
88 |
-
return
|
89 |
|
90 |
def set_browser(self, browser):
|
91 |
"""
|
@@ -213,84 +205,3 @@ class Tools:
|
|
213 |
except:
|
214 |
return "Error retrieving web page"
|
215 |
|
216 |
-
|
217 |
-
def safe_eval(expression):
|
218 |
-
"""
|
219 |
-
A version of eval() that only allows a limited set of math functions.
|
220 |
-
"""
|
221 |
-
|
222 |
-
safe_list = [
|
223 |
-
'abs', 'acos', 'asin', 'atan', 'atan2', 'ceil', 'cos', 'cosh',
|
224 |
-
'degrees', 'e', 'exp', 'fabs', 'floor', 'fmod', 'frexp', 'hypot',
|
225 |
-
'ldexp', 'log', 'log10', 'modf', 'pi', 'pow', 'radians', 'sin', 'sinh',
|
226 |
-
'sqrt', 'tan', 'tanh'
|
227 |
-
]
|
228 |
-
|
229 |
-
safe_dict = dict([ (k, locals().get(k, None)) for k in safe_list ])
|
230 |
-
|
231 |
-
return eval(expression, { "__builtins__": None }, safe_dict)
|
232 |
-
|
233 |
-
def distill_html(raw_html, remove_links=False):
|
234 |
-
"""
|
235 |
-
Reduce HTML to the minimal tags necessary to understand the content.
|
236 |
-
Set remove_links=True to also replace <a> tags with their inner content.
|
237 |
-
"""
|
238 |
-
soup = BeautifulSoup(raw_html, 'html.parser')
|
239 |
-
|
240 |
-
# Tags (with inner content) that should be completely removed from the HTML
|
241 |
-
# Note: We want to keep <g-section-with-header> as it shows Top Stories
|
242 |
-
remove_tags = [
|
243 |
-
'aside', 'br', 'button', 'cite', 'cnx', 'fieldset', 'figcaption',
|
244 |
-
'figure', 'footer', 'form', 'g-dropdown-button',
|
245 |
-
'g-dropdown-menu-button', 'g-fab', 'g-img', 'g-inner-card',
|
246 |
-
'g-left-button', 'g-link', 'g-loading-icon', 'g-more-linkg-menu-item',
|
247 |
-
'g-popup', 'g-radio-button-group', 'g-right-button',
|
248 |
-
'g-scrolling-carousel', 'g-snackbar', 'g-white-loading-icon',
|
249 |
-
'google-read-aloud-player', 'head', 'hr', 'iframe', 'img', 'input',
|
250 |
-
'label', 'link', 'nav', 'next-route-announcer', 'noscript',
|
251 |
-
'option', 'promo-throttler', 'script', 'select', 'style', 'svg'
|
252 |
-
]
|
253 |
-
valid_attrs = ['href']
|
254 |
-
|
255 |
-
# Remove all unwanted tags
|
256 |
-
for tag in soup(remove_tags):
|
257 |
-
tag.decompose()
|
258 |
-
|
259 |
-
# Remove all unwanted attributes
|
260 |
-
for tag in soup():
|
261 |
-
attrs = dict(tag.attrs)
|
262 |
-
for attr in attrs:
|
263 |
-
if attr not in valid_attrs:
|
264 |
-
del tag[attr]
|
265 |
-
|
266 |
-
# Replace every <span> and <p> with it's inner contents
|
267 |
-
for span in soup.find_all(['span', 'p']):
|
268 |
-
span.replace_with(" " + span.text + " ")
|
269 |
-
|
270 |
-
# Replace links with plain text
|
271 |
-
if remove_links:
|
272 |
-
for link in soup.find_all('a'):
|
273 |
-
link.replace_with(" " + link.text + " ")
|
274 |
-
|
275 |
-
# Remove comments
|
276 |
-
for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
|
277 |
-
comment.extract()
|
278 |
-
|
279 |
-
# Remove empty divs (e.g. <div> </div>)
|
280 |
-
for div in soup.find_all("div"):
|
281 |
-
if (div.text is None) or (div.text.strip() == ""):
|
282 |
-
div.decompose()
|
283 |
-
|
284 |
-
# Compress nested divs. For example:
|
285 |
-
# <div><div><div>Content</div></div></div> -> <div>Content>/div>)
|
286 |
-
for div in soup.find_all("div"):
|
287 |
-
children = div.findChildren(recursive=False)
|
288 |
-
if len(children) == 1 and children[0].name == 'div':
|
289 |
-
div.replace_with(children[0])
|
290 |
-
|
291 |
-
html = str(soup)
|
292 |
-
|
293 |
-
# Compress whitespace
|
294 |
-
html = re.sub(r'(\s|\n)+', ' ', html)
|
295 |
-
|
296 |
-
return html
|
|
|
1 |
import inspect
|
2 |
import re
|
3 |
import urllib.parse
|
4 |
+
from bs4 import BeautifulSoup
|
5 |
import requests
|
6 |
from selenium import webdriver
|
7 |
+
from util import safe_eval, distill_html
|
8 |
|
9 |
class Tools:
|
10 |
"""
|
|
|
21 |
self.add_tool(
|
22 |
self.calculate,
|
23 |
"Calculate",
|
24 |
+
"Evaluate a mathematical expression using Python. Expression should only contain numbers, operators (+ - * / **), or math module functions.")
|
|
|
25 |
|
26 |
self.add_tool(
|
27 |
self.google_search,
|
28 |
"GoogleSearch",
|
29 |
+
"Use Google to search the web for the topic.")
|
|
|
30 |
|
31 |
self.add_tool(
|
32 |
self.get_web_page,
|
33 |
"GetWebPage",
|
34 |
+
"Get the contents of a web page. Only call this with a valid URL.")
|
|
|
35 |
|
36 |
+
def add_tool(self, func, name, desc):
|
37 |
"""
|
38 |
Adds a Python function as an available tool for the LLM.
|
39 |
The tool name and desc will be included in the LLM system message.
|
|
|
|
|
|
|
40 |
"""
|
41 |
params = inspect.signature(func).parameters
|
42 |
tool_params = list(params.keys())
|
|
|
44 |
self.tools[name] = {
|
45 |
"params": tool_params,
|
46 |
"desc": desc,
|
47 |
+
"func": func
|
|
|
48 |
}
|
49 |
|
50 |
def get_tool_list_for_prompt(self):
|
|
|
65 |
|
66 |
def run_tool(self, name, params):
|
67 |
"""
|
68 |
+
Runs a tool and returns the result.
|
|
|
69 |
"""
|
70 |
if not name in self.tools:
|
71 |
return f"{name}[] is not a valid tool"
|
|
|
77 |
|
78 |
result = tool["func"](params)
|
79 |
|
80 |
+
return result
|
81 |
|
82 |
def set_browser(self, browser):
|
83 |
"""
|
|
|
205 |
except:
|
206 |
return "Error retrieving web page"
|
207 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
util.py
ADDED
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import re
|
2 |
+
import math
|
3 |
+
from bs4 import BeautifulSoup, Comment
|
4 |
+
|
5 |
+
def safe_eval(expression):
|
6 |
+
"""
|
7 |
+
A version of eval() that only allows a limited set of math functions.
|
8 |
+
"""
|
9 |
+
|
10 |
+
safe_list = [
|
11 |
+
'acos', 'asin', 'atan', 'atan2', 'ceil', 'cos', 'cosh',
|
12 |
+
'degrees', 'e', 'exp', 'fabs', 'floor', 'fmod', 'frexp', 'hypot',
|
13 |
+
'ldexp', 'log', 'log10', 'modf', 'pi', 'pow', 'radians', 'sin', 'sinh',
|
14 |
+
'sqrt', 'tan', 'tanh'
|
15 |
+
]
|
16 |
+
|
17 |
+
safe_dict = {k: getattr(math, k) for k in safe_list}
|
18 |
+
safe_dict['abs'] = abs
|
19 |
+
|
20 |
+
try:
|
21 |
+
return eval(expression, {"__builtins__": None}, safe_dict)
|
22 |
+
except Exception as e:
|
23 |
+
raise ValueError(f'Error evaluating expression: {e}')
|
24 |
+
|
25 |
+
def distill_html(raw_html, remove_links=False):
|
26 |
+
"""
|
27 |
+
Reduce HTML to the minimal tags necessary to understand the content.
|
28 |
+
Set remove_links=True to also replace <a> tags with their inner content.
|
29 |
+
"""
|
30 |
+
soup = BeautifulSoup(raw_html, 'html.parser')
|
31 |
+
|
32 |
+
# Tags (with inner content) that should be completely removed from the HTML
|
33 |
+
# Note: We want to keep <g-section-with-header> as it shows Top Stories
|
34 |
+
remove_tags = [
|
35 |
+
'aside', 'br', 'button', 'cite', 'cnx', 'fieldset', 'figcaption',
|
36 |
+
'figure', 'footer', 'form', 'g-dropdown-button',
|
37 |
+
'g-dropdown-menu-button', 'g-fab', 'g-img', 'g-inner-card',
|
38 |
+
'g-left-button', 'g-link', 'g-loading-icon', 'g-more-linkg-menu-item',
|
39 |
+
'g-popup', 'g-radio-button-group', 'g-right-button',
|
40 |
+
'g-scrolling-carousel', 'g-snackbar', 'g-white-loading-icon',
|
41 |
+
'google-read-aloud-player', 'head', 'hr', 'iframe', 'img', 'input',
|
42 |
+
'label', 'link', 'nav', 'next-route-announcer', 'noscript',
|
43 |
+
'option', 'promo-throttler', 'script', 'select', 'style', 'svg'
|
44 |
+
]
|
45 |
+
valid_attrs = ['href']
|
46 |
+
|
47 |
+
# Remove all unwanted tags
|
48 |
+
for tag in soup(remove_tags):
|
49 |
+
tag.decompose()
|
50 |
+
|
51 |
+
# Remove all unwanted attributes
|
52 |
+
for tag in soup():
|
53 |
+
attrs = dict(tag.attrs)
|
54 |
+
for attr in attrs:
|
55 |
+
if attr not in valid_attrs:
|
56 |
+
del tag[attr]
|
57 |
+
|
58 |
+
# Replace every <span> and <p> with it's inner contents
|
59 |
+
for span in soup.find_all(['span', 'p']):
|
60 |
+
span.replace_with(" " + span.text + " ")
|
61 |
+
|
62 |
+
# Replace links with plain text
|
63 |
+
if remove_links:
|
64 |
+
for link in soup.find_all('a'):
|
65 |
+
link.replace_with(" " + link.text + " ")
|
66 |
+
|
67 |
+
# Remove comments
|
68 |
+
for comment in soup.findAll(text=lambda text: isinstance(text, Comment)):
|
69 |
+
comment.extract()
|
70 |
+
|
71 |
+
# Remove empty divs (e.g. <div> </div>)
|
72 |
+
for div in soup.find_all("div"):
|
73 |
+
if (div.text is None) or (div.text.strip() == ""):
|
74 |
+
div.decompose()
|
75 |
+
|
76 |
+
# Compress nested divs. For example:
|
77 |
+
# <div><div><div>Content</div></div></div> -> <div>Content>/div>)
|
78 |
+
for div in soup.find_all("div"):
|
79 |
+
children = div.findChildren(recursive=False)
|
80 |
+
if len(children) == 1 and children[0].name == 'div':
|
81 |
+
div.replace_with(children[0])
|
82 |
+
|
83 |
+
html = str(soup)
|
84 |
+
|
85 |
+
# Compress whitespace
|
86 |
+
html = re.sub(r'(\s|\n)+', ' ', html)
|
87 |
+
|
88 |
+
return html
|