Spaces:
Runtime error
Runtime error
John Yang
commited on
Commit
•
730ca01
1
Parent(s):
69177fb
Code clean up
Browse files- .gitignore +1 -0
- app.py +9 -10
- predict_help.py +17 -13
.gitignore
CHANGED
@@ -1,3 +1,4 @@
|
|
1 |
*.pyc
|
|
|
2 |
|
3 |
.DS_Store
|
|
|
1 |
*.pyc
|
2 |
+
*.ipynb
|
3 |
|
4 |
.DS_Store
|
app.py
CHANGED
@@ -119,7 +119,7 @@ def run_episode(goal, env, verbose=True):
|
|
119 |
search_results_cache = {}
|
120 |
visited_asins, clicked_options = set(), set()
|
121 |
sub_page_type, page_type, page_num = None, None, None
|
122 |
-
search_terms, prod_title, asin
|
123 |
options = {}
|
124 |
|
125 |
for i in range(100):
|
@@ -228,7 +228,6 @@ def run_episode(goal, env, verbose=True):
|
|
228 |
print(f"Parsing search results took {end-begin} seconds")
|
229 |
|
230 |
search_results_cache[search_terms] = data
|
231 |
-
num_prods = len(data)
|
232 |
for d in data:
|
233 |
title_to_asin_map[d['Title']] = d['asin']
|
234 |
elif page_type == Page.ITEM_PAGE or page_type == Page.SUB_PAGE:
|
@@ -268,7 +267,7 @@ def run_episode(goal, env, verbose=True):
|
|
268 |
# Dict of Info -> Valid Action State (Info)
|
269 |
begin = time.time()
|
270 |
prod_arg = product_map if page_type == Page.ITEM_PAGE else data
|
271 |
-
info = convert_dict_to_actions(page_type, prod_arg, asin, page_num
|
272 |
end = time.time()
|
273 |
if verbose:
|
274 |
print("Extracting available actions took", end-begin, "seconds")
|
@@ -294,19 +293,19 @@ def run_episode(goal, env, verbose=True):
|
|
294 |
return_value['Selected Options'] = ', '.join(list(clicked_options))
|
295 |
return return_value
|
296 |
|
297 |
-
gr.Interface(fn=run_episode
|
298 |
inputs=[
|
299 |
gr.inputs.Textbox(lines=7, label="Input Text"),
|
300 |
gr.inputs.Radio(['Amazon', 'eBay'], type="value", default="Amazon", label='Environment')
|
301 |
-
]
|
302 |
-
outputs="text"
|
303 |
examples=[
|
304 |
["I want to find a gold floor lamp with a glass shade and a nickel finish that i can use for my living room, and price lower than 270.00 dollars", "Amazon"],
|
305 |
["I need some cute heart-shaped glittery cupcake picks as a gift to bring to a baby shower", "Amazon"],
|
306 |
["I'm trying to find white bluetooth speakers that are not only water resistant but also come with stereo sound", "eBay"],
|
307 |
["find me the soy free 3.5 ounce 4-pack of dang thai rice chips, and make sure they are the aged cheddar flavor. i also need the ones in the resealable bags", "eBay"]
|
308 |
-
]
|
309 |
-
title="WebShop"
|
310 |
-
article="<p style='padding-top:15px;text-align:center;'>To learn more about this project, check out the <a href='https://webshop-pnlp.github.io/' target='_blank'>project page</a>!</p>"
|
311 |
-
description="<p style='text-align:center;'>Sim-to-real transfer of agent trained on WebShop to search a desired product on Amazon from any natural language query!</p>"
|
312 |
).launch(inline=False)
|
|
|
119 |
search_results_cache = {}
|
120 |
visited_asins, clicked_options = set(), set()
|
121 |
sub_page_type, page_type, page_num = None, None, None
|
122 |
+
search_terms, prod_title, asin = None, None, None
|
123 |
options = {}
|
124 |
|
125 |
for i in range(100):
|
|
|
228 |
print(f"Parsing search results took {end-begin} seconds")
|
229 |
|
230 |
search_results_cache[search_terms] = data
|
|
|
231 |
for d in data:
|
232 |
title_to_asin_map[d['Title']] = d['asin']
|
233 |
elif page_type == Page.ITEM_PAGE or page_type == Page.SUB_PAGE:
|
|
|
267 |
# Dict of Info -> Valid Action State (Info)
|
268 |
begin = time.time()
|
269 |
prod_arg = product_map if page_type == Page.ITEM_PAGE else data
|
270 |
+
info = convert_dict_to_actions(page_type, prod_arg, asin, page_num)
|
271 |
end = time.time()
|
272 |
if verbose:
|
273 |
print("Extracting available actions took", end-begin, "seconds")
|
|
|
293 |
return_value['Selected Options'] = ', '.join(list(clicked_options))
|
294 |
return return_value
|
295 |
|
296 |
+
gr.Interface(fn=run_episode,
|
297 |
inputs=[
|
298 |
gr.inputs.Textbox(lines=7, label="Input Text"),
|
299 |
gr.inputs.Radio(['Amazon', 'eBay'], type="value", default="Amazon", label='Environment')
|
300 |
+
],
|
301 |
+
outputs="text",
|
302 |
examples=[
|
303 |
["I want to find a gold floor lamp with a glass shade and a nickel finish that i can use for my living room, and price lower than 270.00 dollars", "Amazon"],
|
304 |
["I need some cute heart-shaped glittery cupcake picks as a gift to bring to a baby shower", "Amazon"],
|
305 |
["I'm trying to find white bluetooth speakers that are not only water resistant but also come with stereo sound", "eBay"],
|
306 |
["find me the soy free 3.5 ounce 4-pack of dang thai rice chips, and make sure they are the aged cheddar flavor. i also need the ones in the resealable bags", "eBay"]
|
307 |
+
],
|
308 |
+
title="WebShop",
|
309 |
+
article="<p style='padding-top:15px;text-align:center;'>To learn more about this project, check out the <a href='https://webshop-pnlp.github.io/' target='_blank'>project page</a>!</p>",
|
310 |
+
description="<p style='text-align:center;'>Sim-to-real transfer of agent trained on WebShop to search a desired product on Amazon from any natural language query!</p>",
|
311 |
).launch(inline=False)
|
predict_help.py
CHANGED
@@ -22,12 +22,6 @@ NUM_PROD_LIMIT = 10
|
|
22 |
WEBSHOP_URL = "http://3.83.245.205:3000"
|
23 |
WEBSHOP_SESSION = "abc"
|
24 |
|
25 |
-
API = '85956985fae328bfe5a759a2984448d2'
|
26 |
-
def get_url(url):
|
27 |
-
payload = {'api_key': API, 'url': url , 'country_code': 'us'}
|
28 |
-
proxy_url = 'http://api.scraperapi.com/?' + urlencode(payload)
|
29 |
-
return proxy_url
|
30 |
-
|
31 |
def parse_results_ebay(query, page_num=None, verbose=True):
|
32 |
query_string = '+'.join(query.split())
|
33 |
page_num = 1 if page_num is None else page_num
|
@@ -64,6 +58,7 @@ def parse_results_ebay(query, page_num=None, verbose=True):
|
|
64 |
print(f"Scraped {len(results)} products")
|
65 |
return results
|
66 |
|
|
|
67 |
def parse_item_page_ebay(asin, verbose=True):
|
68 |
product_dict = {}
|
69 |
product_dict["asin"] = asin
|
@@ -188,6 +183,7 @@ def parse_results_ws(query, page_num=None, verbose=True):
|
|
188 |
print(f"Scraped {len(results)} products")
|
189 |
return results
|
190 |
|
|
|
191 |
def parse_item_page_ws(asin, query, page_num, options, verbose=True):
|
192 |
product_dict = {}
|
193 |
product_dict["asin"] = asin
|
@@ -199,7 +195,7 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
|
|
199 |
f'{asin}/{query_string}/{page_num}/{options_string}'
|
200 |
)
|
201 |
if verbose:
|
202 |
-
print("Item Page URL: "
|
203 |
webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
|
204 |
soup = BeautifulSoup(webpage.content, 'html.parser')
|
205 |
|
@@ -240,6 +236,8 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
|
|
240 |
f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
|
241 |
f'{asin}/{query_string}/{page_num}/Description/{options_string}'
|
242 |
)
|
|
|
|
|
243 |
webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
|
244 |
soup = BeautifulSoup(webpage.content, 'html.parser')
|
245 |
product_dict["Description"] = soup.find(name="p", attrs={'class': 'product-info'}).text.strip()
|
@@ -249,6 +247,8 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
|
|
249 |
f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
|
250 |
f'{asin}/{query_string}/{page_num}/Features/{options_string}'
|
251 |
)
|
|
|
|
|
252 |
webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
|
253 |
soup = BeautifulSoup(webpage.content, 'html.parser')
|
254 |
bullets = soup.find(name="ul").findAll(name="li")
|
@@ -256,6 +256,7 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
|
|
256 |
|
257 |
return product_dict
|
258 |
|
|
|
259 |
# Query -> Search Result ASINs
|
260 |
def parse_results_amz(query, page_num=None, verbose=True):
|
261 |
url = 'https://www.amazon.com/s?k=' + query.replace(" ", "+")
|
@@ -289,6 +290,7 @@ def parse_results_amz(query, page_num=None, verbose=True):
|
|
289 |
print("Scraped", len(results), "products")
|
290 |
return results
|
291 |
|
|
|
292 |
# Scrape information of each product
|
293 |
def parse_item_page_amz(asin, verbose=True):
|
294 |
product_dict = {}
|
@@ -385,7 +387,9 @@ def parse_item_page_amz(asin, verbose=True):
|
|
385 |
product_dict["options"], product_dict["option_to_image"] = options, options_to_image
|
386 |
return product_dict
|
387 |
|
|
|
388 |
# Get text observation from html
|
|
|
389 |
def convert_html_to_text(html, simple=False, clicked_options=None, visited_asins=None):
|
390 |
def tag_visible(element):
|
391 |
ignore = {'style', 'script', 'head', 'title', 'meta', '[document]'}
|
@@ -419,18 +423,18 @@ def convert_html_to_text(html, simple=False, clicked_options=None, visited_asins
|
|
419 |
observation += processed_t + '\n'
|
420 |
return observation
|
421 |
|
422 |
-
|
423 |
-
|
|
|
424 |
info = {"valid": []}
|
425 |
if page_type == Page.RESULTS:
|
426 |
info["valid"] = ['click[back to search]']
|
427 |
-
if products is None or page_num is None
|
428 |
print(page_num)
|
429 |
-
print(num_prods)
|
430 |
print(products)
|
431 |
-
raise Exception('Provide `products`, `
|
432 |
# Decide whether to add `next >` as clickable based on # of search results
|
433 |
-
if
|
434 |
info["valid"].append('click[next >]')
|
435 |
# Add `< prev` as clickable if not first page of search results
|
436 |
if page_num > 1:
|
|
|
22 |
WEBSHOP_URL = "http://3.83.245.205:3000"
|
23 |
WEBSHOP_SESSION = "abc"
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def parse_results_ebay(query, page_num=None, verbose=True):
|
26 |
query_string = '+'.join(query.split())
|
27 |
page_num = 1 if page_num is None else page_num
|
|
|
58 |
print(f"Scraped {len(results)} products")
|
59 |
return results
|
60 |
|
61 |
+
|
62 |
def parse_item_page_ebay(asin, verbose=True):
|
63 |
product_dict = {}
|
64 |
product_dict["asin"] = asin
|
|
|
183 |
print(f"Scraped {len(results)} products")
|
184 |
return results
|
185 |
|
186 |
+
|
187 |
def parse_item_page_ws(asin, query, page_num, options, verbose=True):
|
188 |
product_dict = {}
|
189 |
product_dict["asin"] = asin
|
|
|
195 |
f'{asin}/{query_string}/{page_num}/{options_string}'
|
196 |
)
|
197 |
if verbose:
|
198 |
+
print(f"Item Page URL: {url}")
|
199 |
webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
|
200 |
soup = BeautifulSoup(webpage.content, 'html.parser')
|
201 |
|
|
|
236 |
f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
|
237 |
f'{asin}/{query_string}/{page_num}/Description/{options_string}'
|
238 |
)
|
239 |
+
if verbose:
|
240 |
+
print(f"Item Description URL: {url}")
|
241 |
webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
|
242 |
soup = BeautifulSoup(webpage.content, 'html.parser')
|
243 |
product_dict["Description"] = soup.find(name="p", attrs={'class': 'product-info'}).text.strip()
|
|
|
247 |
f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
|
248 |
f'{asin}/{query_string}/{page_num}/Features/{options_string}'
|
249 |
)
|
250 |
+
if verbose:
|
251 |
+
print(f"Item Features URL: {url}")
|
252 |
webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
|
253 |
soup = BeautifulSoup(webpage.content, 'html.parser')
|
254 |
bullets = soup.find(name="ul").findAll(name="li")
|
|
|
256 |
|
257 |
return product_dict
|
258 |
|
259 |
+
|
260 |
# Query -> Search Result ASINs
|
261 |
def parse_results_amz(query, page_num=None, verbose=True):
|
262 |
url = 'https://www.amazon.com/s?k=' + query.replace(" ", "+")
|
|
|
290 |
print("Scraped", len(results), "products")
|
291 |
return results
|
292 |
|
293 |
+
|
294 |
# Scrape information of each product
|
295 |
def parse_item_page_amz(asin, verbose=True):
|
296 |
product_dict = {}
|
|
|
387 |
product_dict["options"], product_dict["option_to_image"] = options, options_to_image
|
388 |
return product_dict
|
389 |
|
390 |
+
|
391 |
# Get text observation from html
|
392 |
+
# TODO[john-b-yang]: Similar to web_agent_site/envs/...text_env.py func def, merge?
|
393 |
def convert_html_to_text(html, simple=False, clicked_options=None, visited_asins=None):
|
394 |
def tag_visible(element):
|
395 |
ignore = {'style', 'script', 'head', 'title', 'meta', '[document]'}
|
|
|
423 |
observation += processed_t + '\n'
|
424 |
return observation
|
425 |
|
426 |
+
|
427 |
+
# Get action from dict of values retrieved from html
|
428 |
+
def convert_dict_to_actions(page_type, products=None, asin=None, page_num=None) -> dict:
|
429 |
info = {"valid": []}
|
430 |
if page_type == Page.RESULTS:
|
431 |
info["valid"] = ['click[back to search]']
|
432 |
+
if products is None or page_num is None:
|
433 |
print(page_num)
|
|
|
434 |
print(products)
|
435 |
+
raise Exception('Provide `products`, `page_num` to get `results` valid actions')
|
436 |
# Decide whether to add `next >` as clickable based on # of search results
|
437 |
+
if len(products) > 10:
|
438 |
info["valid"].append('click[next >]')
|
439 |
# Add `< prev` as clickable if not first page of search results
|
440 |
if page_num > 1:
|