John Yang commited on
Commit
730ca01
1 Parent(s): 69177fb

Code clean up

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +9 -10
  3. predict_help.py +17 -13
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  *.pyc
 
2
 
3
  .DS_Store
 
1
  *.pyc
2
+ *.ipynb
3
 
4
  .DS_Store
app.py CHANGED
@@ -119,7 +119,7 @@ def run_episode(goal, env, verbose=True):
119
  search_results_cache = {}
120
  visited_asins, clicked_options = set(), set()
121
  sub_page_type, page_type, page_num = None, None, None
122
- search_terms, prod_title, asin, num_prods, = None, None, None, None
123
  options = {}
124
 
125
  for i in range(100):
@@ -228,7 +228,6 @@ def run_episode(goal, env, verbose=True):
228
  print(f"Parsing search results took {end-begin} seconds")
229
 
230
  search_results_cache[search_terms] = data
231
- num_prods = len(data)
232
  for d in data:
233
  title_to_asin_map[d['Title']] = d['asin']
234
  elif page_type == Page.ITEM_PAGE or page_type == Page.SUB_PAGE:
@@ -268,7 +267,7 @@ def run_episode(goal, env, verbose=True):
268
  # Dict of Info -> Valid Action State (Info)
269
  begin = time.time()
270
  prod_arg = product_map if page_type == Page.ITEM_PAGE else data
271
- info = convert_dict_to_actions(page_type, prod_arg, asin, page_num, num_prods)
272
  end = time.time()
273
  if verbose:
274
  print("Extracting available actions took", end-begin, "seconds")
@@ -294,19 +293,19 @@ def run_episode(goal, env, verbose=True):
294
  return_value['Selected Options'] = ', '.join(list(clicked_options))
295
  return return_value
296
 
297
- gr.Interface(fn=run_episode,\
298
  inputs=[
299
  gr.inputs.Textbox(lines=7, label="Input Text"),
300
  gr.inputs.Radio(['Amazon', 'eBay'], type="value", default="Amazon", label='Environment')
301
- ],\
302
- outputs="text",\
303
  examples=[
304
  ["I want to find a gold floor lamp with a glass shade and a nickel finish that i can use for my living room, and price lower than 270.00 dollars", "Amazon"],
305
  ["I need some cute heart-shaped glittery cupcake picks as a gift to bring to a baby shower", "Amazon"],
306
  ["I'm trying to find white bluetooth speakers that are not only water resistant but also come with stereo sound", "eBay"],
307
  ["find me the soy free 3.5 ounce 4-pack of dang thai rice chips, and make sure they are the aged cheddar flavor. i also need the ones in the resealable bags", "eBay"]
308
- ],\
309
- title="WebShop",\
310
- article="<p style='padding-top:15px;text-align:center;'>To learn more about this project, check out the <a href='https://webshop-pnlp.github.io/' target='_blank'>project page</a>!</p>",\
311
- description="<p style='text-align:center;'>Sim-to-real transfer of agent trained on WebShop to search a desired product on Amazon from any natural language query!</p>",\
312
  ).launch(inline=False)
 
119
  search_results_cache = {}
120
  visited_asins, clicked_options = set(), set()
121
  sub_page_type, page_type, page_num = None, None, None
122
+ search_terms, prod_title, asin = None, None, None
123
  options = {}
124
 
125
  for i in range(100):
 
228
  print(f"Parsing search results took {end-begin} seconds")
229
 
230
  search_results_cache[search_terms] = data
 
231
  for d in data:
232
  title_to_asin_map[d['Title']] = d['asin']
233
  elif page_type == Page.ITEM_PAGE or page_type == Page.SUB_PAGE:
 
267
  # Dict of Info -> Valid Action State (Info)
268
  begin = time.time()
269
  prod_arg = product_map if page_type == Page.ITEM_PAGE else data
270
+ info = convert_dict_to_actions(page_type, prod_arg, asin, page_num)
271
  end = time.time()
272
  if verbose:
273
  print("Extracting available actions took", end-begin, "seconds")
 
293
  return_value['Selected Options'] = ', '.join(list(clicked_options))
294
  return return_value
295
 
296
+ gr.Interface(fn=run_episode,
297
  inputs=[
298
  gr.inputs.Textbox(lines=7, label="Input Text"),
299
  gr.inputs.Radio(['Amazon', 'eBay'], type="value", default="Amazon", label='Environment')
300
+ ],
301
+ outputs="text",
302
  examples=[
303
  ["I want to find a gold floor lamp with a glass shade and a nickel finish that i can use for my living room, and price lower than 270.00 dollars", "Amazon"],
304
  ["I need some cute heart-shaped glittery cupcake picks as a gift to bring to a baby shower", "Amazon"],
305
  ["I'm trying to find white bluetooth speakers that are not only water resistant but also come with stereo sound", "eBay"],
306
  ["find me the soy free 3.5 ounce 4-pack of dang thai rice chips, and make sure they are the aged cheddar flavor. i also need the ones in the resealable bags", "eBay"]
307
+ ],
308
+ title="WebShop",
309
+ article="<p style='padding-top:15px;text-align:center;'>To learn more about this project, check out the <a href='https://webshop-pnlp.github.io/' target='_blank'>project page</a>!</p>",
310
+ description="<p style='text-align:center;'>Sim-to-real transfer of agent trained on WebShop to search a desired product on Amazon from any natural language query!</p>",
311
  ).launch(inline=False)
predict_help.py CHANGED
@@ -22,12 +22,6 @@ NUM_PROD_LIMIT = 10
22
  WEBSHOP_URL = "http://3.83.245.205:3000"
23
  WEBSHOP_SESSION = "abc"
24
 
25
- API = '85956985fae328bfe5a759a2984448d2'
26
- def get_url(url):
27
- payload = {'api_key': API, 'url': url , 'country_code': 'us'}
28
- proxy_url = 'http://api.scraperapi.com/?' + urlencode(payload)
29
- return proxy_url
30
-
31
  def parse_results_ebay(query, page_num=None, verbose=True):
32
  query_string = '+'.join(query.split())
33
  page_num = 1 if page_num is None else page_num
@@ -64,6 +58,7 @@ def parse_results_ebay(query, page_num=None, verbose=True):
64
  print(f"Scraped {len(results)} products")
65
  return results
66
 
 
67
  def parse_item_page_ebay(asin, verbose=True):
68
  product_dict = {}
69
  product_dict["asin"] = asin
@@ -188,6 +183,7 @@ def parse_results_ws(query, page_num=None, verbose=True):
188
  print(f"Scraped {len(results)} products")
189
  return results
190
 
 
191
  def parse_item_page_ws(asin, query, page_num, options, verbose=True):
192
  product_dict = {}
193
  product_dict["asin"] = asin
@@ -199,7 +195,7 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
199
  f'{asin}/{query_string}/{page_num}/{options_string}'
200
  )
201
  if verbose:
202
- print("Item Page URL: ", url)
203
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
204
  soup = BeautifulSoup(webpage.content, 'html.parser')
205
 
@@ -240,6 +236,8 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
240
  f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
241
  f'{asin}/{query_string}/{page_num}/Description/{options_string}'
242
  )
 
 
243
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
244
  soup = BeautifulSoup(webpage.content, 'html.parser')
245
  product_dict["Description"] = soup.find(name="p", attrs={'class': 'product-info'}).text.strip()
@@ -249,6 +247,8 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
249
  f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
250
  f'{asin}/{query_string}/{page_num}/Features/{options_string}'
251
  )
 
 
252
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
253
  soup = BeautifulSoup(webpage.content, 'html.parser')
254
  bullets = soup.find(name="ul").findAll(name="li")
@@ -256,6 +256,7 @@ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
256
 
257
  return product_dict
258
 
 
259
  # Query -> Search Result ASINs
260
  def parse_results_amz(query, page_num=None, verbose=True):
261
  url = 'https://www.amazon.com/s?k=' + query.replace(" ", "+")
@@ -289,6 +290,7 @@ def parse_results_amz(query, page_num=None, verbose=True):
289
  print("Scraped", len(results), "products")
290
  return results
291
 
 
292
  # Scrape information of each product
293
  def parse_item_page_amz(asin, verbose=True):
294
  product_dict = {}
@@ -385,7 +387,9 @@ def parse_item_page_amz(asin, verbose=True):
385
  product_dict["options"], product_dict["option_to_image"] = options, options_to_image
386
  return product_dict
387
 
 
388
  # Get text observation from html
 
389
  def convert_html_to_text(html, simple=False, clicked_options=None, visited_asins=None):
390
  def tag_visible(element):
391
  ignore = {'style', 'script', 'head', 'title', 'meta', '[document]'}
@@ -419,18 +423,18 @@ def convert_html_to_text(html, simple=False, clicked_options=None, visited_asins
419
  observation += processed_t + '\n'
420
  return observation
421
 
422
- # Get action from dict
423
- def convert_dict_to_actions(page_type, products=None, asin=None, page_num=None, num_prods=None) -> dict:
 
424
  info = {"valid": []}
425
  if page_type == Page.RESULTS:
426
  info["valid"] = ['click[back to search]']
427
- if products is None or page_num is None or num_prods is None:
428
  print(page_num)
429
- print(num_prods)
430
  print(products)
431
- raise Exception('Provide `products`, `num_prods`, `page_num` to get `results` valid actions')
432
  # Decide whether to add `next >` as clickable based on # of search results
433
- if num_prods > 10:
434
  info["valid"].append('click[next >]')
435
  # Add `< prev` as clickable if not first page of search results
436
  if page_num > 1:
 
22
  WEBSHOP_URL = "http://3.83.245.205:3000"
23
  WEBSHOP_SESSION = "abc"
24
 
 
 
 
 
 
 
25
  def parse_results_ebay(query, page_num=None, verbose=True):
26
  query_string = '+'.join(query.split())
27
  page_num = 1 if page_num is None else page_num
 
58
  print(f"Scraped {len(results)} products")
59
  return results
60
 
61
+
62
  def parse_item_page_ebay(asin, verbose=True):
63
  product_dict = {}
64
  product_dict["asin"] = asin
 
183
  print(f"Scraped {len(results)} products")
184
  return results
185
 
186
+
187
  def parse_item_page_ws(asin, query, page_num, options, verbose=True):
188
  product_dict = {}
189
  product_dict["asin"] = asin
 
195
  f'{asin}/{query_string}/{page_num}/{options_string}'
196
  )
197
  if verbose:
198
+ print(f"Item Page URL: {url}")
199
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
200
  soup = BeautifulSoup(webpage.content, 'html.parser')
201
 
 
236
  f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
237
  f'{asin}/{query_string}/{page_num}/Description/{options_string}'
238
  )
239
+ if verbose:
240
+ print(f"Item Description URL: {url}")
241
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
242
  soup = BeautifulSoup(webpage.content, 'html.parser')
243
  product_dict["Description"] = soup.find(name="p", attrs={'class': 'product-info'}).text.strip()
 
247
  f'{WEBSHOP_URL}/item_sub_page/{WEBSHOP_SESSION}/'
248
  f'{asin}/{query_string}/{page_num}/Features/{options_string}'
249
  )
250
+ if verbose:
251
+ print(f"Item Features URL: {url}")
252
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
253
  soup = BeautifulSoup(webpage.content, 'html.parser')
254
  bullets = soup.find(name="ul").findAll(name="li")
 
256
 
257
  return product_dict
258
 
259
+
260
  # Query -> Search Result ASINs
261
  def parse_results_amz(query, page_num=None, verbose=True):
262
  url = 'https://www.amazon.com/s?k=' + query.replace(" ", "+")
 
290
  print("Scraped", len(results), "products")
291
  return results
292
 
293
+
294
  # Scrape information of each product
295
  def parse_item_page_amz(asin, verbose=True):
296
  product_dict = {}
 
387
  product_dict["options"], product_dict["option_to_image"] = options, options_to_image
388
  return product_dict
389
 
390
+
391
  # Get text observation from html
392
+ # TODO[john-b-yang]: Similar to web_agent_site/envs/...text_env.py func def, merge?
393
  def convert_html_to_text(html, simple=False, clicked_options=None, visited_asins=None):
394
  def tag_visible(element):
395
  ignore = {'style', 'script', 'head', 'title', 'meta', '[document]'}
 
423
  observation += processed_t + '\n'
424
  return observation
425
 
426
+
427
+ # Get action from dict of values retrieved from html
428
+ def convert_dict_to_actions(page_type, products=None, asin=None, page_num=None) -> dict:
429
  info = {"valid": []}
430
  if page_type == Page.RESULTS:
431
  info["valid"] = ['click[back to search]']
432
+ if products is None or page_num is None:
433
  print(page_num)
 
434
  print(products)
435
+ raise Exception('Provide `products`, `page_num` to get `results` valid actions')
436
  # Decide whether to add `next >` as clickable based on # of search results
437
+ if len(products) > 10:
438
  info["valid"].append('click[next >]')
439
  # Add `< prev` as clickable if not first page of search results
440
  if page_num > 1: