John Yang commited on
Commit
69177fb
1 Parent(s): 631c491

Refactoring `verbose` arg

Browse files
Files changed (2) hide show
  1. app.py +33 -32
  2. predict_help.py +17 -18
app.py CHANGED
@@ -172,13 +172,11 @@ def run_episode(goal, env, verbose=True):
172
  page_type = Page.SEARCH
173
 
174
  elif action == 'click[buy now]':
175
- return_value = None
176
  if env == 'amazon':
177
- asin_url = f"https://www.amazon.com/dp/{asin}"
178
- return_value = "Product URL: " + asin_url
179
  if len(clicked_options) > 0:
180
- options_str = ', '.join(list(clicked_options))
181
- return_value += "\nSelected Options: " + options_str
182
  if env == 'webshop':
183
  query_str = "+".join(search_terms.split())
184
  options_str = json.dumps(options)
@@ -186,13 +184,12 @@ def run_episode(goal, env, verbose=True):
186
  f'{WEBSHOP_URL}/item_page/{WEBSHOP_SESSION}/'
187
  f'{asin}/{query_str}/{page_num}/{options_str}'
188
  )
189
- return_value = "Product URL: " + asin_url
190
  if env == 'ebay':
191
- asin_url = f"https:///www.ebay.com/itm/{asin}"
192
- return_value = "Product URL: " + asin_url
193
  if len(clicked_options) > 0:
194
- options_str = ', '.join(list(clicked_options))
195
- return_value += "\nSelected Options: " + options_str
196
  return return_value
197
 
198
  elif prev_page_type == Page.ITEM_PAGE:
@@ -216,16 +213,19 @@ def run_episode(goal, env, verbose=True):
216
  if page_type == Page.RESULTS:
217
  if search_terms in search_results_cache:
218
  data = search_results_cache[search_terms]
 
 
219
  else:
220
  begin = time.time()
221
  if env == 'amazon':
222
- data = parse_results_amz(search_terms, page_num)
223
  if env == 'webshop':
224
- data = parse_results_ws(search_terms, page_num)
225
  if env == 'ebay':
226
- data = parse_results_ebay(search_terms, page_num)
227
  end = time.time()
228
- print("Parsing search results took", end-begin, "seconds")
 
229
 
230
  search_results_cache[search_terms] = data
231
  num_prods = len(data)
@@ -233,18 +233,20 @@ def run_episode(goal, env, verbose=True):
233
  title_to_asin_map[d['Title']] = d['asin']
234
  elif page_type == Page.ITEM_PAGE or page_type == Page.SUB_PAGE:
235
  if asin in product_map:
236
- print("Loading cached item page for", asin)
 
237
  data = product_map[asin]
238
  else:
239
  begin = time.time()
240
  if env == 'amazon':
241
- data = parse_item_page_amz(asin)
242
  if env == 'webshop':
243
- data = parse_item_page_ws(asin, search_terms, page_num, options)
244
  if env == 'ebay':
245
- data = parse_item_page_ebay(asin)
246
  end = time.time()
247
- print("Parsing item page took", end-begin, "seconds")
 
248
  product_map[asin] = data
249
  elif page_type == Page.SEARCH:
250
  if verbose:
@@ -260,23 +262,23 @@ def run_episode(goal, env, verbose=True):
260
  html_str = dict_to_fake_html(data, page_type, asin, sub_page_type, options, product_map, goal)
261
  obs = convert_html_to_text(html_str, simple=False, clicked_options=clicked_options, visited_asins=visited_asins)
262
  end = time.time()
263
- print("[Page Info -> WebShop HTML -> Observation] took", end-begin, "seconds")
 
264
 
265
  # Dict of Info -> Valid Action State (Info)
266
  begin = time.time()
267
  prod_arg = product_map if page_type == Page.ITEM_PAGE else data
268
  info = convert_dict_to_actions(page_type, prod_arg, asin, page_num, num_prods)
269
  end = time.time()
270
- print("Extracting available actions took", end-begin, "seconds")
 
271
 
272
- if i == 99:
273
- return_value = None
274
  if env == 'amazon':
275
- asin_url = f"https://www.amazon.com/dp/{asin}"
276
- return_value = "Product URL: " + asin_url
277
  if len(clicked_options) > 0:
278
- options_str = ', '.join(list(clicked_options))
279
- return_value += "\nSelected Options: " + options_str
280
  if env == 'webshop':
281
  query_str = "+".join(search_terms.split())
282
  options_str = json.dumps(options)
@@ -284,13 +286,12 @@ def run_episode(goal, env, verbose=True):
284
  f'{WEBSHOP_URL}/item_page/{WEBSHOP_SESSION}/'
285
  f'{asin}/{query_str}/{page_num}/{options_str}'
286
  )
287
- return_value = "Product URL: " + asin_url
288
  if env == 'ebay':
289
- asin_url = f"https:///www.ebay.com/itm/{asin}"
290
- return_value = "Product URL: " + asin_url
291
  if len(clicked_options) > 0:
292
- options_str = ', '.join(list(clicked_options))
293
- return_value += "\nSelected Options: " + options_str
294
  return return_value
295
 
296
  gr.Interface(fn=run_episode,\
 
172
  page_type = Page.SEARCH
173
 
174
  elif action == 'click[buy now]':
175
+ return_value = {}
176
  if env == 'amazon':
177
+ return_value['Product URL'] = f"https://www.amazon.com/dp/{asin}"
 
178
  if len(clicked_options) > 0:
179
+ return_value['Selected Options'] = ', '.join(list(clicked_options))
 
180
  if env == 'webshop':
181
  query_str = "+".join(search_terms.split())
182
  options_str = json.dumps(options)
 
184
  f'{WEBSHOP_URL}/item_page/{WEBSHOP_SESSION}/'
185
  f'{asin}/{query_str}/{page_num}/{options_str}'
186
  )
187
+ return_value['Product URL'] = asin_url
188
  if env == 'ebay':
189
+ asin_url = f"https://www.ebay.com/itm/{asin}"
190
+ return_value['Product URL'] = asin_url
191
  if len(clicked_options) > 0:
192
+ return_value['Selected Options'] = ', '.join(list(clicked_options))
 
193
  return return_value
194
 
195
  elif prev_page_type == Page.ITEM_PAGE:
 
213
  if page_type == Page.RESULTS:
214
  if search_terms in search_results_cache:
215
  data = search_results_cache[search_terms]
216
+ if verbose:
217
+ print(f"Loading cached results page for\"{search_terms}\"")
218
  else:
219
  begin = time.time()
220
  if env == 'amazon':
221
+ data = parse_results_amz(search_terms, page_num, verbose)
222
  if env == 'webshop':
223
+ data = parse_results_ws(search_terms, page_num, verbose)
224
  if env == 'ebay':
225
+ data = parse_results_ebay(search_terms, page_num, verbose)
226
  end = time.time()
227
+ if verbose:
228
+ print(f"Parsing search results took {end-begin} seconds")
229
 
230
  search_results_cache[search_terms] = data
231
  num_prods = len(data)
 
233
  title_to_asin_map[d['Title']] = d['asin']
234
  elif page_type == Page.ITEM_PAGE or page_type == Page.SUB_PAGE:
235
  if asin in product_map:
236
+ if verbose:
237
+ print("Loading cached item page for", asin)
238
  data = product_map[asin]
239
  else:
240
  begin = time.time()
241
  if env == 'amazon':
242
+ data = parse_item_page_amz(asin, verbose)
243
  if env == 'webshop':
244
+ data = parse_item_page_ws(asin, search_terms, page_num, options, verbose)
245
  if env == 'ebay':
246
+ data = parse_item_page_ebay(asin, verbose)
247
  end = time.time()
248
+ if verbose:
249
+ print("Parsing item page took", end-begin, "seconds")
250
  product_map[asin] = data
251
  elif page_type == Page.SEARCH:
252
  if verbose:
 
262
  html_str = dict_to_fake_html(data, page_type, asin, sub_page_type, options, product_map, goal)
263
  obs = convert_html_to_text(html_str, simple=False, clicked_options=clicked_options, visited_asins=visited_asins)
264
  end = time.time()
265
+ if verbose:
266
+ print("[Page Info -> WebShop HTML -> Observation] took", end-begin, "seconds")
267
 
268
  # Dict of Info -> Valid Action State (Info)
269
  begin = time.time()
270
  prod_arg = product_map if page_type == Page.ITEM_PAGE else data
271
  info = convert_dict_to_actions(page_type, prod_arg, asin, page_num, num_prods)
272
  end = time.time()
273
+ if verbose:
274
+ print("Extracting available actions took", end-begin, "seconds")
275
 
276
+ if i == 50:
277
+ return_value = {}
278
  if env == 'amazon':
279
+ return_value['Product URL'] = f"https://www.amazon.com/dp/{asin}"
 
280
  if len(clicked_options) > 0:
281
+ return_value['Selected Options'] = ', '.join(list(clicked_options))
 
282
  if env == 'webshop':
283
  query_str = "+".join(search_terms.split())
284
  options_str = json.dumps(options)
 
286
  f'{WEBSHOP_URL}/item_page/{WEBSHOP_SESSION}/'
287
  f'{asin}/{query_str}/{page_num}/{options_str}'
288
  )
289
+ return_value['Product URL'] = asin_url
290
  if env == 'ebay':
291
+ asin_url = f"https://www.ebay.com/itm/{asin}"
292
+ return_value['Product URL'] = asin_url
293
  if len(clicked_options) > 0:
294
+ return_value['Selected Options'] = ', '.join(list(clicked_options))
 
295
  return return_value
296
 
297
  gr.Interface(fn=run_episode,\
predict_help.py CHANGED
@@ -17,7 +17,6 @@ class Page(Enum):
17
 
18
  HEADER_ = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36'
19
  DEBUG_HTML = "temp.html"
20
- VERBOSE = True
21
  NUM_PROD_LIMIT = 10
22
 
23
  WEBSHOP_URL = "http://3.83.245.205:3000"
@@ -29,11 +28,11 @@ def get_url(url):
29
  proxy_url = 'http://api.scraperapi.com/?' + urlencode(payload)
30
  return proxy_url
31
 
32
- def parse_results_ebay(query, page_num=None):
33
  query_string = '+'.join(query.split())
34
  page_num = 1 if page_num is None else page_num
35
  url = f'https://www.ebay.com/sch/i.html?_nkw={query_string}&_pgn={page_num}'
36
- if VERBOSE:
37
  print(f"Search Results URL: {url}")
38
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
39
  soup = BeautifulSoup(webpage.text, 'html.parser')
@@ -61,21 +60,21 @@ def parse_results_ebay(query, page_num=None):
61
  "Title": title,
62
  "Price": price
63
  })
64
- if VERBOSE:
65
  print(f"Scraped {len(results)} products")
66
  return results
67
 
68
- def parse_item_page_ebay(asin):
69
  product_dict = {}
70
  product_dict["asin"] = asin
71
 
72
  url = f"https://www.ebay.com/itm/{asin}"
73
- if VERBOSE:
74
  print(f"Item Page URL: {url}")
75
  begin = time.time()
76
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
77
  end = time.time()
78
- if VERBOSE:
79
  print(f"Item page scraping took {end-begin} seconds")
80
  soup = BeautifulSoup(webpage.content, "html.parser")
81
 
@@ -148,14 +147,14 @@ def parse_item_page_ebay(asin):
148
  return product_dict
149
 
150
 
151
- def parse_results_ws(query, page_num=None):
152
  query_string = '+'.join(query.split())
153
  page_num = 1 if page_num is None else page_num
154
  url = (
155
  f'{WEBSHOP_URL}/search_results/{WEBSHOP_SESSION}/'
156
  f'{query_string}/{page_num}'
157
  )
158
- if VERBOSE:
159
  print(f"Search Results URL: {url}")
160
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
161
  soup = BeautifulSoup(webpage.content, 'html.parser')
@@ -185,11 +184,11 @@ def parse_results_ws(query, page_num=None):
185
  "Price": price
186
  })
187
 
188
- if VERBOSE:
189
  print(f"Scraped {len(results)} products")
190
  return results
191
 
192
- def parse_item_page_ws(asin, query, page_num, options):
193
  product_dict = {}
194
  product_dict["asin"] = asin
195
 
@@ -199,7 +198,7 @@ def parse_item_page_ws(asin, query, page_num, options):
199
  f'{WEBSHOP_URL}/item_page/{WEBSHOP_SESSION}/'
200
  f'{asin}/{query_string}/{page_num}/{options_string}'
201
  )
202
- if VERBOSE:
203
  print("Item Page URL: ", url)
204
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
205
  soup = BeautifulSoup(webpage.content, 'html.parser')
@@ -258,11 +257,11 @@ def parse_item_page_ws(asin, query, page_num, options):
258
  return product_dict
259
 
260
  # Query -> Search Result ASINs
261
- def parse_results_amz(query, page_num=None):
262
  url = 'https://www.amazon.com/s?k=' + query.replace(" ", "+")
263
  if page_num is not None:
264
  url += "&page=" + str(page_num)
265
- if VERBOSE:
266
  print(f"Search Results URL: {url}")
267
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
268
  soup = BeautifulSoup(webpage.content, 'html.parser')
@@ -286,22 +285,22 @@ def parse_results_amz(query, page_num=None):
286
  'Price': price.text.strip().strip("$")
287
  }
288
  results.append(result)
289
- if VERBOSE:
290
  print("Scraped", len(results), "products")
291
  return results
292
 
293
  # Scrape information of each product
294
- def parse_item_page_amz(asin):
295
  product_dict = {}
296
  product_dict["asin"] = asin
297
 
298
  url = f"https://www.amazon.com/dp/{asin}"
299
- if VERBOSE:
300
  print("Item Page URL:", url)
301
  begin = time.time()
302
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
303
  end = time.time()
304
- if VERBOSE:
305
  print(f"Item page scraping took {end-begin} seconds")
306
  soup = BeautifulSoup(webpage.content, "html.parser")
307
 
 
17
 
18
  HEADER_ = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/101.0.4951.64 Safari/537.36'
19
  DEBUG_HTML = "temp.html"
 
20
  NUM_PROD_LIMIT = 10
21
 
22
  WEBSHOP_URL = "http://3.83.245.205:3000"
 
28
  proxy_url = 'http://api.scraperapi.com/?' + urlencode(payload)
29
  return proxy_url
30
 
31
+ def parse_results_ebay(query, page_num=None, verbose=True):
32
  query_string = '+'.join(query.split())
33
  page_num = 1 if page_num is None else page_num
34
  url = f'https://www.ebay.com/sch/i.html?_nkw={query_string}&_pgn={page_num}'
35
+ if verbose:
36
  print(f"Search Results URL: {url}")
37
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
38
  soup = BeautifulSoup(webpage.text, 'html.parser')
 
60
  "Title": title,
61
  "Price": price
62
  })
63
+ if verbose:
64
  print(f"Scraped {len(results)} products")
65
  return results
66
 
67
+ def parse_item_page_ebay(asin, verbose=True):
68
  product_dict = {}
69
  product_dict["asin"] = asin
70
 
71
  url = f"https://www.ebay.com/itm/{asin}"
72
+ if verbose:
73
  print(f"Item Page URL: {url}")
74
  begin = time.time()
75
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
76
  end = time.time()
77
+ if verbose:
78
  print(f"Item page scraping took {end-begin} seconds")
79
  soup = BeautifulSoup(webpage.content, "html.parser")
80
 
 
147
  return product_dict
148
 
149
 
150
+ def parse_results_ws(query, page_num=None, verbose=True):
151
  query_string = '+'.join(query.split())
152
  page_num = 1 if page_num is None else page_num
153
  url = (
154
  f'{WEBSHOP_URL}/search_results/{WEBSHOP_SESSION}/'
155
  f'{query_string}/{page_num}'
156
  )
157
+ if verbose:
158
  print(f"Search Results URL: {url}")
159
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
160
  soup = BeautifulSoup(webpage.content, 'html.parser')
 
184
  "Price": price
185
  })
186
 
187
+ if verbose:
188
  print(f"Scraped {len(results)} products")
189
  return results
190
 
191
+ def parse_item_page_ws(asin, query, page_num, options, verbose=True):
192
  product_dict = {}
193
  product_dict["asin"] = asin
194
 
 
198
  f'{WEBSHOP_URL}/item_page/{WEBSHOP_SESSION}/'
199
  f'{asin}/{query_string}/{page_num}/{options_string}'
200
  )
201
+ if verbose:
202
  print("Item Page URL: ", url)
203
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
204
  soup = BeautifulSoup(webpage.content, 'html.parser')
 
257
  return product_dict
258
 
259
  # Query -> Search Result ASINs
260
+ def parse_results_amz(query, page_num=None, verbose=True):
261
  url = 'https://www.amazon.com/s?k=' + query.replace(" ", "+")
262
  if page_num is not None:
263
  url += "&page=" + str(page_num)
264
+ if verbose:
265
  print(f"Search Results URL: {url}")
266
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
267
  soup = BeautifulSoup(webpage.content, 'html.parser')
 
285
  'Price': price.text.strip().strip("$")
286
  }
287
  results.append(result)
288
+ if verbose:
289
  print("Scraped", len(results), "products")
290
  return results
291
 
292
  # Scrape information of each product
293
+ def parse_item_page_amz(asin, verbose=True):
294
  product_dict = {}
295
  product_dict["asin"] = asin
296
 
297
  url = f"https://www.amazon.com/dp/{asin}"
298
+ if verbose:
299
  print("Item Page URL:", url)
300
  begin = time.time()
301
  webpage = requests.get(url, headers={'User-Agent': HEADER_, 'Accept-Language': 'en-US, en;q=0.5'})
302
  end = time.time()
303
+ if verbose:
304
  print(f"Item page scraping took {end-begin} seconds")
305
  soup = BeautifulSoup(webpage.content, "html.parser")
306