iarbel commited on
Commit
1896c1d
1 Parent(s): 17d7e7d

add error handling

Browse files
Files changed (2) hide show
  1. app.py +4 -0
  2. src/scrape.py +7 -2
app.py CHANGED
@@ -17,6 +17,10 @@ def asin_to_pdp(asin_or_url: str) -> dict:
17
 
18
  html = scrape.zyte_call(asin_url)
19
  asin_pdp = scrape.get_asin_pdp(BeautifulSoup(html, 'html.parser'))
 
 
 
 
20
  return asin_pdp
21
 
22
 
 
17
 
18
  html = scrape.zyte_call(asin_url)
19
  asin_pdp = scrape.get_asin_pdp(BeautifulSoup(html, 'html.parser'))
20
+ if not asin_pdp:
21
+ raise gr.Error('Input URL not found (404)')
22
+ elif not asin_pdp.get('title') or not asin_pdp.get('tech_data'):
23
+ raise gr.Error("Couldn't fetch title or technical details from input URL")
24
  return asin_pdp
25
 
26
 
src/scrape.py CHANGED
@@ -3,9 +3,10 @@ import os
3
  import requests
4
  from base64 import b64decode
5
  from bs4 import BeautifulSoup
6
- from typing import Dict
7
 
8
  Z_KEY = os.environ.get('ZYTE_KEY')
 
9
 
10
 
11
  def zyte_call(url: str) -> bytes:
@@ -22,7 +23,11 @@ def zyte_call(url: str) -> bytes:
22
  return http_response_body
23
 
24
 
25
- def get_asin_pdp(soup: BeautifulSoup) -> Dict[str, str]:
 
 
 
 
26
  # Get ASIN
27
  try:
28
  asin = soup.find('link', rel='canonical')['href'].split('/')[-1]
 
3
  import requests
4
  from base64 import b64decode
5
  from bs4 import BeautifulSoup
6
+ from typing import Dict, Optional
7
 
8
  Z_KEY = os.environ.get('ZYTE_KEY')
9
+ PAGE_NOT_FOUND_STR = 'page not found'
10
 
11
 
12
  def zyte_call(url: str) -> bytes:
 
23
  return http_response_body
24
 
25
 
26
+ def get_asin_pdp(soup: BeautifulSoup) -> Optional[Dict[str, str]]:
27
+ # Check if 404
28
+ if PAGE_NOT_FOUND_STR in soup.find('title').text.lower():
29
+ return None
30
+
31
  # Get ASIN
32
  try:
33
  asin = soup.find('link', rel='canonical')['href'].split('/')[-1]