Spaces:
Sleeping
Sleeping
add error handling
Browse files- app.py +4 -0
- src/scrape.py +7 -2
app.py
CHANGED
@@ -17,6 +17,10 @@ def asin_to_pdp(asin_or_url: str) -> dict:
|
|
17 |
|
18 |
html = scrape.zyte_call(asin_url)
|
19 |
asin_pdp = scrape.get_asin_pdp(BeautifulSoup(html, 'html.parser'))
|
|
|
|
|
|
|
|
|
20 |
return asin_pdp
|
21 |
|
22 |
|
|
|
17 |
|
18 |
html = scrape.zyte_call(asin_url)
|
19 |
asin_pdp = scrape.get_asin_pdp(BeautifulSoup(html, 'html.parser'))
|
20 |
+
if not asin_pdp:
|
21 |
+
raise gr.Error('Input URL not found (404)')
|
22 |
+
elif not asin_pdp.get('title') or not asin_pdp.get('tech_data'):
|
23 |
+
raise gr.Error("Couldn't fetch title or technical details from input URL")
|
24 |
return asin_pdp
|
25 |
|
26 |
|
src/scrape.py
CHANGED
@@ -3,9 +3,10 @@ import os
|
|
3 |
import requests
|
4 |
from base64 import b64decode
|
5 |
from bs4 import BeautifulSoup
|
6 |
-
from typing import Dict
|
7 |
|
8 |
Z_KEY = os.environ.get('ZYTE_KEY')
|
|
|
9 |
|
10 |
|
11 |
def zyte_call(url: str) -> bytes:
|
@@ -22,7 +23,11 @@ def zyte_call(url: str) -> bytes:
|
|
22 |
return http_response_body
|
23 |
|
24 |
|
25 |
-
def get_asin_pdp(soup: BeautifulSoup) -> Dict[str, str]:
|
|
|
|
|
|
|
|
|
26 |
# Get ASIN
|
27 |
try:
|
28 |
asin = soup.find('link', rel='canonical')['href'].split('/')[-1]
|
|
|
3 |
import requests
|
4 |
from base64 import b64decode
|
5 |
from bs4 import BeautifulSoup
|
6 |
+
from typing import Dict, Optional
|
7 |
|
8 |
Z_KEY = os.environ.get('ZYTE_KEY')
|
9 |
+
PAGE_NOT_FOUND_STR = 'page not found'
|
10 |
|
11 |
|
12 |
def zyte_call(url: str) -> bytes:
|
|
|
23 |
return http_response_body
|
24 |
|
25 |
|
26 |
+
def get_asin_pdp(soup: BeautifulSoup) -> Optional[Dict[str, str]]:
|
27 |
+
# Check if 404
|
28 |
+
if PAGE_NOT_FOUND_STR in soup.find('title').text.lower():
|
29 |
+
return None
|
30 |
+
|
31 |
# Get ASIN
|
32 |
try:
|
33 |
asin = soup.find('link', rel='canonical')['href'].split('/')[-1]
|