Spaces:
Runtime error
Runtime error
import os | |
import warnings | |
from typing import Iterator, Tuple, Union | |
import requests | |
from PIL import UnidentifiedImageError, Image | |
from PIL.Image import DecompressionBombError | |
from hbutils.system import urlsplit, TemporaryDirectory | |
from .base import RootDataSource | |
from ..model import ImageItem | |
from ..utils import get_requests_session, download_file | |
class NoURL(Exception): | |
pass | |
class WebDataSource(RootDataSource): | |
def __init__(self, group_name: str, session: requests.Session = None, download_silent: bool = True): | |
self.download_silent = download_silent | |
self.session = session or get_requests_session() | |
self.group_name = group_name | |
def _iter_data(self) -> Iterator[Tuple[Union[str, int], str, dict]]: | |
raise NotImplementedError # pragma: no cover | |
def _iter(self) -> Iterator[ImageItem]: | |
for id_, url, meta in self._iter_data(): | |
_, ext_name = os.path.splitext(urlsplit(url).filename) | |
if ext_name.lower() == '.gif': | |
warnings.warn(f'{self.group_name.capitalize()} resource {id_} is a GIF image, skipped.') | |
continue | |
filename = f'{self.group_name}_{id_}{ext_name}' | |
with TemporaryDirectory(ignore_cleanup_errors=True) as td: | |
td_file = os.path.join(td, filename) | |
try: | |
download_file( | |
url, td_file, desc=filename, | |
session=self.session, silent=self.download_silent | |
) | |
image = Image.open(td_file) | |
image.load() | |
except UnidentifiedImageError: | |
warnings.warn(f'{self.group_name.capitalize()} resource {id_} unidentified as image, skipped.') | |
continue | |
except (IOError, DecompressionBombError) as err: | |
warnings.warn(f'Skipped due to error: {err!r}') | |
continue | |
meta = {**meta, 'url': url} | |
yield ImageItem(image, meta) | |