|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Visual Genome dataset.""" |
|
|
|
import json |
|
import os |
|
import re |
|
from collections import defaultdict |
|
from typing import Any, Callable, Dict, Optional |
|
from urllib.parse import urlparse |
|
|
|
import datasets |
|
import dotenv |
|
|
|
|
|
logger = datasets.logging.get_logger(__name__) |
|
|
|
_CITATION = """\ |
|
@inproceedings{krishnavisualgenome, |
|
title={Visual Genome: Connecting Language and Vision Using Crowdsourced Dense Image Annotations}, |
|
author={Krishna, Ranjay and Zhu, Yuke and Groth, Oliver and Johnson, Justin and Hata, Kenji and Kravitz, Joshua and Chen, Stephanie and Kalantidis, Yannis and Li, Li-Jia and Shamma, David A and Bernstein, Michael and Fei-Fei, Li}, |
|
year = {2016}, |
|
url = {https://arxiv.org/abs/1602.07332}, |
|
} |
|
""" |
|
|
|
_DESCRIPTION = """\ |
|
Visual Genome enable to model objects and relationships between objects. |
|
They collect dense annotations of objects, attributes, and relationships within each image. |
|
Specifically, the dataset contains over 108K images where each image has an average of 35 objects, 26 attributes, and 21 pairwise relationships between objects. |
|
""" |
|
|
|
_HOMEPAGE = "https://homes.cs.washington.edu/~ranjay/visualgenome/" |
|
|
|
_LICENSE = "Creative Commons Attribution 4.0 International License" |
|
|
|
_BASE_IMAGE_URLS = { |
|
"https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip": "VG_100K", |
|
"https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip": "VG_100K_2", |
|
} |
|
|
|
_LATEST_VERSIONS = { |
|
"mask_region_descriptions": "0.0.1", |
|
"region_descriptions": "1.2.0", |
|
"objects": "1.4.0", |
|
"attributes": "1.2.0", |
|
"relationships": "1.4.0", |
|
"question_answers": "1.2.0", |
|
"image_metadata": "1.2.0", |
|
} |
|
|
|
|
|
|
|
|
|
_BASE_IMAGE_METADATA_FEATURES = { |
|
"image_id": datasets.Value("int32"), |
|
"coco_url": datasets.Value("string"), |
|
"file_name": datasets.Value("string"), |
|
"width": datasets.Value("int32"), |
|
"height": datasets.Value("int32"), |
|
|
|
|
|
"task_type": datasets.Value("string"), |
|
} |
|
|
|
_BASE_SYNTET_FEATURES = { |
|
"synset_name": datasets.Value("string"), |
|
"entity_name": datasets.Value("string"), |
|
"entity_idx_start": datasets.Value("int32"), |
|
"entity_idx_end": datasets.Value("int32"), |
|
} |
|
|
|
_BASE_OBJECT_FEATURES = { |
|
"object_id": datasets.Value("int32"), |
|
"x": datasets.Value("int32"), |
|
"y": datasets.Value("int32"), |
|
"w": datasets.Value("int32"), |
|
"h": datasets.Value("int32"), |
|
"names": [datasets.Value("string")], |
|
"synsets": [datasets.Value("string")], |
|
} |
|
|
|
_BASE_QA_OBJECT_FEATURES = { |
|
"object_id": datasets.Value("int32"), |
|
"x": datasets.Value("int32"), |
|
"y": datasets.Value("int32"), |
|
"w": datasets.Value("int32"), |
|
"h": datasets.Value("int32"), |
|
"names": [datasets.Value("string")], |
|
"synsets": [datasets.Value("string")], |
|
} |
|
|
|
_BASE_QA_OBJECT = { |
|
"qa_id": datasets.Value("int32"), |
|
"image_id": datasets.Value("int32"), |
|
"question": datasets.Value("string"), |
|
"answer": datasets.Value("string"), |
|
"a_objects": [_BASE_QA_OBJECT_FEATURES], |
|
"q_objects": [_BASE_QA_OBJECT_FEATURES], |
|
} |
|
|
|
_BASE_REGION_FEATURES = { |
|
"region_id": datasets.Value("int64"), |
|
"image_id": datasets.Value("int32"), |
|
"phrases": [datasets.Value("string")], |
|
"x": datasets.Value("int32"), |
|
"y": datasets.Value("int32"), |
|
"width": datasets.Value("int32"), |
|
"height": datasets.Value("int32"), |
|
} |
|
|
|
_BASE_MASK_FEATURES = { |
|
"size": [datasets.Value("int32")], |
|
"counts": datasets.Value("string"), |
|
} |
|
|
|
_BASE_MASK_REGION_FEATURES = { |
|
"region_id": datasets.Value("int64"), |
|
"image_id": datasets.Value("int32"), |
|
"phrases": [datasets.Value("string")], |
|
"x": datasets.Value("int32"), |
|
"y": datasets.Value("int32"), |
|
"width": datasets.Value("int32"), |
|
"height": datasets.Value("int32"), |
|
"mask": _BASE_MASK_FEATURES, |
|
} |
|
|
|
_BASE_RELATIONSHIP_FEATURES = { |
|
"relationship_id": datasets.Value("int32"), |
|
"predicate": datasets.Value("string"), |
|
"synsets": datasets.Value("string"), |
|
"subject": _BASE_OBJECT_FEATURES, |
|
"object": _BASE_OBJECT_FEATURES, |
|
} |
|
|
|
_NAME_VERSION_TO_ANNOTATION_FEATURES = { |
|
"mask_region_descriptions": { |
|
"0.0.1": {"regions": [_BASE_MASK_REGION_FEATURES]}, |
|
}, |
|
"region_descriptions": { |
|
"1.2.0": {"regions": [_BASE_REGION_FEATURES]}, |
|
"1.0.0": {"regions": [_BASE_REGION_FEATURES]}, |
|
}, |
|
"objects": { |
|
"1.4.0": { |
|
"objects": [ |
|
{ |
|
**_BASE_OBJECT_FEATURES, |
|
"merged_object_ids": [datasets.Value("int32")], |
|
} |
|
] |
|
}, |
|
"1.2.0": {"objects": [_BASE_OBJECT_FEATURES]}, |
|
"1.0.0": {"objects": [_BASE_OBJECT_FEATURES]}, |
|
}, |
|
"attributes": { |
|
"1.2.0": {"attributes": [{**_BASE_OBJECT_FEATURES, "attributes": [datasets.Value("string")]}]}, |
|
"1.0.0": {"attributes": [{**_BASE_OBJECT_FEATURES, "attributes": [datasets.Value("string")]}]}, |
|
}, |
|
"relationships": { |
|
"1.4.0": { |
|
"relationships": [ |
|
{ |
|
**_BASE_RELATIONSHIP_FEATURES, |
|
"subject": { |
|
**_BASE_OBJECT_FEATURES, |
|
"merged_object_ids": [datasets.Value("int32")], |
|
}, |
|
"object": { |
|
**_BASE_OBJECT_FEATURES, |
|
"merged_object_ids": [datasets.Value("int32")], |
|
}, |
|
} |
|
] |
|
}, |
|
"1.2.0": {"relationships": [_BASE_RELATIONSHIP_FEATURES]}, |
|
"1.0.0": {"relationships": [_BASE_RELATIONSHIP_FEATURES]}, |
|
}, |
|
"question_answers": { |
|
"1.2.0": {"qas": [_BASE_QA_OBJECT]}, |
|
"1.0.0": {"qas": [_BASE_QA_OBJECT]}, |
|
}, |
|
} |
|
|
|
|
|
|
|
|
|
def _get_decompressed_filename_from_url(url: str) -> str: |
|
parsed_url = urlparse(url) |
|
compressed_filename = os.path.basename(parsed_url.path) |
|
|
|
|
|
assert compressed_filename.endswith(".zip") |
|
uncompressed_filename = compressed_filename[:-4] |
|
|
|
|
|
unversioned_uncompressed_filename = re.sub(r"_v[0-9]+(?:_[0-9]+)?\.json$", ".json", uncompressed_filename) |
|
|
|
return unversioned_uncompressed_filename |
|
|
|
|
|
def _get_local_image_path(img_url: str, folder_local_paths: Dict[str, str]) -> str: |
|
"""Obtain image folder given an image url. |
|
|
|
For example: |
|
Given `https://cs.stanford.edu/people/rak248/VG_100K_2/1.jpg` as an image url, this method returns the local path for that image. |
|
""" |
|
matches = re.fullmatch( |
|
r"^https://cs.stanford.edu/people/rak248/(VG_100K(?:_2)?)/([0-9]+\.jpg)$", |
|
img_url, |
|
) |
|
assert matches is not None, f"Got img_url: {img_url}, matched: {matches}" |
|
folder, filename = matches.group(1), matches.group(2) |
|
return os.path.join(folder_local_paths[folder], filename) |
|
|
|
|
|
def _get_local_image_suffix_path(img_url: str) -> str: |
|
"""Obtain image folder given an image url. |
|
|
|
For example: |
|
Given `https://cs.stanford.edu/people/rak248/VG_100K_2/1.jpg` as an image url, this method returns the local path for that image. |
|
""" |
|
matches = re.fullmatch( |
|
r"^https://cs.stanford.edu/people/rak248/(VG_100K(?:_2)?)/([0-9]+\.jpg)$", |
|
img_url, |
|
) |
|
assert matches is not None, f"Got img_url: {img_url}, matched: {matches}" |
|
folder, filename = matches.group(1), matches.group(2) |
|
return os.path.join(folder, filename) |
|
|
|
|
|
|
|
|
|
_BASE_ANNOTATION_URL = "https://homes.cs.washington.edu/~ranjay/visualgenome/data/dataset" |
|
|
|
|
|
def _normalize_region_description_annotation_(annotation: Dict[str, Any]) -> Dict[str, Any]: |
|
"""Normalizes region descriptions annotation in-place.""" |
|
|
|
for region in annotation["regions"]: |
|
|
|
if "id" in region: |
|
region["region_id"] = region["id"] |
|
del region["id"] |
|
|
|
|
|
if "image" in region: |
|
region["image_id"] = region["image"] |
|
del region["image"] |
|
|
|
|
|
if "phrase" in region: |
|
region["phrases"] = [region["phrase"]] if isinstance(region["phrase"], str) else region["phrase"] |
|
del region["phrase"] |
|
|
|
return annotation |
|
|
|
|
|
def _normalize_object_annotation_(annotation: Dict[str, Any]) -> Dict[str, Any]: |
|
"""Normalizes object annotation in-place.""" |
|
|
|
for object_ in annotation["objects"]: |
|
|
|
if "id" in object_: |
|
object_["object_id"] = object_["id"] |
|
del object_["id"] |
|
|
|
|
|
if "synsets" not in object_: |
|
object_["synsets"] = None |
|
|
|
return annotation |
|
|
|
|
|
def _normalize_attribute_annotation_(annotation: Dict[str, Any]) -> Dict[str, Any]: |
|
"""Normalizes attributes annotation in-place.""" |
|
|
|
for attribute in annotation["attributes"]: |
|
|
|
if "id" in attribute: |
|
attribute["object_id"] = attribute["id"] |
|
del attribute["id"] |
|
|
|
|
|
if "object_names" in attribute: |
|
attribute["names"] = attribute["object_names"] |
|
del attribute["object_names"] |
|
|
|
|
|
if "synsets" not in attribute: |
|
attribute["synsets"] = None |
|
|
|
|
|
if "attributes" not in attribute: |
|
attribute["attributes"] = None |
|
|
|
return annotation |
|
|
|
|
|
def _normalize_relationship_annotation_(annotation: Dict[str, Any]) -> Dict[str, Any]: |
|
"""Normalizes relationship annotation in-place.""" |
|
|
|
for relationship in annotation["relationships"]: |
|
|
|
if "id" in relationship: |
|
relationship["relationship_id"] = relationship["id"] |
|
del relationship["id"] |
|
|
|
if "synsets" not in relationship: |
|
relationship["synsets"] = None |
|
|
|
subject = relationship["subject"] |
|
object_ = relationship["object"] |
|
|
|
for obj in [subject, object_]: |
|
|
|
if "id" in obj: |
|
obj["object_id"] = obj["id"] |
|
del obj["id"] |
|
|
|
if "name" in obj: |
|
obj["names"] = [obj["name"]] |
|
del obj["name"] |
|
|
|
if "synsets" not in obj: |
|
obj["synsets"] = None |
|
|
|
return annotation |
|
|
|
|
|
def _normalize_image_metadata_(image_metadata: Dict[str, Any]) -> Dict[str, Any]: |
|
"""Normalizes image metadata in-place.""" |
|
if "id" in image_metadata: |
|
image_metadata["image_id"] = image_metadata["id"] |
|
del image_metadata["id"] |
|
return image_metadata |
|
|
|
|
|
_ANNOTATION_NORMALIZER = defaultdict(lambda: lambda x: x) |
|
_ANNOTATION_NORMALIZER.update( |
|
{ |
|
"region_descriptions": _normalize_region_description_annotation_, |
|
"objects": _normalize_object_annotation_, |
|
"attributes": _normalize_attribute_annotation_, |
|
"relationships": _normalize_relationship_annotation_, |
|
} |
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class VisualGenomeConfig(datasets.BuilderConfig): |
|
"""BuilderConfig for Visual Genome.""" |
|
|
|
def __init__( |
|
self, |
|
name: str, |
|
version: Optional[str] = None, |
|
with_image: bool = True, |
|
base_image_url: Optional[str] = None, |
|
base_annotation_url: Optional[str] = None, |
|
sas_key: Optional[str] = None, |
|
use_densecap_splits: bool = False, |
|
task_type: str = "caption", |
|
**kwargs, |
|
): |
|
_version = _LATEST_VERSIONS[name] if version is None else version |
|
_name = f"{name}_v{_version}" |
|
super().__init__(version=datasets.Version(_version), name=_name, **kwargs) |
|
self._name_without_version = name |
|
self.annotations_features = _NAME_VERSION_TO_ANNOTATION_FEATURES[self._name_without_version][ |
|
self.version.version_str |
|
] |
|
self.with_image = with_image |
|
|
|
|
|
self.base_annotation_url = base_annotation_url |
|
self.base_image_url = base_image_url |
|
self.sas_key = sas_key |
|
self.use_densecap_splits = use_densecap_splits |
|
self.task_type = task_type |
|
|
|
@property |
|
def image_zip_paths(self): |
|
if self.base_image_url is None: |
|
logger.warning("base_url is None. Using default base urls. Maybe unable to download images.") |
|
_image_zip_paths = _BASE_IMAGE_URLS |
|
else: |
|
if self.sas_key is None: |
|
sas_key = "" |
|
else: |
|
sas_key = self.sas_key |
|
_image_zip_paths = { |
|
f"{self.base_image_url}/images.zip{sas_key}": "VG_100K", |
|
f"{self.base_image_url}/images2.zip{sas_key}": "VG_100K_2", |
|
} |
|
|
|
logger.info(f"image_zip_paths: {_image_zip_paths}") |
|
return _image_zip_paths |
|
|
|
@property |
|
def annotations_url(self): |
|
if self.base_annotation_url is None: |
|
logger.warning("base_url is None. Using default base urls. Maybe unable to download annotations.") |
|
base_annotation_url = _BASE_ANNOTATION_URL |
|
sas_key = "" |
|
else: |
|
base_annotation_url = self.base_annotation_url |
|
if self.sas_key is None: |
|
sas_key = "" |
|
else: |
|
sas_key = self.sas_key |
|
|
|
major, minor = self.version.major, self.version.minor |
|
if self.version == _LATEST_VERSIONS[self._name_without_version]: |
|
_annotations_url = f"{base_annotation_url}/{self._name_without_version}.json.zip{sas_key}" |
|
elif minor == 0: |
|
_annotations_url = f"{base_annotation_url}/{self._name_without_version}_v{major}.json.zip{sas_key}" |
|
else: |
|
_annotations_url = f"{base_annotation_url}/{self._name_without_version}_v{major}_{minor}.json.zip{sas_key}" |
|
|
|
logger.info(f"annotations_url: {_annotations_url}") |
|
return _annotations_url |
|
|
|
@property |
|
def image_metadata_url(self): |
|
if self.base_annotation_url is None: |
|
logger.warning("base_url is None. Using default base urls. Maybe unable to download annotations.") |
|
base_annotation_url = _BASE_ANNOTATION_URL |
|
sas_key = "" |
|
else: |
|
base_annotation_url = self.base_annotation_url |
|
if self.sas_key is None: |
|
sas_key = "" |
|
else: |
|
sas_key = self.sas_key |
|
|
|
if not self.version == _LATEST_VERSIONS["image_metadata"]: |
|
logger.warning( |
|
f"Latest image metadata version is {_LATEST_VERSIONS['image_metadata']}. Trying to generate a dataset of version: {self.version}. Please double check that image data are unchanged between the two versions." |
|
) |
|
_image_metadata_url = f"{base_annotation_url}/image_data.json.zip{sas_key}" |
|
|
|
logger.info(f"image_metadata_url: {_image_metadata_url}") |
|
return _image_metadata_url |
|
|
|
@property |
|
def features(self): |
|
return datasets.Features( |
|
{ |
|
**({"image": datasets.Image()} if self.with_image else {}), |
|
**_BASE_IMAGE_METADATA_FEATURES, |
|
**self.annotations_features, |
|
} |
|
) |
|
|
|
@property |
|
def densecap_splits_json_url(self): |
|
|
|
|
|
if self.base_annotation_url is None: |
|
logger.warning("base_url is None. Using default base urls. Maybe unable to download annotations.") |
|
base_annotation_url = _BASE_ANNOTATION_URL |
|
sas_key = "" |
|
else: |
|
base_annotation_url = self.base_annotation_url |
|
if self.sas_key is None: |
|
sas_key = "" |
|
else: |
|
sas_key = self.sas_key |
|
return f"{base_annotation_url}/densecap_splits.json{sas_key}" |
|
|
|
|
|
class VisualGenome(datasets.GeneratorBasedBuilder): |
|
"""Visual Genome dataset.""" |
|
|
|
BUILDER_CONFIG_CLASS = VisualGenomeConfig |
|
BUILDER_CONFIGS = [ |
|
*[VisualGenomeConfig(name="mask_region_descriptions", version=version) for version in ["0.0.1"]], |
|
*[VisualGenomeConfig(name="region_descriptions", version=version) for version in ["1.0.0", "1.2.0"]], |
|
*[VisualGenomeConfig(name="question_answers", version=version) for version in ["1.0.0", "1.2.0"]], |
|
*[ |
|
VisualGenomeConfig(name="objects", version=version) |
|
|
|
for version in ["1.0.0", "1.2.0"] |
|
], |
|
*[VisualGenomeConfig(name="attributes", version=version) for version in ["1.0.0", "1.2.0"]], |
|
*[ |
|
VisualGenomeConfig(name="relationships", version=version) |
|
|
|
for version in ["1.0.0", "1.2.0"] |
|
], |
|
] |
|
|
|
def _info(self): |
|
return datasets.DatasetInfo( |
|
description=_DESCRIPTION, |
|
features=self.config.features, |
|
homepage=_HOMEPAGE, |
|
license=_LICENSE, |
|
citation=_CITATION, |
|
version=self.config.version, |
|
) |
|
|
|
_SPLIT_NAME_MAP = { |
|
"train": "TRAIN", |
|
"val": "VALIDATION", |
|
"test": "TEST", |
|
} |
|
|
|
def _split_generators(self, dl_manager): |
|
self.config: VisualGenomeConfig |
|
|
|
|
|
if self.config.sas_key is None: |
|
|
|
logger.info(f"Try to load sas_key from .env file: {dotenv.load_dotenv('.env')}.") |
|
self.config.sas_key = os.getenv("VISUAL_GENOME_SAS_KEY") |
|
if self.config.sas_key is not None: |
|
logger.info(f"Using sas_key: {self.config.sas_key}") |
|
|
|
|
|
if dl_manager.is_streaming is True: |
|
raise ValueError( |
|
"dl_manager.is_streaming is True, which is very slow due to the random access inside zip files with streaming loading." |
|
) |
|
|
|
image_metadatas_dir = dl_manager.download_and_extract(self.config.image_metadata_url) |
|
image_metadatas_file = os.path.join( |
|
image_metadatas_dir, |
|
_get_decompressed_filename_from_url(self.config.image_metadata_url), |
|
) |
|
|
|
|
|
annotations_dir = dl_manager.download_and_extract(self.config.annotations_url) |
|
annotations_file = os.path.join( |
|
annotations_dir, |
|
_get_decompressed_filename_from_url(self.config.annotations_url), |
|
) |
|
|
|
logger.info(f"annotations_file: {annotations_file}") |
|
logger.info(f"image_metadatas_file: {image_metadatas_file}") |
|
logger.info(f"annotations_dir: {annotations_dir}") |
|
logger.info(f"image_metadatas_dir: {image_metadatas_dir}") |
|
|
|
if self.config.use_densecap_splits: |
|
splits_path = dl_manager.download_and_extract(self.config.densecap_splits_json_url) |
|
logger.info(f"densecap splits_path: {splits_path}") |
|
with open(splits_path, encoding="utf-8") as fi: |
|
_splits = json.load(fi) |
|
splits = {name: [] for name in _splits.keys()} |
|
with open(image_metadatas_file, encoding="utf-8") as fi: |
|
image_metadatas = json.load(fi) |
|
|
|
image_idx_to_sample_idx = { |
|
image_metadata["image_id"]: sample_idx for sample_idx, image_metadata in enumerate(image_metadatas) |
|
} |
|
|
|
splits = {} |
|
for name, image_id_list in _splits.items(): |
|
splits[name] = [image_idx_to_sample_idx[image_id] for image_id in image_id_list] |
|
else: |
|
splits = dict(train=None) |
|
|
|
|
|
if self.config.with_image: |
|
image_folder_keys = list(self.config.image_zip_paths.keys()) |
|
image_dirs = dl_manager.download_and_extract(image_folder_keys) |
|
image_folder_local_paths = { |
|
self.config.image_zip_paths[key]: os.path.join(dir_, self.config.image_zip_paths[key]) |
|
for key, dir_ in zip(image_folder_keys, image_dirs) |
|
} |
|
else: |
|
image_folder_local_paths = None |
|
|
|
return [ |
|
datasets.SplitGenerator( |
|
name=getattr(datasets.Split, self._SPLIT_NAME_MAP[split]), |
|
gen_kwargs={ |
|
"image_folder_local_paths": image_folder_local_paths, |
|
"image_metadatas_file": image_metadatas_file, |
|
"annotations_file": annotations_file, |
|
"annotation_normalizer_": _ANNOTATION_NORMALIZER[self.config._name_without_version], |
|
"split_sample_idx_list": splits[split], |
|
}, |
|
) |
|
for split in splits |
|
] |
|
|
|
def _generate_examples( |
|
self, |
|
image_folder_local_paths: Optional[Dict[str, str]], |
|
image_metadatas_file: str, |
|
annotations_file: str, |
|
annotation_normalizer_: Callable[[Dict[str, Any]], Dict[str, Any]], |
|
split_sample_idx_list: Optional[list] = None, |
|
): |
|
with open(annotations_file, encoding="utf-8") as fi: |
|
annotations = json.load(fi) |
|
|
|
with open(image_metadatas_file, encoding="utf-8") as fi: |
|
image_metadatas = json.load(fi) |
|
|
|
|
|
logger.info(f"len(image_metadatas): {len(image_metadatas)}") |
|
logger.info(f"len(annotations): {len(annotations)}") |
|
|
|
assert len(image_metadatas) == len(annotations) |
|
|
|
if split_sample_idx_list is None: |
|
split_sample_idx_list = range(len(image_metadatas)) |
|
|
|
for idx, split_idx in enumerate(split_sample_idx_list): |
|
image_metadata, annotation = ( |
|
image_metadatas[split_idx], |
|
annotations[split_idx], |
|
) |
|
|
|
_normalize_image_metadata_(image_metadata) |
|
|
|
|
|
if "id" in annotation: |
|
|
|
assert ( |
|
image_metadata["image_id"] == annotation["id"] |
|
), f"Annotations doesn't match with image metadataset. Got image_metadata['image_id']: {image_metadata['image_id']} and annotations['id']: {annotation['id']}" |
|
del annotation["id"] |
|
else: |
|
assert "image_id" in annotation |
|
assert ( |
|
image_metadata["image_id"] == annotation["image_id"] |
|
), f"Annotations doesn't match with image metadataset. Got image_metadata['image_id']: {image_metadata['image_id']} and annotations['image_id']: {annotation['image_id']}" |
|
|
|
|
|
if "image_url" in annotation: |
|
|
|
assert ( |
|
image_metadata["url"] == annotation["image_url"] |
|
), f"Annotations doesn't match with image metadataset. Got image_metadata['url']: {image_metadata['url']} and annotations['image_url']: {annotation['image_url']}" |
|
del annotation["image_url"] |
|
elif "url" in annotation: |
|
|
|
assert ( |
|
image_metadata["url"] == annotation["url"] |
|
), f"Annotations doesn't match with image metadataset. Got image_metadata['url']: {image_metadata['url']} and annotations['url']: {annotation['url']}" |
|
|
|
|
|
annotation_normalizer_(annotation) |
|
|
|
|
|
if image_folder_local_paths is not None: |
|
filepath = _get_local_image_path(image_metadata["url"], image_folder_local_paths) |
|
image_dict = {"image": filepath} |
|
else: |
|
image_dict = {} |
|
|
|
|
|
image_metadata["file_name"] = _get_local_image_suffix_path(image_metadata["url"]) |
|
image_metadata["coco_url"] = image_metadata.pop("url") |
|
image_metadata.pop("flickr_id") |
|
image_metadata.pop("coco_id") |
|
|
|
yield idx, {**image_dict, **image_metadata, **annotation, "task_type": self.config.task_type} |
|
|