| |
|
|
| import random |
| from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple |
| import torch |
| from torch import nn |
|
|
| SampledData = Any |
| ModelOutput = Any |
|
|
|
|
| def _grouper(iterable: Iterable[Any], n: int, fillvalue=None) -> Iterator[Tuple[Any]]: |
| """ |
| Group elements of an iterable by chunks of size `n`, e.g. |
| grouper(range(9), 4) -> |
| (0, 1, 2, 3), (4, 5, 6, 7), (8, None, None, None) |
| """ |
| it = iter(iterable) |
| while True: |
| values = [] |
| for _ in range(n): |
| try: |
| value = next(it) |
| except StopIteration: |
| if values: |
| values.extend([fillvalue] * (n - len(values))) |
| yield tuple(values) |
| return |
| values.append(value) |
| yield tuple(values) |
|
|
|
|
| class ScoreBasedFilter: |
| """ |
| Filters entries in model output based on their scores |
| Discards all entries with score less than the specified minimum |
| """ |
|
|
| def __init__(self, min_score: float = 0.8): |
| self.min_score = min_score |
|
|
| def __call__(self, model_output: ModelOutput) -> ModelOutput: |
| for model_output_i in model_output: |
| instances = model_output_i["instances"] |
| if not instances.has("scores"): |
| continue |
| instances_filtered = instances[instances.scores >= self.min_score] |
| model_output_i["instances"] = instances_filtered |
| return model_output |
|
|
|
|
| class InferenceBasedLoader: |
| """ |
| Data loader based on results inferred by a model. Consists of: |
| - a data loader that provides batches of images |
| - a model that is used to infer the results |
| - a data sampler that converts inferred results to annotations |
| """ |
|
|
| def __init__( |
| self, |
| model: nn.Module, |
| data_loader: Iterable[List[Dict[str, Any]]], |
| data_sampler: Optional[Callable[[ModelOutput], List[SampledData]]] = None, |
| data_filter: Optional[Callable[[ModelOutput], ModelOutput]] = None, |
| shuffle: bool = True, |
| batch_size: int = 4, |
| inference_batch_size: int = 4, |
| drop_last: bool = False, |
| category_to_class_mapping: Optional[dict] = None, |
| ): |
| """ |
| Constructor |
| |
| Args: |
| model (torch.nn.Module): model used to produce data |
| data_loader (Iterable[List[Dict[str, Any]]]): iterable that provides |
| dictionaries with "images" and "categories" fields to perform inference on |
| data_sampler (Callable: ModelOutput -> SampledData): functor |
| that produces annotation data from inference results; |
| (optional, default: None) |
| data_filter (Callable: ModelOutput -> ModelOutput): filter |
| that selects model outputs for further processing |
| (optional, default: None) |
| shuffle (bool): if True, the input images get shuffled |
| batch_size (int): batch size for the produced annotation data |
| inference_batch_size (int): batch size for input images |
| drop_last (bool): if True, drop the last batch if it is undersized |
| category_to_class_mapping (dict): category to class mapping |
| """ |
| self.model = model |
| self.model.eval() |
| self.data_loader = data_loader |
| self.data_sampler = data_sampler |
| self.data_filter = data_filter |
| self.shuffle = shuffle |
| self.batch_size = batch_size |
| self.inference_batch_size = inference_batch_size |
| self.drop_last = drop_last |
| if category_to_class_mapping is not None: |
| self.category_to_class_mapping = category_to_class_mapping |
| else: |
| self.category_to_class_mapping = {} |
|
|
| def __iter__(self) -> Iterator[List[SampledData]]: |
| for batch in self.data_loader: |
| |
| |
| |
| images_and_categories = [ |
| {"image": image, "category": category} |
| for element in batch |
| for image, category in zip(element["images"], element["categories"]) |
| ] |
| if not images_and_categories: |
| continue |
| if self.shuffle: |
| random.shuffle(images_and_categories) |
| yield from self._produce_data(images_and_categories) |
|
|
| def _produce_data( |
| self, images_and_categories: List[Tuple[torch.Tensor, Optional[str]]] |
| ) -> Iterator[List[SampledData]]: |
| """ |
| Produce batches of data from images |
| |
| Args: |
| images_and_categories (List[Tuple[torch.Tensor, Optional[str]]]): |
| list of images and corresponding categories to process |
| |
| Returns: |
| Iterator over batches of data sampled from model outputs |
| """ |
| data_batches: List[SampledData] = [] |
| category_to_class_mapping = self.category_to_class_mapping |
| batched_images_and_categories = _grouper(images_and_categories, self.inference_batch_size) |
| for batch in batched_images_and_categories: |
| batch = [ |
| { |
| "image": image_and_category["image"].to(self.model.device), |
| "category": image_and_category["category"], |
| } |
| for image_and_category in batch |
| if image_and_category is not None |
| ] |
| if not batch: |
| continue |
| with torch.no_grad(): |
| model_output = self.model(batch) |
| for model_output_i, batch_i in zip(model_output, batch): |
| assert len(batch_i["image"].shape) == 3 |
| model_output_i["image"] = batch_i["image"] |
| instance_class = category_to_class_mapping.get(batch_i["category"], 0) |
| model_output_i["instances"].dataset_classes = torch.tensor( |
| [instance_class] * len(model_output_i["instances"]) |
| ) |
| model_output_filtered = ( |
| model_output if self.data_filter is None else self.data_filter(model_output) |
| ) |
| data = ( |
| model_output_filtered |
| if self.data_sampler is None |
| else self.data_sampler(model_output_filtered) |
| ) |
| for data_i in data: |
| if len(data_i["instances"]): |
| data_batches.append(data_i) |
| if len(data_batches) >= self.batch_size: |
| yield data_batches[: self.batch_size] |
| data_batches = data_batches[self.batch_size :] |
| if not self.drop_last and data_batches: |
| yield data_batches |
|
|