Spaces:

wufan
/

unimer_demo

Runtime error

App Files Files Community

wufan commited on Sep 6, 2024

Commit

18e4b60

verified ·

1 Parent(s): c0d1798

Upload 111 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
examples/0000004.png +0 -0
examples/0000005.png +0 -0
examples/0000006.png +0 -0
examples/0000007.png +0 -0
examples/0000011.png +0 -0
unimernet/__init__.py +31 -0
unimernet/__pycache__/__init__.cpython-310.pyc +0 -0
unimernet/common/__init__.py +0 -0
unimernet/common/__pycache__/__init__.cpython-310.pyc +0 -0
unimernet/common/__pycache__/config.cpython-310.pyc +0 -0
unimernet/common/__pycache__/dist_utils.cpython-310.pyc +0 -0
unimernet/common/__pycache__/logger.cpython-310.pyc +0 -0
unimernet/common/__pycache__/registry.cpython-310.pyc +0 -0
unimernet/common/__pycache__/utils.cpython-310.pyc +0 -0
unimernet/common/config.py +468 -0
unimernet/common/dist_utils.py +137 -0
unimernet/common/gradcam.py +24 -0
unimernet/common/logger.py +195 -0
unimernet/common/optims.py +120 -0
unimernet/common/registry.py +329 -0
unimernet/common/utils.py +424 -0
unimernet/configs/datasets/formula/formula_eval.yaml +6 -0
unimernet/configs/datasets/formula/formula_train.yaml +6 -0
unimernet/configs/datasets/formula/multi_scale_formula_train.yaml +21 -0
unimernet/configs/default.yaml +10 -0
unimernet/configs/models/unimernet_base.yaml +31 -0
unimernet/datasets/__init__.py +0 -0
unimernet/datasets/__pycache__/__init__.cpython-310.pyc +0 -0
unimernet/datasets/__pycache__/data_utils.cpython-310.pyc +0 -0
unimernet/datasets/builders/__init__.py +69 -0
unimernet/datasets/builders/__pycache__/__init__.cpython-310.pyc +0 -0
unimernet/datasets/builders/__pycache__/base_dataset_builder.cpython-310.pyc +0 -0
unimernet/datasets/builders/__pycache__/formula.cpython-310.pyc +0 -0
unimernet/datasets/builders/base_dataset_builder.py +233 -0
unimernet/datasets/builders/formula.py +105 -0
unimernet/datasets/data_utils.py +284 -0
unimernet/datasets/datasets/__pycache__/base_dataset.cpython-310.pyc +0 -0
unimernet/datasets/datasets/__pycache__/formula.cpython-310.pyc +0 -0
unimernet/datasets/datasets/__pycache__/formula_multi_scale.cpython-310.pyc +0 -0
unimernet/datasets/datasets/base_dataset.py +103 -0
unimernet/datasets/datasets/dataloader_utils.py +200 -0
unimernet/datasets/datasets/formula.py +71 -0
unimernet/datasets/datasets/formula_multi_scale.py +32 -0
unimernet/models/__init__.py +198 -0
unimernet/models/__pycache__/__init__.cpython-310.pyc +0 -0
unimernet/models/__pycache__/base_model.cpython-310.pyc +0 -0
unimernet/models/__pycache__/clip_vit.cpython-310.pyc +0 -0
unimernet/models/__pycache__/eva_vit.cpython-310.pyc +0 -0
unimernet/models/base_model.py +251 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+unimernet/processors/formula_processor_helper/frost/frost1.png filter=lfs diff=lfs merge=lfs -text

examples/0000004.png ADDED Viewed

examples/0000005.png ADDED Viewed

examples/0000006.png ADDED Viewed

examples/0000007.png ADDED Viewed

examples/0000011.png ADDED Viewed

unimernet/__init__.py ADDED Viewed

	@@ -0,0 +1,31 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import os
+import sys
+from omegaconf import OmegaConf
+from unimernet.common.registry import registry
+from unimernet.datasets.builders import *
+from unimernet.models import *
+from unimernet.processors import *
+from unimernet.tasks import *
+root_dir = os.path.dirname(os.path.abspath(__file__))
+default_cfg = OmegaConf.load(os.path.join(root_dir, "configs/default.yaml"))
+registry.register_path("library_root", root_dir)
+repo_root = os.path.join(root_dir, "..")
+registry.register_path("repo_root", repo_root)
+cache_root = os.path.join(repo_root, default_cfg.env.cache_root)
+registry.register_path("cache_root", cache_root)
+registry.register("MAX_INT", sys.maxsize)
+registry.register("SPLIT_NAMES", ["train", "val", "test"])

unimernet/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (1.02 kB). View file

unimernet/common/__init__.py ADDED Viewed

File without changes

unimernet/common/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (160 Bytes). View file

unimernet/common/__pycache__/config.cpython-310.pyc ADDED Viewed

Binary file (12.1 kB). View file

unimernet/common/__pycache__/dist_utils.cpython-310.pyc ADDED Viewed

Binary file (3.77 kB). View file

unimernet/common/__pycache__/logger.cpython-310.pyc ADDED Viewed

Binary file (6.42 kB). View file

unimernet/common/__pycache__/registry.cpython-310.pyc ADDED Viewed

Binary file (8.39 kB). View file

unimernet/common/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (12.8 kB). View file

unimernet/common/config.py ADDED Viewed

	@@ -0,0 +1,468 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import logging
+import json
+from typing import Dict
+from omegaconf import OmegaConf
+from unimernet.common.registry import registry
+class Config:
+    def __init__(self, args):
+        self.config = {}
+        self.args = args
+        # Register the config and configuration for setup
+        registry.register("configuration", self)
+        user_config = self._build_opt_list(self.args.options)
+        config = OmegaConf.load(self.args.cfg_path)
+        runner_config = self.build_runner_config(config)
+        model_config = self.build_model_config(config, **user_config)
+        dataset_config = self.build_dataset_config(config)
+        # Validate the user-provided runner configuration
+        # model and dataset configuration are supposed to be validated by the respective classes
+        # [TODO] validate the model/dataset configuration
+        # self._validate_runner_config(runner_config)
+        # Override the default configuration with user options.
+        self.config = OmegaConf.merge(
+            runner_config, model_config, dataset_config, user_config
+        )
+    def _validate_runner_config(self, runner_config):
+        """
+        This method validates the configuration, such that
+            1) all the user specified options are valid;
+            2) no type mismatches between the user specified options and the config.
+        """
+        runner_config_validator = create_runner_config_validator()
+        runner_config_validator.validate(runner_config)
+    def _build_opt_list(self, opts):
+        opts_dot_list = self._convert_to_dot_list(opts)
+        return OmegaConf.from_dotlist(opts_dot_list)
+    @staticmethod
+    def build_model_config(config, **kwargs):
+        model = config.get("model", None)
+        assert model is not None, "Missing model configuration file."
+        model_cls = registry.get_model_class(model.arch)
+        assert model_cls is not None, f"Model '{model.arch}' has not been registered."
+        model_type = kwargs.get("model.model_type", None)
+        if not model_type:
+            model_type = model.get("model_type", None)
+        # else use the model type selected by user.
+        assert model_type is not None, "Missing model_type."
+        model_config_path = model_cls.default_config_path(model_type=model_type)
+        model_config = OmegaConf.create()
+        # hiararchy override, customized config > default config
+        model_config = OmegaConf.merge(
+            model_config,
+            OmegaConf.load(model_config_path),
+            {"model": config["model"]},
+        )
+        return model_config
+    @staticmethod
+    def build_runner_config(config):
+        return {"run": config.run}
+    @staticmethod
+    def build_dataset_config(config):
+        datasets = config.get("datasets", None)
+        if datasets is None:
+            raise KeyError(
+                "Expecting 'datasets' as the root key for dataset configuration."
+            )
+        dataset_config = OmegaConf.create()
+        for dataset_name in datasets:
+            builder_cls = registry.get_builder_class(dataset_name)
+            dataset_config_type = datasets[dataset_name].get("type", "default")
+            dataset_config_path = builder_cls.default_config_path(
+                type=dataset_config_type
+            )
+            # hiararchy override, customized config > default config
+            dataset_config = OmegaConf.merge(
+                dataset_config,
+                OmegaConf.load(dataset_config_path),
+                {"datasets": {dataset_name: config["datasets"][dataset_name]}},
+            )
+        return dataset_config
+    def _convert_to_dot_list(self, opts):
+        if opts is None:
+            opts = []
+        if len(opts) == 0:
+            return opts
+        has_equal = opts[0].find("=") != -1
+        if has_equal:
+            return opts
+        return [(opt + "=" + value) for opt, value in zip(opts[0::2], opts[1::2])]
+    def get_config(self):
+        return self.config
+    @property
+    def run_cfg(self):
+        return self.config.run
+    @property
+    def datasets_cfg(self):
+        return self.config.datasets
+    @property
+    def model_cfg(self):
+        return self.config.model
+    def pretty_print(self):
+        logging.info("\n=====  Running Parameters    =====")
+        logging.info(self._convert_node_to_json(self.config.run))
+        logging.info("\n======  Dataset Attributes  ======")
+        datasets = self.config.datasets
+        for dataset in datasets:
+            if dataset in self.config.datasets:
+                logging.info(f"\n======== {dataset} =======")
+                dataset_config = self.config.datasets[dataset]
+                logging.info(self._convert_node_to_json(dataset_config))
+            else:
+                logging.warning(f"No dataset named '{dataset}' in config. Skipping")
+        logging.info(f"\n======  Model Attributes  ======")
+        logging.info(self._convert_node_to_json(self.config.model))
+    def _convert_node_to_json(self, node):
+        container = OmegaConf.to_container(node, resolve=True)
+        return json.dumps(container, indent=4, sort_keys=True)
+    def to_dict(self):
+        return OmegaConf.to_container(self.config)
+def node_to_dict(node):
+    return OmegaConf.to_container(node)
+class ConfigValidator:
+    """
+    This is a preliminary implementation to centralize and validate the configuration.
+    May be altered in the future.
+    A helper class to validate configurations from yaml file.
+    This serves the following purposes:
+        1. Ensure all the options in the yaml are defined, raise error if not.
+        2. when type mismatches are found, the validator will raise an error.
+        3. a central place to store and display helpful messages for supported configurations.
+    """
+    class _Argument:
+        def __init__(self, name, choices=None, type=None, help=None):
+            self.name = name
+            self.val = None
+            self.choices = choices
+            self.type = type
+            self.help = help
+        def __str__(self):
+            s = f"{self.name}={self.val}"
+            if self.type is not None:
+                s += f", ({self.type})"
+            if self.choices is not None:
+                s += f", choices: {self.choices}"
+            if self.help is not None:
+                s += f", ({self.help})"
+            return s
+    def __init__(self, description):
+        self.description = description
+        self.arguments = dict()
+        self.parsed_args = None
+    def __getitem__(self, key):
+        assert self.parsed_args is not None, "No arguments parsed yet."
+        return self.parsed_args[key]
+    def __str__(self) -> str:
+        return self.format_help()
+    def add_argument(self, *args, **kwargs):
+        """
+        Assume the first argument is the name of the argument.
+        """
+        self.arguments[args[0]] = self._Argument(*args, **kwargs)
+    def validate(self, config=None):
+        """
+        Convert yaml config (dict-like) to list, required by argparse.
+        """
+        for k, v in config.items():
+            assert (
+                k in self.arguments
+            ), f"""{k} is not a valid argument. Support arguments are {self.format_arguments()}."""
+            if self.arguments[k].type is not None:
+                try:
+                    self.arguments[k].val = self.arguments[k].type(v)
+                except ValueError:
+                    raise ValueError(f"{k} is not a valid {self.arguments[k].type}.")
+            if self.arguments[k].choices is not None:
+                assert (
+                    v in self.arguments[k].choices
+                ), f"""{k} must be one of {self.arguments[k].choices}."""
+        return config
+    def format_arguments(self):
+        return str([f"{k}" for k in sorted(self.arguments.keys())])
+    def format_help(self):
+        # description + key-value pair string for each argument
+        help_msg = str(self.description)
+        return help_msg + ", available arguments: " + self.format_arguments()
+    def print_help(self):
+        # display help message
+        print(self.format_help())
+def create_runner_config_validator():
+    validator = ConfigValidator(description="Runner configurations")
+    validator.add_argument(
+        "runner",
+        type=str,
+        choices=["runner_base", "runner_iter"],
+        help="""Runner to use. The "runner_base" uses epoch-based training while iter-based
+            runner runs based on iters. Default: runner_base""",
+    )
+    # add argumetns for training dataset ratios
+    validator.add_argument(
+        "train_dataset_ratios",
+        type=Dict[str, float],
+        help="""Ratios of training dataset. This is used in iteration-based runner.
+        Do not support for epoch-based runner because how to define an epoch becomes tricky.
+        Default: None""",
+    )
+    validator.add_argument(
+        "max_iters",
+        type=float,
+        help="Maximum number of iterations to run.",
+    )
+    validator.add_argument(
+        "max_epoch",
+        type=int,
+        help="Maximum number of epochs to run.",
+    )
+    # add arguments for iters_per_inner_epoch
+    validator.add_argument(
+        "iters_per_inner_epoch",
+        type=float,
+        help="Number of iterations per inner epoch. This is required when runner is runner_iter.",
+    )
+    lr_scheds_choices = registry.list_lr_schedulers()
+    validator.add_argument(
+        "lr_sched",
+        type=str,
+        choices=lr_scheds_choices,
+        help="Learning rate scheduler to use, from {}".format(lr_scheds_choices),
+    )
+    task_choices = registry.list_tasks()
+    validator.add_argument(
+        "task",
+        type=str,
+        choices=task_choices,
+        help="Task to use, from {}".format(task_choices),
+    )
+    # add arguments for init_lr
+    validator.add_argument(
+        "init_lr",
+        type=float,
+        help="Initial learning rate. This will be the learning rate after warmup and before decay.",
+    )
+    # add arguments for min_lr
+    validator.add_argument(
+        "min_lr",
+        type=float,
+        help="Minimum learning rate (after decay).",
+    )
+    # add arguments for warmup_lr
+    validator.add_argument(
+        "warmup_lr",
+        type=float,
+        help="Starting learning rate for warmup.",
+    )
+    # add arguments for learning rate decay rate
+    validator.add_argument(
+        "lr_decay_rate",
+        type=float,
+        help="Learning rate decay rate. Required if using a decaying learning rate scheduler.",
+    )
+    # add arguments for weight decay
+    validator.add_argument(
+        "weight_decay",
+        type=float,
+        help="Weight decay rate.",
+    )
+    # add arguments for training batch size
+    validator.add_argument(
+        "batch_size_train",
+        type=int,
+        help="Training batch size.",
+    )
+    # add arguments for evaluation batch size
+    validator.add_argument(
+        "batch_size_eval",
+        type=int,
+        help="Evaluation batch size, including validation and testing.",
+    )
+    # add arguments for number of workers for data loading
+    validator.add_argument(
+        "num_workers",
+        help="Number of workers for data loading.",
+    )
+    # add arguments for warm up steps
+    validator.add_argument(
+        "warmup_steps",
+        type=int,
+        help="Number of warmup steps. Required if a warmup schedule is used.",
+    )
+    # add arguments for random seed
+    validator.add_argument(
+        "seed",
+        type=int,
+        help="Random seed.",
+    )
+    # add arguments for output directory
+    validator.add_argument(
+        "output_dir",
+        type=str,
+        help="Output directory to save checkpoints and logs.",
+    )
+    # add arguments for whether only use evaluation
+    validator.add_argument(
+        "evaluate",
+        help="Whether to only evaluate the model. If true, training will not be performed.",
+    )
+    # add arguments for splits used for training, e.g. ["train", "val"]
+    validator.add_argument(
+        "train_splits",
+        type=list,
+        help="Splits to use for training.",
+    )
+    # add arguments for splits used for validation, e.g. ["val"]
+    validator.add_argument(
+        "valid_splits",
+        type=list,
+        help="Splits to use for validation. If not provided, will skip the validation.",
+    )
+    # add arguments for splits used for testing, e.g. ["test"]
+    validator.add_argument(
+        "test_splits",
+        type=list,
+        help="Splits to use for testing. If not provided, will skip the testing.",
+    )
+    # add arguments for accumulating gradient for iterations
+    validator.add_argument(
+        "accum_grad_iters",
+        type=int,
+        help="Number of iterations to accumulate gradient for.",
+    )
+    # ====== distributed training ======
+    validator.add_argument(
+        "device",
+        type=str,
+        choices=["cpu", "cuda"],
+        help="Device to use. Support 'cuda' or 'cpu' as for now.",
+    )
+    validator.add_argument(
+        "world_size",
+        type=int,
+        help="Number of processes participating in the job.",
+    )
+    validator.add_argument("dist_url", type=str)
+    validator.add_argument("distributed", type=bool)
+    # add arguments to opt using distributed sampler during evaluation or not
+    validator.add_argument(
+        "use_dist_eval_sampler",
+        type=bool,
+        help="Whether to use distributed sampler during evaluation or not.",
+    )
+    # ====== task specific ======
+    # generation task specific arguments
+    # add arguments for maximal length of text output
+    validator.add_argument(
+        "max_len",
+        type=int,
+        help="Maximal length of text output.",
+    )
+    # add arguments for minimal length of text output
+    validator.add_argument(
+        "min_len",
+        type=int,
+        help="Minimal length of text output.",
+    )
+    # add arguments number of beams
+    validator.add_argument(
+        "num_beams",
+        type=int,
+        help="Number of beams used for beam search.",
+    )
+    # vqa task specific arguments
+    # add arguments for number of answer candidates
+    validator.add_argument(
+        "num_ans_candidates",
+        type=int,
+        help="""For ALBEF and BLIP, these models first rank answers according to likelihood to select answer candidates.""",
+    )
+    # add arguments for inference method
+    validator.add_argument(
+        "inference_method",
+        type=str,
+        choices=["genearte", "rank"],
+        help="""Inference method to use for question answering. If rank, requires a answer list.""",
+    )
+    # ====== model specific ======
+    validator.add_argument(
+        "k_test",
+        type=int,
+        help="Number of top k most similar samples from ITC/VTC selection to be tested.",
+    )
+    return validator

unimernet/common/dist_utils.py ADDED Viewed

	@@ -0,0 +1,137 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import datetime
+import functools
+import os
+import torch
+import torch.distributed as dist
+import timm.models.hub as timm_hub
+def setup_for_distributed(is_master):
+    """
+    This function disables printing when not in master process
+    """
+    import builtins as __builtin__
+    builtin_print = __builtin__.print
+    def print(*args, **kwargs):
+        force = kwargs.pop("force", False)
+        if is_master or force:
+            builtin_print(*args, **kwargs)
+    __builtin__.print = print
+def is_dist_avail_and_initialized():
+    if not dist.is_available():
+        return False
+    if not dist.is_initialized():
+        return False
+    return True
+def get_world_size():
+    if not is_dist_avail_and_initialized():
+        return 1
+    return dist.get_world_size()
+def get_rank():
+    if not is_dist_avail_and_initialized():
+        return 0
+    return dist.get_rank()
+def is_main_process():
+    return get_rank() == 0
+def init_distributed_mode(args):
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
+        args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ["WORLD_SIZE"])
+        args.gpu = int(os.environ["LOCAL_RANK"])
+    elif "SLURM_PROCID" in os.environ:
+        args.rank = int(os.environ["SLURM_PROCID"])
+        args.gpu = args.rank % torch.cuda.device_count()
+    else:
+        print("Not using distributed mode")
+        args.distributed = False
+        return
+    args.distributed = True
+    torch.cuda.set_device(args.gpu)
+    args.dist_backend = "nccl"
+    print(
+        "| distributed init (rank {}, world {}): {}".format(
+            args.rank, args.world_size, args.dist_url
+        ),
+        flush=True,
+    )
+    torch.distributed.init_process_group(
+        backend=args.dist_backend,
+        init_method=args.dist_url,
+        world_size=args.world_size,
+        rank=args.rank,
+        timeout=datetime.timedelta(
+            days=365
+        ),  # allow auto-downloading and de-compressing
+    )
+    torch.distributed.barrier()
+    setup_for_distributed(args.rank == 0)
+def get_dist_info():
+    if torch.__version__ < "1.0":
+        initialized = dist._initialized
+    else:
+        initialized = dist.is_initialized()
+    if initialized:
+        rank = dist.get_rank()
+        world_size = dist.get_world_size()
+    else:  # non-distributed training
+        rank = 0
+        world_size = 1
+    return rank, world_size
+def main_process(func):
+    @functools.wraps(func)
+    def wrapper(*args, **kwargs):
+        rank, _ = get_dist_info()
+        if rank == 0:
+            return func(*args, **kwargs)
+    return wrapper
+def download_cached_file(url, check_hash=True, progress=False):
+    """
+    Download a file from a URL and cache it locally. If the file already exists, it is not downloaded again.
+    If distributed, only the main process downloads the file, and the other processes wait for the file to be downloaded.
+    """
+    def get_cached_file_path():
+        # a hack to sync the file path across processes
+        parts = torch.hub.urlparse(url)
+        filename = os.path.basename(parts.path)
+        cached_file = os.path.join(timm_hub.get_cache_dir(), filename)
+        return cached_file
+    if is_main_process():
+        timm_hub.download_cached_file(url, check_hash, progress)
+    if is_dist_avail_and_initialized():
+        dist.barrier()
+    return get_cached_file_path()

unimernet/common/gradcam.py ADDED Viewed

	@@ -0,0 +1,24 @@

+import numpy as np
+from matplotlib import pyplot as plt
+from scipy.ndimage import filters
+from skimage import transform as skimage_transform
+def getAttMap(img, attMap, blur=True, overlap=True):
+    attMap -= attMap.min()
+    if attMap.max() > 0:
+        attMap /= attMap.max()
+    attMap = skimage_transform.resize(attMap, (img.shape[:2]), order=3, mode="constant")
+    if blur:
+        attMap = filters.gaussian_filter(attMap, 0.02 * max(img.shape[:2]))
+        attMap -= attMap.min()
+        attMap /= attMap.max()
+    cmap = plt.get_cmap("jet")
+    attMapV = cmap(attMap)
+    attMapV = np.delete(attMapV, 3, 2)
+    if overlap:
+        attMap = (
+            1 * (1 - attMap**0.7).reshape(attMap.shape + (1,)) * img
+            + (attMap**0.7).reshape(attMap.shape + (1,)) * attMapV
+        )
+    return attMap

unimernet/common/logger.py ADDED Viewed

	@@ -0,0 +1,195 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import datetime
+import logging
+import time
+from collections import defaultdict, deque
+import torch
+import torch.distributed as dist
+from unimernet.common import dist_utils
+class SmoothedValue(object):
+    """Track a series of values and provide access to smoothed values over a
+    window or the global series average.
+    """
+    def __init__(self, window_size=20, fmt=None):
+        if fmt is None:
+            fmt = "{median:.4f} ({global_avg:.4f})"
+        self.deque = deque(maxlen=window_size)
+        self.total = 0.0
+        self.count = 0
+        self.fmt = fmt
+    def update(self, value, n=1):
+        self.deque.append(value)
+        self.count += n
+        self.total += value * n
+    def synchronize_between_processes(self):
+        """
+        Warning: does not synchronize the deque!
+        """
+        if not dist_utils.is_dist_avail_and_initialized():
+            return
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
+        dist.barrier()
+        dist.all_reduce(t)
+        t = t.tolist()
+        self.count = int(t[0])
+        self.total = t[1]
+    @property
+    def median(self):
+        d = torch.tensor(list(self.deque))
+        return d.median().item()
+    @property
+    def avg(self):
+        d = torch.tensor(list(self.deque), dtype=torch.float32)
+        return d.mean().item()
+    @property
+    def global_avg(self):
+        return self.total / self.count
+    @property
+    def max(self):
+        return max(self.deque)
+    @property
+    def value(self):
+        return self.deque[-1]
+    def __str__(self):
+        return self.fmt.format(
+            median=self.median,
+            avg=self.avg,
+            global_avg=self.global_avg,
+            max=self.max,
+            value=self.value,
+        )
+class MetricLogger(object):
+    def __init__(self, delimiter="\t"):
+        self.meters = defaultdict(SmoothedValue)
+        self.delimiter = delimiter
+    def update(self, **kwargs):
+        for k, v in kwargs.items():
+            if isinstance(v, torch.Tensor):
+                v = v.item()
+            assert isinstance(v, (float, int))
+            self.meters[k].update(v)
+    def __getattr__(self, attr):
+        if attr in self.meters:
+            return self.meters[attr]
+        if attr in self.__dict__:
+            return self.__dict__[attr]
+        raise AttributeError(
+            "'{}' object has no attribute '{}'".format(type(self).__name__, attr)
+        )
+    def __str__(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append("{}: {}".format(name, str(meter)))
+        return self.delimiter.join(loss_str)
+    def global_avg(self):
+        loss_str = []
+        for name, meter in self.meters.items():
+            loss_str.append("{}: {:.4f}".format(name, meter.global_avg))
+        return self.delimiter.join(loss_str)
+    def synchronize_between_processes(self):
+        for meter in self.meters.values():
+            meter.synchronize_between_processes()
+    def add_meter(self, name, meter):
+        self.meters[name] = meter
+    def log_every(self, iterable, print_freq, header=None):
+        i = 0
+        if not header:
+            header = ""
+        start_time = time.time()
+        end = time.time()
+        iter_time = SmoothedValue(fmt="{avg:.4f}")
+        data_time = SmoothedValue(fmt="{avg:.4f}")
+        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
+        log_msg = [
+            header,
+            "[{0" + space_fmt + "}/{1}]",
+            "eta: {eta}",
+            "{meters}",
+            "time: {time}",
+            "data: {data}",
+        ]
+        if torch.cuda.is_available():
+            log_msg.append("max mem: {memory:.0f}")
+        log_msg = self.delimiter.join(log_msg)
+        MB = 1024.0 * 1024.0
+        for obj in iterable:
+            data_time.update(time.time() - end)
+            yield obj
+            iter_time.update(time.time() - end)
+            if i % print_freq == 0 or i == len(iterable) - 1:
+                eta_seconds = iter_time.global_avg * (len(iterable) - i)
+                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
+                if torch.cuda.is_available():
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
+                            meters=str(self),
+                            time=str(iter_time),
+                            data=str(data_time),
+                            memory=torch.cuda.max_memory_allocated() / MB,
+                        )
+                    )
+                else:
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
+                            meters=str(self),
+                            time=str(iter_time),
+                            data=str(data_time),
+                        )
+                    )
+            i += 1
+            end = time.time()
+        total_time = time.time() - start_time
+        total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print(
+            "{} Total time: {} ({:.4f} s / it)".format(
+                header, total_time_str, total_time / len(iterable)
+            )
+        )
+class AttrDict(dict):
+    def __init__(self, *args, **kwargs):
+        super(AttrDict, self).__init__(*args, **kwargs)
+        self.__dict__ = self
+def setup_logger():
+    logging.basicConfig(
+        level=logging.INFO if dist_utils.is_main_process() else logging.WARN,
+        format="%(asctime)s [%(levelname)s] %(message)s",
+        handlers=[logging.StreamHandler()],
+    )

unimernet/common/optims.py ADDED Viewed

	@@ -0,0 +1,120 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import math
+from unimernet.common.registry import registry
+@registry.register_lr_scheduler("linear_warmup_step_lr")
+class LinearWarmupStepLRScheduler:
+    def __init__(
+        self,
+        optimizer,
+        max_epoch,
+        min_lr,
+        init_lr,
+        decay_rate=1,
+        warmup_start_lr=-1,
+        warmup_steps=0,
+        **kwargs
+    ):
+        self.optimizer = optimizer
+        self.max_epoch = max_epoch
+        self.min_lr = min_lr
+        self.decay_rate = decay_rate
+        self.init_lr = init_lr
+        self.warmup_steps = warmup_steps
+        self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
+    def step(self, cur_epoch, cur_step):
+        if cur_epoch == 0:
+            warmup_lr_schedule(
+                step=cur_step,
+                optimizer=self.optimizer,
+                max_step=self.warmup_steps,
+                init_lr=self.warmup_start_lr,
+                max_lr=self.init_lr,
+            )
+        else:
+            step_lr_schedule(
+                epoch=cur_epoch,
+                optimizer=self.optimizer,
+                init_lr=self.init_lr,
+                min_lr=self.min_lr,
+                decay_rate=self.decay_rate,
+            )
+@registry.register_lr_scheduler("linear_warmup_cosine_lr")
+class LinearWarmupCosineLRScheduler:
+    def __init__(
+        self,
+        optimizer,
+        max_epoch,
+        min_lr,
+        init_lr,
+        iters_per_epoch,
+        warmup_steps=0,
+        warmup_start_lr=-1,
+        **kwargs
+    ):
+        self.optimizer = optimizer
+        self.max_epoch = max_epoch
+        self.min_lr = min_lr
+        self.init_lr = init_lr
+        self.warmup_steps = warmup_steps
+        self.iters_per_epoch = iters_per_epoch
+        self.warmup_start_lr = warmup_start_lr if warmup_start_lr >= 0 else init_lr
+    def step(self, cur_epoch, cur_step):
+        # assuming the warmup iters less than one epoch
+        total_steps = cur_epoch * self.iters_per_epoch + cur_step
+        if total_steps < self.warmup_steps:
+            warmup_lr_schedule(
+                step=cur_step,
+                optimizer=self.optimizer,
+                max_step=self.warmup_steps,
+                init_lr=self.warmup_start_lr,
+                max_lr=self.init_lr,
+            )
+        else:
+            cosine_lr_schedule(
+                epoch=total_steps,
+                optimizer=self.optimizer,
+                max_epoch=self.max_epoch * self.iters_per_epoch,
+                init_lr=self.init_lr,
+                min_lr=self.min_lr,
+            )
+def cosine_lr_schedule(optimizer, epoch, max_epoch, init_lr, min_lr):
+    """Decay the learning rate"""
+    lr = (init_lr - min_lr) * 0.5 * (
+        1.0 + math.cos(math.pi * epoch / max_epoch)
+    ) + min_lr
+    for param_group in optimizer.param_groups:
+        param_group["lr"] = lr
+def warmup_lr_schedule(optimizer, step, max_step, init_lr, max_lr):
+    """Warmup the learning rate"""
+    lr = min(max_lr, init_lr + (max_lr - init_lr) * step / max(max_step, 1))
+    for param_group in optimizer.param_groups:
+        param_group["lr"] = lr
+def step_lr_schedule(optimizer, epoch, init_lr, min_lr, decay_rate):
+    """Decay the learning rate"""
+    lr = max(min_lr, init_lr * (decay_rate**epoch))
+    for param_group in optimizer.param_groups:
+        param_group["lr"] = lr

unimernet/common/registry.py ADDED Viewed

	@@ -0,0 +1,329 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+class Registry:
+    mapping = {
+        "builder_name_mapping": {},
+        "task_name_mapping": {},
+        "processor_name_mapping": {},
+        "model_name_mapping": {},
+        "lr_scheduler_name_mapping": {},
+        "runner_name_mapping": {},
+        "state": {},
+        "paths": {},
+    }
+    @classmethod
+    def register_builder(cls, name):
+        r"""Register a dataset builder to registry with key 'name'
+        Args:
+            name: Key with which the builder will be registered.
+        Usage:
+            from unimernet.common.registry import registry
+            from unimernet.datasets.base_dataset_builder import BaseDatasetBuilder
+        """
+        def wrap(builder_cls):
+            from unimernet.datasets.builders.base_dataset_builder import BaseDatasetBuilder
+            assert issubclass(
+                builder_cls, BaseDatasetBuilder
+            ), "All builders must inherit BaseDatasetBuilder class, found {}".format(
+                builder_cls
+            )
+            if name in cls.mapping["builder_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["builder_name_mapping"][name]
+                    )
+                )
+            cls.mapping["builder_name_mapping"][name] = builder_cls
+            return builder_cls
+        return wrap
+    @classmethod
+    def register_task(cls, name):
+        r"""Register a task to registry with key 'name'
+        Args:
+            name: Key with which the task will be registered.
+        Usage:
+            from unimernet.common.registry import registry
+        """
+        def wrap(task_cls):
+            from unimernet.tasks.base_task import BaseTask
+            assert issubclass(
+                task_cls, BaseTask
+            ), "All tasks must inherit BaseTask class"
+            if name in cls.mapping["task_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["task_name_mapping"][name]
+                    )
+                )
+            cls.mapping["task_name_mapping"][name] = task_cls
+            return task_cls
+        return wrap
+    @classmethod
+    def register_model(cls, name):
+        r"""Register a task to registry with key 'name'
+        Args:
+            name: Key with which the task will be registered.
+        Usage:
+            from unimernet.common.registry import registry
+        """
+        def wrap(model_cls):
+            from unimernet.models import BaseModel
+            assert issubclass(
+                model_cls, BaseModel
+            ), "All models must inherit BaseModel class"
+            if name in cls.mapping["model_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["model_name_mapping"][name]
+                    )
+                )
+            cls.mapping["model_name_mapping"][name] = model_cls
+            return model_cls
+        return wrap
+    @classmethod
+    def register_processor(cls, name):
+        r"""Register a processor to registry with key 'name'
+        Args:
+            name: Key with which the task will be registered.
+        Usage:
+            from unimernet.common.registry import registry
+        """
+        def wrap(processor_cls):
+            from unimernet.processors import BaseProcessor
+            assert issubclass(
+                processor_cls, BaseProcessor
+            ), "All processors must inherit BaseProcessor class"
+            if name in cls.mapping["processor_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["processor_name_mapping"][name]
+                    )
+                )
+            cls.mapping["processor_name_mapping"][name] = processor_cls
+            return processor_cls
+        return wrap
+    @classmethod
+    def register_lr_scheduler(cls, name):
+        r"""Register a model to registry with key 'name'
+        Args:
+            name: Key with which the task will be registered.
+        Usage:
+            from unimernet.common.registry import registry
+        """
+        def wrap(lr_sched_cls):
+            if name in cls.mapping["lr_scheduler_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["lr_scheduler_name_mapping"][name]
+                    )
+                )
+            cls.mapping["lr_scheduler_name_mapping"][name] = lr_sched_cls
+            return lr_sched_cls
+        return wrap
+    @classmethod
+    def register_runner(cls, name):
+        r"""Register a model to registry with key 'name'
+        Args:
+            name: Key with which the task will be registered.
+        Usage:
+            from unimernet.common.registry import registry
+        """
+        def wrap(runner_cls):
+            if name in cls.mapping["runner_name_mapping"]:
+                raise KeyError(
+                    "Name '{}' already registered for {}.".format(
+                        name, cls.mapping["runner_name_mapping"][name]
+                    )
+                )
+            cls.mapping["runner_name_mapping"][name] = runner_cls
+            return runner_cls
+        return wrap
+    @classmethod
+    def register_path(cls, name, path):
+        r"""Register a path to registry with key 'name'
+        Args:
+            name: Key with which the path will be registered.
+        Usage:
+            from unimernet.common.registry import registry
+        """
+        assert isinstance(path, str), "All path must be str."
+        if name in cls.mapping["paths"]:
+            raise KeyError("Name '{}' already registered.".format(name))
+        cls.mapping["paths"][name] = path
+    @classmethod
+    def register(cls, name, obj):
+        r"""Register an item to registry with key 'name'
+        Args:
+            name: Key with which the item will be registered.
+        Usage::
+            from unimernet.common.registry import registry
+            registry.register("config", {})
+        """
+        path = name.split(".")
+        current = cls.mapping["state"]
+        for part in path[:-1]:
+            if part not in current:
+                current[part] = {}
+            current = current[part]
+        current[path[-1]] = obj
+    # @classmethod
+    # def get_trainer_class(cls, name):
+    #     return cls.mapping["trainer_name_mapping"].get(name, None)
+    @classmethod
+    def get_builder_class(cls, name):
+        return cls.mapping["builder_name_mapping"].get(name, None)
+    @classmethod
+    def get_model_class(cls, name):
+        return cls.mapping["model_name_mapping"].get(name, None)
+    @classmethod
+    def get_task_class(cls, name):
+        return cls.mapping["task_name_mapping"].get(name, None)
+    @classmethod
+    def get_processor_class(cls, name):
+        return cls.mapping["processor_name_mapping"].get(name, None)
+    @classmethod
+    def get_lr_scheduler_class(cls, name):
+        return cls.mapping["lr_scheduler_name_mapping"].get(name, None)
+    @classmethod
+    def get_runner_class(cls, name):
+        return cls.mapping["runner_name_mapping"].get(name, None)
+    @classmethod
+    def list_runners(cls):
+        return sorted(cls.mapping["runner_name_mapping"].keys())
+    @classmethod
+    def list_models(cls):
+        return sorted(cls.mapping["model_name_mapping"].keys())
+    @classmethod
+    def list_tasks(cls):
+        return sorted(cls.mapping["task_name_mapping"].keys())
+    @classmethod
+    def list_processors(cls):
+        return sorted(cls.mapping["processor_name_mapping"].keys())
+    @classmethod
+    def list_lr_schedulers(cls):
+        return sorted(cls.mapping["lr_scheduler_name_mapping"].keys())
+    @classmethod
+    def list_datasets(cls):
+        return sorted(cls.mapping["builder_name_mapping"].keys())
+    @classmethod
+    def get_path(cls, name):
+        return cls.mapping["paths"].get(name, None)
+    @classmethod
+    def get(cls, name, default=None, no_warning=False):
+        r"""Get an item from registry with key 'name'
+        Args:
+            name (string): Key whose value needs to be retrieved.
+            default: If passed and key is not in registry, default value will
+                     be returned with a warning. Default: None
+            no_warning (bool): If passed as True, warning when key doesn't exist
+                               will not be generated. Useful for MMF's
+                               internal operations. Default: False
+        """
+        original_name = name
+        name = name.split(".")
+        value = cls.mapping["state"]
+        for subname in name:
+            value = value.get(subname, default)
+            if value is default:
+                break
+        if (
+            "writer" in cls.mapping["state"]
+            and value == default
+            and no_warning is False
+        ):
+            cls.mapping["state"]["writer"].warning(
+                "Key {} is not present in registry, returning default value "
+                "of {}".format(original_name, default)
+            )
+        return value
+    @classmethod
+    def unregister(cls, name):
+        r"""Remove an item from registry with key 'name'
+        Args:
+            name: Key which needs to be removed.
+        Usage::
+            from mmf.common.registry import registry
+            config = registry.unregister("config")
+        """
+        return cls.mapping["state"].pop(name, None)
+registry = Registry()

unimernet/common/utils.py ADDED Viewed

	@@ -0,0 +1,424 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import io
+import json
+import logging
+import os
+import pickle
+import re
+import shutil
+import urllib
+import urllib.error
+import urllib.request
+from typing import Optional
+from urllib.parse import urlparse
+import numpy as np
+import pandas as pd
+import yaml
+from iopath.common.download import download
+from iopath.common.file_io import file_lock, g_pathmgr
+from unimernet.common.registry import registry
+from torch.utils.model_zoo import tqdm
+from torchvision.datasets.utils import (
+    check_integrity,
+    download_file_from_google_drive,
+    extract_archive,
+)
+def now():
+    from datetime import datetime
+    return datetime.now().strftime("%Y%m%d%H%M")[:-1]
+def is_url(url_or_filename):
+    parsed = urlparse(url_or_filename)
+    return parsed.scheme in ("http", "https")
+def get_cache_path(rel_path):
+    return os.path.expanduser(os.path.join(registry.get_path("cache_root"), rel_path))
+def get_abs_path(rel_path):
+    return os.path.join(registry.get_path("library_root"), rel_path)
+def load_json(filename):
+    with open(filename, "r") as f:
+        return json.load(f)
+# The following are adapted from torchvision and vissl
+# torchvision: https://github.com/pytorch/vision
+# vissl: https://github.com/facebookresearch/vissl/blob/main/vissl/utils/download.py
+def makedir(dir_path):
+    """
+    Create the directory if it does not exist.
+    """
+    is_success = False
+    try:
+        if not g_pathmgr.exists(dir_path):
+            g_pathmgr.mkdirs(dir_path)
+        is_success = True
+    except BaseException:
+        print(f"Error creating directory: {dir_path}")
+    return is_success
+def get_redirected_url(url: str):
+    """
+    Given a URL, returns the URL it redirects to or the
+    original URL in case of no indirection
+    """
+    import requests
+    with requests.Session() as session:
+        with session.get(url, stream=True, allow_redirects=True) as response:
+            if response.history:
+                return response.url
+            else:
+                return url
+def to_google_drive_download_url(view_url: str) -> str:
+    """
+    Utility function to transform a view URL of google drive
+    to a download URL for google drive
+    Example input:
+        https://drive.google.com/file/d/137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp/view
+    Example output:
+        https://drive.google.com/uc?export=download&id=137RyRjvTBkBiIfeYBNZBtViDHQ6_Ewsp
+    """
+    splits = view_url.split("/")
+    assert splits[-1] == "view"
+    file_id = splits[-2]
+    return f"https://drive.google.com/uc?export=download&id={file_id}"
+def download_google_drive_url(url: str, output_path: str, output_file_name: str):
+    """
+    Download a file from google drive
+    Downloading an URL from google drive requires confirmation when
+    the file of the size is too big (google drive notifies that
+    anti-viral checks cannot be performed on such files)
+    """
+    import requests
+    with requests.Session() as session:
+        # First get the confirmation token and append it to the URL
+        with session.get(url, stream=True, allow_redirects=True) as response:
+            for k, v in response.cookies.items():
+                if k.startswith("download_warning"):
+                    url = url + "&confirm=" + v
+        # Then download the content of the file
+        with session.get(url, stream=True, verify=True) as response:
+            makedir(output_path)
+            path = os.path.join(output_path, output_file_name)
+            total_size = int(response.headers.get("Content-length", 0))
+            with open(path, "wb") as file:
+                from tqdm import tqdm
+                with tqdm(total=total_size) as progress_bar:
+                    for block in response.iter_content(
+                        chunk_size=io.DEFAULT_BUFFER_SIZE
+                    ):
+                        file.write(block)
+                        progress_bar.update(len(block))
+def _get_google_drive_file_id(url: str) -> Optional[str]:
+    parts = urlparse(url)
+    if re.match(r"(drive|docs)[.]google[.]com", parts.netloc) is None:
+        return None
+    match = re.match(r"/file/d/(?P<id>[^/]*)", parts.path)
+    if match is None:
+        return None
+    return match.group("id")
+def _urlretrieve(url: str, filename: str, chunk_size: int = 1024) -> None:
+    with open(filename, "wb") as fh:
+        with urllib.request.urlopen(
+            urllib.request.Request(url, headers={"User-Agent": "vissl"})
+        ) as response:
+            with tqdm(total=response.length) as pbar:
+                for chunk in iter(lambda: response.read(chunk_size), ""):
+                    if not chunk:
+                        break
+                    pbar.update(chunk_size)
+                    fh.write(chunk)
+def download_url(
+    url: str,
+    root: str,
+    filename: Optional[str] = None,
+    md5: Optional[str] = None,
+) -> None:
+    """Download a file from a url and place it in root.
+    Args:
+        url (str): URL to download file from
+        root (str): Directory to place downloaded file in
+        filename (str, optional): Name to save the file under.
+                                  If None, use the basename of the URL.
+        md5 (str, optional): MD5 checksum of the download. If None, do not check
+    """
+    root = os.path.expanduser(root)
+    if not filename:
+        filename = os.path.basename(url)
+    fpath = os.path.join(root, filename)
+    makedir(root)
+    # check if file is already present locally
+    if check_integrity(fpath, md5):
+        print("Using downloaded and verified file: " + fpath)
+        return
+    # expand redirect chain if needed
+    url = get_redirected_url(url)
+    # check if file is located on Google Drive
+    file_id = _get_google_drive_file_id(url)
+    if file_id is not None:
+        return download_file_from_google_drive(file_id, root, filename, md5)
+    # download the file
+    try:
+        print("Downloading " + url + " to " + fpath)
+        _urlretrieve(url, fpath)
+    except (urllib.error.URLError, IOError) as e:  # type: ignore[attr-defined]
+        if url[:5] == "https":
+            url = url.replace("https:", "http:")
+            print(
+                "Failed download. Trying https -> http instead."
+                " Downloading " + url + " to " + fpath
+            )
+            _urlretrieve(url, fpath)
+        else:
+            raise e
+    # check integrity of downloaded file
+    if not check_integrity(fpath, md5):
+        raise RuntimeError("File not found or corrupted.")
+def download_and_extract_archive(
+    url: str,
+    download_root: str,
+    extract_root: Optional[str] = None,
+    filename: Optional[str] = None,
+    md5: Optional[str] = None,
+    remove_finished: bool = False,
+) -> None:
+    download_root = os.path.expanduser(download_root)
+    if extract_root is None:
+        extract_root = download_root
+    if not filename:
+        filename = os.path.basename(url)
+    download_url(url, download_root, filename, md5)
+    archive = os.path.join(download_root, filename)
+    print("Extracting {} to {}".format(archive, extract_root))
+    extract_archive(archive, extract_root, remove_finished)
+def cache_url(url: str, cache_dir: str) -> str:
+    """
+    This implementation downloads the remote resource and caches it locally.
+    The resource will only be downloaded if not previously requested.
+    """
+    parsed_url = urlparse(url)
+    dirname = os.path.join(cache_dir, os.path.dirname(parsed_url.path.lstrip("/")))
+    makedir(dirname)
+    filename = url.split("/")[-1]
+    cached = os.path.join(dirname, filename)
+    with file_lock(cached):
+        if not os.path.isfile(cached):
+            logging.info(f"Downloading {url} to {cached} ...")
+            cached = download(url, dirname, filename=filename)
+    logging.info(f"URL {url} cached in {cached}")
+    return cached
+# TODO (prigoyal): convert this into RAII-style API
+def create_file_symlink(file1, file2):
+    """
+    Simply create the symlinks for a given file1 to file2.
+    Useful during model checkpointing to symlinks to the
+    latest successful checkpoint.
+    """
+    try:
+        if g_pathmgr.exists(file2):
+            g_pathmgr.rm(file2)
+        g_pathmgr.symlink(file1, file2)
+    except Exception as e:
+        logging.info(f"Could NOT create symlink. Error: {e}")
+def save_file(data, filename, append_to_json=True, verbose=True):
+    """
+    Common i/o utility to handle saving data to various file formats.
+    Supported:
+        .pkl, .pickle, .npy, .json
+    Specifically for .json, users have the option to either append (default)
+    or rewrite by passing in Boolean value to append_to_json.
+    """
+    if verbose:
+        logging.info(f"Saving data to file: {filename}")
+    file_ext = os.path.splitext(filename)[1]
+    if file_ext in [".pkl", ".pickle"]:
+        with g_pathmgr.open(filename, "wb") as fopen:
+            pickle.dump(data, fopen, pickle.HIGHEST_PROTOCOL)
+    elif file_ext == ".npy":
+        with g_pathmgr.open(filename, "wb") as fopen:
+            np.save(fopen, data)
+    elif file_ext == ".json":
+        if append_to_json:
+            with g_pathmgr.open(filename, "a") as fopen:
+                fopen.write(json.dumps(data, sort_keys=True) + "\n")
+                fopen.flush()
+        else:
+            with g_pathmgr.open(filename, "w") as fopen:
+                fopen.write(json.dumps(data, sort_keys=True) + "\n")
+                fopen.flush()
+    elif file_ext == ".yaml":
+        with g_pathmgr.open(filename, "w") as fopen:
+            dump = yaml.dump(data)
+            fopen.write(dump)
+            fopen.flush()
+    else:
+        raise Exception(f"Saving {file_ext} is not supported yet")
+    if verbose:
+        logging.info(f"Saved data to file: {filename}")
+def load_file(filename, mmap_mode=None, verbose=True, allow_pickle=False):
+    """
+    Common i/o utility to handle loading data from various file formats.
+    Supported:
+        .pkl, .pickle, .npy, .json
+    For the npy files, we support reading the files in mmap_mode.
+    If the mmap_mode of reading is not successful, we load data without the
+    mmap_mode.
+    """
+    if verbose:
+        logging.info(f"Loading data from file: {filename}")
+    file_ext = os.path.splitext(filename)[1]
+    if file_ext == ".txt":
+        with g_pathmgr.open(filename, "r") as fopen:
+            data = fopen.readlines()
+    elif file_ext in [".pkl", ".pickle"]:
+        with g_pathmgr.open(filename, "rb") as fopen:
+            data = pickle.load(fopen, encoding="latin1")
+    elif file_ext == ".npy":
+        if mmap_mode:
+            try:
+                with g_pathmgr.open(filename, "rb") as fopen:
+                    data = np.load(
+                        fopen,
+                        allow_pickle=allow_pickle,
+                        encoding="latin1",
+                        mmap_mode=mmap_mode,
+                    )
+            except ValueError as e:
+                logging.info(
+                    f"Could not mmap {filename}: {e}. Trying without g_pathmgr"
+                )
+                data = np.load(
+                    filename,
+                    allow_pickle=allow_pickle,
+                    encoding="latin1",
+                    mmap_mode=mmap_mode,
+                )
+                logging.info("Successfully loaded without g_pathmgr")
+            except Exception:
+                logging.info("Could not mmap without g_pathmgr. Trying without mmap")
+                with g_pathmgr.open(filename, "rb") as fopen:
+                    data = np.load(fopen, allow_pickle=allow_pickle, encoding="latin1")
+        else:
+            with g_pathmgr.open(filename, "rb") as fopen:
+                data = np.load(fopen, allow_pickle=allow_pickle, encoding="latin1")
+    elif file_ext == ".json":
+        with g_pathmgr.open(filename, "r") as fopen:
+            data = json.load(fopen)
+    elif file_ext == ".yaml":
+        with g_pathmgr.open(filename, "r") as fopen:
+            data = yaml.load(fopen, Loader=yaml.FullLoader)
+    elif file_ext == ".csv":
+        with g_pathmgr.open(filename, "r") as fopen:
+            data = pd.read_csv(fopen)
+    else:
+        raise Exception(f"Reading from {file_ext} is not supported yet")
+    return data
+def abspath(resource_path: str):
+    """
+    Make a path absolute, but take into account prefixes like
+    "http://" or "manifold://"
+    """
+    regex = re.compile(r"^\w+://")
+    if regex.match(resource_path) is None:
+        return os.path.abspath(resource_path)
+    else:
+        return resource_path
+def makedir(dir_path):
+    """
+    Create the directory if it does not exist.
+    """
+    is_success = False
+    try:
+        if not g_pathmgr.exists(dir_path):
+            g_pathmgr.mkdirs(dir_path)
+        is_success = True
+    except BaseException:
+        logging.info(f"Error creating directory: {dir_path}")
+    return is_success
+def is_url(input_url):
+    """
+    Check if an input string is a url. look for http(s):// and ignoring the case
+    """
+    is_url = re.match(r"^(?:http)s?://", input_url, re.IGNORECASE) is not None
+    return is_url
+def cleanup_dir(dir):
+    """
+    Utility for deleting a directory. Useful for cleaning the storage space
+    that contains various training artifacts like checkpoints, data etc.
+    """
+    if os.path.exists(dir):
+        logging.info(f"Deleting directory: {dir}")
+        shutil.rmtree(dir)
+    logging.info(f"Deleted contents of directory: {dir}")
+def get_file_size(filename):
+    """
+    Given a file, get the size of file in MB
+    """
+    size_in_mb = os.path.getsize(filename) / float(1024**2)
+    return size_in_mb

unimernet/configs/datasets/formula/formula_eval.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+datasets:
+  formula_rec_eval:
+    data_type: images
+    build_info:
+      images: /mnt/petrelfs/share_data/hanxiao/latex-ocr/pdf/val
+      annotation: /mnt/petrelfs/share_data/hanxiao/latex-ocr/pdf/pdfmath.txt

unimernet/configs/datasets/formula/formula_train.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+datasets:
+  formula_rec_train:
+    data_type: images
+    build_info:
+      images: /mnt/petrelfs/share_data/hanxiao/latex-ocr/pdf/train
+      annotation: /mnt/petrelfs/share_data/hanxiao/latex-ocr/pdf/pdfmath.txt

unimernet/configs/datasets/formula/multi_scale_formula_train.yaml ADDED Viewed

	@@ -0,0 +1,21 @@

+datasets:
+  multi_scale_formula_rec_train:
+    data_type: images
+    build_info:
+      images: /mnt/petrelfs/share_data/hanxiao/latex-ocr/pdf/train
+      annotation: /mnt/petrelfs/share_data/hanxiao/latex-ocr/pdf/pdfmath.txt
+    vis_processor:
+      train:
+        name: "formula_image_multi_scale_train"
+        all_scales:
+          - [ 96, 336 ]
+          - [ 128, 448 ]
+          - [ 192, 672 ]
+          - [ 288, 1008 ]
+          - [ 384, 1344 ]
+    text_processor:
+      train:
+        name: "blip_caption"
+        max_words: 256

unimernet/configs/default.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+ # Copyright (c) 2022, salesforce.com, inc.
+ # All rights reserved.
+ # SPDX-License-Identifier: BSD-3-Clause
+ # For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+env:
+  # For default users
+  # cache_root: "cache"
+  # For internal use with persistent storage
+  cache_root: "/export/home/.cache/vigc"

unimernet/configs/models/unimernet_base.yaml ADDED Viewed

	@@ -0,0 +1,31 @@

+model:
+  arch: unimernet
+  load_finetuned: False
+  load_pretrained: False
+  pretrained: "path/to/pretrained/weight"
+  finetuned: ""
+  tokenizer_name: nougat
+  tokenizer_config:
+    path: ./models/unimernet
+  model_name: unimernet
+  model_config:
+    max_seq_len: 384
+preprocess:
+  vis_processor:
+    train:
+      name: "formula_image_train"
+      image_size:
+        - 192
+        - 672
+    eval:
+      name: "formula_image_eval"
+      image_size:
+        - 192
+        - 672
+  text_processor:
+    train:
+      name: "blip_caption"
+    eval:
+      name: "blip_caption"

unimernet/datasets/__init__.py ADDED Viewed

File without changes

unimernet/datasets/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (162 Bytes). View file

unimernet/datasets/__pycache__/data_utils.cpython-310.pyc ADDED Viewed

Binary file (8.18 kB). View file

unimernet/datasets/builders/__init__.py ADDED Viewed

	@@ -0,0 +1,69 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+from unimernet.datasets.builders.base_dataset_builder import load_dataset_config
+from unimernet.common.registry import registry
+from unimernet.datasets.builders.formula import FormulaRecTrainBuilder, FormulaRecEvalBuilder, \
+    MultiScaleFormulaRecTrainBuilder
+__all__ = [
+    "FormulaRecTrainBuilder",
+    "FormulaRecEvalBuilder",
+    "MultiScaleFormulaRecTrainBuilder",
+]
+def load_dataset(name, cfg_path=None, vis_path=None, data_type=None):
+    """
+    Example
+    >>> dataset = load_dataset("coco_caption", cfg=None)
+    >>> splits = dataset.keys()
+    >>> print([len(dataset[split]) for split in splits])
+    """
+    if cfg_path is None:
+        cfg = None
+    else:
+        cfg = load_dataset_config(cfg_path)
+    try:
+        builder = registry.get_builder_class(name)(cfg)
+    except TypeError:
+        print(
+            f"Dataset {name} not found. Available datasets:\n"
+            + ", ".join([str(k) for k in dataset_zoo.get_names()])
+        )
+        exit(1)
+    if vis_path is not None:
+        if data_type is None:
+            # use default data type in the config
+            data_type = builder.config.data_type
+        assert (
+                data_type in builder.config.build_info
+        ), f"Invalid data_type {data_type} for {name}."
+        builder.config.build_info.get(data_type).storage = vis_path
+    dataset = builder.build_datasets()
+    return dataset
+class DatasetZoo:
+    def __init__(self) -> None:
+        self.dataset_zoo = {
+            k: list(v.DATASET_CONFIG_DICT.keys())
+            for k, v in sorted(registry.mapping["builder_name_mapping"].items())
+        }
+    def get_names(self):
+        return list(self.dataset_zoo.keys())
+dataset_zoo = DatasetZoo()

unimernet/datasets/builders/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (2.4 kB). View file

unimernet/datasets/builders/__pycache__/base_dataset_builder.cpython-310.pyc ADDED Viewed

Binary file (6.05 kB). View file

unimernet/datasets/builders/__pycache__/formula.cpython-310.pyc ADDED Viewed

Binary file (2.44 kB). View file

unimernet/datasets/builders/base_dataset_builder.py ADDED Viewed

	@@ -0,0 +1,233 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import logging
+import os
+import shutil
+import warnings
+import unimernet.common.utils as utils
+import torch.distributed as dist
+from unimernet.common.dist_utils import is_dist_avail_and_initialized, is_main_process
+from unimernet.common.registry import registry
+from unimernet.processors.base_processor import BaseProcessor
+from omegaconf import OmegaConf
+from torchvision.datasets.utils import download_url
+class BaseDatasetBuilder:
+    train_dataset_cls, eval_dataset_cls = None, None
+    def __init__(self, cfg=None):
+        super().__init__()
+        if cfg is None:
+            # help to create datasets from default config.
+            self.config = load_dataset_config(self.default_config_path())
+        elif isinstance(cfg, str):
+            self.config = load_dataset_config(cfg)
+        else:
+            # when called from task.build_dataset()
+            self.config = cfg
+        self.data_type = self.config.data_type
+        self.vis_processors = {"train": BaseProcessor(), "eval": BaseProcessor()}
+        self.text_processors = {"train": BaseProcessor(), "eval": BaseProcessor()}
+    def build_datasets(self):
+        # download, split, etc...
+        # only called on 1 GPU/TPU in distributed
+        if is_main_process():
+            self._download_data()
+        if is_dist_avail_and_initialized():
+            dist.barrier()
+        # at this point, all the annotations and image/videos should be all downloaded to the specified locations.
+        logging.info("Building datasets...")
+        datasets = self.build()  # dataset['train'/'val'/'test']
+        return datasets
+    def build_processors(self):
+        vis_proc_cfg = self.config.get("vis_processor")
+        txt_proc_cfg = self.config.get("text_processor")
+        if vis_proc_cfg is not None:
+            vis_train_cfg = vis_proc_cfg.get("train")
+            vis_eval_cfg = vis_proc_cfg.get("eval")
+            self.vis_processors["train"] = self._build_proc_from_cfg(vis_train_cfg)
+            self.vis_processors["eval"] = self._build_proc_from_cfg(vis_eval_cfg)
+        if txt_proc_cfg is not None:
+            txt_train_cfg = txt_proc_cfg.get("train")
+            txt_eval_cfg = txt_proc_cfg.get("eval")
+            self.text_processors["train"] = self._build_proc_from_cfg(txt_train_cfg)
+            self.text_processors["eval"] = self._build_proc_from_cfg(txt_eval_cfg)
+    @staticmethod
+    def _build_proc_from_cfg(cfg):
+        return (
+            registry.get_processor_class(cfg.name).from_config(cfg)
+            if cfg is not None
+            else None
+        )
+    @classmethod
+    def default_config_path(cls, type="default"):
+        return utils.get_abs_path(cls.DATASET_CONFIG_DICT[type])
+    def _download_data(self):
+        self._download_ann()
+        self._download_vis()
+    def _download_ann(self):
+        """
+        Download annotation files if necessary.
+        All the vision-language datasets should have annotations of unified format.
+        storage_path can be:
+          (1) relative/absolute: will be prefixed with env.cache_root to make full path if relative.
+          (2) basename/dirname: will be suffixed with base name of URL if dirname is provided.
+        Local annotation paths should be relative.
+        """
+        anns = self.config.build_info.annotations
+        splits = anns.keys()
+        cache_root = registry.get_path("cache_root")
+        for split in splits:
+            info = anns[split]
+            urls, storage_paths = info.get("url", None), info.storage
+            if isinstance(urls, str):
+                urls = [urls]
+            if isinstance(storage_paths, str):
+                storage_paths = [storage_paths]
+            assert len(urls) == len(storage_paths)
+            for url_or_filename, storage_path in zip(urls, storage_paths):
+                # if storage_path is relative, make it full by prefixing with cache_root.
+                if not os.path.isabs(storage_path):
+                    storage_path = os.path.join(cache_root, storage_path)
+                dirname = os.path.dirname(storage_path)
+                if not os.path.exists(dirname):
+                    os.makedirs(dirname)
+                if os.path.isfile(url_or_filename):
+                    src, dst = url_or_filename, storage_path
+                    if not os.path.exists(dst):
+                        shutil.copyfile(src=src, dst=dst)
+                    else:
+                        logging.info("Using existing file {}.".format(dst))
+                else:
+                    if os.path.isdir(storage_path):
+                        # if only dirname is provided, suffix with basename of URL.
+                        raise ValueError(
+                            "Expecting storage_path to be a file path, got directory {}".format(
+                                storage_path
+                            )
+                        )
+                    else:
+                        filename = os.path.basename(storage_path)
+                    download_url(url=url_or_filename, root=dirname, filename=filename)
+    def _download_vis(self):
+        storage_path = self.config.build_info.get(self.data_type).storage
+        storage_path = utils.get_cache_path(storage_path)
+        if not os.path.exists(storage_path):
+            warnings.warn(
+                f"""
+                The specified path {storage_path} for visual inputs does not exist.
+                Please provide a correct path to the visual inputs or
+                refer to datasets/download_scripts/README.md for downloading instructions.
+                """
+            )
+    def build(self):
+        """
+        Create by split datasets inheriting torch.utils.data.Datasets.
+        # build() can be dataset-specific. Overwrite to customize.
+        """
+        self.build_processors()
+        build_info = self.config.build_info
+        ann_info = build_info.annotations
+        vis_info = build_info.get(self.data_type)
+        datasets = dict()
+        for split in ann_info.keys():
+            if split not in ["train", "val", "test"]:
+                continue
+            is_train = split == "train"
+            # processors
+            vis_processor = (
+                self.vis_processors["train"]
+                if is_train
+                else self.vis_processors["eval"]
+            )
+            text_processor = (
+                self.text_processors["train"]
+                if is_train
+                else self.text_processors["eval"]
+            )
+            # annotation path
+            ann_paths = ann_info.get(split).storage
+            if isinstance(ann_paths, str):
+                ann_paths = [ann_paths]
+            abs_ann_paths = []
+            for ann_path in ann_paths:
+                if not os.path.isabs(ann_path):
+                    ann_path = utils.get_cache_path(ann_path)
+                abs_ann_paths.append(ann_path)
+            ann_paths = abs_ann_paths
+            # visual data storage path
+            vis_path = vis_info.storage
+            if not os.path.isabs(vis_path):
+                # vis_path = os.path.join(utils.get_cache_path(), vis_path)
+                vis_path = utils.get_cache_path(vis_path)
+            if not os.path.exists(vis_path):
+                warnings.warn("storage path {} does not exist.".format(vis_path))
+            # create datasets
+            dataset_cls = self.train_dataset_cls if is_train else self.eval_dataset_cls
+            datasets[split] = dataset_cls(
+                vis_processor=vis_processor,
+                text_processor=text_processor,
+                ann_paths=ann_paths,
+                vis_root=vis_path,
+            )
+        return datasets
+def load_dataset_config(cfg_path):
+    cfg = OmegaConf.load(cfg_path).datasets
+    cfg = cfg[list(cfg.keys())[0]]
+    return cfg

unimernet/datasets/builders/formula.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import logging
+from unimernet.common.registry import registry
+from unimernet.datasets.builders.base_dataset_builder import BaseDatasetBuilder
+from unimernet.datasets.datasets.formula import Im2LatexDataset
+from unimernet.datasets.datasets.formula_multi_scale import MultiScaleIm2LatexDataset
+@registry.register_builder("formula_rec_train")
+class FormulaRecTrainBuilder(BaseDatasetBuilder):
+    train_dataset_cls = Im2LatexDataset
+    DATASET_CONFIG_DICT = {
+        "default": "configs/datasets/formula/formula_train.yaml"
+    }
+    LOG_INFO = "Formula Recgnition Train"
+    def build_datasets(self):
+        logging.info(f"Building {self.LOG_INFO} datasets ...")
+        self.build_processors()
+        build_info = self.config.build_info
+        anno_path = build_info.annotation,
+        vis_root = build_info.images
+        anno_path = [anno_path] if isinstance(anno_path, str) else anno_path
+        vis_root = [vis_root] if isinstance(vis_root, str) else vis_root
+        datasets = dict()
+        # create datasets
+        dataset_cls = self.train_dataset_cls
+        datasets['train'] = dataset_cls(
+            vis_processor=self.vis_processors["train"],
+            text_processor=self.text_processors["train"],
+            vis_root=vis_root,
+            anno_path=anno_path,
+        )
+        print(datasets['train'][0])
+        return datasets
+@registry.register_builder("multi_scale_formula_rec_train")
+class MultiScaleFormulaRecTrainBuilder(BaseDatasetBuilder):
+    train_dataset_cls = MultiScaleIm2LatexDataset
+    DATASET_CONFIG_DICT = {
+        "default": "configs/datasets/formula/multi_scale_formula_train.yaml"
+    }
+    LOG_INFO = "Multi Scale Formula Recgnition Train"
+    def build_datasets(self):
+        logging.info(f"Building {self.LOG_INFO} datasets ...")
+        self.build_processors()
+        build_info = self.config.build_info
+        anno_path = build_info.annotation,
+        vis_root = build_info.images
+        anno_path = [anno_path] if isinstance(anno_path, str) else anno_path
+        vis_root = [vis_root] if isinstance(vis_root, str) else vis_root
+        datasets = dict()
+        # create datasets
+        dataset_cls = self.train_dataset_cls
+        datasets['train'] = dataset_cls(
+            vis_processor=self.vis_processors["train"],
+            text_processor=self.text_processors["train"],
+            vis_root=vis_root,
+            anno_path=anno_path,
+        )
+        print(datasets['train'][0])
+        return datasets
+@registry.register_builder("formula_rec_eval")
+class FormulaRecEvalBuilder(BaseDatasetBuilder):
+    eval_dataset_cls = Im2LatexDataset
+    DATASET_CONFIG_DICT = {
+        "default": "configs/datasets/formula/formula_eval.yaml"
+    }
+    LOG_INFO = "Formula Recgnition Eval"
+    def build_datasets(self):
+        logging.info(f"Building {self.LOG_INFO} datasets ...")
+        self.build_processors()
+        build_info = self.config.build_info
+        anno_path = build_info.annotation,
+        vis_root = build_info.images
+        anno_path = [anno_path] if isinstance(anno_path, str) else anno_path
+        vis_root = [vis_root] if isinstance(vis_root, str) else vis_root
+        datasets = dict()
+        # create datasets
+        dataset_cls = self.eval_dataset_cls
+        datasets['eval'] = dataset_cls(
+            vis_processor=self.vis_processors["eval"],
+            text_processor=self.text_processors["eval"],
+            vis_root=vis_root,
+            anno_path=anno_path,
+        )
+        print(datasets['eval'][0])
+        return datasets

unimernet/datasets/data_utils.py ADDED Viewed

	@@ -0,0 +1,284 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import gzip
+import logging
+import os
+import random as rnd
+import tarfile
+import zipfile
+import decord
+import webdataset as wds
+import numpy as np
+import torch
+from torch.utils.data.dataset import IterableDataset, ChainDataset
+from decord import VideoReader
+from unimernet.common.registry import registry
+from unimernet.datasets.datasets.base_dataset import ConcatDataset
+from tqdm import tqdm
+decord.bridge.set_bridge("torch")
+MAX_INT = registry.get("MAX_INT")
+def load_video(video_path, n_frms=MAX_INT, height=-1, width=-1, sampling="uniform"):
+    vr = VideoReader(uri=video_path, height=height, width=width)
+    vlen = len(vr)
+    start, end = 0, vlen
+    n_frms = min(n_frms, vlen)
+    if sampling == "uniform":
+        indices = np.arange(start, end, vlen / n_frms).astype(int)
+    elif sampling == "headtail":
+        indices_h = sorted(rnd.sample(range(vlen // 2), n_frms // 2))
+        indices_t = sorted(rnd.sample(range(vlen // 2, vlen), n_frms // 2))
+        indices = indices_h + indices_t
+    else:
+        raise NotImplementedError
+    # get_batch -> T, H, W, C
+    frms = vr.get_batch(indices).permute(3, 0, 1, 2).float()  # (C, T, H, W)
+    return frms
+def apply_to_sample(f, sample):
+    if len(sample) == 0:
+        return {}
+    def _apply(x):
+        if torch.is_tensor(x):
+            return f(x)
+        elif isinstance(x, dict):
+            return {key: _apply(value) for key, value in x.items()}
+        elif isinstance(x, list):
+            return [_apply(x) for x in x]
+        else:
+            return x
+    return _apply(sample)
+def move_to_cuda(sample):
+    def _move_to_cuda(tensor):
+        return tensor.cuda()
+    return apply_to_sample(_move_to_cuda, sample)
+def prepare_sample(samples, cuda_enabled=True):
+    if cuda_enabled:
+        samples = move_to_cuda(samples)
+    # TODO fp16 support
+    return samples
+def reorg_datasets_by_split(datasets):
+    """
+    Organizes datasets by split.
+    Args:
+        datasets: dict of torch.utils.data.Dataset objects by name.
+    Returns:
+        Dict of datasets by split {split_name: List[Datasets]}.
+    """
+    # if len(datasets) == 1:
+    #     return datasets[list(datasets.keys())[0]]
+    # else:
+    reorg_datasets = dict()
+    # reorganize by split
+    for _, dataset in datasets.items():
+        for split_name, dataset_split in dataset.items():
+            if split_name not in reorg_datasets:
+                reorg_datasets[split_name] = [dataset_split]
+            else:
+                reorg_datasets[split_name].append(dataset_split)
+    return reorg_datasets
+def concat_datasets(datasets):
+    """
+    Concatenates multiple datasets into a single dataset.
+    It supports may-style datasets and DataPipeline from WebDataset. Currently, does not support
+    generic IterableDataset because it requires creating separate samplers.
+    Now only supports conctenating training datasets and assuming validation and testing
+    have only a single dataset. This is because metrics should not be computed on the concatenated
+    datasets.
+    Args:
+        datasets: dict of torch.utils.data.Dataset objects by split.
+    Returns:
+        Dict of concatenated datasets by split, "train" is the concatenation of multiple datasets,
+        "val" and "test" remain the same.
+        If the input training datasets contain both map-style and DataPipeline datasets, returns
+        a tuple, where the first element is a concatenated map-style dataset and the second
+        element is a chained DataPipeline dataset.
+    """
+    # concatenate datasets in the same split
+    for split_name in datasets:
+        if split_name != "train":
+            assert (
+                len(datasets[split_name]) == 1
+            ), "Do not support multiple {} datasets.".format(split_name)
+            datasets[split_name] = datasets[split_name][0]
+        else:
+            iterable_datasets, map_datasets = [], []
+            for dataset in datasets[split_name]:
+                if isinstance(dataset, wds.DataPipeline):
+                    logging.info(
+                        "Dataset {} is IterableDataset, can't be concatenated.".format(
+                            dataset
+                        )
+                    )
+                    iterable_datasets.append(dataset)
+                elif isinstance(dataset, IterableDataset):
+                    raise NotImplementedError(
+                        "Do not support concatenation of generic IterableDataset."
+                    )
+                else:
+                    map_datasets.append(dataset)
+            # if len(iterable_datasets) > 0:
+            # concatenate map-style datasets and iterable-style datasets separately
+            chained_datasets = (
+                ChainDataset(iterable_datasets) if len(iterable_datasets) > 0 else None
+            )
+            concat_datasets = (
+                ConcatDataset(map_datasets) if len(map_datasets) > 0 else None
+            )
+            train_datasets = concat_datasets, chained_datasets
+            train_datasets = tuple([x for x in train_datasets if x is not None])
+            train_datasets = (
+                train_datasets[0] if len(train_datasets) == 1 else train_datasets
+            )
+            datasets[split_name] = train_datasets
+    return datasets
+def extract_archive(from_path, to_path=None, overwrite=False):
+    """Extract archive.
+    Args:
+        from_path: the path of the archive.
+        to_path: the root path of the extracted files (directory of from_path)
+        overwrite: overwrite existing files (False)
+    Returns:
+        List of paths to extracted files even if not overwritten.
+    Examples:
+        >>> url = 'http://www.quest.dcs.shef.ac.uk/wmt16_files_mmt/validation.tar.gz'
+        >>> from_path = './validation.tar.gz'
+        >>> to_path = './'
+        >>> torchtext.utils.download_from_url(url, from_path)
+        >>> torchtext.utils.extract_archive(from_path, to_path)
+        >>> ['.data/val.de', '.data/val.en']
+        >>> torchtext.utils.download_from_url(url, from_path)
+        >>> torchtext.utils.extract_archive(from_path, to_path)
+        >>> ['.data/val.de', '.data/val.en']
+    """
+    if to_path is None:
+        to_path = os.path.dirname(from_path)
+    if from_path.endswith((".tar.gz", ".tgz")):
+        logging.info("Opening tar file {} to {}.".format(from_path, to_path))
+        with tarfile.open(from_path, "r") as tar:
+            files = []
+            for file_ in tqdm(tar):
+                file_path = os.path.join(to_path, file_.name)
+                if file_.isfile():
+                    files.append(file_path)
+                    if os.path.exists(file_path):
+                        logging.info("{} already extracted.".format(file_path))
+                        if not overwrite:
+                            continue
+                tar.extract(file_, to_path)
+            logging.info("Finished extracting tar file {}.".format(from_path))
+            return files
+    elif from_path.endswith(".zip"):
+        assert zipfile.is_zipfile(from_path), from_path
+        logging.info("Opening zip file {} to {}.".format(from_path, to_path))
+        with zipfile.ZipFile(from_path, "r") as zfile:
+            files = []
+            for file_ in tqdm(zfile.namelist()):
+                file_path = os.path.join(to_path, file_)
+                files.append(file_path)
+                if os.path.exists(file_path):
+                    logging.info("{} already extracted.".format(file_path))
+                    if not overwrite:
+                        continue
+                zfile.extract(file_, to_path)
+        files = [f for f in files if os.path.isfile(f)]
+        logging.info("Finished extracting zip file {}.".format(from_path))
+        return files
+    elif from_path.endswith(".gz"):
+        logging.info("Opening gz file {} to {}.".format(from_path, to_path))
+        default_block_size = 65536
+        filename = from_path[:-3]
+        files = [filename]
+        with gzip.open(from_path, "rb") as gzfile, open(filename, "wb") as d_file:
+            while True:
+                block = gzfile.read(default_block_size)
+                if not block:
+                    break
+                else:
+                    d_file.write(block)
+            d_file.write(block)
+        logging.info("Finished extracting gz file {}.".format(from_path))
+        return files
+    else:
+        raise NotImplementedError(
+            "We currently only support tar.gz, .tgz, .gz and zip achives."
+        )
+def save_frames_grid(img_array, out_path):
+    import torch
+    from PIL import Image
+    from torchvision.utils import make_grid
+    if len(img_array.shape) == 3:
+        img_array = img_array.unsqueeze(0)
+    elif len(img_array.shape) == 5:
+        b, t, c, h, w = img_array.shape
+        img_array = img_array.view(-1, c, h, w)
+    elif len(img_array.shape) == 4:
+        pass
+    else:
+        raise NotImplementedError(
+            "Supports only (b,t,c,h,w)-shaped inputs. First two dimensions can be ignored."
+        )
+    assert img_array.shape[1] == 3, "Exepcting input shape of (H, W, 3), i.e. RGB-only."
+    grid = make_grid(img_array)
+    ndarr = grid.permute(1, 2, 0).to("cpu", torch.uint8).numpy()
+    img = Image.fromarray(ndarr)
+    img.save(out_path)

unimernet/datasets/datasets/__pycache__/base_dataset.cpython-310.pyc ADDED Viewed

Binary file (3.55 kB). View file

unimernet/datasets/datasets/__pycache__/formula.cpython-310.pyc ADDED Viewed

Binary file (2.85 kB). View file

unimernet/datasets/datasets/__pycache__/formula_multi_scale.cpython-310.pyc ADDED Viewed

Binary file (1.23 kB). View file

unimernet/datasets/datasets/base_dataset.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import json
+from PIL import Image, ImageFile
+import os.path as osp
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+from io import BytesIO
+from typing import Iterable
+from torch.utils.data import Dataset, ConcatDataset
+import torch
+class BaseDataset(Dataset):
+    def __init__(self, vis_processor, text_processor, vis_root, anno_path):
+        self.vis_root = vis_root
+        # if isinstance(anno_path, tuple) or isinstance(anno_path, list):
+        #     anno_path = anno_path[0]
+        self.anno_path = anno_path
+        self.vis_processor = vis_processor
+        self.text_processor = text_processor
+        self.samples = self.init_samples()
+        self.reader = self.init_reader()
+        print('total {} {} samples'.format(self.__len__(), self.__class__.__name__))
+        for idx in range(10):
+            self.__getitem__(idx)
+    def __len__(self):
+        return len(self.samples)
+    def __getitem__(self, index):
+        raise NotImplementedError
+    def init_samples(self):
+        # read annotation from ceph
+        if self.anno_path.startswith('cluster'):
+            from petrel_client.client import Client
+            client = Client("~/petreloss.conf")
+            samples = json.loads(client.get(self.anno_path))
+        else:
+            samples = json.load(open(self.anno_path, 'r'))
+        return samples
+    def init_reader(self):
+        if self.vis_root.startswith('cluster'):
+            from petrel_client.client import Client
+            client = Client("~/petreloss.conf")
+            reader = {'type': 'PetrelReader', 'body': client.get}
+        else:
+            reader = {'type': 'LocalReader', 'body': Image.open}
+        return reader
+    def _read_image(self, sample, image_key="image"):
+        img_file = sample[image_key]
+        image_path = osp.join(self.vis_root, img_file)
+        image = self.reader['body'](image_path)
+        if isinstance(image, bytes):
+            bytes_stream = BytesIO(image)
+            image = Image.open(bytes_stream)
+        image = image.convert("RGB")
+        return image
+    def collater(self, samples):
+        image_list, question_list, answer_list = [], [], []
+        for sample in samples:
+            image_list.append(sample["image"])
+            question_list.append(sample["text_input"])
+            answer_list.append(sample["text_output"])
+        return {
+            "image": torch.stack(image_list, dim=0),
+            "text_input": question_list,
+            "text_output": answer_list,
+            "data_type": "vqa",
+        }
+class ConcatDataset(ConcatDataset):
+    def __init__(self, datasets: Iterable[Dataset]) -> None:
+        super().__init__(datasets)
+    def collater(self, samples):
+        # TODO For now only supports datasets with same underlying collater implementations
+        all_keys = set()
+        for s in samples:
+            all_keys.update(s)
+        shared_keys = all_keys
+        for s in samples:
+            shared_keys = shared_keys & set(s.keys())
+        samples_shared_keys = []
+        for s in samples:
+            samples_shared_keys.append({k: s[k] for k in s.keys() if k in shared_keys})
+        return self.datasets[0].collater(samples_shared_keys)

unimernet/datasets/datasets/dataloader_utils.py ADDED Viewed

	@@ -0,0 +1,200 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import time
+import random
+import torch
+from unimernet.datasets.data_utils import move_to_cuda
+from torch.utils.data import DataLoader
+class MultiIterLoader:
+    """
+    A simple wrapper for iterating over multiple iterators.
+    Args:
+        loaders (List[Loader]): List of Iterator loaders.
+        ratios (List[float]): List of ratios to sample from each loader. If None, all loaders are sampled uniformly.
+    """
+    def __init__(self, loaders, ratios=None):
+        # assert all loaders has __next__ method
+        for loader in loaders:
+            assert hasattr(
+                loader, "__next__"
+            ), "Loader {} has no __next__ method.".format(loader)
+        if ratios is None:
+            ratios = [1.0] * len(loaders)
+        else:
+            assert len(ratios) == len(loaders)
+            ratios = [float(ratio) / sum(ratios) for ratio in ratios]
+        self.loaders = loaders
+        self.ratios = ratios
+    def __next__(self):
+        # random sample from each loader by ratio
+        loader_idx = random.choices(range(len(self.loaders)), self.ratios, k=1)[0]
+        return next(self.loaders[loader_idx])
+    def __len__(self):
+        return sum([len(_) for _ in self.loaders if hasattr(_, "__len__")])
+class ConcatLoader:
+    """
+    A simple wrapper for iterating over multiple iterators.
+    Args:
+        loaders (List[Loader]): List of Iterator loaders.
+        ratios (List[float]): List of ratios to sample from each loader. If None, all loaders are sampled uniformly.
+    """
+    def __init__(self, loaders):
+        # assert all loaders has __next__ method
+        for loader in loaders:
+            assert hasattr(
+                loader, "__len__"
+            ), "Loader {} has no __len__ method.".format(loader)
+        self._epoch = 0
+        self._loader_lens = [len(_) for _ in loaders]
+        self._rest_lens = self._loader_lens.copy()
+        self.loaders = loaders
+    def __next__(self):
+        # random sample from each loader by ratio
+        loader_idx = random.choices(range(len(self.loaders)), self._rest_lens, k=1)[0]
+        self._rest_lens[loader_idx] -= 1
+        if sum(self._rest_lens) == 0:
+            self._epoch += 1
+            self._rest_lens = self._loader_lens.copy()
+        return next(self.loaders[loader_idx])
+    def __len__(self):
+        return sum([len(_) for _ in self.loaders if hasattr(_, "__len__")])
+class PrefetchLoader(object):
+    """
+    Modified from https://github.com/ChenRocks/UNITER.
+    overlap compute and cuda data transfer
+    (copied and then modified from nvidia apex)
+    """
+    def __init__(self, loader):
+        self.loader = loader
+        self.stream = torch.cuda.Stream()
+    def __iter__(self):
+        loader_it = iter(self.loader)
+        self.preload(loader_it)
+        batch = self.next(loader_it)
+        while batch is not None:
+            is_tuple = isinstance(batch, tuple)
+            if is_tuple:
+                task, batch = batch
+            if is_tuple:
+                yield task, batch
+            else:
+                yield batch
+            batch = self.next(loader_it)
+    def __len__(self):
+        return len(self.loader)
+    def preload(self, it):
+        try:
+            self.batch = next(it)
+        except StopIteration:
+            self.batch = None
+            return
+        # if record_stream() doesn't work, another option is to make sure
+        # device inputs are created on the main stream.
+        # self.next_input_gpu = torch.empty_like(self.next_input,
+        #                                        device='cuda')
+        # self.next_target_gpu = torch.empty_like(self.next_target,
+        #                                         device='cuda')
+        # Need to make sure the memory allocated for next_* is not still in use
+        # by the main stream at the time we start copying to next_*:
+        # self.stream.wait_stream(torch.cuda.current_stream())
+        with torch.cuda.stream(self.stream):
+            self.batch = move_to_cuda(self.batch)
+            # more code for the alternative if record_stream() doesn't work:
+            # copy_ will record the use of the pinned source tensor in this
+            # side stream.
+            # self.next_input_gpu.copy_(self.next_input, non_blocking=True)
+            # self.next_target_gpu.copy_(self.next_target, non_blocking=True)
+            # self.next_input = self.next_input_gpu
+            # self.next_target = self.next_target_gpu
+    def next(self, it):
+        torch.cuda.current_stream().wait_stream(self.stream)
+        batch = self.batch
+        if batch is not None:
+            record_cuda_stream(batch)
+        self.preload(it)
+        return batch
+    def __getattr__(self, name):
+        method = self.loader.__getattribute__(name)
+        return method
+def record_cuda_stream(batch):
+    if isinstance(batch, torch.Tensor):
+        batch.record_stream(torch.cuda.current_stream())
+    elif isinstance(batch, list) or isinstance(batch, tuple):
+        for t in batch:
+            record_cuda_stream(t)
+    elif isinstance(batch, dict):
+        for t in batch.values():
+            record_cuda_stream(t)
+    else:
+        pass
+class IterLoader:
+    """
+    A wrapper to convert DataLoader as an infinite iterator.
+    Modified from:
+        https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/iter_based_runner.py
+    """
+    def __init__(self, dataloader: DataLoader, use_distributed: bool = False):
+        self._dataloader = dataloader
+        self.iter_loader = iter(self._dataloader)
+        self._use_distributed = use_distributed
+        self._epoch = 0
+    @property
+    def epoch(self) -> int:
+        return self._epoch
+    def __next__(self):
+        try:
+            data = next(self.iter_loader)
+        except StopIteration:
+            self._epoch += 1
+            if hasattr(self._dataloader.sampler, "set_epoch") and self._use_distributed:
+                self._dataloader.sampler.set_epoch(self._epoch)
+            time.sleep(2)  # Prevent possible deadlock during epoch transition
+            self.iter_loader = iter(self._dataloader)
+            data = next(self.iter_loader)
+        return data
+    def __iter__(self):
+        return self
+    def __len__(self):
+        return len(self._dataloader)

unimernet/datasets/datasets/formula.py ADDED Viewed

	@@ -0,0 +1,71 @@

+import torch
+from .base_dataset import BaseDataset
+import os.path as osp
+import glob
+from io import BytesIO
+from PIL import Image
+class Im2LatexDataset(BaseDataset):
+    def init_samples(self):
+        samples = []
+        for vis_root, anno_path in zip(self.vis_root, self.anno_path):
+            images = [path.replace('\\', '/') for path in glob.glob(osp.join(vis_root, '*.png'))]
+            indices = [int(osp.basename(img).split('.')[0]) for img in images]
+            eqs = open(anno_path, 'r').read().split('\n')
+            eqs = [eqs[_] for _ in indices]
+            for i, e in zip(images, eqs):
+                samples.append({"image": i, "equation": e, "vis_root": vis_root})
+        return samples
+    def __getitem__(self, index):
+        ann = self.samples[index]
+        try:
+            image = self.vis_processor(self._read_image(ann))
+        except Exception:
+            return self[(index + 1) % len(self)]
+        if image is None:
+            return self[(index + 1) % len(self)]
+        equation = ann["equation"]
+        return {"image": image, "text_input": equation, "id": index}
+    def _read_image(self, sample, image_key="image"):
+        img_file = sample[image_key]
+        vis_root = sample["vis_root"]
+        image_path = osp.join(vis_root, img_file)
+        image = self.reader['body'](image_path)
+        if isinstance(image, bytes):
+            bytes_stream = BytesIO(image)
+            image = Image.open(bytes_stream)
+        image = image.convert("RGB")
+        return image
+    def init_reader(self):
+        if not isinstance(self.vis_root, str):
+            vis_root = self.vis_root[0]
+        else:
+            vis_root = self.vis_root
+        if vis_root.startswith('cluster'):
+            from petrel_client.client import Client
+            client = Client("~/petreloss.conf")
+            reader = {'type': 'PetrelReader', 'body': client.get}
+        else:
+            reader = {'type': 'LocalReader', 'body': Image.open}
+        return reader
+    def collater(self, samples):
+        image_list, question_list, id_list = [], [], []
+        for sample in samples:
+            image_list.append(sample["image"])
+            question_list.append(sample["text_input"])
+            id_list.append(sample["id"])
+        return {
+            "image": torch.stack(image_list, dim=0),
+            "text_input": question_list,
+            "id": id_list
+        }

unimernet/datasets/datasets/formula_multi_scale.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import torch
+from .formula import Im2LatexDataset
+class MultiScaleIm2LatexDataset(Im2LatexDataset):
+    def __getitem__(self, index):
+        ann = self.samples[index]
+        try:
+            pil_image = self._read_image(ann)
+            image = self.vis_processor(pil_image)
+        except Exception:
+            return self[(index + 1) % len(self)]
+        if image is None:
+            return self[(index + 1) % len(self)]
+        equation = ann["equation"]
+        return {"image": image, "text_input": equation, "id": index, "raw_image": pil_image}
+    def collater(self, samples):
+        self.vis_processor.reset_scale()
+        image_list, question_list, id_list = [], [], []
+        for sample in samples:
+            image_list.append(self.vis_processor(sample["raw_image"]))
+            question_list.append(sample["text_input"])
+            id_list.append(sample["id"])
+        return {
+            "image": torch.stack(image_list, dim=0),
+            "text_input": question_list,
+            "id": id_list
+        }

unimernet/models/__init__.py ADDED Viewed

	@@ -0,0 +1,198 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import logging
+import torch
+from omegaconf import OmegaConf
+from unimernet.common.registry import registry
+from unimernet.models.base_model import BaseModel
+from unimernet.processors.base_processor import BaseProcessor
+from unimernet.models.unimernet.unimernet import UniMERModel
+__all__ = [
+    "load_model",
+    "BaseModel",
+    "UniMERModel",
+]
+def load_model(name, model_type, is_eval=False, device="cpu", checkpoint=None):
+    """
+    Load supported models.
+    To list all available models and types in registry:
+    >>> from unimernet.models import model_zoo
+    >>> print(model_zoo)
+    Args:
+        name (str): name of the model.
+        model_type (str): type of the model.
+        is_eval (bool): whether the model is in eval mode. Default: False.
+        device (str): device to use. Default: "cpu".
+        checkpoint (str): path or to checkpoint. Default: None.
+            Note that expecting the checkpoint to have the same keys in state_dict as the model.
+    Returns:
+        model (torch.nn.Module): model.
+    """
+    model = registry.get_model_class(name).from_pretrained(model_type=model_type)
+    if checkpoint is not None:
+        model.load_checkpoint(checkpoint)
+    if is_eval:
+        model.eval()
+    if device == "cpu":
+        model = model.float()
+    return model.to(device)
+def load_preprocess(config):
+    """
+    Load preprocessor configs and construct preprocessors.
+    If no preprocessor is specified, return BaseProcessor, which does not do any preprocessing.
+    Args:
+        config (dict): preprocessor configs.
+    Returns:
+        vis_processors (dict): preprocessors for visual inputs.
+        txt_processors (dict): preprocessors for text inputs.
+        Key is "train" or "eval" for processors used in training and evaluation respectively.
+    """
+    def _build_proc_from_cfg(cfg):
+        return (
+            registry.get_processor_class(cfg.name).from_config(cfg)
+            if cfg is not None
+            else BaseProcessor()
+        )
+    vis_processors = dict()
+    txt_processors = dict()
+    vis_proc_cfg = config.get("vis_processor")
+    txt_proc_cfg = config.get("text_processor")
+    if vis_proc_cfg is not None:
+        vis_train_cfg = vis_proc_cfg.get("train")
+        vis_eval_cfg = vis_proc_cfg.get("eval")
+    else:
+        vis_train_cfg = None
+        vis_eval_cfg = None
+    vis_processors["train"] = _build_proc_from_cfg(vis_train_cfg)
+    vis_processors["eval"] = _build_proc_from_cfg(vis_eval_cfg)
+    if txt_proc_cfg is not None:
+        txt_train_cfg = txt_proc_cfg.get("train")
+        txt_eval_cfg = txt_proc_cfg.get("eval")
+    else:
+        txt_train_cfg = None
+        txt_eval_cfg = None
+    txt_processors["train"] = _build_proc_from_cfg(txt_train_cfg)
+    txt_processors["eval"] = _build_proc_from_cfg(txt_eval_cfg)
+    return vis_processors, txt_processors
+def load_model_and_preprocess(name, model_type, is_eval=False, device="cpu"):
+    """
+    Load model and its related preprocessors.
+    List all available models and types in registry:
+    >>> from unimernet.models import model_zoo
+    >>> print(model_zoo)
+    Args:
+        name (str): name of the model.
+        model_type (str): type of the model.
+        is_eval (bool): whether the model is in eval mode. Default: False.
+        device (str): device to use. Default: "cpu".
+    Returns:
+        model (torch.nn.Module): model.
+        vis_processors (dict): preprocessors for visual inputs.
+        txt_processors (dict): preprocessors for text inputs.
+    """
+    model_cls = registry.get_model_class(name)
+    # load model
+    model = model_cls.from_pretrained(model_type=model_type)
+    if is_eval:
+        model.eval()
+    # load preprocess
+    cfg = OmegaConf.load(model_cls.default_config_path(model_type))
+    if cfg is not None:
+        preprocess_cfg = cfg.preprocess
+        vis_processors, txt_processors = load_preprocess(preprocess_cfg)
+    else:
+        vis_processors, txt_processors = None, None
+        logging.info(
+            f"""No default preprocess for model {name} ({model_type}).
+                This can happen if the model is not finetuned on downstream datasets,
+                or it is not intended for direct use without finetuning.
+            """
+        )
+    if device == "cpu" or device == torch.device("cpu"):
+        model = model.float()
+    return model.to(device), vis_processors, txt_processors
+class ModelZoo:
+    """
+    A utility class to create string representation of available model architectures and types.
+    >>> from unimernet.models import model_zoo
+    >>> # list all available models
+    >>> print(model_zoo)
+    >>> # show total number of models
+    >>> print(len(model_zoo))
+    """
+    def __init__(self) -> None:
+        self.model_zoo = {
+            k: list(v.PRETRAINED_MODEL_CONFIG_DICT.keys())
+            for k, v in registry.mapping["model_name_mapping"].items()
+        }
+    def __str__(self) -> str:
+        return (
+                "=" * 50
+                + "\n"
+                + f"{'Architectures':<30} {'Types'}\n"
+                + "=" * 50
+                + "\n"
+                + "\n".join(
+            [
+                f"{name:<30} {', '.join(types)}"
+                for name, types in self.model_zoo.items()
+            ]
+        )
+        )
+    def __iter__(self):
+        return iter(self.model_zoo.items())
+    def __len__(self):
+        return sum([len(v) for v in self.model_zoo.values()])
+model_zoo = ModelZoo()

unimernet/models/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (6.04 kB). View file

unimernet/models/__pycache__/base_model.cpython-310.pyc ADDED Viewed

Binary file (8.8 kB). View file

unimernet/models/__pycache__/clip_vit.cpython-310.pyc ADDED Viewed

Binary file (9.22 kB). View file

unimernet/models/__pycache__/eva_vit.cpython-310.pyc ADDED Viewed

Binary file (14.1 kB). View file

unimernet/models/base_model.py ADDED Viewed

	@@ -0,0 +1,251 @@

+"""
+ Copyright (c) 2022, salesforce.com, inc.
+ All rights reserved.
+ SPDX-License-Identifier: BSD-3-Clause
+ For full license text, see the LICENSE file in the repo root or https://opensource.org/licenses/BSD-3-Clause
+"""
+import logging
+import os
+import numpy as np
+import torch
+import torch.nn as nn
+from unimernet.common.dist_utils import download_cached_file, is_dist_avail_and_initialized
+from unimernet.common.utils import get_abs_path, is_url
+from omegaconf import OmegaConf
+class BaseModel(nn.Module):
+    """Base class for models."""
+    def __init__(self):
+        super().__init__()
+    @property
+    def device(self):
+        return list(self.parameters())[0].device
+    def load_checkpoint(self, url_or_filename):
+        """
+        Load from a finetuned checkpoint.
+        This should expect no mismatch in the model keys and the checkpoint keys.
+        """
+        if is_url(url_or_filename):
+            cached_file = download_cached_file(
+                url_or_filename, check_hash=False, progress=True
+            )
+            checkpoint = torch.load(cached_file, map_location="cpu")
+        elif os.path.isfile(url_or_filename):
+            checkpoint = torch.load(url_or_filename, map_location="cpu")
+        else:
+            raise RuntimeError("checkpoint url or path is invalid")
+        if "model" in checkpoint.keys():
+            state_dict = checkpoint["model"]
+        else:
+            state_dict = checkpoint
+        msg = self.load_state_dict(state_dict, strict=False)
+        # logging.info("Missing keys {}".format(msg.missing_keys))
+        logging.info(f"Missing keys exist when loading '{url_or_filename}'.")
+        logging.info("load checkpoint from %s" % url_or_filename)
+        return msg
+    @classmethod
+    def from_pretrained(cls, model_type):
+        """
+        Build a pretrained model from default configuration file, specified by model_type.
+        Args:
+            - model_type (str): model type, specifying architecture and checkpoints.
+        Returns:
+            - model (nn.Module): pretrained or finetuned model, depending on the configuration.
+        """
+        model_cfg = OmegaConf.load(cls.default_config_path(model_type)).model
+        model = cls.from_config(model_cfg)
+        return model
+    @classmethod
+    def default_config_path(cls, model_type):
+        assert (
+                model_type in cls.PRETRAINED_MODEL_CONFIG_DICT
+        ), "Unknown model type {}".format(model_type)
+        return get_abs_path(cls.PRETRAINED_MODEL_CONFIG_DICT[model_type])
+    def load_checkpoint_from_config(self, cfg, **kwargs):
+        """
+        Load checkpoint as specified in the config file.
+        If load_finetuned is True, load the finetuned model; otherwise, load the pretrained model.
+        When loading the pretrained model, each task-specific architecture may define their
+        own load_from_pretrained() method.
+        """
+        load_pretrained = cfg.get("load_pretrained", True)
+        load_finetuned = cfg.get("load_finetuned", False)
+        if load_pretrained:
+            # load pre-trained weights
+            pretrain_path = cfg.get("pretrained", None)
+            assert pretrain_path, "Found load_finetuned is False, but pretrain_path is None."
+            self.load_from_pretrained(url_or_filename=pretrain_path, **kwargs)
+            logging.info(f"Loaded pretrained model '{pretrain_path}'.")
+        if load_finetuned:
+            finetune_path = cfg.get("finetuned", None)
+            assert finetune_path is not None, "Found load_finetuned is True, but finetune_path is None."
+            self.load_checkpoint(url_or_filename=finetune_path)
+            logging.info(f"Loaded finetuned model '{finetune_path}'.")
+    def before_evaluation(self, **kwargs):
+        pass
+    def show_n_params(self, return_str=True):
+        tot = 0
+        for p in self.parameters():
+            w = 1
+            for x in p.shape:
+                w *= x
+            tot += w
+        if return_str:
+            if tot >= 1e6:
+                return "{:.1f}M".format(tot / 1e6)
+            else:
+                return "{:.1f}K".format(tot / 1e3)
+        else:
+            return tot
+class BaseEncoder(nn.Module):
+    """
+    Base class for primitive encoders, such as ViT, TimeSformer, etc.
+    """
+    def __init__(self):
+        super().__init__()
+    def forward_features(self, samples, **kwargs):
+        raise NotImplementedError
+    @property
+    def device(self):
+        return list(self.parameters())[0].device
+class SharedQueueMixin:
+    @torch.no_grad()
+    def _dequeue_and_enqueue(self, image_feat, text_feat, idxs=None):
+        # gather keys before updating queue
+        image_feats = concat_all_gather(image_feat)
+        text_feats = concat_all_gather(text_feat)
+        batch_size = image_feats.shape[0]
+        ptr = int(self.queue_ptr)
+        assert self.queue_size % batch_size == 0  # for simplicity
+        # replace the keys at ptr (dequeue and enqueue)
+        self.image_queue[:, ptr: ptr + batch_size] = image_feats.T
+        self.text_queue[:, ptr: ptr + batch_size] = text_feats.T
+        if idxs is not None:
+            idxs = concat_all_gather(idxs)
+            self.idx_queue[:, ptr: ptr + batch_size] = idxs.T
+        ptr = (ptr + batch_size) % self.queue_size  # move pointer
+        self.queue_ptr[0] = ptr
+class MomentumDistilationMixin:
+    @torch.no_grad()
+    def copy_params(self):
+        for model_pair in self.model_pairs:
+            for param, param_m in zip(
+                    model_pair[0].parameters(), model_pair[1].parameters()
+            ):
+                param_m.data.copy_(param.data)  # initialize
+                param_m.requires_grad = False  # not update by gradient
+    @torch.no_grad()
+    def _momentum_update(self):
+        for model_pair in self.model_pairs:
+            for param, param_m in zip(
+                    model_pair[0].parameters(), model_pair[1].parameters()
+            ):
+                param_m.data = param_m.data * self.momentum + param.data * (
+                        1.0 - self.momentum
+                )
+class GatherLayer(torch.autograd.Function):
+    """
+    Gather tensors from all workers with support for backward propagation:
+    This implementation does not cut the gradients as torch.distributed.all_gather does.
+    """
+    @staticmethod
+    def forward(ctx, x):
+        output = [
+            torch.zeros_like(x) for _ in range(torch.distributed.get_world_size())
+        ]
+        torch.distributed.all_gather(output, x)
+        return tuple(output)
+    @staticmethod
+    def backward(ctx, *grads):
+        all_gradients = torch.stack(grads)
+        torch.distributed.all_reduce(all_gradients)
+        return all_gradients[torch.distributed.get_rank()]
+def all_gather_with_grad(tensors):
+    """
+    Performs all_gather operation on the provided tensors.
+    Graph remains connected for backward grad computation.
+    """
+    # Queue the gathered tensors
+    world_size = torch.distributed.get_world_size()
+    # There is no need for reduction in the single-proc case
+    if world_size == 1:
+        return tensors
+    # tensor_all = GatherLayer.apply(tensors)
+    tensor_all = GatherLayer.apply(tensors)
+    return torch.cat(tensor_all, dim=0)
+@torch.no_grad()
+def concat_all_gather(tensor):
+    """
+    Performs all_gather operation on the provided tensors.
+    *** Warning ***: torch.distributed.all_gather has no gradient.
+    """
+    # if use distributed training
+    if not is_dist_avail_and_initialized():
+        return tensor
+    tensors_gather = [
+        torch.ones_like(tensor) for _ in range(torch.distributed.get_world_size())
+    ]
+    torch.distributed.all_gather(tensors_gather, tensor, async_op=False)
+    output = torch.cat(tensors_gather, dim=0)
+    return output
+def tile(x, dim, n_tile):
+    init_dim = x.size(dim)
+    repeat_idx = [1] * x.dim()
+    repeat_idx[dim] = n_tile
+    x = x.repeat(*(repeat_idx))
+    order_index = torch.LongTensor(
+        np.concatenate([init_dim * np.arange(n_tile) + i for i in range(init_dim)])
+    )
+    return torch.index_select(x, dim, order_index.to(x.device))