diff --git a/live2diff/MiDaS/.gitignore b/live2diff/MiDaS/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..a13c80028de3d297de4a3f09cee1b20759acc006 --- /dev/null +++ b/live2diff/MiDaS/.gitignore @@ -0,0 +1,110 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +*.png +*.pfm +*.jpg +*.jpeg +*.pt \ No newline at end of file diff --git a/live2diff/MiDaS/Dockerfile b/live2diff/MiDaS/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..73ab8246971b97b7e486323ce2571c590d049fa5 --- /dev/null +++ b/live2diff/MiDaS/Dockerfile @@ -0,0 +1,29 @@ +# enables cuda support in docker +FROM nvidia/cuda:10.2-cudnn7-runtime-ubuntu18.04 + +# install python 3.6, pip and requirements for opencv-python +# (see https://github.com/NVIDIA/nvidia-docker/issues/864) +RUN apt-get update && apt-get -y install \ + python3 \ + python3-pip \ + libsm6 \ + libxext6 \ + libxrender-dev \ + curl \ + && rm -rf /var/lib/apt/lists/* + +# install python dependencies +RUN pip3 install --upgrade pip +RUN pip3 install torch~=1.8 torchvision opencv-python-headless~=3.4 timm + +# copy inference code +WORKDIR /opt/MiDaS +COPY ./midas ./midas +COPY ./*.py ./ + +# download model weights so the docker image can be used offline +RUN cd weights && {curl -OL https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/dpt_hybrid-midas-501f0c75.pt; cd -; } +RUN python3 run.py --model_type dpt_hybrid; exit 0 + +# entrypoint (dont forget to mount input and output directories) +CMD python3 run.py --model_type dpt_hybrid diff --git a/live2diff/MiDaS/LICENSE b/live2diff/MiDaS/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..277b5c11be103f028a8d10985139f1da10c2f08e --- /dev/null +++ b/live2diff/MiDaS/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Intel ISL (Intel Intelligent Systems Lab) + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/live2diff/MiDaS/README.md b/live2diff/MiDaS/README.md new file mode 100644 index 0000000000000000000000000000000000000000..027f214381ee01e4afe4f9ba3e506df8a092f7fd --- /dev/null +++ b/live2diff/MiDaS/README.md @@ -0,0 +1,161 @@ +## Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer + +This repository contains code to compute depth from a single image. It accompanies our [paper](https://arxiv.org/abs/1907.01341v3): + +>Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer +René Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, Vladlen Koltun + + +and our [preprint](https://arxiv.org/abs/2103.13413): + +> Vision Transformers for Dense Prediction +> René Ranftl, Alexey Bochkovskiy, Vladlen Koltun + + +MiDaS was trained on 10 datasets (ReDWeb, DIML, Movies, MegaDepth, WSVD, TartanAir, HRWSI, ApolloScape, BlendedMVS, IRS) with +multi-objective optimization. +The original model that was trained on 5 datasets (`MIX 5` in the paper) can be found [here](https://github.com/intel-isl/MiDaS/releases/tag/v2). + + +### Changelog +* [Sep 2021] Integrated to [Huggingface Spaces](https://huggingface.co/spaces) with [Gradio](https://github.com/gradio-app/gradio). See [Gradio Web Demo](https://huggingface.co/spaces/akhaliq/DPT-Large). +* [Apr 2021] Released MiDaS v3.0: + - New models based on [Dense Prediction Transformers](https://arxiv.org/abs/2103.13413) are on average [21% more accurate](#Accuracy) than MiDaS v2.1 + - Additional models can be found [here](https://github.com/intel-isl/DPT) +* [Nov 2020] Released MiDaS v2.1: + - New model that was trained on 10 datasets and is on average about [10% more accurate](#Accuracy) than [MiDaS v2.0](https://github.com/intel-isl/MiDaS/releases/tag/v2) + - New light-weight model that achieves [real-time performance](https://github.com/intel-isl/MiDaS/tree/master/mobile) on mobile platforms. + - Sample applications for [iOS](https://github.com/intel-isl/MiDaS/tree/master/mobile/ios) and [Android](https://github.com/intel-isl/MiDaS/tree/master/mobile/android) + - [ROS package](https://github.com/intel-isl/MiDaS/tree/master/ros) for easy deployment on robots +* [Jul 2020] Added TensorFlow and ONNX code. Added [online demo](http://35.202.76.57/). +* [Dec 2019] Released new version of MiDaS - the new model is significantly more accurate and robust +* [Jul 2019] Initial release of MiDaS ([Link](https://github.com/intel-isl/MiDaS/releases/tag/v1)) + +### Setup + +1) Pick one or more models and download corresponding weights to the `weights` folder: + +- For highest quality: [dpt_large](https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt) +- For moderately less quality, but better speed on CPU and slower GPUs: [dpt_hybrid](https://github.com/intel-isl/DPT/releases/download/1_0/dpt_hybrid-midas-501f0c75.pt) +- For real-time applications on resource-constrained devices: [midas_v21_small](https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/midas_v21_small-70d6b9c8.pt) +- Legacy convolutional model: [midas_v21](https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/midas_v21-f6b98070.pt) + +2) Set up dependencies: + + ```shell + conda install pytorch torchvision opencv + pip install timm + ``` + + The code was tested with Python 3.7, PyTorch 1.8.0, OpenCV 4.5.1, and timm 0.4.5. + + +### Usage + +1) Place one or more input images in the folder `input`. + +2) Run the model: + + ```shell + python run.py --model_type dpt_large + python run.py --model_type dpt_hybrid + python run.py --model_type midas_v21_small + python run.py --model_type midas_v21 + ``` + +3) The resulting inverse depth maps are written to the `output` folder. + + +#### via Docker + +1) Make sure you have installed Docker and the + [NVIDIA Docker runtime](https://github.com/NVIDIA/nvidia-docker/wiki/Installation-\(Native-GPU-Support\)). + +2) Build the Docker image: + + ```shell + docker build -t midas . + ``` + +3) Run inference: + + ```shell + docker run --rm --gpus all -v $PWD/input:/opt/MiDaS/input -v $PWD/output:/opt/MiDaS/output midas + ``` + + This command passes through all of your NVIDIA GPUs to the container, mounts the + `input` and `output` directories and then runs the inference. + +#### via PyTorch Hub + +The pretrained model is also available on [PyTorch Hub](https://pytorch.org/hub/intelisl_midas_v2/) + +#### via TensorFlow or ONNX + +See [README](https://github.com/intel-isl/MiDaS/tree/master/tf) in the `tf` subdirectory. + +Currently only supports MiDaS v2.1. DPT-based models to be added. + + +#### via Mobile (iOS / Android) + +See [README](https://github.com/intel-isl/MiDaS/tree/master/mobile) in the `mobile` subdirectory. + +#### via ROS1 (Robot Operating System) + +See [README](https://github.com/intel-isl/MiDaS/tree/master/ros) in the `ros` subdirectory. + +Currently only supports MiDaS v2.1. DPT-based models to be added. + + +### Accuracy + +Zero-shot error (the lower - the better) and speed (FPS): + +| Model | DIW, WHDR | Eth3d, AbsRel | Sintel, AbsRel | Kitti, δ>1.25 | NyuDepthV2, δ>1.25 | TUM, δ>1.25 | Speed, FPS | +|---|---|---|---|---|---|---|---| +| **Small models:** | | | | | | | iPhone 11 | +| MiDaS v2 small | **0.1248** | 0.1550 | **0.3300** | **21.81** | 15.73 | 17.00 | 0.6 | +| MiDaS v2.1 small [URL]() | 0.1344 | **0.1344** | 0.3370 | 29.27 | **13.43** | **14.53** | 30 | +| | | | | | | | +| **Big models:** | | | | | | | GPU RTX 3090 | +| MiDaS v2 large [URL](https://github.com/intel-isl/MiDaS/releases/download/v2/model-f46da743.pt) | 0.1246 | 0.1290 | 0.3270 | 23.90 | 9.55 | 14.29 | 51 | +| MiDaS v2.1 large [URL](https://github.com/AlexeyAB/MiDaS/releases/download/midas_dpt/midas_v21-f6b98070.pt) | 0.1295 | 0.1155 | 0.3285 | 16.08 | 8.71 | 12.51 | 51 | +| MiDaS v3.0 DPT-Hybrid [URL](https://github.com/intel-isl/DPT/releases/download/1_0/dpt_hybrid-midas-501f0c75.pt) | 0.1106 | 0.0934 | 0.2741 | 11.56 | 8.69 | 10.89 | 46 | +| MiDaS v3.0 DPT-Large [URL](https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt) | **0.1082** | **0.0888** | **0.2697** | **8.46** | **8.32** | **9.97** | 47 | + + + +### Citation + +Please cite our paper if you use this code or any of the models: +``` +@ARTICLE {Ranftl2022, + author = "Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun", + title = "Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-Shot Cross-Dataset Transfer", + journal = "IEEE Transactions on Pattern Analysis and Machine Intelligence", + year = "2022", + volume = "44", + number = "3" +} +``` + +If you use a DPT-based model, please also cite: + +``` +@article{Ranftl2021, + author = {Ren\'{e} Ranftl and Alexey Bochkovskiy and Vladlen Koltun}, + title = {Vision Transformers for Dense Prediction}, + journal = {ICCV}, + year = {2021}, +} +``` + +### Acknowledgements + +Our work builds on and uses code from [timm](https://github.com/rwightman/pytorch-image-models). +We'd like to thank the author for making these libraries available. + +### License + +MIT License diff --git a/live2diff/MiDaS/hubconf.py b/live2diff/MiDaS/hubconf.py new file mode 100644 index 0000000000000000000000000000000000000000..5be7088d5744dd73f8de72c242eee322a490330d --- /dev/null +++ b/live2diff/MiDaS/hubconf.py @@ -0,0 +1,154 @@ +dependencies = ["torch"] + +import torch + +from midas.dpt_depth import DPTDepthModel +from midas.midas_net import MidasNet +from midas.midas_net_custom import MidasNet_small + +def DPT_Large(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT-Large model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="vitl16_384", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/intel-isl/DPT/releases/download/1_0/dpt_large-midas-2f21e586.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def DPT_Hybrid(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS DPT-Hybrid model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = DPTDepthModel( + path=None, + backbone="vitb_rn50_384", + non_negative=True, + ) + + if pretrained: + checkpoint = ( + "https://github.com/intel-isl/DPT/releases/download/1_0/dpt_hybrid-midas-501f0c75.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def MiDaS(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS v2.1 model for monocular depth estimation + pretrained (bool): load pretrained weights into model + """ + + model = MidasNet() + + if pretrained: + checkpoint = ( + "https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-f6b98070.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + +def MiDaS_small(pretrained=True, **kwargs): + """ # This docstring shows up in hub.help() + MiDaS small model for monocular depth estimation on resource-constrained devices + pretrained (bool): load pretrained weights into model + """ + + model = MidasNet_small(None, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True}) + + if pretrained: + checkpoint = ( + "https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-small-70d6b9c8.pt" + ) + state_dict = torch.hub.load_state_dict_from_url( + checkpoint, map_location=torch.device('cpu'), progress=True, check_hash=True + ) + model.load_state_dict(state_dict) + + return model + + +def transforms(): + import cv2 + from torchvision.transforms import Compose + from midas.transforms import Resize, NormalizeImage, PrepareForNet + from midas import transforms + + transforms.default_transform = Compose( + [ + lambda img: {"image": img / 255.0}, + Resize( + 384, + 384, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), + ] + ) + + transforms.small_transform = Compose( + [ + lambda img: {"image": img / 255.0}, + Resize( + 256, + 256, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), + PrepareForNet(), + lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), + ] + ) + + transforms.dpt_transform = Compose( + [ + lambda img: {"image": img / 255.0}, + Resize( + 384, + 384, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method="minimal", + image_interpolation_method=cv2.INTER_CUBIC, + ), + NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), + PrepareForNet(), + lambda sample: torch.from_numpy(sample["image"]).unsqueeze(0), + ] + ) + + return transforms diff --git a/live2diff/MiDaS/input/.placeholder b/live2diff/MiDaS/input/.placeholder new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/live2diff/MiDaS/midas/base_model.py b/live2diff/MiDaS/midas/base_model.py new file mode 100644 index 0000000000000000000000000000000000000000..5cf430239b47ec5ec07531263f26f5c24a2311cd --- /dev/null +++ b/live2diff/MiDaS/midas/base_model.py @@ -0,0 +1,16 @@ +import torch + + +class BaseModel(torch.nn.Module): + def load(self, path): + """Load model from file. + + Args: + path (str): file path + """ + parameters = torch.load(path, map_location=torch.device('cpu')) + + if "optimizer" in parameters: + parameters = parameters["model"] + + self.load_state_dict(parameters) diff --git a/live2diff/MiDaS/midas/blocks.py b/live2diff/MiDaS/midas/blocks.py new file mode 100644 index 0000000000000000000000000000000000000000..2145d18fa98060a618536d9a64fe6589e9be4f78 --- /dev/null +++ b/live2diff/MiDaS/midas/blocks.py @@ -0,0 +1,342 @@ +import torch +import torch.nn as nn + +from .vit import ( + _make_pretrained_vitb_rn50_384, + _make_pretrained_vitl16_384, + _make_pretrained_vitb16_384, + forward_vit, +) + +def _make_encoder(backbone, features, use_pretrained, groups=1, expand=False, exportable=True, hooks=None, use_vit_only=False, use_readout="ignore",): + if backbone == "vitl16_384": + pretrained = _make_pretrained_vitl16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [256, 512, 1024, 1024], features, groups=groups, expand=expand + ) # ViT-L/16 - 85.0% Top1 (backbone) + elif backbone == "vitb_rn50_384": + pretrained = _make_pretrained_vitb_rn50_384( + use_pretrained, + hooks=hooks, + use_vit_only=use_vit_only, + use_readout=use_readout, + ) + scratch = _make_scratch( + [256, 512, 768, 768], features, groups=groups, expand=expand + ) # ViT-H/16 - 85.0% Top1 (backbone) + elif backbone == "vitb16_384": + pretrained = _make_pretrained_vitb16_384( + use_pretrained, hooks=hooks, use_readout=use_readout + ) + scratch = _make_scratch( + [96, 192, 384, 768], features, groups=groups, expand=expand + ) # ViT-B/16 - 84.6% Top1 (backbone) + elif backbone == "resnext101_wsl": + pretrained = _make_pretrained_resnext101_wsl(use_pretrained) + scratch = _make_scratch([256, 512, 1024, 2048], features, groups=groups, expand=expand) # efficientnet_lite3 + elif backbone == "efficientnet_lite3": + pretrained = _make_pretrained_efficientnet_lite3(use_pretrained, exportable=exportable) + scratch = _make_scratch([32, 48, 136, 384], features, groups=groups, expand=expand) # efficientnet_lite3 + else: + print(f"Backbone '{backbone}' not implemented") + assert False + + return pretrained, scratch + + +def _make_scratch(in_shape, out_shape, groups=1, expand=False): + scratch = nn.Module() + + out_shape1 = out_shape + out_shape2 = out_shape + out_shape3 = out_shape + out_shape4 = out_shape + if expand==True: + out_shape1 = out_shape + out_shape2 = out_shape*2 + out_shape3 = out_shape*4 + out_shape4 = out_shape*8 + + scratch.layer1_rn = nn.Conv2d( + in_shape[0], out_shape1, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer2_rn = nn.Conv2d( + in_shape[1], out_shape2, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer3_rn = nn.Conv2d( + in_shape[2], out_shape3, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + scratch.layer4_rn = nn.Conv2d( + in_shape[3], out_shape4, kernel_size=3, stride=1, padding=1, bias=False, groups=groups + ) + + return scratch + + +def _make_pretrained_efficientnet_lite3(use_pretrained, exportable=False): + efficientnet = torch.hub.load( + "rwightman/gen-efficientnet-pytorch", + "tf_efficientnet_lite3", + pretrained=use_pretrained, + exportable=exportable + ) + return _make_efficientnet_backbone(efficientnet) + + +def _make_efficientnet_backbone(effnet): + pretrained = nn.Module() + + pretrained.layer1 = nn.Sequential( + effnet.conv_stem, effnet.bn1, effnet.act1, *effnet.blocks[0:2] + ) + pretrained.layer2 = nn.Sequential(*effnet.blocks[2:3]) + pretrained.layer3 = nn.Sequential(*effnet.blocks[3:5]) + pretrained.layer4 = nn.Sequential(*effnet.blocks[5:9]) + + return pretrained + + +def _make_resnet_backbone(resnet): + pretrained = nn.Module() + pretrained.layer1 = nn.Sequential( + resnet.conv1, resnet.bn1, resnet.relu, resnet.maxpool, resnet.layer1 + ) + + pretrained.layer2 = resnet.layer2 + pretrained.layer3 = resnet.layer3 + pretrained.layer4 = resnet.layer4 + + return pretrained + + +def _make_pretrained_resnext101_wsl(use_pretrained): + resnet = torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl") + return _make_resnet_backbone(resnet) + + + +class Interpolate(nn.Module): + """Interpolation module. + """ + + def __init__(self, scale_factor, mode, align_corners=False): + """Init. + + Args: + scale_factor (float): scaling + mode (str): interpolation mode + """ + super(Interpolate, self).__init__() + + self.interp = nn.functional.interpolate + self.scale_factor = scale_factor + self.mode = mode + self.align_corners = align_corners + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: interpolated data + """ + + x = self.interp( + x, scale_factor=self.scale_factor, mode=self.mode, align_corners=self.align_corners + ) + + return x + + +class ResidualConvUnit(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.conv1 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True + ) + + self.conv2 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True + ) + + self.relu = nn.ReLU(inplace=True) + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + out = self.relu(x) + out = self.conv1(out) + out = self.relu(out) + out = self.conv2(out) + + return out + x + + +class FeatureFusionBlock(nn.Module): + """Feature fusion block. + """ + + def __init__(self, features): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock, self).__init__() + + self.resConfUnit1 = ResidualConvUnit(features) + self.resConfUnit2 = ResidualConvUnit(features) + + def forward(self, *xs): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + output += self.resConfUnit1(xs[1]) + + output = self.resConfUnit2(output) + + output = nn.functional.interpolate( + output, scale_factor=2, mode="bilinear", align_corners=True + ) + + return output + + + + +class ResidualConvUnit_custom(nn.Module): + """Residual convolution module. + """ + + def __init__(self, features, activation, bn): + """Init. + + Args: + features (int): number of features + """ + super().__init__() + + self.bn = bn + + self.groups=1 + + self.conv1 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups + ) + + self.conv2 = nn.Conv2d( + features, features, kernel_size=3, stride=1, padding=1, bias=True, groups=self.groups + ) + + if self.bn==True: + self.bn1 = nn.BatchNorm2d(features) + self.bn2 = nn.BatchNorm2d(features) + + self.activation = activation + + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input + + Returns: + tensor: output + """ + + out = self.activation(x) + out = self.conv1(out) + if self.bn==True: + out = self.bn1(out) + + out = self.activation(out) + out = self.conv2(out) + if self.bn==True: + out = self.bn2(out) + + if self.groups > 1: + out = self.conv_merge(out) + + return self.skip_add.add(out, x) + + # return out + x + + +class FeatureFusionBlock_custom(nn.Module): + """Feature fusion block. + """ + + def __init__(self, features, activation, deconv=False, bn=False, expand=False, align_corners=True): + """Init. + + Args: + features (int): number of features + """ + super(FeatureFusionBlock_custom, self).__init__() + + self.deconv = deconv + self.align_corners = align_corners + + self.groups=1 + + self.expand = expand + out_features = features + if self.expand==True: + out_features = features//2 + + self.out_conv = nn.Conv2d(features, out_features, kernel_size=1, stride=1, padding=0, bias=True, groups=1) + + self.resConfUnit1 = ResidualConvUnit_custom(features, activation, bn) + self.resConfUnit2 = ResidualConvUnit_custom(features, activation, bn) + + self.skip_add = nn.quantized.FloatFunctional() + + def forward(self, *xs): + """Forward pass. + + Returns: + tensor: output + """ + output = xs[0] + + if len(xs) == 2: + res = self.resConfUnit1(xs[1]) + output = self.skip_add.add(output, res) + # output += res + + output = self.resConfUnit2(output) + + output = nn.functional.interpolate( + output, scale_factor=2, mode="bilinear", align_corners=self.align_corners + ) + + output = self.out_conv(output) + + return output + diff --git a/live2diff/MiDaS/midas/dpt_depth.py b/live2diff/MiDaS/midas/dpt_depth.py new file mode 100644 index 0000000000000000000000000000000000000000..4e9aab5d2767dffea39da5b3f30e2798688216f1 --- /dev/null +++ b/live2diff/MiDaS/midas/dpt_depth.py @@ -0,0 +1,109 @@ +import torch +import torch.nn as nn +import torch.nn.functional as F + +from .base_model import BaseModel +from .blocks import ( + FeatureFusionBlock, + FeatureFusionBlock_custom, + Interpolate, + _make_encoder, + forward_vit, +) + + +def _make_fusion_block(features, use_bn): + return FeatureFusionBlock_custom( + features, + nn.ReLU(False), + deconv=False, + bn=use_bn, + expand=False, + align_corners=True, + ) + + +class DPT(BaseModel): + def __init__( + self, + head, + features=256, + backbone="vitb_rn50_384", + readout="project", + channels_last=False, + use_bn=False, + ): + + super(DPT, self).__init__() + + self.channels_last = channels_last + + hooks = { + "vitb_rn50_384": [0, 1, 8, 11], + "vitb16_384": [2, 5, 8, 11], + "vitl16_384": [5, 11, 17, 23], + } + + # Instantiate backbone and reassemble blocks + self.pretrained, self.scratch = _make_encoder( + backbone, + features, + False, # Set to true of you want to train from scratch, uses ImageNet weights + groups=1, + expand=False, + exportable=False, + hooks=hooks[backbone], + use_readout=readout, + ) + + self.scratch.refinenet1 = _make_fusion_block(features, use_bn) + self.scratch.refinenet2 = _make_fusion_block(features, use_bn) + self.scratch.refinenet3 = _make_fusion_block(features, use_bn) + self.scratch.refinenet4 = _make_fusion_block(features, use_bn) + + self.scratch.output_conv = head + + + def forward(self, x): + if self.channels_last == True: + x.contiguous(memory_format=torch.channels_last) + + layer_1, layer_2, layer_3, layer_4 = forward_vit(self.pretrained, x) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return out + + +class DPTDepthModel(DPT): + def __init__(self, path=None, non_negative=True, **kwargs): + features = kwargs["features"] if "features" in kwargs else 256 + + head = nn.Sequential( + nn.Conv2d(features, features // 2, kernel_size=3, stride=1, padding=1), + Interpolate(scale_factor=2, mode="bilinear", align_corners=True), + nn.Conv2d(features // 2, 32, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + nn.Identity(), + ) + + super().__init__(head, **kwargs) + + if path is not None: + self.load(path) + + def forward(self, x): + return super().forward(x).squeeze(dim=1) + diff --git a/live2diff/MiDaS/midas/midas_net.py b/live2diff/MiDaS/midas/midas_net.py new file mode 100644 index 0000000000000000000000000000000000000000..8a954977800b0a0f48807e80fa63041910e33c1f --- /dev/null +++ b/live2diff/MiDaS/midas/midas_net.py @@ -0,0 +1,76 @@ +"""MidashNet: Network for monocular depth estimation trained by mixing several datasets. +This file contains code that is adapted from +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +""" +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import FeatureFusionBlock, Interpolate, _make_encoder + + +class MidasNet(BaseModel): + """Network for monocular depth estimation. + """ + + def __init__(self, path=None, features=256, non_negative=True): + """Init. + + Args: + path (str, optional): Path to saved model. Defaults to None. + features (int, optional): Number of features. Defaults to 256. + backbone (str, optional): Backbone network for encoder. Defaults to resnet50 + """ + print("Loading weights: ", path) + + super(MidasNet, self).__init__() + + use_pretrained = False if path is None else True + + self.pretrained, self.scratch = _make_encoder(backbone="resnext101_wsl", features=features, use_pretrained=use_pretrained) + + self.scratch.refinenet4 = FeatureFusionBlock(features) + self.scratch.refinenet3 = FeatureFusionBlock(features) + self.scratch.refinenet2 = FeatureFusionBlock(features) + self.scratch.refinenet1 = FeatureFusionBlock(features) + + self.scratch.output_conv = nn.Sequential( + nn.Conv2d(features, 128, kernel_size=3, stride=1, padding=1), + Interpolate(scale_factor=2, mode="bilinear"), + nn.Conv2d(128, 32, kernel_size=3, stride=1, padding=1), + nn.ReLU(True), + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + ) + + if path: + self.load(path) + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + + layer_1 = self.pretrained.layer1(x) + layer_2 = self.pretrained.layer2(layer_1) + layer_3 = self.pretrained.layer3(layer_2) + layer_4 = self.pretrained.layer4(layer_3) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return torch.squeeze(out, dim=1) diff --git a/live2diff/MiDaS/midas/midas_net_custom.py b/live2diff/MiDaS/midas/midas_net_custom.py new file mode 100644 index 0000000000000000000000000000000000000000..50e4acb5e53d5fabefe3dde16ab49c33c2b7797c --- /dev/null +++ b/live2diff/MiDaS/midas/midas_net_custom.py @@ -0,0 +1,128 @@ +"""MidashNet: Network for monocular depth estimation trained by mixing several datasets. +This file contains code that is adapted from +https://github.com/thomasjpfan/pytorch_refinenet/blob/master/pytorch_refinenet/refinenet/refinenet_4cascade.py +""" +import torch +import torch.nn as nn + +from .base_model import BaseModel +from .blocks import FeatureFusionBlock, FeatureFusionBlock_custom, Interpolate, _make_encoder + + +class MidasNet_small(BaseModel): + """Network for monocular depth estimation. + """ + + def __init__(self, path=None, features=64, backbone="efficientnet_lite3", non_negative=True, exportable=True, channels_last=False, align_corners=True, + blocks={'expand': True}): + """Init. + + Args: + path (str, optional): Path to saved model. Defaults to None. + features (int, optional): Number of features. Defaults to 256. + backbone (str, optional): Backbone network for encoder. Defaults to resnet50 + """ + print("Loading weights: ", path) + + super(MidasNet_small, self).__init__() + + use_pretrained = False if path else True + + self.channels_last = channels_last + self.blocks = blocks + self.backbone = backbone + + self.groups = 1 + + features1=features + features2=features + features3=features + features4=features + self.expand = False + if "expand" in self.blocks and self.blocks['expand'] == True: + self.expand = True + features1=features + features2=features*2 + features3=features*4 + features4=features*8 + + self.pretrained, self.scratch = _make_encoder(self.backbone, features, use_pretrained, groups=self.groups, expand=self.expand, exportable=exportable) + + self.scratch.activation = nn.ReLU(False) + + self.scratch.refinenet4 = FeatureFusionBlock_custom(features4, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet3 = FeatureFusionBlock_custom(features3, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet2 = FeatureFusionBlock_custom(features2, self.scratch.activation, deconv=False, bn=False, expand=self.expand, align_corners=align_corners) + self.scratch.refinenet1 = FeatureFusionBlock_custom(features1, self.scratch.activation, deconv=False, bn=False, align_corners=align_corners) + + + self.scratch.output_conv = nn.Sequential( + nn.Conv2d(features, features//2, kernel_size=3, stride=1, padding=1, groups=self.groups), + Interpolate(scale_factor=2, mode="bilinear"), + nn.Conv2d(features//2, 32, kernel_size=3, stride=1, padding=1), + self.scratch.activation, + nn.Conv2d(32, 1, kernel_size=1, stride=1, padding=0), + nn.ReLU(True) if non_negative else nn.Identity(), + nn.Identity(), + ) + + if path: + self.load(path) + + + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + if self.channels_last==True: + print("self.channels_last = ", self.channels_last) + x.contiguous(memory_format=torch.channels_last) + + + layer_1 = self.pretrained.layer1(x) + layer_2 = self.pretrained.layer2(layer_1) + layer_3 = self.pretrained.layer3(layer_2) + layer_4 = self.pretrained.layer4(layer_3) + + layer_1_rn = self.scratch.layer1_rn(layer_1) + layer_2_rn = self.scratch.layer2_rn(layer_2) + layer_3_rn = self.scratch.layer3_rn(layer_3) + layer_4_rn = self.scratch.layer4_rn(layer_4) + + + path_4 = self.scratch.refinenet4(layer_4_rn) + path_3 = self.scratch.refinenet3(path_4, layer_3_rn) + path_2 = self.scratch.refinenet2(path_3, layer_2_rn) + path_1 = self.scratch.refinenet1(path_2, layer_1_rn) + + out = self.scratch.output_conv(path_1) + + return torch.squeeze(out, dim=1) + + + +def fuse_model(m): + prev_previous_type = nn.Identity() + prev_previous_name = '' + previous_type = nn.Identity() + previous_name = '' + for name, module in m.named_modules(): + if prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d and type(module) == nn.ReLU: + # print("FUSED ", prev_previous_name, previous_name, name) + torch.quantization.fuse_modules(m, [prev_previous_name, previous_name, name], inplace=True) + elif prev_previous_type == nn.Conv2d and previous_type == nn.BatchNorm2d: + # print("FUSED ", prev_previous_name, previous_name) + torch.quantization.fuse_modules(m, [prev_previous_name, previous_name], inplace=True) + # elif previous_type == nn.Conv2d and type(module) == nn.ReLU: + # print("FUSED ", previous_name, name) + # torch.quantization.fuse_modules(m, [previous_name, name], inplace=True) + + prev_previous_type = previous_type + prev_previous_name = previous_name + previous_type = type(module) + previous_name = name \ No newline at end of file diff --git a/live2diff/MiDaS/midas/transforms.py b/live2diff/MiDaS/midas/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..350cbc11662633ad7f8968eb10be2e7de6e384e9 --- /dev/null +++ b/live2diff/MiDaS/midas/transforms.py @@ -0,0 +1,234 @@ +import numpy as np +import cv2 +import math + + +def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA): + """Rezise the sample to ensure the given size. Keeps aspect ratio. + + Args: + sample (dict): sample + size (tuple): image size + + Returns: + tuple: new size + """ + shape = list(sample["disparity"].shape) + + if shape[0] >= size[0] and shape[1] >= size[1]: + return sample + + scale = [0, 0] + scale[0] = size[0] / shape[0] + scale[1] = size[1] / shape[1] + + scale = max(scale) + + shape[0] = math.ceil(scale * shape[0]) + shape[1] = math.ceil(scale * shape[1]) + + # resize + sample["image"] = cv2.resize( + sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method + ) + + sample["disparity"] = cv2.resize( + sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST + ) + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + tuple(shape[::-1]), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return tuple(shape) + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size( + sample["image"].shape[1], sample["image"].shape[0] + ) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if "depth" in sample: + sample["depth"] = cv2.resize( + sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST + ) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "disparity" in sample: + disparity = sample["disparity"].astype(np.float32) + sample["disparity"] = np.ascontiguousarray(disparity) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + return sample diff --git a/live2diff/MiDaS/midas/vit.py b/live2diff/MiDaS/midas/vit.py new file mode 100644 index 0000000000000000000000000000000000000000..d5eee48405a3f3df53495a4a395a4cc6929bea6f --- /dev/null +++ b/live2diff/MiDaS/midas/vit.py @@ -0,0 +1,503 @@ +import torch +import torch.nn as nn +import timm +import types +import math +import torch.nn.functional as F + + +class Slice(nn.Module): + def __init__(self, start_index=1): + super(Slice, self).__init__() + self.start_index = start_index + + def forward(self, x): + return x[:, self.start_index :] + + +class AddReadout(nn.Module): + def __init__(self, start_index=1): + super(AddReadout, self).__init__() + self.start_index = start_index + + def forward(self, x): + if self.start_index == 2: + readout = (x[:, 0] + x[:, 1]) / 2 + else: + readout = x[:, 0] + return x[:, self.start_index :] + readout.unsqueeze(1) + + +class ProjectReadout(nn.Module): + def __init__(self, in_features, start_index=1): + super(ProjectReadout, self).__init__() + self.start_index = start_index + + self.project = nn.Sequential(nn.Linear(2 * in_features, in_features), nn.GELU()) + + def forward(self, x): + readout = x[:, 0].unsqueeze(1).expand_as(x[:, self.start_index :]) + features = torch.cat((x[:, self.start_index :], readout), -1) + + return self.project(features) + + +class Transpose(nn.Module): + def __init__(self, dim0, dim1): + super(Transpose, self).__init__() + self.dim0 = dim0 + self.dim1 = dim1 + + def forward(self, x): + x = x.transpose(self.dim0, self.dim1) + return x + +class View(nn.Module): + def __init__(self, dim, shape): + super(View, self).__init__() + self.dim = dim + self.shape = shape + + def forward(self, input): + new_shape = list(input.shape)[:self.dim] + list(self.shape) + list(input.shape)[self.dim+1:] + return input.view(*new_shape) + + +nn.Unflatten = View + +def forward_vit(pretrained, x): + b, c, h, w = x.shape + + glob = pretrained.model.forward_flex(x) + + layer_1 = pretrained.activations["1"] + layer_2 = pretrained.activations["2"] + layer_3 = pretrained.activations["3"] + layer_4 = pretrained.activations["4"] + + layer_1 = pretrained.act_postprocess1[0:2](layer_1) + layer_2 = pretrained.act_postprocess2[0:2](layer_2) + layer_3 = pretrained.act_postprocess3[0:2](layer_3) + layer_4 = pretrained.act_postprocess4[0:2](layer_4) + + unflatten = nn.Sequential( + nn.Unflatten( + 2, + torch.Size( + [ + h // pretrained.model.patch_size[1], + w // pretrained.model.patch_size[0], + ] + ), + ) + ) + + if layer_1.ndim == 3: + layer_1 = unflatten(layer_1) + if layer_2.ndim == 3: + layer_2 = unflatten(layer_2) + if layer_3.ndim == 3: + layer_3 = unflatten(layer_3) + if layer_4.ndim == 3: + layer_4 = unflatten(layer_4) + + layer_1 = pretrained.act_postprocess1[3 : len(pretrained.act_postprocess1)](layer_1) + layer_2 = pretrained.act_postprocess2[3 : len(pretrained.act_postprocess2)](layer_2) + layer_3 = pretrained.act_postprocess3[3 : len(pretrained.act_postprocess3)](layer_3) + layer_4 = pretrained.act_postprocess4[3 : len(pretrained.act_postprocess4)](layer_4) + + return layer_1, layer_2, layer_3, layer_4 + + +def _resize_pos_embed(self, posemb, gs_h, gs_w): + posemb_tok, posemb_grid = ( + posemb[:, : self.start_index], + posemb[0, self.start_index :], + ) + + gs_old = int(math.sqrt(len(posemb_grid))) + + posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2) + posemb_grid = F.interpolate(posemb_grid, size=(gs_h, gs_w), mode="bilinear") + posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_h * gs_w, -1) + + posemb = torch.cat([posemb_tok, posemb_grid], dim=1) + + return posemb + + +def forward_flex(self, x): + b, c, h, w = x.shape + + pos_embed = self._resize_pos_embed( + self.pos_embed, h // self.patch_size[1], w // self.patch_size[0] + ) + + B = x.shape[0] + + if hasattr(self.patch_embed, "backbone"): + x = self.patch_embed.backbone(x) + if isinstance(x, (list, tuple)): + x = x[-1] # last feature if backbone outputs list/tuple of features + + x = self.patch_embed.proj(x).flatten(2).transpose(1, 2) + + if getattr(self, "dist_token", None) is not None: + cls_tokens = self.cls_token.expand( + B, -1, -1 + ) # stole cls_tokens impl from Phil Wang, thanks + dist_token = self.dist_token.expand(B, -1, -1) + x = torch.cat((cls_tokens, dist_token, x), dim=1) + else: + cls_tokens = self.cls_token.expand( + B, -1, -1 + ) # stole cls_tokens impl from Phil Wang, thanks + x = torch.cat((cls_tokens, x), dim=1) + + x = x + pos_embed + x = self.pos_drop(x) + + for blk in self.blocks: + x = blk(x) + + x = self.norm(x) + + return x + + +activations = {} + + +def get_activation(name): + def hook(model, input, output): + activations[name] = output + + return hook + + +def get_readout_oper(vit_features, features, use_readout, start_index=1): + if use_readout == "ignore": + readout_oper = [Slice(start_index)] * len(features) + elif use_readout == "add": + readout_oper = [AddReadout(start_index)] * len(features) + elif use_readout == "project": + readout_oper = [ + ProjectReadout(vit_features, start_index) for out_feat in features + ] + else: + assert ( + False + ), "wrong operation for readout token, use_readout can be 'ignore', 'add', or 'project'" + + return readout_oper + + +def _make_vit_b16_backbone( + model, + features=[96, 192, 384, 768], + size=[384, 384], + hooks=[2, 5, 8, 11], + vit_features=768, + use_readout="ignore", + start_index=1, +): + pretrained = nn.Module() + + pretrained.model = model + pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) + pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + readout_oper = get_readout_oper(vit_features, features, use_readout, start_index) + + # 32, 48, 136, 384 + pretrained.act_postprocess1 = nn.Sequential( + readout_oper[0], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[0], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[0], + out_channels=features[0], + kernel_size=4, + stride=4, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess2 = nn.Sequential( + readout_oper[1], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[1], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[1], + out_channels=features[1], + kernel_size=2, + stride=2, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess3 = nn.Sequential( + readout_oper[2], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[2], + kernel_size=1, + stride=1, + padding=0, + ), + ) + + pretrained.act_postprocess4 = nn.Sequential( + readout_oper[3], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[3], + kernel_size=1, + stride=1, + padding=0, + ), + nn.Conv2d( + in_channels=features[3], + out_channels=features[3], + kernel_size=3, + stride=2, + padding=1, + ), + ) + + pretrained.model.start_index = start_index + pretrained.model.patch_size = [16, 16] + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model) + pretrained.model._resize_pos_embed = types.MethodType( + _resize_pos_embed, pretrained.model + ) + + return pretrained + + +def _make_pretrained_vitl16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_large_patch16_384", pretrained=pretrained) + + hooks = [5, 11, 17, 23] if hooks == None else hooks + return _make_vit_b16_backbone( + model, + features=[256, 512, 1024, 1024], + hooks=hooks, + vit_features=1024, + use_readout=use_readout, + ) + + +def _make_pretrained_vitb16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_base_patch16_384", pretrained=pretrained) + + hooks = [2, 5, 8, 11] if hooks == None else hooks + return _make_vit_b16_backbone( + model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout + ) + + +def _make_pretrained_deitb16_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model("vit_deit_base_patch16_384", pretrained=pretrained) + + hooks = [2, 5, 8, 11] if hooks == None else hooks + return _make_vit_b16_backbone( + model, features=[96, 192, 384, 768], hooks=hooks, use_readout=use_readout + ) + + +def _make_pretrained_deitb16_distil_384(pretrained, use_readout="ignore", hooks=None): + model = timm.create_model( + "vit_deit_base_distilled_patch16_384", pretrained=pretrained + ) + + hooks = [2, 5, 8, 11] if hooks == None else hooks + return _make_vit_b16_backbone( + model, + features=[96, 192, 384, 768], + hooks=hooks, + use_readout=use_readout, + start_index=2, + ) + + +def _make_vit_b_rn50_backbone( + model, + features=[256, 512, 768, 768], + size=[384, 384], + hooks=[0, 1, 8, 11], + vit_features=768, + use_vit_only=False, + use_readout="ignore", + start_index=1, +): + pretrained = nn.Module() + + pretrained.model = model + + if use_vit_only == True: + pretrained.model.blocks[hooks[0]].register_forward_hook(get_activation("1")) + pretrained.model.blocks[hooks[1]].register_forward_hook(get_activation("2")) + else: + pretrained.model.patch_embed.backbone.stages[0].register_forward_hook( + get_activation("1") + ) + pretrained.model.patch_embed.backbone.stages[1].register_forward_hook( + get_activation("2") + ) + + pretrained.model.blocks[hooks[2]].register_forward_hook(get_activation("3")) + pretrained.model.blocks[hooks[3]].register_forward_hook(get_activation("4")) + + pretrained.activations = activations + + readout_oper = get_readout_oper(vit_features, features, use_readout, start_index) + + if use_vit_only == True: + pretrained.act_postprocess1 = nn.Sequential( + readout_oper[0], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[0], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[0], + out_channels=features[0], + kernel_size=4, + stride=4, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + + pretrained.act_postprocess2 = nn.Sequential( + readout_oper[1], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[1], + kernel_size=1, + stride=1, + padding=0, + ), + nn.ConvTranspose2d( + in_channels=features[1], + out_channels=features[1], + kernel_size=2, + stride=2, + padding=0, + bias=True, + dilation=1, + groups=1, + ), + ) + else: + pretrained.act_postprocess1 = nn.Sequential( + nn.Identity(), nn.Identity(), nn.Identity() + ) + pretrained.act_postprocess2 = nn.Sequential( + nn.Identity(), nn.Identity(), nn.Identity() + ) + + pretrained.act_postprocess3 = nn.Sequential( + readout_oper[2], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[2], + kernel_size=1, + stride=1, + padding=0, + ), + ) + + pretrained.act_postprocess4 = nn.Sequential( + readout_oper[3], + Transpose(1, 2), + nn.Unflatten(2, torch.Size([size[0] // 16, size[1] // 16])), + nn.Conv2d( + in_channels=vit_features, + out_channels=features[3], + kernel_size=1, + stride=1, + padding=0, + ), + nn.Conv2d( + in_channels=features[3], + out_channels=features[3], + kernel_size=3, + stride=2, + padding=1, + ), + ) + + pretrained.model.start_index = start_index + pretrained.model.patch_size = [16, 16] + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model.forward_flex = types.MethodType(forward_flex, pretrained.model) + + # We inject this function into the VisionTransformer instances so that + # we can use it with interpolated position embeddings without modifying the library source. + pretrained.model._resize_pos_embed = types.MethodType( + _resize_pos_embed, pretrained.model + ) + + return pretrained + + +def _make_pretrained_vitb_rn50_384( + pretrained, use_readout="ignore", hooks=None, use_vit_only=False +): + model = timm.create_model("vit_base_resnet50_384", pretrained=pretrained) + + hooks = [0, 1, 8, 11] if hooks == None else hooks + return _make_vit_b_rn50_backbone( + model, + features=[256, 512, 768, 768], + size=[384, 384], + hooks=hooks, + use_vit_only=use_vit_only, + use_readout=use_readout, + ) diff --git a/live2diff/MiDaS/mobile/README.md b/live2diff/MiDaS/mobile/README.md new file mode 100644 index 0000000000000000000000000000000000000000..45c18f7f0bfe40c0db373e8a94716867705f5827 --- /dev/null +++ b/live2diff/MiDaS/mobile/README.md @@ -0,0 +1,70 @@ +## Mobile version of MiDaS for iOS / Android - Monocular Depth Estimation + +### Accuracy + +* Old small model - ResNet50 default-decoder 384x384 +* New small model - EfficientNet-Lite3 small-decoder 256x256 + +**Zero-shot error** (the lower - the better): + +| Model | DIW WHDR | Eth3d AbsRel | Sintel AbsRel | Kitti δ>1.25 | NyuDepthV2 δ>1.25 | TUM δ>1.25 | +|---|---|---|---|---|---|---| +| Old small model 384x384 | **0.1248** | 0.1550 | **0.3300** | **21.81** | 15.73 | 17.00 | +| New small model 256x256 | 0.1344 | **0.1344** | 0.3370 | 29.27 | **13.43** | **14.53** | +| Relative improvement, % | -8 % | **+13 %** | -2 % | -34 % | **+15 %** | **+15 %** | + +None of Train/Valid/Test subsets of datasets (DIW, Eth3d, Sintel, Kitti, NyuDepthV2, TUM) were not involved in Training or Fine Tuning. + +### Inference speed (FPS) on iOS / Android + +**Frames Per Second** (the higher - the better): + +| Model | iPhone CPU | iPhone GPU | iPhone NPU | OnePlus8 CPU | OnePlus8 GPU | OnePlus8 NNAPI | +|---|---|---|---|---|---|---| +| Old small model 384x384 | 0.6 | N/A | N/A | 0.45 | 0.50 | 0.50 | +| New small model 256x256 | 8 | 22 | **30** | 6 | **22** | 4 | +| SpeedUp, X times | **12.8x** | - | - | **13.2x** | **44x** | **8x** | + +N/A - run-time error (no data available) + + +#### Models: + +* Old small model - ResNet50 default-decoder 1x384x384x3, batch=1 FP32 (converters: Pytorch -> ONNX - [onnx_tf](https://github.com/onnx/onnx-tensorflow) -> (saved model) PB -> TFlite) + + (Trained on datasets: RedWeb, MegaDepth, WSVD, 3D Movies, DIML indoor) + +* New small model - EfficientNet-Lite3 small-decoder 1x256x256x3, batch=1 FP32 (custom converter: Pytorch -> TFlite) + + (Trained on datasets: RedWeb, MegaDepth, WSVD, 3D Movies, DIML indoor, HRWSI, IRS, TartanAir, BlendedMVS, ApolloScape) + +#### Frameworks for training and conversions: +``` +pip install torch==1.6.0 torchvision==0.7.0 +pip install tf-nightly-gpu==2.5.0.dev20201031 tensorflow-addons==0.11.2 numpy==1.18.0 +git clone --depth 1 --branch v1.6.0 https://github.com/onnx/onnx-tensorflow +``` + +#### SoC - OS - Library: + +* iPhone 11 (A13 Bionic) - iOS 13.7 - TensorFlowLiteSwift 0.0.1-nightly +* OnePlus 8 (Snapdragon 865) - Andoird 10 - org.tensorflow:tensorflow-lite-task-vision:0.0.0-nightly + + +### Citation + +This repository contains code to compute depth from a single image. It accompanies our [paper](https://arxiv.org/abs/1907.01341v3): + +>Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer +René Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, Vladlen Koltun + +Please cite our paper if you use this code or any of the models: +``` +@article{Ranftl2020, + author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun}, + title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer}, + journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)}, + year = {2020}, +} +``` + diff --git a/live2diff/MiDaS/mobile/android/.gitignore b/live2diff/MiDaS/mobile/android/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..2fbe357549c64ae2966d5c3013a9179427b7b396 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/.gitignore @@ -0,0 +1,13 @@ +*.iml +.gradle +/local.properties +/.idea/libraries +/.idea/modules.xml +/.idea/workspace.xml +.DS_Store +/build +/captures +.externalNativeBuild + +/.gradle/ +/.idea/ \ No newline at end of file diff --git a/live2diff/MiDaS/mobile/android/EXPLORE_THE_CODE.md b/live2diff/MiDaS/mobile/android/EXPLORE_THE_CODE.md new file mode 100644 index 0000000000000000000000000000000000000000..72014bdfa2cd701a6453debbc8e53fcc15c0a5dc --- /dev/null +++ b/live2diff/MiDaS/mobile/android/EXPLORE_THE_CODE.md @@ -0,0 +1,414 @@ +# TensorFlow Lite Android image classification example + +This document walks through the code of a simple Android mobile application that +demonstrates +[image classification](https://www.tensorflow.org/lite/models/image_classification/overview) +using the device camera. + +## Explore the code + +We're now going to walk through the most important parts of the sample code. + +### Get camera input + +This mobile application gets the camera input using the functions defined in the +file +[`CameraActivity.java`](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification/android/app/src/main/java/org/tensorflow/lite/examples/classification/CameraActivity.java). +This file depends on +[`AndroidManifest.xml`](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification/android/app/src/main/AndroidManifest.xml) +to set the camera orientation. + +`CameraActivity` also contains code to capture user preferences from the UI and +make them available to other classes via convenience methods. + +```java +model = Model.valueOf(modelSpinner.getSelectedItem().toString().toUpperCase()); +device = Device.valueOf(deviceSpinner.getSelectedItem().toString()); +numThreads = Integer.parseInt(threadsTextView.getText().toString().trim()); +``` + +### Classifier + +This Image Classification Android reference app demonstrates two implementation +solutions, +[`lib_task_api`](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification/android/lib_task_api) +that leverages the out-of-box API from the +[TensorFlow Lite Task Library](https://www.tensorflow.org/lite/inference_with_metadata/task_library/image_classifier), +and +[`lib_support`](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification/android/lib_support) +that creates the custom inference pipleline using the +[TensorFlow Lite Support Library](https://www.tensorflow.org/lite/inference_with_metadata/lite_support). + +Both solutions implement the file `Classifier.java` (see +[the one in lib_task_api](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/Classifier.java) +and +[the one in lib_support](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/Classifier.java)) +that contains most of the complex logic for processing the camera input and +running inference. + +Two subclasses of the `Classifier` exist, as in `ClassifierFloatMobileNet.java` +and `ClassifierQuantizedMobileNet.java`, which contain settings for both +floating point and +[quantized](https://www.tensorflow.org/lite/performance/post_training_quantization) +models. + +The `Classifier` class implements a static method, `create`, which is used to +instantiate the appropriate subclass based on the supplied model type (quantized +vs floating point). + +#### Using the TensorFlow Lite Task Library + +Inference can be done using just a few lines of code with the +[`ImageClassifier`](https://www.tensorflow.org/lite/inference_with_metadata/task_library/image_classifier) +in the TensorFlow Lite Task Library. + +##### Load model and create ImageClassifier + +`ImageClassifier` expects a model populated with the +[model metadata](https://www.tensorflow.org/lite/convert/metadata) and the label +file. See the +[model compatibility requirements](https://www.tensorflow.org/lite/inference_with_metadata/task_library/image_classifier#model_compatibility_requirements) +for more details. + +`ImageClassifierOptions` allows manipulation on various inference options, such +as setting the maximum number of top scored results to return using +`setMaxResults(MAX_RESULTS)`, and setting the score threshold using +`setScoreThreshold(scoreThreshold)`. + +```java +// Create the ImageClassifier instance. +ImageClassifierOptions options = + ImageClassifierOptions.builder().setMaxResults(MAX_RESULTS).build(); +imageClassifier = ImageClassifier.createFromFileAndOptions(activity, + getModelPath(), options); +``` + +`ImageClassifier` currently does not support configuring delegates and +multithread, but those are on our roadmap. Please stay tuned! + +##### Run inference + +`ImageClassifier` contains builtin logic to preprocess the input image, such as +rotating and resizing an image. Processing options can be configured through +`ImageProcessingOptions`. In the following example, input images are rotated to +the up-right angle and cropped to the center as the model expects a square input +(`224x224`). See the +[Java doc of `ImageClassifier`](https://github.com/tensorflow/tflite-support/blob/195b574f0aa9856c618b3f1ad87bd185cddeb657/tensorflow_lite_support/java/src/java/org/tensorflow/lite/task/core/vision/ImageProcessingOptions.java#L22) +for more details about how the underlying image processing is performed. + +```java +TensorImage inputImage = TensorImage.fromBitmap(bitmap); +int width = bitmap.getWidth(); +int height = bitmap.getHeight(); +int cropSize = min(width, height); +ImageProcessingOptions imageOptions = + ImageProcessingOptions.builder() + .setOrientation(getOrientation(sensorOrientation)) + // Set the ROI to the center of the image. + .setRoi( + new Rect( + /*left=*/ (width - cropSize) / 2, + /*top=*/ (height - cropSize) / 2, + /*right=*/ (width + cropSize) / 2, + /*bottom=*/ (height + cropSize) / 2)) + .build(); + +List results = imageClassifier.classify(inputImage, + imageOptions); +``` + +The output of `ImageClassifier` is a list of `Classifications` instance, where +each `Classifications` element is a single head classification result. All the +demo models are single head models, therefore, `results` only contains one +`Classifications` object. Use `Classifications.getCategories()` to get a list of +top-k categories as specified with `MAX_RESULTS`. Each `Category` object +contains the srting label and the score of that category. + +To match the implementation of +[`lib_support`](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification/android/lib_support), +`results` is converted into `List` in the method, +`getRecognitions`. + +#### Using the TensorFlow Lite Support Library + +##### Load model and create interpreter + +To perform inference, we need to load a model file and instantiate an +`Interpreter`. This happens in the constructor of the `Classifier` class, along +with loading the list of class labels. Information about the device type and +number of threads is used to configure the `Interpreter` via the +`Interpreter.Options` instance passed into its constructor. Note that if a GPU, +DSP (Digital Signal Processor) or NPU (Neural Processing Unit) is available, a +[`Delegate`](https://www.tensorflow.org/lite/performance/delegates) can be used +to take full advantage of these hardware. + +Please note that there are performance edge cases and developers are adviced to +test with a representative set of devices prior to production. + +```java +protected Classifier(Activity activity, Device device, int numThreads) throws + IOException { + tfliteModel = FileUtil.loadMappedFile(activity, getModelPath()); + switch (device) { + case NNAPI: + nnApiDelegate = new NnApiDelegate(); + tfliteOptions.addDelegate(nnApiDelegate); + break; + case GPU: + gpuDelegate = new GpuDelegate(); + tfliteOptions.addDelegate(gpuDelegate); + break; + case CPU: + break; + } + tfliteOptions.setNumThreads(numThreads); + tflite = new Interpreter(tfliteModel, tfliteOptions); + labels = FileUtil.loadLabels(activity, getLabelPath()); +... +``` + +For Android devices, we recommend pre-loading and memory mapping the model file +to offer faster load times and reduce the dirty pages in memory. The method +`FileUtil.loadMappedFile` does this, returning a `MappedByteBuffer` containing +the model. + +The `MappedByteBuffer` is passed into the `Interpreter` constructor, along with +an `Interpreter.Options` object. This object can be used to configure the +interpreter, for example by setting the number of threads (`.setNumThreads(1)`) +or enabling [NNAPI](https://developer.android.com/ndk/guides/neuralnetworks) +(`.addDelegate(nnApiDelegate)`). + +##### Pre-process bitmap image + +Next in the `Classifier` constructor, we take the input camera bitmap image, +convert it to a `TensorImage` format for efficient processing and pre-process +it. The steps are shown in the private 'loadImage' method: + +```java +/** Loads input image, and applys preprocessing. */ +private TensorImage loadImage(final Bitmap bitmap, int sensorOrientation) { + // Loads bitmap into a TensorImage. + image.load(bitmap); + + // Creates processor for the TensorImage. + int cropSize = Math.min(bitmap.getWidth(), bitmap.getHeight()); + int numRoration = sensorOrientation / 90; + ImageProcessor imageProcessor = + new ImageProcessor.Builder() + .add(new ResizeWithCropOrPadOp(cropSize, cropSize)) + .add(new ResizeOp(imageSizeX, imageSizeY, ResizeMethod.BILINEAR)) + .add(new Rot90Op(numRoration)) + .add(getPreprocessNormalizeOp()) + .build(); + return imageProcessor.process(inputImageBuffer); +} +``` + +The pre-processing is largely the same for quantized and float models with one +exception: Normalization. + +In `ClassifierFloatMobileNet`, the normalization parameters are defined as: + +```java +private static final float IMAGE_MEAN = 127.5f; +private static final float IMAGE_STD = 127.5f; +``` + +In `ClassifierQuantizedMobileNet`, normalization is not required. Thus the +nomalization parameters are defined as: + +```java +private static final float IMAGE_MEAN = 0.0f; +private static final float IMAGE_STD = 1.0f; +``` + +##### Allocate output object + +Initiate the output `TensorBuffer` for the output of the model. + +```java +/** Output probability TensorBuffer. */ +private final TensorBuffer outputProbabilityBuffer; + +//... +// Get the array size for the output buffer from the TensorFlow Lite model file +int probabilityTensorIndex = 0; +int[] probabilityShape = + tflite.getOutputTensor(probabilityTensorIndex).shape(); // {1, 1001} +DataType probabilityDataType = + tflite.getOutputTensor(probabilityTensorIndex).dataType(); + +// Creates the output tensor and its processor. +outputProbabilityBuffer = + TensorBuffer.createFixedSize(probabilityShape, probabilityDataType); + +// Creates the post processor for the output probability. +probabilityProcessor = + new TensorProcessor.Builder().add(getPostprocessNormalizeOp()).build(); +``` + +For quantized models, we need to de-quantize the prediction with the NormalizeOp +(as they are all essentially linear transformation). For float model, +de-quantize is not required. But to uniform the API, de-quantize is added to +float model too. Mean and std are set to 0.0f and 1.0f, respectively. To be more +specific, + +In `ClassifierQuantizedMobileNet`, the normalized parameters are defined as: + +```java +private static final float PROBABILITY_MEAN = 0.0f; +private static final float PROBABILITY_STD = 255.0f; +``` + +In `ClassifierFloatMobileNet`, the normalized parameters are defined as: + +```java +private static final float PROBABILITY_MEAN = 0.0f; +private static final float PROBABILITY_STD = 1.0f; +``` + +##### Run inference + +Inference is performed using the following in `Classifier` class: + +```java +tflite.run(inputImageBuffer.getBuffer(), + outputProbabilityBuffer.getBuffer().rewind()); +``` + +##### Recognize image + +Rather than call `run` directly, the method `recognizeImage` is used. It accepts +a bitmap and sensor orientation, runs inference, and returns a sorted `List` of +`Recognition` instances, each corresponding to a label. The method will return a +number of results bounded by `MAX_RESULTS`, which is 3 by default. + +`Recognition` is a simple class that contains information about a specific +recognition result, including its `title` and `confidence`. Using the +post-processing normalization method specified, the confidence is converted to +between 0 and 1 of a given class being represented by the image. + +```java +/** Gets the label to probability map. */ +Map labeledProbability = + new TensorLabel(labels, + probabilityProcessor.process(outputProbabilityBuffer)) + .getMapWithFloatValue(); +``` + +A `PriorityQueue` is used for sorting. + +```java +/** Gets the top-k results. */ +private static List getTopKProbability( + Map labelProb) { + // Find the best classifications. + PriorityQueue pq = + new PriorityQueue<>( + MAX_RESULTS, + new Comparator() { + @Override + public int compare(Recognition lhs, Recognition rhs) { + // Intentionally reversed to put high confidence at the head of + // the queue. + return Float.compare(rhs.getConfidence(), lhs.getConfidence()); + } + }); + + for (Map.Entry entry : labelProb.entrySet()) { + pq.add(new Recognition("" + entry.getKey(), entry.getKey(), + entry.getValue(), null)); + } + + final ArrayList recognitions = new ArrayList<>(); + int recognitionsSize = Math.min(pq.size(), MAX_RESULTS); + for (int i = 0; i < recognitionsSize; ++i) { + recognitions.add(pq.poll()); + } + return recognitions; +} +``` + +### Display results + +The classifier is invoked and inference results are displayed by the +`processImage()` function in +[`ClassifierActivity.java`](https://github.com/tensorflow/examples/tree/master/lite/examples/image_classification/android/app/src/main/java/org/tensorflow/lite/examples/classification/ClassifierActivity.java). + +`ClassifierActivity` is a subclass of `CameraActivity` that contains method +implementations that render the camera image, run classification, and display +the results. The method `processImage()` runs classification on a background +thread as fast as possible, rendering information on the UI thread to avoid +blocking inference and creating latency. + +```java +@Override +protected void processImage() { + rgbFrameBitmap.setPixels(getRgbBytes(), 0, previewWidth, 0, 0, previewWidth, + previewHeight); + final int imageSizeX = classifier.getImageSizeX(); + final int imageSizeY = classifier.getImageSizeY(); + + runInBackground( + new Runnable() { + @Override + public void run() { + if (classifier != null) { + final long startTime = SystemClock.uptimeMillis(); + final List results = + classifier.recognizeImage(rgbFrameBitmap, sensorOrientation); + lastProcessingTimeMs = SystemClock.uptimeMillis() - startTime; + LOGGER.v("Detect: %s", results); + + runOnUiThread( + new Runnable() { + @Override + public void run() { + showResultsInBottomSheet(results); + showFrameInfo(previewWidth + "x" + previewHeight); + showCropInfo(imageSizeX + "x" + imageSizeY); + showCameraResolution(imageSizeX + "x" + imageSizeY); + showRotationInfo(String.valueOf(sensorOrientation)); + showInference(lastProcessingTimeMs + "ms"); + } + }); + } + readyForNextImage(); + } + }); +} +``` + +Another important role of `ClassifierActivity` is to determine user preferences +(by interrogating `CameraActivity`), and instantiate the appropriately +configured `Classifier` subclass. This happens when the video feed begins (via +`onPreviewSizeChosen()`) and when options are changed in the UI (via +`onInferenceConfigurationChanged()`). + +```java +private void recreateClassifier(Model model, Device device, int numThreads) { + if (classifier != null) { + LOGGER.d("Closing classifier."); + classifier.close(); + classifier = null; + } + if (device == Device.GPU && model == Model.QUANTIZED) { + LOGGER.d("Not creating classifier: GPU doesn't support quantized models."); + runOnUiThread( + () -> { + Toast.makeText(this, "GPU does not yet supported quantized models.", + Toast.LENGTH_LONG) + .show(); + }); + return; + } + try { + LOGGER.d( + "Creating classifier (model=%s, device=%s, numThreads=%d)", model, + device, numThreads); + classifier = Classifier.create(this, model, device, numThreads); + } catch (IOException e) { + LOGGER.e(e, "Failed to create classifier."); + } +} +``` diff --git a/live2diff/MiDaS/mobile/android/LICENSE b/live2diff/MiDaS/mobile/android/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..6606ec028d1c629986e7019fe3564f5b4bfe425d --- /dev/null +++ b/live2diff/MiDaS/mobile/android/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Alexey + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/live2diff/MiDaS/mobile/android/README.md b/live2diff/MiDaS/mobile/android/README.md new file mode 100644 index 0000000000000000000000000000000000000000..aecdb6b2ab38d07258c6169f83822301f63b9321 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/README.md @@ -0,0 +1,21 @@ +# MiDaS on Android smartphone by using TensorFlow-lite (TFLite) + + +* Either use Android Studio for compilation. + +* Or use ready to install apk-file: + * Or use URL: https://i.diawi.com/CVb8a9 + * Or use QR-code: + +Scan QR-code or open URL -> Press `Install application` -> Press `Download` and wait for download -> Open -> Install -> Open -> Press: Allow MiDaS to take photo and video from the camera While using the APP + +![CVb8a9](https://user-images.githubusercontent.com/4096485/97727213-38552500-1ae1-11eb-8b76-4ea11216f76d.png) + +---- + +To use another model, you should convert it to `model_opt.tflite` and place it to the directory: `models\src\main\assets` + + +---- + +Original repository: https://github.com/intel-isl/MiDaS diff --git a/live2diff/MiDaS/mobile/android/app/.gitignore b/live2diff/MiDaS/mobile/android/app/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..1ae74c6780c277d75fedfb7511ff51f69941b48b --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/.gitignore @@ -0,0 +1,3 @@ +/build + +/build/ \ No newline at end of file diff --git a/live2diff/MiDaS/mobile/android/app/build.gradle b/live2diff/MiDaS/mobile/android/app/build.gradle new file mode 100644 index 0000000000000000000000000000000000000000..94e9886a55c7d54f71b424bb246c849dd6bd795d --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/build.gradle @@ -0,0 +1,56 @@ +apply plugin: 'com.android.application' + +android { + compileSdkVersion 28 + defaultConfig { + applicationId "org.tensorflow.lite.examples.classification" + minSdkVersion 21 + targetSdkVersion 28 + versionCode 1 + versionName "1.0" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + } + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' + } + } + aaptOptions { + noCompress "tflite" + } + compileOptions { + sourceCompatibility = '1.8' + targetCompatibility = '1.8' + } + lintOptions { + abortOnError false + } + flavorDimensions "tfliteInference" + productFlavors { + // The TFLite inference is built using the TFLite Support library. + support { + dimension "tfliteInference" + } + // The TFLite inference is built using the TFLite Task library. + taskApi { + dimension "tfliteInference" + } + } + +} + +dependencies { + implementation fileTree(dir: 'libs', include: ['*.jar']) + supportImplementation project(":lib_support") + taskApiImplementation project(":lib_task_api") + implementation 'androidx.appcompat:appcompat:1.0.0' + implementation 'androidx.coordinatorlayout:coordinatorlayout:1.0.0' + implementation 'com.google.android.material:material:1.0.0' + + androidTestImplementation 'androidx.test.ext:junit:1.1.1' + androidTestImplementation 'com.google.truth:truth:1.0.1' + androidTestImplementation 'androidx.test:runner:1.2.0' + androidTestImplementation 'androidx.test:rules:1.1.0' +} diff --git a/live2diff/MiDaS/mobile/android/app/proguard-rules.pro b/live2diff/MiDaS/mobile/android/app/proguard-rules.pro new file mode 100644 index 0000000000000000000000000000000000000000..f1b424510da51fd82143bc74a0a801ae5a1e2fcd --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile diff --git a/live2diff/MiDaS/mobile/android/app/src/androidTest/assets/fox-mobilenet_v1_1.0_224_support.txt b/live2diff/MiDaS/mobile/android/app/src/androidTest/assets/fox-mobilenet_v1_1.0_224_support.txt new file mode 100644 index 0000000000000000000000000000000000000000..bdfad31f9b3e694817025d8b8f2ca0b40aa436bb --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/androidTest/assets/fox-mobilenet_v1_1.0_224_support.txt @@ -0,0 +1,3 @@ +red_fox 0.79403335 +kit_fox 0.16753247 +grey_fox 0.03619214 diff --git a/live2diff/MiDaS/mobile/android/app/src/androidTest/assets/fox-mobilenet_v1_1.0_224_task_api.txt b/live2diff/MiDaS/mobile/android/app/src/androidTest/assets/fox-mobilenet_v1_1.0_224_task_api.txt new file mode 100644 index 0000000000000000000000000000000000000000..3668ce54df0d1e57e31c58281d6085b83928f991 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/androidTest/assets/fox-mobilenet_v1_1.0_224_task_api.txt @@ -0,0 +1,3 @@ +red_fox 0.85 +kit_fox 0.13 +grey_fox 0.02 diff --git a/live2diff/MiDaS/mobile/android/app/src/androidTest/java/AndroidManifest.xml b/live2diff/MiDaS/mobile/android/app/src/androidTest/java/AndroidManifest.xml new file mode 100644 index 0000000000000000000000000000000000000000..3653d8799092492ebbb16c7c956eb50e3d404aa4 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/androidTest/java/AndroidManifest.xml @@ -0,0 +1,5 @@ + + + + \ No newline at end of file diff --git a/live2diff/MiDaS/mobile/android/app/src/androidTest/java/org/tensorflow/lite/examples/classification/ClassifierTest.java b/live2diff/MiDaS/mobile/android/app/src/androidTest/java/org/tensorflow/lite/examples/classification/ClassifierTest.java new file mode 100644 index 0000000000000000000000000000000000000000..0194132890aae659c2a70d33106306ed665b22e8 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/androidTest/java/org/tensorflow/lite/examples/classification/ClassifierTest.java @@ -0,0 +1,121 @@ +/* + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.tensorflow.lite.examples.classification; + +import static com.google.common.truth.Truth.assertThat; + +import android.content.res.AssetManager; +import android.graphics.Bitmap; +import android.graphics.BitmapFactory; +import android.util.Log; +import androidx.test.ext.junit.runners.AndroidJUnit4; +import androidx.test.platform.app.InstrumentationRegistry; +import androidx.test.rule.ActivityTestRule; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Scanner; +import org.junit.Assert; +import org.junit.Rule; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.tensorflow.lite.examples.classification.tflite.Classifier; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Model; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Recognition; + +/** Golden test for Image Classification Reference app. */ +@RunWith(AndroidJUnit4.class) +public class ClassifierTest { + + @Rule + public ActivityTestRule rule = + new ActivityTestRule<>(ClassifierActivity.class); + + private static final String[] INPUTS = {"fox.jpg"}; + private static final String[] GOLDEN_OUTPUTS_SUPPORT = {"fox-mobilenet_v1_1.0_224_support.txt"}; + private static final String[] GOLDEN_OUTPUTS_TASK = {"fox-mobilenet_v1_1.0_224_task_api.txt"}; + + @Test + public void classificationResultsShouldNotChange() throws IOException { + ClassifierActivity activity = rule.getActivity(); + Classifier classifier = Classifier.create(activity, Model.FLOAT_MOBILENET, Device.CPU, 1); + for (int i = 0; i < INPUTS.length; i++) { + String imageFileName = INPUTS[i]; + String goldenOutputFileName; + // TODO(b/169379396): investigate the impact of the resize algorithm on accuracy. + // This is a temporary workaround to set different golden rest results as the preprocessing + // of lib_support and lib_task_api are different. Will merge them once the above TODO is + // resolved. + if (Classifier.TAG.equals("ClassifierWithSupport")) { + goldenOutputFileName = GOLDEN_OUTPUTS_SUPPORT[i]; + } else { + goldenOutputFileName = GOLDEN_OUTPUTS_TASK[i]; + } + Bitmap input = loadImage(imageFileName); + List goldenOutput = loadRecognitions(goldenOutputFileName); + + List result = classifier.recognizeImage(input, 0); + Iterator goldenOutputIterator = goldenOutput.iterator(); + + for (Recognition actual : result) { + Assert.assertTrue(goldenOutputIterator.hasNext()); + Recognition expected = goldenOutputIterator.next(); + assertThat(actual.getTitle()).isEqualTo(expected.getTitle()); + assertThat(actual.getConfidence()).isWithin(0.01f).of(expected.getConfidence()); + } + } + } + + private static Bitmap loadImage(String fileName) { + AssetManager assetManager = + InstrumentationRegistry.getInstrumentation().getContext().getAssets(); + InputStream inputStream = null; + try { + inputStream = assetManager.open(fileName); + } catch (IOException e) { + Log.e("Test", "Cannot load image from assets"); + } + return BitmapFactory.decodeStream(inputStream); + } + + private static List loadRecognitions(String fileName) { + AssetManager assetManager = + InstrumentationRegistry.getInstrumentation().getContext().getAssets(); + InputStream inputStream = null; + try { + inputStream = assetManager.open(fileName); + } catch (IOException e) { + Log.e("Test", "Cannot load probability results from assets"); + } + Scanner scanner = new Scanner(inputStream); + List result = new ArrayList<>(); + while (scanner.hasNext()) { + String category = scanner.next(); + category = category.replace('_', ' '); + if (!scanner.hasNextFloat()) { + break; + } + float probability = scanner.nextFloat(); + Recognition recognition = new Recognition(null, category, probability, null); + result.add(recognition); + } + return result; + } +} diff --git a/live2diff/MiDaS/mobile/android/app/src/main/AndroidManifest.xml b/live2diff/MiDaS/mobile/android/app/src/main/AndroidManifest.xml new file mode 100644 index 0000000000000000000000000000000000000000..7a414d5176a117262dce56c2220e6b71791287de --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/AndroidManifest.xml @@ -0,0 +1,28 @@ + + + + + + + + + + + + + + + + + + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/CameraActivity.java b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/CameraActivity.java new file mode 100644 index 0000000000000000000000000000000000000000..d1eb26c862c04bf573ecc4eb127e7460f0b100fc --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/CameraActivity.java @@ -0,0 +1,717 @@ +/* + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.tensorflow.lite.examples.classification; + +import android.Manifest; +import android.app.Fragment; +import android.content.Context; +import android.content.pm.PackageManager; +import android.graphics.Bitmap; +import android.graphics.Canvas; +import android.graphics.Color; +import android.graphics.Paint; +import android.graphics.RectF; +import android.hardware.Camera; +import android.hardware.camera2.CameraAccessException; +import android.hardware.camera2.CameraCharacteristics; +import android.hardware.camera2.CameraManager; +import android.hardware.camera2.params.StreamConfigurationMap; +import android.media.Image; +import android.media.Image.Plane; +import android.media.ImageReader; +import android.media.ImageReader.OnImageAvailableListener; +import android.os.Build; +import android.os.Bundle; +import android.os.Handler; +import android.os.HandlerThread; +import android.os.Trace; +import androidx.annotation.NonNull; +import androidx.annotation.UiThread; +import androidx.appcompat.app.AppCompatActivity; +import android.util.Size; +import android.view.Surface; +import android.view.TextureView; +import android.view.View; +import android.view.ViewTreeObserver; +import android.view.WindowManager; +import android.widget.AdapterView; +import android.widget.ImageView; +import android.widget.LinearLayout; +import android.widget.Spinner; +import android.widget.TextView; +import android.widget.Toast; +import com.google.android.material.bottomsheet.BottomSheetBehavior; +import java.nio.ByteBuffer; +import java.util.List; +import org.tensorflow.lite.examples.classification.env.ImageUtils; +import org.tensorflow.lite.examples.classification.env.Logger; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Model; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Recognition; + +public abstract class CameraActivity extends AppCompatActivity + implements OnImageAvailableListener, + Camera.PreviewCallback, + View.OnClickListener, + AdapterView.OnItemSelectedListener { + private static final Logger LOGGER = new Logger(); + + private static final int PERMISSIONS_REQUEST = 1; + + private static final String PERMISSION_CAMERA = Manifest.permission.CAMERA; + protected int previewWidth = 0; + protected int previewHeight = 0; + private Handler handler; + private HandlerThread handlerThread; + private boolean useCamera2API; + private boolean isProcessingFrame = false; + private byte[][] yuvBytes = new byte[3][]; + private int[] rgbBytes = null; + private int yRowStride; + private Runnable postInferenceCallback; + private Runnable imageConverter; + private LinearLayout bottomSheetLayout; + private LinearLayout gestureLayout; + private BottomSheetBehavior sheetBehavior; + protected TextView recognitionTextView, + recognition1TextView, + recognition2TextView, + recognitionValueTextView, + recognition1ValueTextView, + recognition2ValueTextView; + protected TextView frameValueTextView, + cropValueTextView, + cameraResolutionTextView, + rotationTextView, + inferenceTimeTextView; + protected ImageView bottomSheetArrowImageView; + private ImageView plusImageView, minusImageView; + private Spinner modelSpinner; + private Spinner deviceSpinner; + private TextView threadsTextView; + + //private Model model = Model.QUANTIZED_EFFICIENTNET; + //private Device device = Device.CPU; + private Model model = Model.FLOAT_EFFICIENTNET; + private Device device = Device.GPU; + private int numThreads = -1; + + @Override + protected void onCreate(final Bundle savedInstanceState) { + LOGGER.d("onCreate " + this); + super.onCreate(null); + getWindow().addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON); + + setContentView(R.layout.tfe_ic_activity_camera); + + if (hasPermission()) { + setFragment(); + } else { + requestPermission(); + } + + threadsTextView = findViewById(R.id.threads); + plusImageView = findViewById(R.id.plus); + minusImageView = findViewById(R.id.minus); + modelSpinner = findViewById(R.id.model_spinner); + deviceSpinner = findViewById(R.id.device_spinner); + bottomSheetLayout = findViewById(R.id.bottom_sheet_layout); + gestureLayout = findViewById(R.id.gesture_layout); + sheetBehavior = BottomSheetBehavior.from(bottomSheetLayout); + bottomSheetArrowImageView = findViewById(R.id.bottom_sheet_arrow); + + ViewTreeObserver vto = gestureLayout.getViewTreeObserver(); + vto.addOnGlobalLayoutListener( + new ViewTreeObserver.OnGlobalLayoutListener() { + @Override + public void onGlobalLayout() { + if (Build.VERSION.SDK_INT < Build.VERSION_CODES.JELLY_BEAN) { + gestureLayout.getViewTreeObserver().removeGlobalOnLayoutListener(this); + } else { + gestureLayout.getViewTreeObserver().removeOnGlobalLayoutListener(this); + } + // int width = bottomSheetLayout.getMeasuredWidth(); + int height = gestureLayout.getMeasuredHeight(); + + sheetBehavior.setPeekHeight(height); + } + }); + sheetBehavior.setHideable(false); + + sheetBehavior.setBottomSheetCallback( + new BottomSheetBehavior.BottomSheetCallback() { + @Override + public void onStateChanged(@NonNull View bottomSheet, int newState) { + switch (newState) { + case BottomSheetBehavior.STATE_HIDDEN: + break; + case BottomSheetBehavior.STATE_EXPANDED: + { + bottomSheetArrowImageView.setImageResource(R.drawable.icn_chevron_down); + } + break; + case BottomSheetBehavior.STATE_COLLAPSED: + { + bottomSheetArrowImageView.setImageResource(R.drawable.icn_chevron_up); + } + break; + case BottomSheetBehavior.STATE_DRAGGING: + break; + case BottomSheetBehavior.STATE_SETTLING: + bottomSheetArrowImageView.setImageResource(R.drawable.icn_chevron_up); + break; + } + } + + @Override + public void onSlide(@NonNull View bottomSheet, float slideOffset) {} + }); + + recognitionTextView = findViewById(R.id.detected_item); + recognitionValueTextView = findViewById(R.id.detected_item_value); + recognition1TextView = findViewById(R.id.detected_item1); + recognition1ValueTextView = findViewById(R.id.detected_item1_value); + recognition2TextView = findViewById(R.id.detected_item2); + recognition2ValueTextView = findViewById(R.id.detected_item2_value); + + frameValueTextView = findViewById(R.id.frame_info); + cropValueTextView = findViewById(R.id.crop_info); + cameraResolutionTextView = findViewById(R.id.view_info); + rotationTextView = findViewById(R.id.rotation_info); + inferenceTimeTextView = findViewById(R.id.inference_info); + + modelSpinner.setOnItemSelectedListener(this); + deviceSpinner.setOnItemSelectedListener(this); + + plusImageView.setOnClickListener(this); + minusImageView.setOnClickListener(this); + + model = Model.valueOf(modelSpinner.getSelectedItem().toString().toUpperCase()); + device = Device.valueOf(deviceSpinner.getSelectedItem().toString()); + numThreads = Integer.parseInt(threadsTextView.getText().toString().trim()); + } + + protected int[] getRgbBytes() { + imageConverter.run(); + return rgbBytes; + } + + protected int getLuminanceStride() { + return yRowStride; + } + + protected byte[] getLuminance() { + return yuvBytes[0]; + } + + /** Callback for android.hardware.Camera API */ + @Override + public void onPreviewFrame(final byte[] bytes, final Camera camera) { + if (isProcessingFrame) { + LOGGER.w("Dropping frame!"); + return; + } + + try { + // Initialize the storage bitmaps once when the resolution is known. + if (rgbBytes == null) { + Camera.Size previewSize = camera.getParameters().getPreviewSize(); + previewHeight = previewSize.height; + previewWidth = previewSize.width; + rgbBytes = new int[previewWidth * previewHeight]; + onPreviewSizeChosen(new Size(previewSize.width, previewSize.height), 90); + } + } catch (final Exception e) { + LOGGER.e(e, "Exception!"); + return; + } + + isProcessingFrame = true; + yuvBytes[0] = bytes; + yRowStride = previewWidth; + + imageConverter = + new Runnable() { + @Override + public void run() { + ImageUtils.convertYUV420SPToARGB8888(bytes, previewWidth, previewHeight, rgbBytes); + } + }; + + postInferenceCallback = + new Runnable() { + @Override + public void run() { + camera.addCallbackBuffer(bytes); + isProcessingFrame = false; + } + }; + processImage(); + } + + /** Callback for Camera2 API */ + @Override + public void onImageAvailable(final ImageReader reader) { + // We need wait until we have some size from onPreviewSizeChosen + if (previewWidth == 0 || previewHeight == 0) { + return; + } + if (rgbBytes == null) { + rgbBytes = new int[previewWidth * previewHeight]; + } + try { + final Image image = reader.acquireLatestImage(); + + if (image == null) { + return; + } + + if (isProcessingFrame) { + image.close(); + return; + } + isProcessingFrame = true; + Trace.beginSection("imageAvailable"); + final Plane[] planes = image.getPlanes(); + fillBytes(planes, yuvBytes); + yRowStride = planes[0].getRowStride(); + final int uvRowStride = planes[1].getRowStride(); + final int uvPixelStride = planes[1].getPixelStride(); + + imageConverter = + new Runnable() { + @Override + public void run() { + ImageUtils.convertYUV420ToARGB8888( + yuvBytes[0], + yuvBytes[1], + yuvBytes[2], + previewWidth, + previewHeight, + yRowStride, + uvRowStride, + uvPixelStride, + rgbBytes); + } + }; + + postInferenceCallback = + new Runnable() { + @Override + public void run() { + image.close(); + isProcessingFrame = false; + } + }; + + processImage(); + } catch (final Exception e) { + LOGGER.e(e, "Exception!"); + Trace.endSection(); + return; + } + Trace.endSection(); + } + + @Override + public synchronized void onStart() { + LOGGER.d("onStart " + this); + super.onStart(); + } + + @Override + public synchronized void onResume() { + LOGGER.d("onResume " + this); + super.onResume(); + + handlerThread = new HandlerThread("inference"); + handlerThread.start(); + handler = new Handler(handlerThread.getLooper()); + } + + @Override + public synchronized void onPause() { + LOGGER.d("onPause " + this); + + handlerThread.quitSafely(); + try { + handlerThread.join(); + handlerThread = null; + handler = null; + } catch (final InterruptedException e) { + LOGGER.e(e, "Exception!"); + } + + super.onPause(); + } + + @Override + public synchronized void onStop() { + LOGGER.d("onStop " + this); + super.onStop(); + } + + @Override + public synchronized void onDestroy() { + LOGGER.d("onDestroy " + this); + super.onDestroy(); + } + + protected synchronized void runInBackground(final Runnable r) { + if (handler != null) { + handler.post(r); + } + } + + @Override + public void onRequestPermissionsResult( + final int requestCode, final String[] permissions, final int[] grantResults) { + super.onRequestPermissionsResult(requestCode, permissions, grantResults); + if (requestCode == PERMISSIONS_REQUEST) { + if (allPermissionsGranted(grantResults)) { + setFragment(); + } else { + requestPermission(); + } + } + } + + private static boolean allPermissionsGranted(final int[] grantResults) { + for (int result : grantResults) { + if (result != PackageManager.PERMISSION_GRANTED) { + return false; + } + } + return true; + } + + private boolean hasPermission() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) { + return checkSelfPermission(PERMISSION_CAMERA) == PackageManager.PERMISSION_GRANTED; + } else { + return true; + } + } + + private void requestPermission() { + if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.M) { + if (shouldShowRequestPermissionRationale(PERMISSION_CAMERA)) { + Toast.makeText( + CameraActivity.this, + "Camera permission is required for this demo", + Toast.LENGTH_LONG) + .show(); + } + requestPermissions(new String[] {PERMISSION_CAMERA}, PERMISSIONS_REQUEST); + } + } + + // Returns true if the device supports the required hardware level, or better. + private boolean isHardwareLevelSupported( + CameraCharacteristics characteristics, int requiredLevel) { + int deviceLevel = characteristics.get(CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL); + if (deviceLevel == CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_LEGACY) { + return requiredLevel == deviceLevel; + } + // deviceLevel is not LEGACY, can use numerical sort + return requiredLevel <= deviceLevel; + } + + private String chooseCamera() { + final CameraManager manager = (CameraManager) getSystemService(Context.CAMERA_SERVICE); + try { + for (final String cameraId : manager.getCameraIdList()) { + final CameraCharacteristics characteristics = manager.getCameraCharacteristics(cameraId); + + // We don't use a front facing camera in this sample. + final Integer facing = characteristics.get(CameraCharacteristics.LENS_FACING); + if (facing != null && facing == CameraCharacteristics.LENS_FACING_FRONT) { + continue; + } + + final StreamConfigurationMap map = + characteristics.get(CameraCharacteristics.SCALER_STREAM_CONFIGURATION_MAP); + + if (map == null) { + continue; + } + + // Fallback to camera1 API for internal cameras that don't have full support. + // This should help with legacy situations where using the camera2 API causes + // distorted or otherwise broken previews. + useCamera2API = + (facing == CameraCharacteristics.LENS_FACING_EXTERNAL) + || isHardwareLevelSupported( + characteristics, CameraCharacteristics.INFO_SUPPORTED_HARDWARE_LEVEL_FULL); + LOGGER.i("Camera API lv2?: %s", useCamera2API); + return cameraId; + } + } catch (CameraAccessException e) { + LOGGER.e(e, "Not allowed to access camera"); + } + + return null; + } + + protected void setFragment() { + String cameraId = chooseCamera(); + + Fragment fragment; + if (useCamera2API) { + CameraConnectionFragment camera2Fragment = + CameraConnectionFragment.newInstance( + new CameraConnectionFragment.ConnectionCallback() { + @Override + public void onPreviewSizeChosen(final Size size, final int rotation) { + previewHeight = size.getHeight(); + previewWidth = size.getWidth(); + CameraActivity.this.onPreviewSizeChosen(size, rotation); + } + }, + this, + getLayoutId(), + getDesiredPreviewFrameSize()); + + camera2Fragment.setCamera(cameraId); + fragment = camera2Fragment; + } else { + fragment = + new LegacyCameraConnectionFragment(this, getLayoutId(), getDesiredPreviewFrameSize()); + } + + getFragmentManager().beginTransaction().replace(R.id.container, fragment).commit(); + } + + protected void fillBytes(final Plane[] planes, final byte[][] yuvBytes) { + // Because of the variable row stride it's not possible to know in + // advance the actual necessary dimensions of the yuv planes. + for (int i = 0; i < planes.length; ++i) { + final ByteBuffer buffer = planes[i].getBuffer(); + if (yuvBytes[i] == null) { + LOGGER.d("Initializing buffer %d at size %d", i, buffer.capacity()); + yuvBytes[i] = new byte[buffer.capacity()]; + } + buffer.get(yuvBytes[i]); + } + } + + protected void readyForNextImage() { + if (postInferenceCallback != null) { + postInferenceCallback.run(); + } + } + + protected int getScreenOrientation() { + switch (getWindowManager().getDefaultDisplay().getRotation()) { + case Surface.ROTATION_270: + return 270; + case Surface.ROTATION_180: + return 180; + case Surface.ROTATION_90: + return 90; + default: + return 0; + } + } + + @UiThread + protected void showResultsInTexture(float[] img_array, int imageSizeX, int imageSizeY) { + float maxval = Float.NEGATIVE_INFINITY; + float minval = Float.POSITIVE_INFINITY; + for (float cur : img_array) { + maxval = Math.max(maxval, cur); + minval = Math.min(minval, cur); + } + float multiplier = 0; + if ((maxval - minval) > 0) multiplier = 255 / (maxval - minval); + + int[] img_normalized = new int[img_array.length]; + for (int i = 0; i < img_array.length; ++i) { + float val = (float) (multiplier * (img_array[i] - minval)); + img_normalized[i] = (int) val; + } + + + + TextureView textureView = findViewById(R.id.textureView3); + //AutoFitTextureView textureView = (AutoFitTextureView) findViewById(R.id.texture); + + if(textureView.isAvailable()) { + int width = imageSizeX; + int height = imageSizeY; + + Canvas canvas = textureView.lockCanvas(); + canvas.drawColor(Color.BLUE); + Paint paint = new Paint(); + paint.setStyle(Paint.Style.FILL); + paint.setARGB(255, 150, 150, 150); + + int canvas_size = Math.min(canvas.getWidth(), canvas.getHeight()); + + Bitmap bitmap = Bitmap.createBitmap(width, height, Bitmap.Config.RGB_565); + + for (int ii = 0; ii < width; ii++) //pass the screen pixels in 2 directions + { + for (int jj = 0; jj < height; jj++) { + //int val = img_normalized[ii + jj * width]; + int index = (width - ii - 1) + (height - jj - 1) * width; + if(index < img_array.length) { + int val = img_normalized[index]; + bitmap.setPixel(ii, jj, Color.rgb(val, val, val)); + } + } + } + + canvas.drawBitmap(bitmap, null, new RectF(0, 0, canvas_size, canvas_size), null); + + textureView.unlockCanvasAndPost(canvas); + + } + + } + + protected void showResultsInBottomSheet(List results) { + if (results != null && results.size() >= 3) { + Recognition recognition = results.get(0); + if (recognition != null) { + if (recognition.getTitle() != null) recognitionTextView.setText(recognition.getTitle()); + if (recognition.getConfidence() != null) + recognitionValueTextView.setText( + String.format("%.2f", (100 * recognition.getConfidence())) + "%"); + } + + Recognition recognition1 = results.get(1); + if (recognition1 != null) { + if (recognition1.getTitle() != null) recognition1TextView.setText(recognition1.getTitle()); + if (recognition1.getConfidence() != null) + recognition1ValueTextView.setText( + String.format("%.2f", (100 * recognition1.getConfidence())) + "%"); + } + + Recognition recognition2 = results.get(2); + if (recognition2 != null) { + if (recognition2.getTitle() != null) recognition2TextView.setText(recognition2.getTitle()); + if (recognition2.getConfidence() != null) + recognition2ValueTextView.setText( + String.format("%.2f", (100 * recognition2.getConfidence())) + "%"); + } + } + } + + protected void showFrameInfo(String frameInfo) { + frameValueTextView.setText(frameInfo); + } + + protected void showCropInfo(String cropInfo) { + cropValueTextView.setText(cropInfo); + } + + protected void showCameraResolution(String cameraInfo) { + cameraResolutionTextView.setText(cameraInfo); + } + + protected void showRotationInfo(String rotation) { + rotationTextView.setText(rotation); + } + + protected void showInference(String inferenceTime) { + inferenceTimeTextView.setText(inferenceTime); + } + + protected Model getModel() { + return model; + } + + private void setModel(Model model) { + if (this.model != model) { + LOGGER.d("Updating model: " + model); + this.model = model; + onInferenceConfigurationChanged(); + } + } + + protected Device getDevice() { + return device; + } + + private void setDevice(Device device) { + if (this.device != device) { + LOGGER.d("Updating device: " + device); + this.device = device; + final boolean threadsEnabled = device == Device.CPU; + plusImageView.setEnabled(threadsEnabled); + minusImageView.setEnabled(threadsEnabled); + threadsTextView.setText(threadsEnabled ? String.valueOf(numThreads) : "N/A"); + onInferenceConfigurationChanged(); + } + } + + protected int getNumThreads() { + return numThreads; + } + + private void setNumThreads(int numThreads) { + if (this.numThreads != numThreads) { + LOGGER.d("Updating numThreads: " + numThreads); + this.numThreads = numThreads; + onInferenceConfigurationChanged(); + } + } + + protected abstract void processImage(); + + protected abstract void onPreviewSizeChosen(final Size size, final int rotation); + + protected abstract int getLayoutId(); + + protected abstract Size getDesiredPreviewFrameSize(); + + protected abstract void onInferenceConfigurationChanged(); + + @Override + public void onClick(View v) { + if (v.getId() == R.id.plus) { + String threads = threadsTextView.getText().toString().trim(); + int numThreads = Integer.parseInt(threads); + if (numThreads >= 9) return; + setNumThreads(++numThreads); + threadsTextView.setText(String.valueOf(numThreads)); + } else if (v.getId() == R.id.minus) { + String threads = threadsTextView.getText().toString().trim(); + int numThreads = Integer.parseInt(threads); + if (numThreads == 1) { + return; + } + setNumThreads(--numThreads); + threadsTextView.setText(String.valueOf(numThreads)); + } + } + + @Override + public void onItemSelected(AdapterView parent, View view, int pos, long id) { + if (parent == modelSpinner) { + setModel(Model.valueOf(parent.getItemAtPosition(pos).toString().toUpperCase())); + } else if (parent == deviceSpinner) { + setDevice(Device.valueOf(parent.getItemAtPosition(pos).toString())); + } + } + + @Override + public void onNothingSelected(AdapterView parent) { + // Do nothing. + } +} diff --git a/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/CameraConnectionFragment.java b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/CameraConnectionFragment.java new file mode 100644 index 0000000000000000000000000000000000000000..13e5c0dc341a86b1ddd66c4b562e0bf767641b42 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/CameraConnectionFragment.java @@ -0,0 +1,575 @@ +/* + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.tensorflow.lite.examples.classification; + +import android.annotation.SuppressLint; +import android.app.Activity; +import android.app.AlertDialog; +import android.app.Dialog; +import android.app.DialogFragment; +import android.app.Fragment; +import android.content.Context; +import android.content.DialogInterface; +import android.content.res.Configuration; +import android.graphics.ImageFormat; +import android.graphics.Matrix; +import android.graphics.RectF; +import android.graphics.SurfaceTexture; +import android.hardware.camera2.CameraAccessException; +import android.hardware.camera2.CameraCaptureSession; +import android.hardware.camera2.CameraCharacteristics; +import android.hardware.camera2.CameraDevice; +import android.hardware.camera2.CameraManager; +import android.hardware.camera2.CaptureRequest; +import android.hardware.camera2.CaptureResult; +import android.hardware.camera2.TotalCaptureResult; +import android.hardware.camera2.params.StreamConfigurationMap; +import android.media.ImageReader; +import android.media.ImageReader.OnImageAvailableListener; +import android.os.Bundle; +import android.os.Handler; +import android.os.HandlerThread; +import android.text.TextUtils; +import android.util.Size; +import android.util.SparseIntArray; +import android.view.LayoutInflater; +import android.view.Surface; +import android.view.TextureView; +import android.view.View; +import android.view.ViewGroup; +import android.widget.Toast; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Comparator; +import java.util.List; +import java.util.concurrent.Semaphore; +import java.util.concurrent.TimeUnit; +import org.tensorflow.lite.examples.classification.customview.AutoFitTextureView; +import org.tensorflow.lite.examples.classification.env.Logger; + +/** + * Camera Connection Fragment that captures images from camera. + * + *

Instantiated by newInstance.

+ */ +@SuppressWarnings("FragmentNotInstantiable") +public class CameraConnectionFragment extends Fragment { + private static final Logger LOGGER = new Logger(); + + /** + * The camera preview size will be chosen to be the smallest frame by pixel size capable of + * containing a DESIRED_SIZE x DESIRED_SIZE square. + */ + private static final int MINIMUM_PREVIEW_SIZE = 320; + + /** Conversion from screen rotation to JPEG orientation. */ + private static final SparseIntArray ORIENTATIONS = new SparseIntArray(); + + private static final String FRAGMENT_DIALOG = "dialog"; + + static { + ORIENTATIONS.append(Surface.ROTATION_0, 90); + ORIENTATIONS.append(Surface.ROTATION_90, 0); + ORIENTATIONS.append(Surface.ROTATION_180, 270); + ORIENTATIONS.append(Surface.ROTATION_270, 180); + } + + /** A {@link Semaphore} to prevent the app from exiting before closing the camera. */ + private final Semaphore cameraOpenCloseLock = new Semaphore(1); + /** A {@link OnImageAvailableListener} to receive frames as they are available. */ + private final OnImageAvailableListener imageListener; + /** The input size in pixels desired by TensorFlow (width and height of a square bitmap). */ + private final Size inputSize; + /** The layout identifier to inflate for this Fragment. */ + private final int layout; + + private final ConnectionCallback cameraConnectionCallback; + private final CameraCaptureSession.CaptureCallback captureCallback = + new CameraCaptureSession.CaptureCallback() { + @Override + public void onCaptureProgressed( + final CameraCaptureSession session, + final CaptureRequest request, + final CaptureResult partialResult) {} + + @Override + public void onCaptureCompleted( + final CameraCaptureSession session, + final CaptureRequest request, + final TotalCaptureResult result) {} + }; + /** ID of the current {@link CameraDevice}. */ + private String cameraId; + /** An {@link AutoFitTextureView} for camera preview. */ + private AutoFitTextureView textureView; + /** A {@link CameraCaptureSession } for camera preview. */ + private CameraCaptureSession captureSession; + /** A reference to the opened {@link CameraDevice}. */ + private CameraDevice cameraDevice; + /** The rotation in degrees of the camera sensor from the display. */ + private Integer sensorOrientation; + /** The {@link Size} of camera preview. */ + private Size previewSize; + /** An additional thread for running tasks that shouldn't block the UI. */ + private HandlerThread backgroundThread; + /** A {@link Handler} for running tasks in the background. */ + private Handler backgroundHandler; + /** + * {@link TextureView.SurfaceTextureListener} handles several lifecycle events on a {@link + * TextureView}. + */ + private final TextureView.SurfaceTextureListener surfaceTextureListener = + new TextureView.SurfaceTextureListener() { + @Override + public void onSurfaceTextureAvailable( + final SurfaceTexture texture, final int width, final int height) { + openCamera(width, height); + } + + @Override + public void onSurfaceTextureSizeChanged( + final SurfaceTexture texture, final int width, final int height) { + configureTransform(width, height); + } + + @Override + public boolean onSurfaceTextureDestroyed(final SurfaceTexture texture) { + return true; + } + + @Override + public void onSurfaceTextureUpdated(final SurfaceTexture texture) {} + }; + /** An {@link ImageReader} that handles preview frame capture. */ + private ImageReader previewReader; + /** {@link CaptureRequest.Builder} for the camera preview */ + private CaptureRequest.Builder previewRequestBuilder; + /** {@link CaptureRequest} generated by {@link #previewRequestBuilder} */ + private CaptureRequest previewRequest; + /** {@link CameraDevice.StateCallback} is called when {@link CameraDevice} changes its state. */ + private final CameraDevice.StateCallback stateCallback = + new CameraDevice.StateCallback() { + @Override + public void onOpened(final CameraDevice cd) { + // This method is called when the camera is opened. We start camera preview here. + cameraOpenCloseLock.release(); + cameraDevice = cd; + createCameraPreviewSession(); + } + + @Override + public void onDisconnected(final CameraDevice cd) { + cameraOpenCloseLock.release(); + cd.close(); + cameraDevice = null; + } + + @Override + public void onError(final CameraDevice cd, final int error) { + cameraOpenCloseLock.release(); + cd.close(); + cameraDevice = null; + final Activity activity = getActivity(); + if (null != activity) { + activity.finish(); + } + } + }; + + @SuppressLint("ValidFragment") + private CameraConnectionFragment( + final ConnectionCallback connectionCallback, + final OnImageAvailableListener imageListener, + final int layout, + final Size inputSize) { + this.cameraConnectionCallback = connectionCallback; + this.imageListener = imageListener; + this.layout = layout; + this.inputSize = inputSize; + } + + /** + * Given {@code choices} of {@code Size}s supported by a camera, chooses the smallest one whose + * width and height are at least as large as the minimum of both, or an exact match if possible. + * + * @param choices The list of sizes that the camera supports for the intended output class + * @param width The minimum desired width + * @param height The minimum desired height + * @return The optimal {@code Size}, or an arbitrary one if none were big enough + */ + protected static Size chooseOptimalSize(final Size[] choices, final int width, final int height) { + final int minSize = Math.max(Math.min(width, height), MINIMUM_PREVIEW_SIZE); + final Size desiredSize = new Size(width, height); + + // Collect the supported resolutions that are at least as big as the preview Surface + boolean exactSizeFound = false; + final List bigEnough = new ArrayList(); + final List tooSmall = new ArrayList(); + for (final Size option : choices) { + if (option.equals(desiredSize)) { + // Set the size but don't return yet so that remaining sizes will still be logged. + exactSizeFound = true; + } + + if (option.getHeight() >= minSize && option.getWidth() >= minSize) { + bigEnough.add(option); + } else { + tooSmall.add(option); + } + } + + LOGGER.i("Desired size: " + desiredSize + ", min size: " + minSize + "x" + minSize); + LOGGER.i("Valid preview sizes: [" + TextUtils.join(", ", bigEnough) + "]"); + LOGGER.i("Rejected preview sizes: [" + TextUtils.join(", ", tooSmall) + "]"); + + if (exactSizeFound) { + LOGGER.i("Exact size match found."); + return desiredSize; + } + + // Pick the smallest of those, assuming we found any + if (bigEnough.size() > 0) { + final Size chosenSize = Collections.min(bigEnough, new CompareSizesByArea()); + LOGGER.i("Chosen size: " + chosenSize.getWidth() + "x" + chosenSize.getHeight()); + return chosenSize; + } else { + LOGGER.e("Couldn't find any suitable preview size"); + return choices[0]; + } + } + + public static CameraConnectionFragment newInstance( + final ConnectionCallback callback, + final OnImageAvailableListener imageListener, + final int layout, + final Size inputSize) { + return new CameraConnectionFragment(callback, imageListener, layout, inputSize); + } + + /** + * Shows a {@link Toast} on the UI thread. + * + * @param text The message to show + */ + private void showToast(final String text) { + final Activity activity = getActivity(); + if (activity != null) { + activity.runOnUiThread( + new Runnable() { + @Override + public void run() { + Toast.makeText(activity, text, Toast.LENGTH_SHORT).show(); + } + }); + } + } + + @Override + public View onCreateView( + final LayoutInflater inflater, final ViewGroup container, final Bundle savedInstanceState) { + return inflater.inflate(layout, container, false); + } + + @Override + public void onViewCreated(final View view, final Bundle savedInstanceState) { + textureView = (AutoFitTextureView) view.findViewById(R.id.texture); + } + + @Override + public void onActivityCreated(final Bundle savedInstanceState) { + super.onActivityCreated(savedInstanceState); + } + + @Override + public void onResume() { + super.onResume(); + startBackgroundThread(); + + // When the screen is turned off and turned back on, the SurfaceTexture is already + // available, and "onSurfaceTextureAvailable" will not be called. In that case, we can open + // a camera and start preview from here (otherwise, we wait until the surface is ready in + // the SurfaceTextureListener). + if (textureView.isAvailable()) { + openCamera(textureView.getWidth(), textureView.getHeight()); + } else { + textureView.setSurfaceTextureListener(surfaceTextureListener); + } + } + + @Override + public void onPause() { + closeCamera(); + stopBackgroundThread(); + super.onPause(); + } + + public void setCamera(String cameraId) { + this.cameraId = cameraId; + } + + /** Sets up member variables related to camera. */ + private void setUpCameraOutputs() { + final Activity activity = getActivity(); + final CameraManager manager = (CameraManager) activity.getSystemService(Context.CAMERA_SERVICE); + try { + final CameraCharacteristics characteristics = manager.getCameraCharacteristics(cameraId); + + final StreamConfigurationMap map = + characteristics.get(CameraCharacteristics.SCALER_STREAM_CONFIGURATION_MAP); + + sensorOrientation = characteristics.get(CameraCharacteristics.SENSOR_ORIENTATION); + + // Danger, W.R.! Attempting to use too large a preview size could exceed the camera + // bus' bandwidth limitation, resulting in gorgeous previews but the storage of + // garbage capture data. + previewSize = + chooseOptimalSize( + map.getOutputSizes(SurfaceTexture.class), + inputSize.getWidth(), + inputSize.getHeight()); + + // We fit the aspect ratio of TextureView to the size of preview we picked. + final int orientation = getResources().getConfiguration().orientation; + if (orientation == Configuration.ORIENTATION_LANDSCAPE) { + textureView.setAspectRatio(previewSize.getWidth(), previewSize.getHeight()); + } else { + textureView.setAspectRatio(previewSize.getHeight(), previewSize.getWidth()); + } + } catch (final CameraAccessException e) { + LOGGER.e(e, "Exception!"); + } catch (final NullPointerException e) { + // Currently an NPE is thrown when the Camera2API is used but not supported on the + // device this code runs. + ErrorDialog.newInstance(getString(R.string.tfe_ic_camera_error)) + .show(getChildFragmentManager(), FRAGMENT_DIALOG); + throw new IllegalStateException(getString(R.string.tfe_ic_camera_error)); + } + + cameraConnectionCallback.onPreviewSizeChosen(previewSize, sensorOrientation); + } + + /** Opens the camera specified by {@link CameraConnectionFragment#cameraId}. */ + private void openCamera(final int width, final int height) { + setUpCameraOutputs(); + configureTransform(width, height); + final Activity activity = getActivity(); + final CameraManager manager = (CameraManager) activity.getSystemService(Context.CAMERA_SERVICE); + try { + if (!cameraOpenCloseLock.tryAcquire(2500, TimeUnit.MILLISECONDS)) { + throw new RuntimeException("Time out waiting to lock camera opening."); + } + manager.openCamera(cameraId, stateCallback, backgroundHandler); + } catch (final CameraAccessException e) { + LOGGER.e(e, "Exception!"); + } catch (final InterruptedException e) { + throw new RuntimeException("Interrupted while trying to lock camera opening.", e); + } + } + + /** Closes the current {@link CameraDevice}. */ + private void closeCamera() { + try { + cameraOpenCloseLock.acquire(); + if (null != captureSession) { + captureSession.close(); + captureSession = null; + } + if (null != cameraDevice) { + cameraDevice.close(); + cameraDevice = null; + } + if (null != previewReader) { + previewReader.close(); + previewReader = null; + } + } catch (final InterruptedException e) { + throw new RuntimeException("Interrupted while trying to lock camera closing.", e); + } finally { + cameraOpenCloseLock.release(); + } + } + + /** Starts a background thread and its {@link Handler}. */ + private void startBackgroundThread() { + backgroundThread = new HandlerThread("ImageListener"); + backgroundThread.start(); + backgroundHandler = new Handler(backgroundThread.getLooper()); + } + + /** Stops the background thread and its {@link Handler}. */ + private void stopBackgroundThread() { + backgroundThread.quitSafely(); + try { + backgroundThread.join(); + backgroundThread = null; + backgroundHandler = null; + } catch (final InterruptedException e) { + LOGGER.e(e, "Exception!"); + } + } + + /** Creates a new {@link CameraCaptureSession} for camera preview. */ + private void createCameraPreviewSession() { + try { + final SurfaceTexture texture = textureView.getSurfaceTexture(); + assert texture != null; + + // We configure the size of default buffer to be the size of camera preview we want. + texture.setDefaultBufferSize(previewSize.getWidth(), previewSize.getHeight()); + + // This is the output Surface we need to start preview. + final Surface surface = new Surface(texture); + + // We set up a CaptureRequest.Builder with the output Surface. + previewRequestBuilder = cameraDevice.createCaptureRequest(CameraDevice.TEMPLATE_PREVIEW); + previewRequestBuilder.addTarget(surface); + + LOGGER.i("Opening camera preview: " + previewSize.getWidth() + "x" + previewSize.getHeight()); + + // Create the reader for the preview frames. + previewReader = + ImageReader.newInstance( + previewSize.getWidth(), previewSize.getHeight(), ImageFormat.YUV_420_888, 2); + + previewReader.setOnImageAvailableListener(imageListener, backgroundHandler); + previewRequestBuilder.addTarget(previewReader.getSurface()); + + // Here, we create a CameraCaptureSession for camera preview. + cameraDevice.createCaptureSession( + Arrays.asList(surface, previewReader.getSurface()), + new CameraCaptureSession.StateCallback() { + + @Override + public void onConfigured(final CameraCaptureSession cameraCaptureSession) { + // The camera is already closed + if (null == cameraDevice) { + return; + } + + // When the session is ready, we start displaying the preview. + captureSession = cameraCaptureSession; + try { + // Auto focus should be continuous for camera preview. + previewRequestBuilder.set( + CaptureRequest.CONTROL_AF_MODE, + CaptureRequest.CONTROL_AF_MODE_CONTINUOUS_PICTURE); + // Flash is automatically enabled when necessary. + previewRequestBuilder.set( + CaptureRequest.CONTROL_AE_MODE, CaptureRequest.CONTROL_AE_MODE_ON_AUTO_FLASH); + + // Finally, we start displaying the camera preview. + previewRequest = previewRequestBuilder.build(); + captureSession.setRepeatingRequest( + previewRequest, captureCallback, backgroundHandler); + } catch (final CameraAccessException e) { + LOGGER.e(e, "Exception!"); + } + } + + @Override + public void onConfigureFailed(final CameraCaptureSession cameraCaptureSession) { + showToast("Failed"); + } + }, + null); + } catch (final CameraAccessException e) { + LOGGER.e(e, "Exception!"); + } + } + + /** + * Configures the necessary {@link Matrix} transformation to `mTextureView`. This method should be + * called after the camera preview size is determined in setUpCameraOutputs and also the size of + * `mTextureView` is fixed. + * + * @param viewWidth The width of `mTextureView` + * @param viewHeight The height of `mTextureView` + */ + private void configureTransform(final int viewWidth, final int viewHeight) { + final Activity activity = getActivity(); + if (null == textureView || null == previewSize || null == activity) { + return; + } + final int rotation = activity.getWindowManager().getDefaultDisplay().getRotation(); + final Matrix matrix = new Matrix(); + final RectF viewRect = new RectF(0, 0, viewWidth, viewHeight); + final RectF bufferRect = new RectF(0, 0, previewSize.getHeight(), previewSize.getWidth()); + final float centerX = viewRect.centerX(); + final float centerY = viewRect.centerY(); + if (Surface.ROTATION_90 == rotation || Surface.ROTATION_270 == rotation) { + bufferRect.offset(centerX - bufferRect.centerX(), centerY - bufferRect.centerY()); + matrix.setRectToRect(viewRect, bufferRect, Matrix.ScaleToFit.FILL); + final float scale = + Math.max( + (float) viewHeight / previewSize.getHeight(), + (float) viewWidth / previewSize.getWidth()); + matrix.postScale(scale, scale, centerX, centerY); + matrix.postRotate(90 * (rotation - 2), centerX, centerY); + } else if (Surface.ROTATION_180 == rotation) { + matrix.postRotate(180, centerX, centerY); + } + textureView.setTransform(matrix); + } + + /** + * Callback for Activities to use to initialize their data once the selected preview size is + * known. + */ + public interface ConnectionCallback { + void onPreviewSizeChosen(Size size, int cameraRotation); + } + + /** Compares two {@code Size}s based on their areas. */ + static class CompareSizesByArea implements Comparator { + @Override + public int compare(final Size lhs, final Size rhs) { + // We cast here to ensure the multiplications won't overflow + return Long.signum( + (long) lhs.getWidth() * lhs.getHeight() - (long) rhs.getWidth() * rhs.getHeight()); + } + } + + /** Shows an error message dialog. */ + public static class ErrorDialog extends DialogFragment { + private static final String ARG_MESSAGE = "message"; + + public static ErrorDialog newInstance(final String message) { + final ErrorDialog dialog = new ErrorDialog(); + final Bundle args = new Bundle(); + args.putString(ARG_MESSAGE, message); + dialog.setArguments(args); + return dialog; + } + + @Override + public Dialog onCreateDialog(final Bundle savedInstanceState) { + final Activity activity = getActivity(); + return new AlertDialog.Builder(activity) + .setMessage(getArguments().getString(ARG_MESSAGE)) + .setPositiveButton( + android.R.string.ok, + new DialogInterface.OnClickListener() { + @Override + public void onClick(final DialogInterface dialogInterface, final int i) { + activity.finish(); + } + }) + .create(); + } + } +} diff --git a/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/ClassifierActivity.java b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/ClassifierActivity.java new file mode 100644 index 0000000000000000000000000000000000000000..24b5d72fdb42d47e5d2c87e3f944b71105748c1b --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/ClassifierActivity.java @@ -0,0 +1,238 @@ +/* + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.tensorflow.lite.examples.classification; + +import android.graphics.Bitmap; +import android.graphics.Bitmap.Config; +import android.graphics.Typeface; +import android.media.ImageReader.OnImageAvailableListener; +import android.os.SystemClock; +import android.util.Size; +import android.util.TypedValue; +import android.view.TextureView; +import android.view.ViewStub; +import android.widget.TextView; +import android.widget.Toast; +import java.io.IOException; +import java.util.List; +import java.util.ArrayList; + +import org.tensorflow.lite.examples.classification.customview.AutoFitTextureView; +import org.tensorflow.lite.examples.classification.env.BorderedText; +import org.tensorflow.lite.examples.classification.env.Logger; +import org.tensorflow.lite.examples.classification.tflite.Classifier; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Model; + +import android.widget.ImageView; +import android.graphics.Bitmap; +import android.graphics.BitmapFactory; +import android.graphics.Canvas; +import android.graphics.Color; +import android.graphics.Paint; +import android.graphics.Rect; +import android.graphics.RectF; +import android.graphics.PixelFormat; +import java.nio.ByteBuffer; + +public class ClassifierActivity extends CameraActivity implements OnImageAvailableListener { + private static final Logger LOGGER = new Logger(); + private static final Size DESIRED_PREVIEW_SIZE = new Size(640, 480); + private static final float TEXT_SIZE_DIP = 10; + private Bitmap rgbFrameBitmap = null; + private long lastProcessingTimeMs; + private Integer sensorOrientation; + private Classifier classifier; + private BorderedText borderedText; + /** Input image size of the model along x axis. */ + private int imageSizeX; + /** Input image size of the model along y axis. */ + private int imageSizeY; + + @Override + protected int getLayoutId() { + return R.layout.tfe_ic_camera_connection_fragment; + } + + @Override + protected Size getDesiredPreviewFrameSize() { + return DESIRED_PREVIEW_SIZE; + } + + @Override + public void onPreviewSizeChosen(final Size size, final int rotation) { + final float textSizePx = + TypedValue.applyDimension( + TypedValue.COMPLEX_UNIT_DIP, TEXT_SIZE_DIP, getResources().getDisplayMetrics()); + borderedText = new BorderedText(textSizePx); + borderedText.setTypeface(Typeface.MONOSPACE); + + recreateClassifier(getModel(), getDevice(), getNumThreads()); + if (classifier == null) { + LOGGER.e("No classifier on preview!"); + return; + } + + previewWidth = size.getWidth(); + previewHeight = size.getHeight(); + + sensorOrientation = rotation - getScreenOrientation(); + LOGGER.i("Camera orientation relative to screen canvas: %d", sensorOrientation); + + LOGGER.i("Initializing at size %dx%d", previewWidth, previewHeight); + rgbFrameBitmap = Bitmap.createBitmap(previewWidth, previewHeight, Config.ARGB_8888); + } + + @Override + protected void processImage() { + rgbFrameBitmap.setPixels(getRgbBytes(), 0, previewWidth, 0, 0, previewWidth, previewHeight); + final int cropSize = Math.min(previewWidth, previewHeight); + + runInBackground( + new Runnable() { + @Override + public void run() { + if (classifier != null) { + final long startTime = SystemClock.uptimeMillis(); + //final List results = + // classifier.recognizeImage(rgbFrameBitmap, sensorOrientation); + final List results = new ArrayList<>(); + + float[] img_array = classifier.recognizeImage(rgbFrameBitmap, sensorOrientation); + + + /* + float maxval = Float.NEGATIVE_INFINITY; + float minval = Float.POSITIVE_INFINITY; + for (float cur : img_array) { + maxval = Math.max(maxval, cur); + minval = Math.min(minval, cur); + } + float multiplier = 0; + if ((maxval - minval) > 0) multiplier = 255 / (maxval - minval); + + int[] img_normalized = new int[img_array.length]; + for (int i = 0; i < img_array.length; ++i) { + float val = (float) (multiplier * (img_array[i] - minval)); + img_normalized[i] = (int) val; + } + + + + TextureView textureView = findViewById(R.id.textureView3); + //AutoFitTextureView textureView = (AutoFitTextureView) findViewById(R.id.texture); + + if(textureView.isAvailable()) { + int width = imageSizeX; + int height = imageSizeY; + + Canvas canvas = textureView.lockCanvas(); + canvas.drawColor(Color.BLUE); + Paint paint = new Paint(); + paint.setStyle(Paint.Style.FILL); + paint.setARGB(255, 150, 150, 150); + + int canvas_size = Math.min(canvas.getWidth(), canvas.getHeight()); + + Bitmap bitmap = Bitmap.createBitmap(width, height, Bitmap.Config.RGB_565); + + for (int ii = 0; ii < width; ii++) //pass the screen pixels in 2 directions + { + for (int jj = 0; jj < height; jj++) { + //int val = img_normalized[ii + jj * width]; + int index = (width - ii - 1) + (height - jj - 1) * width; + if(index < img_array.length) { + int val = img_normalized[index]; + bitmap.setPixel(ii, jj, Color.rgb(val, val, val)); + } + } + } + + canvas.drawBitmap(bitmap, null, new RectF(0, 0, canvas_size, canvas_size), null); + + textureView.unlockCanvasAndPost(canvas); + + } + */ + + lastProcessingTimeMs = SystemClock.uptimeMillis() - startTime; + LOGGER.v("Detect: %s", results); + + runOnUiThread( + new Runnable() { + @Override + public void run() { + //showResultsInBottomSheet(results); + showResultsInTexture(img_array, imageSizeX, imageSizeY); + showFrameInfo(previewWidth + "x" + previewHeight); + showCropInfo(imageSizeX + "x" + imageSizeY); + showCameraResolution(cropSize + "x" + cropSize); + showRotationInfo(String.valueOf(sensorOrientation)); + showInference(lastProcessingTimeMs + "ms"); + } + }); + } + readyForNextImage(); + } + }); + } + + @Override + protected void onInferenceConfigurationChanged() { + if (rgbFrameBitmap == null) { + // Defer creation until we're getting camera frames. + return; + } + final Device device = getDevice(); + final Model model = getModel(); + final int numThreads = getNumThreads(); + runInBackground(() -> recreateClassifier(model, device, numThreads)); + } + + private void recreateClassifier(Model model, Device device, int numThreads) { + if (classifier != null) { + LOGGER.d("Closing classifier."); + classifier.close(); + classifier = null; + } + if (device == Device.GPU + && (model == Model.QUANTIZED_MOBILENET || model == Model.QUANTIZED_EFFICIENTNET)) { + LOGGER.d("Not creating classifier: GPU doesn't support quantized models."); + runOnUiThread( + () -> { + Toast.makeText(this, R.string.tfe_ic_gpu_quant_error, Toast.LENGTH_LONG).show(); + }); + return; + } + try { + LOGGER.d( + "Creating classifier (model=%s, device=%s, numThreads=%d)", model, device, numThreads); + classifier = Classifier.create(this, model, device, numThreads); + } catch (IOException | IllegalArgumentException e) { + LOGGER.e(e, "Failed to create classifier."); + runOnUiThread( + () -> { + Toast.makeText(this, e.getMessage(), Toast.LENGTH_LONG).show(); + }); + return; + } + + // Updates the input image size. + imageSizeX = classifier.getImageSizeX(); + imageSizeY = classifier.getImageSizeY(); + } +} diff --git a/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/LegacyCameraConnectionFragment.java b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/LegacyCameraConnectionFragment.java new file mode 100644 index 0000000000000000000000000000000000000000..760fe90375450c7b1356603c83fb37a68548ca13 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/LegacyCameraConnectionFragment.java @@ -0,0 +1,203 @@ +package org.tensorflow.lite.examples.classification; + +/* + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import android.annotation.SuppressLint; +import android.app.Fragment; +import android.graphics.SurfaceTexture; +import android.hardware.Camera; +import android.hardware.Camera.CameraInfo; +import android.os.Bundle; +import android.os.Handler; +import android.os.HandlerThread; +import android.util.Size; +import android.util.SparseIntArray; +import android.view.LayoutInflater; +import android.view.Surface; +import android.view.TextureView; +import android.view.View; +import android.view.ViewGroup; +import java.io.IOException; +import java.util.List; +import org.tensorflow.lite.examples.classification.customview.AutoFitTextureView; +import org.tensorflow.lite.examples.classification.env.ImageUtils; +import org.tensorflow.lite.examples.classification.env.Logger; + +public class LegacyCameraConnectionFragment extends Fragment { + private static final Logger LOGGER = new Logger(); + /** Conversion from screen rotation to JPEG orientation. */ + private static final SparseIntArray ORIENTATIONS = new SparseIntArray(); + + static { + ORIENTATIONS.append(Surface.ROTATION_0, 90); + ORIENTATIONS.append(Surface.ROTATION_90, 0); + ORIENTATIONS.append(Surface.ROTATION_180, 270); + ORIENTATIONS.append(Surface.ROTATION_270, 180); + } + + private Camera camera; + private Camera.PreviewCallback imageListener; + private Size desiredSize; + /** The layout identifier to inflate for this Fragment. */ + private int layout; + /** An {@link AutoFitTextureView} for camera preview. */ + private AutoFitTextureView textureView; + /** + * {@link TextureView.SurfaceTextureListener} handles several lifecycle events on a {@link + * TextureView}. + */ + private final TextureView.SurfaceTextureListener surfaceTextureListener = + new TextureView.SurfaceTextureListener() { + @Override + public void onSurfaceTextureAvailable( + final SurfaceTexture texture, final int width, final int height) { + + int index = getCameraId(); + camera = Camera.open(index); + + try { + Camera.Parameters parameters = camera.getParameters(); + List focusModes = parameters.getSupportedFocusModes(); + if (focusModes != null + && focusModes.contains(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE)) { + parameters.setFocusMode(Camera.Parameters.FOCUS_MODE_CONTINUOUS_PICTURE); + } + List cameraSizes = parameters.getSupportedPreviewSizes(); + Size[] sizes = new Size[cameraSizes.size()]; + int i = 0; + for (Camera.Size size : cameraSizes) { + sizes[i++] = new Size(size.width, size.height); + } + Size previewSize = + CameraConnectionFragment.chooseOptimalSize( + sizes, desiredSize.getWidth(), desiredSize.getHeight()); + parameters.setPreviewSize(previewSize.getWidth(), previewSize.getHeight()); + camera.setDisplayOrientation(90); + camera.setParameters(parameters); + camera.setPreviewTexture(texture); + } catch (IOException exception) { + camera.release(); + } + + camera.setPreviewCallbackWithBuffer(imageListener); + Camera.Size s = camera.getParameters().getPreviewSize(); + camera.addCallbackBuffer(new byte[ImageUtils.getYUVByteSize(s.height, s.width)]); + + textureView.setAspectRatio(s.height, s.width); + + camera.startPreview(); + } + + @Override + public void onSurfaceTextureSizeChanged( + final SurfaceTexture texture, final int width, final int height) {} + + @Override + public boolean onSurfaceTextureDestroyed(final SurfaceTexture texture) { + return true; + } + + @Override + public void onSurfaceTextureUpdated(final SurfaceTexture texture) {} + }; + /** An additional thread for running tasks that shouldn't block the UI. */ + private HandlerThread backgroundThread; + + @SuppressLint("ValidFragment") + public LegacyCameraConnectionFragment( + final Camera.PreviewCallback imageListener, final int layout, final Size desiredSize) { + this.imageListener = imageListener; + this.layout = layout; + this.desiredSize = desiredSize; + } + + @Override + public View onCreateView( + final LayoutInflater inflater, final ViewGroup container, final Bundle savedInstanceState) { + return inflater.inflate(layout, container, false); + } + + @Override + public void onViewCreated(final View view, final Bundle savedInstanceState) { + textureView = (AutoFitTextureView) view.findViewById(R.id.texture); + } + + @Override + public void onActivityCreated(final Bundle savedInstanceState) { + super.onActivityCreated(savedInstanceState); + } + + @Override + public void onResume() { + super.onResume(); + startBackgroundThread(); + // When the screen is turned off and turned back on, the SurfaceTexture is already + // available, and "onSurfaceTextureAvailable" will not be called. In that case, we can open + // a camera and start preview from here (otherwise, we wait until the surface is ready in + // the SurfaceTextureListener). + + if (textureView.isAvailable()) { + if (camera != null) { + camera.startPreview(); + } + } else { + textureView.setSurfaceTextureListener(surfaceTextureListener); + } + } + + @Override + public void onPause() { + stopCamera(); + stopBackgroundThread(); + super.onPause(); + } + + /** Starts a background thread and its {@link Handler}. */ + private void startBackgroundThread() { + backgroundThread = new HandlerThread("CameraBackground"); + backgroundThread.start(); + } + + /** Stops the background thread and its {@link Handler}. */ + private void stopBackgroundThread() { + backgroundThread.quitSafely(); + try { + backgroundThread.join(); + backgroundThread = null; + } catch (final InterruptedException e) { + LOGGER.e(e, "Exception!"); + } + } + + protected void stopCamera() { + if (camera != null) { + camera.stopPreview(); + camera.setPreviewCallback(null); + camera.release(); + camera = null; + } + } + + private int getCameraId() { + CameraInfo ci = new CameraInfo(); + for (int i = 0; i < Camera.getNumberOfCameras(); i++) { + Camera.getCameraInfo(i, ci); + if (ci.facing == CameraInfo.CAMERA_FACING_BACK) return i; + } + return -1; // No camera found + } +} diff --git a/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/AutoFitTextureView.java b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/AutoFitTextureView.java new file mode 100644 index 0000000000000000000000000000000000000000..62e99ae70c2a7c4c60a776e7490742c5339e85f3 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/AutoFitTextureView.java @@ -0,0 +1,72 @@ +/* + * Copyright 2019 The TensorFlow Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.tensorflow.lite.examples.classification.customview; + +import android.content.Context; +import android.util.AttributeSet; +import android.view.TextureView; + +/** A {@link TextureView} that can be adjusted to a specified aspect ratio. */ +public class AutoFitTextureView extends TextureView { + private int ratioWidth = 0; + private int ratioHeight = 0; + + public AutoFitTextureView(final Context context) { + this(context, null); + } + + public AutoFitTextureView(final Context context, final AttributeSet attrs) { + this(context, attrs, 0); + } + + public AutoFitTextureView(final Context context, final AttributeSet attrs, final int defStyle) { + super(context, attrs, defStyle); + } + + /** + * Sets the aspect ratio for this view. The size of the view will be measured based on the ratio + * calculated from the parameters. Note that the actual sizes of parameters don't matter, that is, + * calling setAspectRatio(2, 3) and setAspectRatio(4, 6) make the same result. + * + * @param width Relative horizontal size + * @param height Relative vertical size + */ + public void setAspectRatio(final int width, final int height) { + if (width < 0 || height < 0) { + throw new IllegalArgumentException("Size cannot be negative."); + } + ratioWidth = width; + ratioHeight = height; + requestLayout(); + } + + @Override + protected void onMeasure(final int widthMeasureSpec, final int heightMeasureSpec) { + super.onMeasure(widthMeasureSpec, heightMeasureSpec); + final int width = MeasureSpec.getSize(widthMeasureSpec); + final int height = MeasureSpec.getSize(heightMeasureSpec); + if (0 == ratioWidth || 0 == ratioHeight) { + setMeasuredDimension(width, height); + } else { + if (width < height * ratioWidth / ratioHeight) { + setMeasuredDimension(width, width * ratioHeight / ratioWidth); + } else { + setMeasuredDimension(height * ratioWidth / ratioHeight, height); + } + } + } +} diff --git a/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/OverlayView.java b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/OverlayView.java new file mode 100644 index 0000000000000000000000000000000000000000..dc302ac04f145c9a1673a2d7e630a94a05ab1b1a --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/OverlayView.java @@ -0,0 +1,48 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.customview; + +import android.content.Context; +import android.graphics.Canvas; +import android.util.AttributeSet; +import android.view.View; +import java.util.LinkedList; +import java.util.List; + +/** A simple View providing a render callback to other classes. */ +public class OverlayView extends View { + private final List callbacks = new LinkedList(); + + public OverlayView(final Context context, final AttributeSet attrs) { + super(context, attrs); + } + + public void addCallback(final DrawCallback callback) { + callbacks.add(callback); + } + + @Override + public synchronized void draw(final Canvas canvas) { + for (final DrawCallback callback : callbacks) { + callback.drawCallback(canvas); + } + } + + /** Interface defining the callback for client classes. */ + public interface DrawCallback { + public void drawCallback(final Canvas canvas); + } +} diff --git a/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/RecognitionScoreView.java b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/RecognitionScoreView.java new file mode 100644 index 0000000000000000000000000000000000000000..2c57f603f12200079c888793cfa40d9b10dabde3 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/RecognitionScoreView.java @@ -0,0 +1,67 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.customview; + +import android.content.Context; +import android.graphics.Canvas; +import android.graphics.Paint; +import android.util.AttributeSet; +import android.util.TypedValue; +import android.view.View; +import java.util.List; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Recognition; + +public class RecognitionScoreView extends View implements ResultsView { + private static final float TEXT_SIZE_DIP = 16; + private final float textSizePx; + private final Paint fgPaint; + private final Paint bgPaint; + private List results; + + public RecognitionScoreView(final Context context, final AttributeSet set) { + super(context, set); + + textSizePx = + TypedValue.applyDimension( + TypedValue.COMPLEX_UNIT_DIP, TEXT_SIZE_DIP, getResources().getDisplayMetrics()); + fgPaint = new Paint(); + fgPaint.setTextSize(textSizePx); + + bgPaint = new Paint(); + bgPaint.setColor(0xcc4285f4); + } + + @Override + public void setResults(final List results) { + this.results = results; + postInvalidate(); + } + + @Override + public void onDraw(final Canvas canvas) { + final int x = 10; + int y = (int) (fgPaint.getTextSize() * 1.5f); + + canvas.drawPaint(bgPaint); + + if (results != null) { + for (final Recognition recog : results) { + canvas.drawText(recog.getTitle() + ": " + recog.getConfidence(), x, y, fgPaint); + y += (int) (fgPaint.getTextSize() * 1.5f); + } + } + } +} diff --git a/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/ResultsView.java b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/ResultsView.java new file mode 100644 index 0000000000000000000000000000000000000000..d055eb5f161a57fc439716efe6d49b7e45ef3fc7 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/java/org/tensorflow/lite/examples/classification/customview/ResultsView.java @@ -0,0 +1,23 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.customview; + +import java.util.List; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Recognition; + +public interface ResultsView { + public void setResults(final List results); +} diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml new file mode 100644 index 0000000000000000000000000000000000000000..b1517edf496ef5800b97d046b92012a9f94a34d0 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable-v24/ic_launcher_foreground.xml @@ -0,0 +1,34 @@ + + + + + + + + + + + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/bottom_sheet_bg.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/bottom_sheet_bg.xml new file mode 100644 index 0000000000000000000000000000000000000000..70f4b24e35039e6bfc35989bcbe570a4bdc2ae07 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/bottom_sheet_bg.xml @@ -0,0 +1,9 @@ + + + + + + \ No newline at end of file diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/ic_baseline_add.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/ic_baseline_add.xml new file mode 100644 index 0000000000000000000000000000000000000000..757f4503314fb9e5837f68ac515f4487d9b5fc2c --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/ic_baseline_add.xml @@ -0,0 +1,9 @@ + + + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/ic_baseline_remove.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/ic_baseline_remove.xml new file mode 100644 index 0000000000000000000000000000000000000000..a64b853e79137f0fd95f9d5fa6e0552cc255c7ae --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/ic_baseline_remove.xml @@ -0,0 +1,9 @@ + + + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/ic_launcher_background.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/ic_launcher_background.xml new file mode 100644 index 0000000000000000000000000000000000000000..d5fccc538c179838bfdce779c26eebb4fa0b5ce9 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/ic_launcher_background.xml @@ -0,0 +1,170 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/rectangle.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/rectangle.xml new file mode 100644 index 0000000000000000000000000000000000000000..b8f5d3559c4e83072d5d73a3241d240aa68daccf --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/drawable/rectangle.xml @@ -0,0 +1,13 @@ + + + + + + \ No newline at end of file diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/layout/tfe_ic_activity_camera.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/layout/tfe_ic_activity_camera.xml new file mode 100644 index 0000000000000000000000000000000000000000..f0e1dae7afa15cf4a832de708f345482a6dfeff6 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/layout/tfe_ic_activity_camera.xml @@ -0,0 +1,56 @@ + + + + + + + + + + + + + + + + + + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/layout/tfe_ic_camera_connection_fragment.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/layout/tfe_ic_camera_connection_fragment.xml new file mode 100644 index 0000000000000000000000000000000000000000..97e5e7c6df25da48977f9064a888fd3735e4986f --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/layout/tfe_ic_camera_connection_fragment.xml @@ -0,0 +1,32 @@ + + + + + + + + + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/layout/tfe_ic_layout_bottom_sheet.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/layout/tfe_ic_layout_bottom_sheet.xml new file mode 100644 index 0000000000000000000000000000000000000000..77a348af90e2ed995ff106cd209cbf304c6b9153 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/layout/tfe_ic_layout_bottom_sheet.xml @@ -0,0 +1,321 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml new file mode 100644 index 0000000000000000000000000000000000000000..0c2a915e91af65a077d2e01db4ca21acd42906f3 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml new file mode 100644 index 0000000000000000000000000000000000000000..0c2a915e91af65a077d2e01db4ca21acd42906f3 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/mipmap-anydpi-v26/ic_launcher_round.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/values/colors.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/values/colors.xml new file mode 100644 index 0000000000000000000000000000000000000000..ed82bafb536474c6a88c996b439a2781f31f3d3e --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/values/colors.xml @@ -0,0 +1,8 @@ + + + #ffa800 + #ff6f00 + #425066 + + #66000000 + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/values/dimens.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/values/dimens.xml new file mode 100644 index 0000000000000000000000000000000000000000..5d3609029ca66b612c88b4f395e4e2e3cfc1f0e6 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/values/dimens.xml @@ -0,0 +1,5 @@ + + + 15dp + 8dp + \ No newline at end of file diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/values/strings.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/values/strings.xml new file mode 100644 index 0000000000000000000000000000000000000000..7d763d85efc49879c8d3c0641484f5f472bfaca0 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/values/strings.xml @@ -0,0 +1,21 @@ + + Midas + This device doesn\'t support Camera2 API. + GPU does not yet supported quantized models. + Model: + + Float_EfficientNet + + + + Device: + + GPU + CPU + NNAPI + + diff --git a/live2diff/MiDaS/mobile/android/app/src/main/res/values/styles.xml b/live2diff/MiDaS/mobile/android/app/src/main/res/values/styles.xml new file mode 100644 index 0000000000000000000000000000000000000000..ad09a13ec6b2de8920a7441c9992f3cc0eedcfda --- /dev/null +++ b/live2diff/MiDaS/mobile/android/app/src/main/res/values/styles.xml @@ -0,0 +1,11 @@ + + + + + + diff --git a/live2diff/MiDaS/mobile/android/build.gradle b/live2diff/MiDaS/mobile/android/build.gradle new file mode 100644 index 0000000000000000000000000000000000000000..14492756847191ca3beff4c2e012d378c4e44be6 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/build.gradle @@ -0,0 +1,27 @@ +// Top-level build file where you can add configuration options common to all sub-projects/modules. + +buildscript { + + repositories { + google() + jcenter() + } + dependencies { + classpath 'com.android.tools.build:gradle:4.0.0' + classpath 'de.undercouch:gradle-download-task:4.0.2' + // NOTE: Do not place your application dependencies here; they belong + // in the individual module build.gradle files + } +} + +allprojects { + repositories { + google() + jcenter() + } +} + +task clean(type: Delete) { + delete rootProject.buildDir +} + diff --git a/live2diff/MiDaS/mobile/android/gradle.properties b/live2diff/MiDaS/mobile/android/gradle.properties new file mode 100644 index 0000000000000000000000000000000000000000..9592636c07d9d5e6f61c0cfce1311d3e1ffcf34d --- /dev/null +++ b/live2diff/MiDaS/mobile/android/gradle.properties @@ -0,0 +1,15 @@ +# Project-wide Gradle settings. +# IDE (e.g. Android Studio) users: +# Gradle settings configured through the IDE *will override* +# any settings specified in this file. +# For more details on how to configure your build environment visit +# http://www.gradle.org/docs/current/userguide/build_environment.html +# Specifies the JVM arguments used for the daemon process. +# The setting is particularly useful for tweaking memory settings. +org.gradle.jvmargs=-Xmx1536m +# When configured, Gradle will run in incubating parallel mode. +# This option should only be used with decoupled projects. More details, visit +# http://www.gradle.org/docs/current/userguide/multi_project_builds.html#sec:decoupled_projects +# org.gradle.parallel=true +android.useAndroidX=true +android.enableJetifier=true diff --git a/live2diff/MiDaS/mobile/android/gradle/wrapper/gradle-wrapper.jar b/live2diff/MiDaS/mobile/android/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000000000000000000000000000000000..f3d88b1c2faf2fc91d853cd5d4242b5547257070 Binary files /dev/null and b/live2diff/MiDaS/mobile/android/gradle/wrapper/gradle-wrapper.jar differ diff --git a/live2diff/MiDaS/mobile/android/gradle/wrapper/gradle-wrapper.properties b/live2diff/MiDaS/mobile/android/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000000000000000000000000000000000000..1b16c34a71cf212ed0cfb883d14d1b8511903eb2 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,5 @@ +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-6.1.1-bin.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists diff --git a/live2diff/MiDaS/mobile/android/gradlew b/live2diff/MiDaS/mobile/android/gradlew new file mode 100644 index 0000000000000000000000000000000000000000..2fe81a7d95e4f9ad2c9b2a046707d36ceb3980b3 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/gradlew @@ -0,0 +1,183 @@ +#!/usr/bin/env sh + +# +# Copyright 2015 the original author or authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +############################################################################## +## +## Gradle start up script for UN*X +## +############################################################################## + +# Attempt to set APP_HOME +# Resolve links: $0 may be a link +PRG="$0" +# Need this for relative symlinks. +while [ -h "$PRG" ] ; do + ls=`ls -ld "$PRG"` + link=`expr "$ls" : '.*-> \(.*\)$'` + if expr "$link" : '/.*' > /dev/null; then + PRG="$link" + else + PRG=`dirname "$PRG"`"/$link" + fi +done +SAVED="`pwd`" +cd "`dirname \"$PRG\"`/" >/dev/null +APP_HOME="`pwd -P`" +cd "$SAVED" >/dev/null + +APP_NAME="Gradle" +APP_BASE_NAME=`basename "$0"` + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD="maximum" + +warn () { + echo "$*" +} + +die () { + echo + echo "$*" + echo + exit 1 +} + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "`uname`" in + CYGWIN* ) + cygwin=true + ;; + Darwin* ) + darwin=true + ;; + MINGW* ) + msys=true + ;; + NONSTOP* ) + nonstop=true + ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD="$JAVA_HOME/jre/sh/java" + else + JAVACMD="$JAVA_HOME/bin/java" + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD="java" + which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." +fi + +# Increase the maximum file descriptors if we can. +if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then + MAX_FD_LIMIT=`ulimit -H -n` + if [ $? -eq 0 ] ; then + if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then + MAX_FD="$MAX_FD_LIMIT" + fi + ulimit -n $MAX_FD + if [ $? -ne 0 ] ; then + warn "Could not set maximum file descriptor limit: $MAX_FD" + fi + else + warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" + fi +fi + +# For Darwin, add options to specify how the application appears in the dock +if $darwin; then + GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" +fi + +# For Cygwin or MSYS, switch paths to Windows format before running java +if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then + APP_HOME=`cygpath --path --mixed "$APP_HOME"` + CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` + JAVACMD=`cygpath --unix "$JAVACMD"` + + # We build the pattern for arguments to be converted via cygpath + ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` + SEP="" + for dir in $ROOTDIRSRAW ; do + ROOTDIRS="$ROOTDIRS$SEP$dir" + SEP="|" + done + OURCYGPATTERN="(^($ROOTDIRS))" + # Add a user-defined pattern to the cygpath arguments + if [ "$GRADLE_CYGPATTERN" != "" ] ; then + OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" + fi + # Now convert the arguments - kludge to limit ourselves to /bin/sh + i=0 + for arg in "$@" ; do + CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` + CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option + + if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition + eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` + else + eval `echo args$i`="\"$arg\"" + fi + i=`expr $i + 1` + done + case $i in + 0) set -- ;; + 1) set -- "$args0" ;; + 2) set -- "$args0" "$args1" ;; + 3) set -- "$args0" "$args1" "$args2" ;; + 4) set -- "$args0" "$args1" "$args2" "$args3" ;; + 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; + 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; + 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; + 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; + 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; + esac +fi + +# Escape application args +save () { + for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done + echo " " +} +APP_ARGS=`save "$@"` + +# Collect all arguments for the java command, following the shell quoting and substitution rules +eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" + +exec "$JAVACMD" "$@" diff --git a/live2diff/MiDaS/mobile/android/gradlew.bat b/live2diff/MiDaS/mobile/android/gradlew.bat new file mode 100644 index 0000000000000000000000000000000000000000..9618d8d9607cd91a0efb866bcac4810064ba6fac --- /dev/null +++ b/live2diff/MiDaS/mobile/android/gradlew.bat @@ -0,0 +1,100 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem + +@if "%DEBUG%" == "" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%" == "" set DIRNAME=. +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if "%ERRORLEVEL%" == "0" goto init + +echo. +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto init + +echo. +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% +echo. +echo Please set the JAVA_HOME variable in your environment to match the +echo location of your Java installation. + +goto fail + +:init +@rem Get command-line arguments, handling Windows variants + +if not "%OS%" == "Windows_NT" goto win9xME_args + +:win9xME_args +@rem Slurp the command line arguments. +set CMD_LINE_ARGS= +set _SKIP=2 + +:win9xME_args_slurp +if "x%~1" == "x" goto execute + +set CMD_LINE_ARGS=%* + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% + +:end +@rem End local scope for the variables with windows NT shell +if "%ERRORLEVEL%"=="0" goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 +exit /b 1 + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/live2diff/MiDaS/mobile/android/lib_support/build.gradle b/live2diff/MiDaS/mobile/android/lib_support/build.gradle new file mode 100644 index 0000000000000000000000000000000000000000..5d463975293264765a941795601cddb6cfc84f00 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_support/build.gradle @@ -0,0 +1,47 @@ +apply plugin: 'com.android.library' + +android { + compileSdkVersion 28 + buildToolsVersion "28.0.0" + + defaultConfig { + minSdkVersion 21 + targetSdkVersion 28 + versionCode 1 + versionName "1.0" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + + } + + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } + + aaptOptions { + noCompress "tflite" + } + + lintOptions { + checkReleaseBuilds false + // Or, if you prefer, you can continue to check for errors in release builds, + // but continue the build even when errors are found: + abortOnError false + } +} + +dependencies { + implementation fileTree(dir: 'libs', include: ['*.jar']) + implementation project(":models") + implementation 'androidx.appcompat:appcompat:1.1.0' + + // Build off of nightly TensorFlow Lite + implementation('org.tensorflow:tensorflow-lite:0.0.0-nightly') { changing = true } + implementation('org.tensorflow:tensorflow-lite-gpu:0.0.0-nightly') { changing = true } + implementation('org.tensorflow:tensorflow-lite-support:0.0.0-nightly') { changing = true } + // Use local TensorFlow library + // implementation 'org.tensorflow:tensorflow-lite-local:0.0.0' +} diff --git a/live2diff/MiDaS/mobile/android/lib_support/proguard-rules.pro b/live2diff/MiDaS/mobile/android/lib_support/proguard-rules.pro new file mode 100644 index 0000000000000000000000000000000000000000..f1b424510da51fd82143bc74a0a801ae5a1e2fcd --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_support/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile diff --git a/live2diff/MiDaS/mobile/android/lib_support/src/main/AndroidManifest.xml b/live2diff/MiDaS/mobile/android/lib_support/src/main/AndroidManifest.xml new file mode 100644 index 0000000000000000000000000000000000000000..ebe3c56c60a9b67eec218d969aecfdf5311d7b49 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_support/src/main/AndroidManifest.xml @@ -0,0 +1,3 @@ + + diff --git a/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/Classifier.java b/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/Classifier.java new file mode 100644 index 0000000000000000000000000000000000000000..24ec573e7d184e7d64118a723d6645fd92d6e6d9 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/Classifier.java @@ -0,0 +1,376 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.tflite; + +import static java.lang.Math.min; + +import android.app.Activity; +import android.graphics.Bitmap; +import android.graphics.RectF; +import android.os.SystemClock; +import android.os.Trace; +import android.util.Log; +import android.view.TextureView; +import android.view.ViewStub; + +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.PriorityQueue; +import org.tensorflow.lite.DataType; +import org.tensorflow.lite.Interpreter; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; +import org.tensorflow.lite.gpu.GpuDelegate; +import org.tensorflow.lite.nnapi.NnApiDelegate; +import org.tensorflow.lite.support.common.FileUtil; +import org.tensorflow.lite.support.common.TensorOperator; +import org.tensorflow.lite.support.common.TensorProcessor; +import org.tensorflow.lite.support.image.ImageProcessor; +import org.tensorflow.lite.support.image.TensorImage; +import org.tensorflow.lite.support.image.ops.ResizeOp; +import org.tensorflow.lite.support.image.ops.ResizeOp.ResizeMethod; +import org.tensorflow.lite.support.image.ops.ResizeWithCropOrPadOp; +import org.tensorflow.lite.support.image.ops.Rot90Op; +import org.tensorflow.lite.support.label.TensorLabel; +import org.tensorflow.lite.support.tensorbuffer.TensorBuffer; + +/** A classifier specialized to label images using TensorFlow Lite. */ +public abstract class Classifier { + public static final String TAG = "ClassifierWithSupport"; + + /** The model type used for classification. */ + public enum Model { + FLOAT_MOBILENET, + QUANTIZED_MOBILENET, + QUANTIZED_EFFICIENTNET, + FLOAT_EFFICIENTNET + } + + /** The runtime device type used for executing classification. */ + public enum Device { + CPU, + NNAPI, + GPU + } + + /** Number of results to show in the UI. */ + private static final int MAX_RESULTS = 3; + + /** The loaded TensorFlow Lite model. */ + + /** Image size along the x axis. */ + private final int imageSizeX; + + /** Image size along the y axis. */ + private final int imageSizeY; + + /** Optional GPU delegate for accleration. */ + private GpuDelegate gpuDelegate = null; + + /** Optional NNAPI delegate for accleration. */ + private NnApiDelegate nnApiDelegate = null; + + /** An instance of the driver class to run model inference with Tensorflow Lite. */ + protected Interpreter tflite; + + /** Options for configuring the Interpreter. */ + private final Interpreter.Options tfliteOptions = new Interpreter.Options(); + + /** Labels corresponding to the output of the vision model. */ + private final List labels; + + /** Input image TensorBuffer. */ + private TensorImage inputImageBuffer; + + /** Output probability TensorBuffer. */ + private final TensorBuffer outputProbabilityBuffer; + + /** Processer to apply post processing of the output probability. */ + private final TensorProcessor probabilityProcessor; + + /** + * Creates a classifier with the provided configuration. + * + * @param activity The current Activity. + * @param model The model to use for classification. + * @param device The device to use for classification. + * @param numThreads The number of threads to use for classification. + * @return A classifier with the desired configuration. + */ + public static Classifier create(Activity activity, Model model, Device device, int numThreads) + throws IOException { + if (model == Model.QUANTIZED_MOBILENET) { + return new ClassifierQuantizedMobileNet(activity, device, numThreads); + } else if (model == Model.FLOAT_MOBILENET) { + return new ClassifierFloatMobileNet(activity, device, numThreads); + } else if (model == Model.FLOAT_EFFICIENTNET) { + return new ClassifierFloatEfficientNet(activity, device, numThreads); + } else if (model == Model.QUANTIZED_EFFICIENTNET) { + return new ClassifierQuantizedEfficientNet(activity, device, numThreads); + } else { + throw new UnsupportedOperationException(); + } + } + + /** An immutable result returned by a Classifier describing what was recognized. */ + public static class Recognition { + /** + * A unique identifier for what has been recognized. Specific to the class, not the instance of + * the object. + */ + private final String id; + + /** Display name for the recognition. */ + private final String title; + + /** + * A sortable score for how good the recognition is relative to others. Higher should be better. + */ + private final Float confidence; + + /** Optional location within the source image for the location of the recognized object. */ + private RectF location; + + public Recognition( + final String id, final String title, final Float confidence, final RectF location) { + this.id = id; + this.title = title; + this.confidence = confidence; + this.location = location; + } + + public String getId() { + return id; + } + + public String getTitle() { + return title; + } + + public Float getConfidence() { + return confidence; + } + + public RectF getLocation() { + return new RectF(location); + } + + public void setLocation(RectF location) { + this.location = location; + } + + @Override + public String toString() { + String resultString = ""; + if (id != null) { + resultString += "[" + id + "] "; + } + + if (title != null) { + resultString += title + " "; + } + + if (confidence != null) { + resultString += String.format("(%.1f%%) ", confidence * 100.0f); + } + + if (location != null) { + resultString += location + " "; + } + + return resultString.trim(); + } + } + + /** Initializes a {@code Classifier}. */ + protected Classifier(Activity activity, Device device, int numThreads) throws IOException { + MappedByteBuffer tfliteModel = FileUtil.loadMappedFile(activity, getModelPath()); + switch (device) { + case NNAPI: + nnApiDelegate = new NnApiDelegate(); + tfliteOptions.addDelegate(nnApiDelegate); + break; + case GPU: + gpuDelegate = new GpuDelegate(); + tfliteOptions.addDelegate(gpuDelegate); + break; + case CPU: + break; + } + tfliteOptions.setNumThreads(numThreads); + tflite = new Interpreter(tfliteModel, tfliteOptions); + + // Loads labels out from the label file. + labels = FileUtil.loadLabels(activity, getLabelPath()); + + // Reads type and shape of input and output tensors, respectively. + int imageTensorIndex = 0; + int[] imageShape = tflite.getInputTensor(imageTensorIndex).shape(); // {1, height, width, 3} + if(imageShape[1] != imageShape[2]) { + imageSizeY = imageShape[2]; + imageSizeX = imageShape[3]; + } else { + imageSizeY = imageShape[1]; + imageSizeX = imageShape[2]; + } + DataType imageDataType = tflite.getInputTensor(imageTensorIndex).dataType(); + int probabilityTensorIndex = 0; + int[] probabilityShape = + tflite.getOutputTensor(probabilityTensorIndex).shape(); // {1, NUM_CLASSES} + DataType probabilityDataType = tflite.getOutputTensor(probabilityTensorIndex).dataType(); + + // Creates the input tensor. + inputImageBuffer = new TensorImage(imageDataType); + + // Creates the output tensor and its processor. + outputProbabilityBuffer = TensorBuffer.createFixedSize(probabilityShape, probabilityDataType); + + // Creates the post processor for the output probability. + probabilityProcessor = new TensorProcessor.Builder().add(getPostprocessNormalizeOp()).build(); + + Log.d(TAG, "Created a Tensorflow Lite Image Classifier."); + } + + /** Runs inference and returns the classification results. */ + //public List recognizeImage(final Bitmap bitmap, int sensorOrientation) { + public float[] recognizeImage(final Bitmap bitmap, int sensorOrientation) { + // Logs this method so that it can be analyzed with systrace. + Trace.beginSection("recognizeImage"); + + Trace.beginSection("loadImage"); + long startTimeForLoadImage = SystemClock.uptimeMillis(); + inputImageBuffer = loadImage(bitmap, sensorOrientation); + long endTimeForLoadImage = SystemClock.uptimeMillis(); + Trace.endSection(); + Log.v(TAG, "Timecost to load the image: " + (endTimeForLoadImage - startTimeForLoadImage)); + + // Runs the inference call. + Trace.beginSection("runInference"); + long startTimeForReference = SystemClock.uptimeMillis(); + tflite.run(inputImageBuffer.getBuffer(), outputProbabilityBuffer.getBuffer().rewind()); + long endTimeForReference = SystemClock.uptimeMillis(); + Trace.endSection(); + Log.v(TAG, "Timecost to run model inference: " + (endTimeForReference - startTimeForReference)); + + float[] img_array = outputProbabilityBuffer.getFloatArray(); + + // Gets the map of label and probability. + //Map labeledProbability = + // new TensorLabel(labels, probabilityProcessor.process(outputProbabilityBuffer)) + // .getMapWithFloatValue(); + Trace.endSection(); + + // Gets top-k results. + return img_array;//getTopKProbability(labeledProbability); + } + + /** Closes the interpreter and model to release resources. */ + public void close() { + if (tflite != null) { + tflite.close(); + tflite = null; + } + if (gpuDelegate != null) { + gpuDelegate.close(); + gpuDelegate = null; + } + if (nnApiDelegate != null) { + nnApiDelegate.close(); + nnApiDelegate = null; + } + } + + /** Get the image size along the x axis. */ + public int getImageSizeX() { + return imageSizeX; + } + + /** Get the image size along the y axis. */ + public int getImageSizeY() { + return imageSizeY; + } + + /** Loads input image, and applies preprocessing. */ + private TensorImage loadImage(final Bitmap bitmap, int sensorOrientation) { + // Loads bitmap into a TensorImage. + inputImageBuffer.load(bitmap); + + // Creates processor for the TensorImage. + int cropSize = min(bitmap.getWidth(), bitmap.getHeight()); + int numRotation = sensorOrientation / 90; + // TODO(b/143564309): Fuse ops inside ImageProcessor. + ImageProcessor imageProcessor = + new ImageProcessor.Builder() + .add(new ResizeWithCropOrPadOp(cropSize, cropSize)) + // TODO(b/169379396): investigate the impact of the resize algorithm on accuracy. + // To get the same inference results as lib_task_api, which is built on top of the Task + // Library, use ResizeMethod.BILINEAR. + .add(new ResizeOp(imageSizeX, imageSizeY, ResizeMethod.NEAREST_NEIGHBOR)) + //.add(new ResizeOp(224, 224, ResizeMethod.NEAREST_NEIGHBOR)) + .add(new Rot90Op(numRotation)) + .add(getPreprocessNormalizeOp()) + .build(); + return imageProcessor.process(inputImageBuffer); + } + + /** Gets the top-k results. */ + private static List getTopKProbability(Map labelProb) { + // Find the best classifications. + PriorityQueue pq = + new PriorityQueue<>( + MAX_RESULTS, + new Comparator() { + @Override + public int compare(Recognition lhs, Recognition rhs) { + // Intentionally reversed to put high confidence at the head of the queue. + return Float.compare(rhs.getConfidence(), lhs.getConfidence()); + } + }); + + for (Map.Entry entry : labelProb.entrySet()) { + pq.add(new Recognition("" + entry.getKey(), entry.getKey(), entry.getValue(), null)); + } + + final ArrayList recognitions = new ArrayList<>(); + int recognitionsSize = min(pq.size(), MAX_RESULTS); + for (int i = 0; i < recognitionsSize; ++i) { + recognitions.add(pq.poll()); + } + return recognitions; + } + + /** Gets the name of the model file stored in Assets. */ + protected abstract String getModelPath(); + + /** Gets the name of the label file stored in Assets. */ + protected abstract String getLabelPath(); + + /** Gets the TensorOperator to nomalize the input image in preprocessing. */ + protected abstract TensorOperator getPreprocessNormalizeOp(); + + /** + * Gets the TensorOperator to dequantize the output probability in post processing. + * + *

For quantized model, we need de-quantize the prediction with NormalizeOp (as they are all + * essentially linear transformation). For float model, de-quantize is not required. But to + * uniform the API, de-quantize is added to float model too. Mean and std are set to 0.0f and + * 1.0f, respectively. + */ + protected abstract TensorOperator getPostprocessNormalizeOp(); +} diff --git a/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatEfficientNet.java b/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatEfficientNet.java new file mode 100644 index 0000000000000000000000000000000000000000..14dd027b26baefaedd979a8ac37f0bf984210ed4 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatEfficientNet.java @@ -0,0 +1,71 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.tflite; + +import android.app.Activity; +import java.io.IOException; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; +import org.tensorflow.lite.support.common.TensorOperator; +import org.tensorflow.lite.support.common.ops.NormalizeOp; + +/** This TensorFlowLite classifier works with the float EfficientNet model. */ +public class ClassifierFloatEfficientNet extends Classifier { + + private static final float IMAGE_MEAN = 115.0f; //127.0f; + private static final float IMAGE_STD = 58.0f; //128.0f; + + /** + * Float model does not need dequantization in the post-processing. Setting mean and std as 0.0f + * and 1.0f, repectively, to bypass the normalization. + */ + private static final float PROBABILITY_MEAN = 0.0f; + + private static final float PROBABILITY_STD = 1.0f; + + /** + * Initializes a {@code ClassifierFloatMobileNet}. + * + * @param activity + */ + public ClassifierFloatEfficientNet(Activity activity, Device device, int numThreads) + throws IOException { + super(activity, device, numThreads); + } + + @Override + protected String getModelPath() { + // you can download this file from + // see build.gradle for where to obtain this file. It should be auto + // downloaded into assets. + //return "efficientnet-lite0-fp32.tflite"; + return "model_opt.tflite"; + } + + @Override + protected String getLabelPath() { + return "labels_without_background.txt"; + } + + @Override + protected TensorOperator getPreprocessNormalizeOp() { + return new NormalizeOp(IMAGE_MEAN, IMAGE_STD); + } + + @Override + protected TensorOperator getPostprocessNormalizeOp() { + return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD); + } +} diff --git a/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java b/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java new file mode 100644 index 0000000000000000000000000000000000000000..40519de07cf5e887773250a4609a832b6060d684 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java @@ -0,0 +1,72 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.tflite; + +import android.app.Activity; +import java.io.IOException; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; +import org.tensorflow.lite.support.common.TensorOperator; +import org.tensorflow.lite.support.common.ops.NormalizeOp; + +/** This TensorFlowLite classifier works with the float MobileNet model. */ +public class ClassifierFloatMobileNet extends Classifier { + + /** Float MobileNet requires additional normalization of the used input. */ + private static final float IMAGE_MEAN = 127.5f; + + private static final float IMAGE_STD = 127.5f; + + /** + * Float model does not need dequantization in the post-processing. Setting mean and std as 0.0f + * and 1.0f, repectively, to bypass the normalization. + */ + private static final float PROBABILITY_MEAN = 0.0f; + + private static final float PROBABILITY_STD = 1.0f; + + /** + * Initializes a {@code ClassifierFloatMobileNet}. + * + * @param activity + */ + public ClassifierFloatMobileNet(Activity activity, Device device, int numThreads) + throws IOException { + super(activity, device, numThreads); + } + + @Override + protected String getModelPath() { + // you can download this file from + // see build.gradle for where to obtain this file. It should be auto + // downloaded into assets. + return "model_0.tflite"; + } + + @Override + protected String getLabelPath() { + return "labels.txt"; + } + + @Override + protected TensorOperator getPreprocessNormalizeOp() { + return new NormalizeOp(IMAGE_MEAN, IMAGE_STD); + } + + @Override + protected TensorOperator getPostprocessNormalizeOp() { + return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD); + } +} diff --git a/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedEfficientNet.java b/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedEfficientNet.java new file mode 100644 index 0000000000000000000000000000000000000000..d0d62f58d18333b6360ec30a4c85c9f1d38955ce --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedEfficientNet.java @@ -0,0 +1,71 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.tflite; + +import android.app.Activity; +import java.io.IOException; +import org.tensorflow.lite.support.common.TensorOperator; +import org.tensorflow.lite.support.common.ops.NormalizeOp; + +/** This TensorFlow Lite classifier works with the quantized EfficientNet model. */ +public class ClassifierQuantizedEfficientNet extends Classifier { + + /** + * The quantized model does not require normalization, thus set mean as 0.0f, and std as 1.0f to + * bypass the normalization. + */ + private static final float IMAGE_MEAN = 0.0f; + + private static final float IMAGE_STD = 1.0f; + + /** Quantized MobileNet requires additional dequantization to the output probability. */ + private static final float PROBABILITY_MEAN = 0.0f; + + private static final float PROBABILITY_STD = 255.0f; + + /** + * Initializes a {@code ClassifierQuantizedMobileNet}. + * + * @param activity + */ + public ClassifierQuantizedEfficientNet(Activity activity, Device device, int numThreads) + throws IOException { + super(activity, device, numThreads); + } + + @Override + protected String getModelPath() { + // you can download this file from + // see build.gradle for where to obtain this file. It should be auto + // downloaded into assets. + return "model_quant.tflite"; + } + + @Override + protected String getLabelPath() { + return "labels_without_background.txt"; + } + + @Override + protected TensorOperator getPreprocessNormalizeOp() { + return new NormalizeOp(IMAGE_MEAN, IMAGE_STD); + } + + @Override + protected TensorOperator getPostprocessNormalizeOp() { + return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD); + } +} diff --git a/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedMobileNet.java b/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedMobileNet.java new file mode 100644 index 0000000000000000000000000000000000000000..94b06e3df659005c287733a8a37672863fdadd71 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedMobileNet.java @@ -0,0 +1,72 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.tflite; + +import android.app.Activity; +import java.io.IOException; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; +import org.tensorflow.lite.support.common.TensorOperator; +import org.tensorflow.lite.support.common.ops.NormalizeOp; + +/** This TensorFlow Lite classifier works with the quantized MobileNet model. */ +public class ClassifierQuantizedMobileNet extends Classifier { + + /** + * The quantized model does not require normalization, thus set mean as 0.0f, and std as 1.0f to + * bypass the normalization. + */ + private static final float IMAGE_MEAN = 0.0f; + + private static final float IMAGE_STD = 1.0f; + + /** Quantized MobileNet requires additional dequantization to the output probability. */ + private static final float PROBABILITY_MEAN = 0.0f; + + private static final float PROBABILITY_STD = 255.0f; + + /** + * Initializes a {@code ClassifierQuantizedMobileNet}. + * + * @param activity + */ + public ClassifierQuantizedMobileNet(Activity activity, Device device, int numThreads) + throws IOException { + super(activity, device, numThreads); + } + + @Override + protected String getModelPath() { + // you can download this file from + // see build.gradle for where to obtain this file. It should be auto + // downloaded into assets. + return "model_quant_0.tflite"; + } + + @Override + protected String getLabelPath() { + return "labels.txt"; + } + + @Override + protected TensorOperator getPreprocessNormalizeOp() { + return new NormalizeOp(IMAGE_MEAN, IMAGE_STD); + } + + @Override + protected TensorOperator getPostprocessNormalizeOp() { + return new NormalizeOp(PROBABILITY_MEAN, PROBABILITY_STD); + } +} diff --git a/live2diff/MiDaS/mobile/android/lib_task_api/build.gradle b/live2diff/MiDaS/mobile/android/lib_task_api/build.gradle new file mode 100644 index 0000000000000000000000000000000000000000..b5983986e3d56a77a41676b9195b0d0882b5fb96 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_task_api/build.gradle @@ -0,0 +1,47 @@ +apply plugin: 'com.android.library' + +android { + compileSdkVersion 28 + buildToolsVersion "28.0.0" + + defaultConfig { + minSdkVersion 21 + targetSdkVersion 28 + versionCode 1 + versionName "1.0" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + + } + + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } + compileOptions { + sourceCompatibility = '1.8' + targetCompatibility = '1.8' + } + aaptOptions { + noCompress "tflite" + } + + lintOptions { + checkReleaseBuilds false + // Or, if you prefer, you can continue to check for errors in release builds, + // but continue the build even when errors are found: + abortOnError false + } +} + +dependencies { + implementation fileTree(dir: 'libs', include: ['*.jar']) + implementation project(":models") + implementation 'androidx.appcompat:appcompat:1.1.0' + + // Build off of nightly TensorFlow Lite Task Library + implementation('org.tensorflow:tensorflow-lite-task-vision:0.0.0-nightly') { changing = true } + implementation('org.tensorflow:tensorflow-lite-metadata:0.0.0-nightly') { changing = true } +} diff --git a/live2diff/MiDaS/mobile/android/lib_task_api/proguard-rules.pro b/live2diff/MiDaS/mobile/android/lib_task_api/proguard-rules.pro new file mode 100644 index 0000000000000000000000000000000000000000..f1b424510da51fd82143bc74a0a801ae5a1e2fcd --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_task_api/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile diff --git a/live2diff/MiDaS/mobile/android/lib_task_api/src/main/AndroidManifest.xml b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/AndroidManifest.xml new file mode 100644 index 0000000000000000000000000000000000000000..ebe3c56c60a9b67eec218d969aecfdf5311d7b49 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/AndroidManifest.xml @@ -0,0 +1,3 @@ + + diff --git a/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/Classifier.java b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/Classifier.java new file mode 100644 index 0000000000000000000000000000000000000000..45da52a0d0dfa203255e0f2d44901ee0618e739f --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/Classifier.java @@ -0,0 +1,278 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.tflite; + +import static java.lang.Math.min; + +import android.app.Activity; +import android.graphics.Bitmap; +import android.graphics.Rect; +import android.graphics.RectF; +import android.os.SystemClock; +import android.os.Trace; +import android.util.Log; +import java.io.IOException; +import java.nio.MappedByteBuffer; +import java.util.ArrayList; +import java.util.List; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; +import org.tensorflow.lite.support.common.FileUtil; +import org.tensorflow.lite.support.image.TensorImage; +import org.tensorflow.lite.support.label.Category; +import org.tensorflow.lite.support.metadata.MetadataExtractor; +import org.tensorflow.lite.task.core.vision.ImageProcessingOptions; +import org.tensorflow.lite.task.core.vision.ImageProcessingOptions.Orientation; +import org.tensorflow.lite.task.vision.classifier.Classifications; +import org.tensorflow.lite.task.vision.classifier.ImageClassifier; +import org.tensorflow.lite.task.vision.classifier.ImageClassifier.ImageClassifierOptions; + +/** A classifier specialized to label images using TensorFlow Lite. */ +public abstract class Classifier { + public static final String TAG = "ClassifierWithTaskApi"; + + /** The model type used for classification. */ + public enum Model { + FLOAT_MOBILENET, + QUANTIZED_MOBILENET, + FLOAT_EFFICIENTNET, + QUANTIZED_EFFICIENTNET + } + + /** The runtime device type used for executing classification. */ + public enum Device { + CPU, + NNAPI, + GPU + } + + /** Number of results to show in the UI. */ + private static final int MAX_RESULTS = 3; + + /** Image size along the x axis. */ + private final int imageSizeX; + + /** Image size along the y axis. */ + private final int imageSizeY; + /** An instance of the driver class to run model inference with Tensorflow Lite. */ + protected final ImageClassifier imageClassifier; + + /** + * Creates a classifier with the provided configuration. + * + * @param activity The current Activity. + * @param model The model to use for classification. + * @param device The device to use for classification. + * @param numThreads The number of threads to use for classification. + * @return A classifier with the desired configuration. + */ + public static Classifier create(Activity activity, Model model, Device device, int numThreads) + throws IOException { + if (model == Model.QUANTIZED_MOBILENET) { + return new ClassifierQuantizedMobileNet(activity, device, numThreads); + } else if (model == Model.FLOAT_MOBILENET) { + return new ClassifierFloatMobileNet(activity, device, numThreads); + } else if (model == Model.FLOAT_EFFICIENTNET) { + return new ClassifierFloatEfficientNet(activity, device, numThreads); + } else if (model == Model.QUANTIZED_EFFICIENTNET) { + return new ClassifierQuantizedEfficientNet(activity, device, numThreads); + } else { + throw new UnsupportedOperationException(); + } + } + + /** An immutable result returned by a Classifier describing what was recognized. */ + public static class Recognition { + /** + * A unique identifier for what has been recognized. Specific to the class, not the instance of + * the object. + */ + private final String id; + + /** Display name for the recognition. */ + private final String title; + + /** + * A sortable score for how good the recognition is relative to others. Higher should be better. + */ + private final Float confidence; + + /** Optional location within the source image for the location of the recognized object. */ + private RectF location; + + public Recognition( + final String id, final String title, final Float confidence, final RectF location) { + this.id = id; + this.title = title; + this.confidence = confidence; + this.location = location; + } + + public String getId() { + return id; + } + + public String getTitle() { + return title; + } + + public Float getConfidence() { + return confidence; + } + + public RectF getLocation() { + return new RectF(location); + } + + public void setLocation(RectF location) { + this.location = location; + } + + @Override + public String toString() { + String resultString = ""; + if (id != null) { + resultString += "[" + id + "] "; + } + + if (title != null) { + resultString += title + " "; + } + + if (confidence != null) { + resultString += String.format("(%.1f%%) ", confidence * 100.0f); + } + + if (location != null) { + resultString += location + " "; + } + + return resultString.trim(); + } + } + + /** Initializes a {@code Classifier}. */ + protected Classifier(Activity activity, Device device, int numThreads) throws IOException { + if (device != Device.CPU || numThreads != 1) { + throw new IllegalArgumentException( + "Manipulating the hardware accelerators and numbers of threads is not allowed in the Task" + + " library currently. Only CPU + single thread is allowed."); + } + + // Create the ImageClassifier instance. + ImageClassifierOptions options = + ImageClassifierOptions.builder().setMaxResults(MAX_RESULTS).build(); + imageClassifier = ImageClassifier.createFromFileAndOptions(activity, getModelPath(), options); + Log.d(TAG, "Created a Tensorflow Lite Image Classifier."); + + // Get the input image size information of the underlying tflite model. + MappedByteBuffer tfliteModel = FileUtil.loadMappedFile(activity, getModelPath()); + MetadataExtractor metadataExtractor = new MetadataExtractor(tfliteModel); + // Image shape is in the format of {1, height, width, 3}. + int[] imageShape = metadataExtractor.getInputTensorShape(/*inputIndex=*/ 0); + imageSizeY = imageShape[1]; + imageSizeX = imageShape[2]; + } + + /** Runs inference and returns the classification results. */ + public List recognizeImage(final Bitmap bitmap, int sensorOrientation) { + // Logs this method so that it can be analyzed with systrace. + Trace.beginSection("recognizeImage"); + + TensorImage inputImage = TensorImage.fromBitmap(bitmap); + int width = bitmap.getWidth(); + int height = bitmap.getHeight(); + int cropSize = min(width, height); + // TODO(b/169379396): investigate the impact of the resize algorithm on accuracy. + // Task Library resize the images using bilinear interpolation, which is slightly different from + // the nearest neighbor sampling algorithm used in lib_support. See + // https://github.com/tensorflow/examples/blob/0ef3d93e2af95d325c70ef3bcbbd6844d0631e07/lite/examples/image_classification/android/lib_support/src/main/java/org/tensorflow/lite/examples/classification/tflite/Classifier.java#L310. + ImageProcessingOptions imageOptions = + ImageProcessingOptions.builder() + .setOrientation(getOrientation(sensorOrientation)) + // Set the ROI to the center of the image. + .setRoi( + new Rect( + /*left=*/ (width - cropSize) / 2, + /*top=*/ (height - cropSize) / 2, + /*right=*/ (width + cropSize) / 2, + /*bottom=*/ (height + cropSize) / 2)) + .build(); + + // Runs the inference call. + Trace.beginSection("runInference"); + long startTimeForReference = SystemClock.uptimeMillis(); + List results = imageClassifier.classify(inputImage, imageOptions); + long endTimeForReference = SystemClock.uptimeMillis(); + Trace.endSection(); + Log.v(TAG, "Timecost to run model inference: " + (endTimeForReference - startTimeForReference)); + + Trace.endSection(); + + return getRecognitions(results); + } + + /** Closes the interpreter and model to release resources. */ + public void close() { + if (imageClassifier != null) { + imageClassifier.close(); + } + } + + /** Get the image size along the x axis. */ + public int getImageSizeX() { + return imageSizeX; + } + + /** Get the image size along the y axis. */ + public int getImageSizeY() { + return imageSizeY; + } + + /** + * Converts a list of {@link Classifications} objects into a list of {@link Recognition} objects + * to match the interface of other inference method, such as using the TFLite + * Support Library.. + */ + private static List getRecognitions(List classifications) { + + final ArrayList recognitions = new ArrayList<>(); + // All the demo models are single head models. Get the first Classifications in the results. + for (Category category : classifications.get(0).getCategories()) { + recognitions.add( + new Recognition( + "" + category.getLabel(), category.getLabel(), category.getScore(), null)); + } + return recognitions; + } + + /* Convert the camera orientation in degree into {@link ImageProcessingOptions#Orientation}.*/ + private static Orientation getOrientation(int cameraOrientation) { + switch (cameraOrientation / 90) { + case 3: + return Orientation.BOTTOM_LEFT; + case 2: + return Orientation.BOTTOM_RIGHT; + case 1: + return Orientation.TOP_RIGHT; + default: + return Orientation.TOP_LEFT; + } + } + + /** Gets the name of the model file stored in Assets. */ + protected abstract String getModelPath(); +} diff --git a/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatEfficientNet.java b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatEfficientNet.java new file mode 100644 index 0000000000000000000000000000000000000000..250794cc12d0e603aa47502322dc646d50689848 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatEfficientNet.java @@ -0,0 +1,45 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.tflite; + +import android.app.Activity; +import java.io.IOException; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; + +/** This TensorFlowLite classifier works with the float EfficientNet model. */ +public class ClassifierFloatEfficientNet extends Classifier { + + /** + * Initializes a {@code ClassifierFloatMobileNet}. + * + * @param device a {@link Device} object to configure the hardware accelerator + * @param numThreads the number of threads during the inference + * @throws IOException if the model is not loaded correctly + */ + public ClassifierFloatEfficientNet(Activity activity, Device device, int numThreads) + throws IOException { + super(activity, device, numThreads); + } + + @Override + protected String getModelPath() { + // you can download this file from + // see build.gradle for where to obtain this file. It should be auto + // downloaded into assets. + //return "efficientnet-lite0-fp32.tflite"; + return "model.tflite"; + } +} diff --git a/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java new file mode 100644 index 0000000000000000000000000000000000000000..0707de98de41395eaf3ddcfd74d6e36229a63760 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierFloatMobileNet.java @@ -0,0 +1,43 @@ +/* Copyright 2019 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.tflite; + +import android.app.Activity; +import java.io.IOException; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; + +/** This TensorFlowLite classifier works with the float MobileNet model. */ +public class ClassifierFloatMobileNet extends Classifier { + /** + * Initializes a {@code ClassifierFloatMobileNet}. + * + * @param device a {@link Device} object to configure the hardware accelerator + * @param numThreads the number of threads during the inference + * @throws IOException if the model is not loaded correctly + */ + public ClassifierFloatMobileNet(Activity activity, Device device, int numThreads) + throws IOException { + super(activity, device, numThreads); + } + + @Override + protected String getModelPath() { + // you can download this file from + // see build.gradle for where to obtain this file. It should be auto + // downloaded into assets. + return "mobilenet_v1_1.0_224.tflite"; + } +} diff --git a/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedEfficientNet.java b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedEfficientNet.java new file mode 100644 index 0000000000000000000000000000000000000000..05ca4fa6c409d0274a396c9b26c3c39ca8a8194e --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedEfficientNet.java @@ -0,0 +1,43 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.tflite; + +import android.app.Activity; +import java.io.IOException; + +/** This TensorFlow Lite classifier works with the quantized EfficientNet model. */ +public class ClassifierQuantizedEfficientNet extends Classifier { + + /** + * Initializes a {@code ClassifierQuantizedMobileNet}. + * + * @param device a {@link Device} object to configure the hardware accelerator + * @param numThreads the number of threads during the inference + * @throws IOException if the model is not loaded correctly + */ + public ClassifierQuantizedEfficientNet(Activity activity, Device device, int numThreads) + throws IOException { + super(activity, device, numThreads); + } + + @Override + protected String getModelPath() { + // you can download this file from + // see build.gradle for where to obtain this file. It should be auto + // downloaded into assets. + return "efficientnet-lite0-int8.tflite"; + } +} diff --git a/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedMobileNet.java b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedMobileNet.java new file mode 100644 index 0000000000000000000000000000000000000000..978b08eeaf52a23eede437d61045db08d1dff163 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/lib_task_api/src/main/java/org/tensorflow/lite/examples/classification/tflite/ClassifierQuantizedMobileNet.java @@ -0,0 +1,44 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +package org.tensorflow.lite.examples.classification.tflite; + +import android.app.Activity; +import java.io.IOException; +import org.tensorflow.lite.examples.classification.tflite.Classifier.Device; + +/** This TensorFlow Lite classifier works with the quantized MobileNet model. */ +public class ClassifierQuantizedMobileNet extends Classifier { + + /** + * Initializes a {@code ClassifierQuantizedMobileNet}. + * + * @param device a {@link Device} object to configure the hardware accelerator + * @param numThreads the number of threads during the inference + * @throws IOException if the model is not loaded correctly + */ + public ClassifierQuantizedMobileNet(Activity activity, Device device, int numThreads) + throws IOException { + super(activity, device, numThreads); + } + + @Override + protected String getModelPath() { + // you can download this file from + // see build.gradle for where to obtain this file. It should be auto + // downloaded into assets. + return "mobilenet_v1_1.0_224_quant.tflite"; + } +} diff --git a/live2diff/MiDaS/mobile/android/models/build.gradle b/live2diff/MiDaS/mobile/android/models/build.gradle new file mode 100644 index 0000000000000000000000000000000000000000..8d825707af20cbbead6c4599f075599148e3511c --- /dev/null +++ b/live2diff/MiDaS/mobile/android/models/build.gradle @@ -0,0 +1,40 @@ +apply plugin: 'com.android.library' +apply plugin: 'de.undercouch.download' + +android { + compileSdkVersion 28 + buildToolsVersion "28.0.0" + + defaultConfig { + minSdkVersion 21 + targetSdkVersion 28 + versionCode 1 + versionName "1.0" + + testInstrumentationRunner "androidx.test.runner.AndroidJUnitRunner" + + } + + buildTypes { + release { + minifyEnabled false + proguardFiles getDefaultProguardFile('proguard-android-optimize.txt'), 'proguard-rules.pro' + } + } + + aaptOptions { + noCompress "tflite" + } + + lintOptions { + checkReleaseBuilds false + // Or, if you prefer, you can continue to check for errors in release builds, + // but continue the build even when errors are found: + abortOnError false + } +} + +// Download default models; if you wish to use your own models then +// place them in the "assets" directory and comment out this line. +project.ext.ASSET_DIR = projectDir.toString() + '/src/main/assets' +apply from:'download.gradle' diff --git a/live2diff/MiDaS/mobile/android/models/download.gradle b/live2diff/MiDaS/mobile/android/models/download.gradle new file mode 100644 index 0000000000000000000000000000000000000000..0f9da676e1d037ecb9e4a2f67a1dfcf7679e9732 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/models/download.gradle @@ -0,0 +1,10 @@ +def modelFloatDownloadUrl = "https://github.com/intel-isl/MiDaS/releases/download/v2_1/model_opt.tflite" +def modelFloatFile = "model_opt.tflite" + +task downloadModelFloat(type: Download) { + src "${modelFloatDownloadUrl}" + dest project.ext.ASSET_DIR + "/${modelFloatFile}" + overwrite false +} + +preBuild.dependsOn downloadModelFloat diff --git a/live2diff/MiDaS/mobile/android/models/proguard-rules.pro b/live2diff/MiDaS/mobile/android/models/proguard-rules.pro new file mode 100644 index 0000000000000000000000000000000000000000..f1b424510da51fd82143bc74a0a801ae5a1e2fcd --- /dev/null +++ b/live2diff/MiDaS/mobile/android/models/proguard-rules.pro @@ -0,0 +1,21 @@ +# Add project specific ProGuard rules here. +# You can control the set of applied configuration files using the +# proguardFiles setting in build.gradle. +# +# For more details, see +# http://developer.android.com/guide/developing/tools/proguard.html + +# If your project uses WebView with JS, uncomment the following +# and specify the fully qualified class name to the JavaScript interface +# class: +#-keepclassmembers class fqcn.of.javascript.interface.for.webview { +# public *; +#} + +# Uncomment this to preserve the line number information for +# debugging stack traces. +#-keepattributes SourceFile,LineNumberTable + +# If you keep the line number information, uncomment this to +# hide the original source file name. +#-renamesourcefileattribute SourceFile diff --git a/live2diff/MiDaS/mobile/android/models/src/main/AndroidManifest.xml b/live2diff/MiDaS/mobile/android/models/src/main/AndroidManifest.xml new file mode 100644 index 0000000000000000000000000000000000000000..42951a56497c5f947efe4aea6a07462019fb152c --- /dev/null +++ b/live2diff/MiDaS/mobile/android/models/src/main/AndroidManifest.xml @@ -0,0 +1,3 @@ + + diff --git a/live2diff/MiDaS/mobile/android/models/src/main/assets/labels.txt b/live2diff/MiDaS/mobile/android/models/src/main/assets/labels.txt new file mode 100644 index 0000000000000000000000000000000000000000..fe811239d8e2989de19fecabb1ebb0c9dddac514 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/models/src/main/assets/labels.txt @@ -0,0 +1,1001 @@ +background +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenter's kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o'-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potter's wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow lady's slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue diff --git a/live2diff/MiDaS/mobile/android/models/src/main/assets/labels_without_background.txt b/live2diff/MiDaS/mobile/android/models/src/main/assets/labels_without_background.txt new file mode 100644 index 0000000000000000000000000000000000000000..f40829ed0fc318c673860fae4be6c48529da116e --- /dev/null +++ b/live2diff/MiDaS/mobile/android/models/src/main/assets/labels_without_background.txt @@ -0,0 +1,1000 @@ +tench +goldfish +great white shark +tiger shark +hammerhead +electric ray +stingray +cock +hen +ostrich +brambling +goldfinch +house finch +junco +indigo bunting +robin +bulbul +jay +magpie +chickadee +water ouzel +kite +bald eagle +vulture +great grey owl +European fire salamander +common newt +eft +spotted salamander +axolotl +bullfrog +tree frog +tailed frog +loggerhead +leatherback turtle +mud turtle +terrapin +box turtle +banded gecko +common iguana +American chameleon +whiptail +agama +frilled lizard +alligator lizard +Gila monster +green lizard +African chameleon +Komodo dragon +African crocodile +American alligator +triceratops +thunder snake +ringneck snake +hognose snake +green snake +king snake +garter snake +water snake +vine snake +night snake +boa constrictor +rock python +Indian cobra +green mamba +sea snake +horned viper +diamondback +sidewinder +trilobite +harvestman +scorpion +black and gold garden spider +barn spider +garden spider +black widow +tarantula +wolf spider +tick +centipede +black grouse +ptarmigan +ruffed grouse +prairie chicken +peacock +quail +partridge +African grey +macaw +sulphur-crested cockatoo +lorikeet +coucal +bee eater +hornbill +hummingbird +jacamar +toucan +drake +red-breasted merganser +goose +black swan +tusker +echidna +platypus +wallaby +koala +wombat +jellyfish +sea anemone +brain coral +flatworm +nematode +conch +snail +slug +sea slug +chiton +chambered nautilus +Dungeness crab +rock crab +fiddler crab +king crab +American lobster +spiny lobster +crayfish +hermit crab +isopod +white stork +black stork +spoonbill +flamingo +little blue heron +American egret +bittern +crane +limpkin +European gallinule +American coot +bustard +ruddy turnstone +red-backed sandpiper +redshank +dowitcher +oystercatcher +pelican +king penguin +albatross +grey whale +killer whale +dugong +sea lion +Chihuahua +Japanese spaniel +Maltese dog +Pekinese +Shih-Tzu +Blenheim spaniel +papillon +toy terrier +Rhodesian ridgeback +Afghan hound +basset +beagle +bloodhound +bluetick +black-and-tan coonhound +Walker hound +English foxhound +redbone +borzoi +Irish wolfhound +Italian greyhound +whippet +Ibizan hound +Norwegian elkhound +otterhound +Saluki +Scottish deerhound +Weimaraner +Staffordshire bullterrier +American Staffordshire terrier +Bedlington terrier +Border terrier +Kerry blue terrier +Irish terrier +Norfolk terrier +Norwich terrier +Yorkshire terrier +wire-haired fox terrier +Lakeland terrier +Sealyham terrier +Airedale +cairn +Australian terrier +Dandie Dinmont +Boston bull +miniature schnauzer +giant schnauzer +standard schnauzer +Scotch terrier +Tibetan terrier +silky terrier +soft-coated wheaten terrier +West Highland white terrier +Lhasa +flat-coated retriever +curly-coated retriever +golden retriever +Labrador retriever +Chesapeake Bay retriever +German short-haired pointer +vizsla +English setter +Irish setter +Gordon setter +Brittany spaniel +clumber +English springer +Welsh springer spaniel +cocker spaniel +Sussex spaniel +Irish water spaniel +kuvasz +schipperke +groenendael +malinois +briard +kelpie +komondor +Old English sheepdog +Shetland sheepdog +collie +Border collie +Bouvier des Flandres +Rottweiler +German shepherd +Doberman +miniature pinscher +Greater Swiss Mountain dog +Bernese mountain dog +Appenzeller +EntleBucher +boxer +bull mastiff +Tibetan mastiff +French bulldog +Great Dane +Saint Bernard +Eskimo dog +malamute +Siberian husky +dalmatian +affenpinscher +basenji +pug +Leonberg +Newfoundland +Great Pyrenees +Samoyed +Pomeranian +chow +keeshond +Brabancon griffon +Pembroke +Cardigan +toy poodle +miniature poodle +standard poodle +Mexican hairless +timber wolf +white wolf +red wolf +coyote +dingo +dhole +African hunting dog +hyena +red fox +kit fox +Arctic fox +grey fox +tabby +tiger cat +Persian cat +Siamese cat +Egyptian cat +cougar +lynx +leopard +snow leopard +jaguar +lion +tiger +cheetah +brown bear +American black bear +ice bear +sloth bear +mongoose +meerkat +tiger beetle +ladybug +ground beetle +long-horned beetle +leaf beetle +dung beetle +rhinoceros beetle +weevil +fly +bee +ant +grasshopper +cricket +walking stick +cockroach +mantis +cicada +leafhopper +lacewing +dragonfly +damselfly +admiral +ringlet +monarch +cabbage butterfly +sulphur butterfly +lycaenid +starfish +sea urchin +sea cucumber +wood rabbit +hare +Angora +hamster +porcupine +fox squirrel +marmot +beaver +guinea pig +sorrel +zebra +hog +wild boar +warthog +hippopotamus +ox +water buffalo +bison +ram +bighorn +ibex +hartebeest +impala +gazelle +Arabian camel +llama +weasel +mink +polecat +black-footed ferret +otter +skunk +badger +armadillo +three-toed sloth +orangutan +gorilla +chimpanzee +gibbon +siamang +guenon +patas +baboon +macaque +langur +colobus +proboscis monkey +marmoset +capuchin +howler monkey +titi +spider monkey +squirrel monkey +Madagascar cat +indri +Indian elephant +African elephant +lesser panda +giant panda +barracouta +eel +coho +rock beauty +anemone fish +sturgeon +gar +lionfish +puffer +abacus +abaya +academic gown +accordion +acoustic guitar +aircraft carrier +airliner +airship +altar +ambulance +amphibian +analog clock +apiary +apron +ashcan +assault rifle +backpack +bakery +balance beam +balloon +ballpoint +Band Aid +banjo +bannister +barbell +barber chair +barbershop +barn +barometer +barrel +barrow +baseball +basketball +bassinet +bassoon +bathing cap +bath towel +bathtub +beach wagon +beacon +beaker +bearskin +beer bottle +beer glass +bell cote +bib +bicycle-built-for-two +bikini +binder +binoculars +birdhouse +boathouse +bobsled +bolo tie +bonnet +bookcase +bookshop +bottlecap +bow +bow tie +brass +brassiere +breakwater +breastplate +broom +bucket +buckle +bulletproof vest +bullet train +butcher shop +cab +caldron +candle +cannon +canoe +can opener +cardigan +car mirror +carousel +carpenter's kit +carton +car wheel +cash machine +cassette +cassette player +castle +catamaran +CD player +cello +cellular telephone +chain +chainlink fence +chain mail +chain saw +chest +chiffonier +chime +china cabinet +Christmas stocking +church +cinema +cleaver +cliff dwelling +cloak +clog +cocktail shaker +coffee mug +coffeepot +coil +combination lock +computer keyboard +confectionery +container ship +convertible +corkscrew +cornet +cowboy boot +cowboy hat +cradle +crane +crash helmet +crate +crib +Crock Pot +croquet ball +crutch +cuirass +dam +desk +desktop computer +dial telephone +diaper +digital clock +digital watch +dining table +dishrag +dishwasher +disk brake +dock +dogsled +dome +doormat +drilling platform +drum +drumstick +dumbbell +Dutch oven +electric fan +electric guitar +electric locomotive +entertainment center +envelope +espresso maker +face powder +feather boa +file +fireboat +fire engine +fire screen +flagpole +flute +folding chair +football helmet +forklift +fountain +fountain pen +four-poster +freight car +French horn +frying pan +fur coat +garbage truck +gasmask +gas pump +goblet +go-kart +golf ball +golfcart +gondola +gong +gown +grand piano +greenhouse +grille +grocery store +guillotine +hair slide +hair spray +half track +hammer +hamper +hand blower +hand-held computer +handkerchief +hard disc +harmonica +harp +harvester +hatchet +holster +home theater +honeycomb +hook +hoopskirt +horizontal bar +horse cart +hourglass +iPod +iron +jack-o'-lantern +jean +jeep +jersey +jigsaw puzzle +jinrikisha +joystick +kimono +knee pad +knot +lab coat +ladle +lampshade +laptop +lawn mower +lens cap +letter opener +library +lifeboat +lighter +limousine +liner +lipstick +Loafer +lotion +loudspeaker +loupe +lumbermill +magnetic compass +mailbag +mailbox +maillot +maillot +manhole cover +maraca +marimba +mask +matchstick +maypole +maze +measuring cup +medicine chest +megalith +microphone +microwave +military uniform +milk can +minibus +miniskirt +minivan +missile +mitten +mixing bowl +mobile home +Model T +modem +monastery +monitor +moped +mortar +mortarboard +mosque +mosquito net +motor scooter +mountain bike +mountain tent +mouse +mousetrap +moving van +muzzle +nail +neck brace +necklace +nipple +notebook +obelisk +oboe +ocarina +odometer +oil filter +organ +oscilloscope +overskirt +oxcart +oxygen mask +packet +paddle +paddlewheel +padlock +paintbrush +pajama +palace +panpipe +paper towel +parachute +parallel bars +park bench +parking meter +passenger car +patio +pay-phone +pedestal +pencil box +pencil sharpener +perfume +Petri dish +photocopier +pick +pickelhaube +picket fence +pickup +pier +piggy bank +pill bottle +pillow +ping-pong ball +pinwheel +pirate +pitcher +plane +planetarium +plastic bag +plate rack +plow +plunger +Polaroid camera +pole +police van +poncho +pool table +pop bottle +pot +potter's wheel +power drill +prayer rug +printer +prison +projectile +projector +puck +punching bag +purse +quill +quilt +racer +racket +radiator +radio +radio telescope +rain barrel +recreational vehicle +reel +reflex camera +refrigerator +remote control +restaurant +revolver +rifle +rocking chair +rotisserie +rubber eraser +rugby ball +rule +running shoe +safe +safety pin +saltshaker +sandal +sarong +sax +scabbard +scale +school bus +schooner +scoreboard +screen +screw +screwdriver +seat belt +sewing machine +shield +shoe shop +shoji +shopping basket +shopping cart +shovel +shower cap +shower curtain +ski +ski mask +sleeping bag +slide rule +sliding door +slot +snorkel +snowmobile +snowplow +soap dispenser +soccer ball +sock +solar dish +sombrero +soup bowl +space bar +space heater +space shuttle +spatula +speedboat +spider web +spindle +sports car +spotlight +stage +steam locomotive +steel arch bridge +steel drum +stethoscope +stole +stone wall +stopwatch +stove +strainer +streetcar +stretcher +studio couch +stupa +submarine +suit +sundial +sunglass +sunglasses +sunscreen +suspension bridge +swab +sweatshirt +swimming trunks +swing +switch +syringe +table lamp +tank +tape player +teapot +teddy +television +tennis ball +thatch +theater curtain +thimble +thresher +throne +tile roof +toaster +tobacco shop +toilet seat +torch +totem pole +tow truck +toyshop +tractor +trailer truck +tray +trench coat +tricycle +trimaran +tripod +triumphal arch +trolleybus +trombone +tub +turnstile +typewriter keyboard +umbrella +unicycle +upright +vacuum +vase +vault +velvet +vending machine +vestment +viaduct +violin +volleyball +waffle iron +wall clock +wallet +wardrobe +warplane +washbasin +washer +water bottle +water jug +water tower +whiskey jug +whistle +wig +window screen +window shade +Windsor tie +wine bottle +wing +wok +wooden spoon +wool +worm fence +wreck +yawl +yurt +web site +comic book +crossword puzzle +street sign +traffic light +book jacket +menu +plate +guacamole +consomme +hot pot +trifle +ice cream +ice lolly +French loaf +bagel +pretzel +cheeseburger +hotdog +mashed potato +head cabbage +broccoli +cauliflower +zucchini +spaghetti squash +acorn squash +butternut squash +cucumber +artichoke +bell pepper +cardoon +mushroom +Granny Smith +strawberry +orange +lemon +fig +pineapple +banana +jackfruit +custard apple +pomegranate +hay +carbonara +chocolate sauce +dough +meat loaf +pizza +potpie +burrito +red wine +espresso +cup +eggnog +alp +bubble +cliff +coral reef +geyser +lakeside +promontory +sandbar +seashore +valley +volcano +ballplayer +groom +scuba diver +rapeseed +daisy +yellow lady's slipper +corn +acorn +hip +buckeye +coral fungus +agaric +gyromitra +stinkhorn +earthstar +hen-of-the-woods +bolete +ear +toilet tissue diff --git a/live2diff/MiDaS/mobile/android/models/src/main/assets/run_tflite.py b/live2diff/MiDaS/mobile/android/models/src/main/assets/run_tflite.py new file mode 100644 index 0000000000000000000000000000000000000000..4b8ebe235758d3d0f3d357c51ed54d78ac7eea8e --- /dev/null +++ b/live2diff/MiDaS/mobile/android/models/src/main/assets/run_tflite.py @@ -0,0 +1,75 @@ +# Flex ops are included in the nightly build of the TensorFlow Python package. You can use TFLite models containing Flex ops by the same Python API as normal TFLite models. The nightly TensorFlow build can be installed with this command: +# Flex ops will be added to the TensorFlow Python package's and the tflite_runtime package from version 2.3 for Linux and 2.4 for other environments. +# https://www.tensorflow.org/lite/guide/ops_select#running_the_model + +# You must use: tf-nightly +# pip install tf-nightly + +import os +import glob +import cv2 +import numpy as np + +import tensorflow as tf + +width=256 +height=256 +model_name="model.tflite" +#model_name="model_quant.tflite" +image_name="dog.jpg" + +# input +img = cv2.imread(image_name) +img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + +mean=[0.485, 0.456, 0.406] +std=[0.229, 0.224, 0.225] +img = (img - mean) / std + +img_resized = tf.image.resize(img, [width,height], method='bicubic', preserve_aspect_ratio=False) +#img_resized = tf.transpose(img_resized, [2, 0, 1]) +img_input = img_resized.numpy() +reshape_img = img_input.reshape(1,width,height,3) +tensor = tf.convert_to_tensor(reshape_img, dtype=tf.float32) + +# load model +print("Load model...") +interpreter = tf.lite.Interpreter(model_path=model_name) +print("Allocate tensor...") +interpreter.allocate_tensors() +print("Get input/output details...") +input_details = interpreter.get_input_details() +output_details = interpreter.get_output_details() +print("Get input shape...") +input_shape = input_details[0]['shape'] +print(input_shape) +print(input_details) +print(output_details) +#input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32) +print("Set input tensor...") +interpreter.set_tensor(input_details[0]['index'], tensor) + +print("invoke()...") +interpreter.invoke() + +# The function `get_tensor()` returns a copy of the tensor data. +# Use `tensor()` in order to get a pointer to the tensor. +print("get output tensor...") +output = interpreter.get_tensor(output_details[0]['index']) +#output = np.squeeze(output) +output = output.reshape(width, height) +#print(output) +prediction = np.array(output) +print("reshape prediction...") +prediction = prediction.reshape(width, height) + +# output file +#prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) +print(" Write image to: output.png") +depth_min = prediction.min() +depth_max = prediction.max() +img_out = (255 * (prediction - depth_min) / (depth_max - depth_min)).astype("uint8") +print("save output image...") +cv2.imwrite("output.png", img_out) + +print("finished") \ No newline at end of file diff --git a/live2diff/MiDaS/mobile/android/settings.gradle b/live2diff/MiDaS/mobile/android/settings.gradle new file mode 100644 index 0000000000000000000000000000000000000000..e86d89d2483f92b7e778589011fad60fbba3a318 --- /dev/null +++ b/live2diff/MiDaS/mobile/android/settings.gradle @@ -0,0 +1,2 @@ +rootProject.name = 'TFLite Image Classification Demo App' +include ':app', ':lib_support', ':lib_task_api', ':models' \ No newline at end of file diff --git a/live2diff/MiDaS/mobile/ios/.gitignore b/live2diff/MiDaS/mobile/ios/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..f1150e3379e4a38d31ca7bb46dc4f31d79f482c2 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/.gitignore @@ -0,0 +1,2 @@ +# ignore model file +#*.tflite diff --git a/live2diff/MiDaS/mobile/ios/LICENSE b/live2diff/MiDaS/mobile/ios/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..261eeb9e9f8b2b4b0d119366dda99c6fd7d35c64 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.pbxproj b/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.pbxproj new file mode 100644 index 0000000000000000000000000000000000000000..4917371aa33a65fdfc66c02d914f05489c446430 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.pbxproj @@ -0,0 +1,538 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 50; + objects = { + +/* Begin PBXBuildFile section */ + 0CDA8C85042ADF65D0787629 /* Pods_Midas.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = A1CE41C09920CCEC31985547 /* Pods_Midas.framework */; }; + 8402440123D9834600704ABD /* README.md in Resources */ = {isa = PBXBuildFile; fileRef = 8402440023D9834600704ABD /* README.md */; }; + 840ECB20238BAA2300C7D88A /* InfoCell.swift in Sources */ = {isa = PBXBuildFile; fileRef = 840ECB1F238BAA2300C7D88A /* InfoCell.swift */; }; + 840EDCFD2341DDD30017ED42 /* Launch Screen.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 840EDCFB2341DDD30017ED42 /* Launch Screen.storyboard */; }; + 840EDD022341DE380017ED42 /* Main.storyboard in Resources */ = {isa = PBXBuildFile; fileRef = 840EDD002341DE380017ED42 /* Main.storyboard */; }; + 842DDB6E2372A82000F6BB94 /* OverlayView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 842DDB6D2372A82000F6BB94 /* OverlayView.swift */; }; + 846499C2235DAB0D009CBBC7 /* ModelDataHandler.swift in Sources */ = {isa = PBXBuildFile; fileRef = 846499C1235DAB0D009CBBC7 /* ModelDataHandler.swift */; }; + 846BAF7623E7FE13006FC136 /* Constants.swift in Sources */ = {isa = PBXBuildFile; fileRef = 846BAF7523E7FE13006FC136 /* Constants.swift */; }; + 8474FEC92341D36E00377D34 /* PreviewView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8474FEC82341D36E00377D34 /* PreviewView.swift */; }; + 8474FECB2341D39800377D34 /* CameraFeedManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 8474FECA2341D39800377D34 /* CameraFeedManager.swift */; }; + 84952CB5236186BE0052C104 /* CVPixelBufferExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84952CB4236186BE0052C104 /* CVPixelBufferExtension.swift */; }; + 84952CB92361874A0052C104 /* TFLiteExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84952CB82361874A0052C104 /* TFLiteExtension.swift */; }; + 84B67CEF2326338300A11A08 /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84B67CEE2326338300A11A08 /* AppDelegate.swift */; }; + 84B67CF12326338300A11A08 /* ViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84B67CF02326338300A11A08 /* ViewController.swift */; }; + 84B67CF62326338400A11A08 /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 84B67CF52326338400A11A08 /* Assets.xcassets */; }; + 84D6576D2387BB7E0048171E /* CGSizeExtension.swift in Sources */ = {isa = PBXBuildFile; fileRef = 84D6576C2387BB7E0048171E /* CGSizeExtension.swift */; }; + 84F232D5254C831E0011862E /* model_opt.tflite in Resources */ = {isa = PBXBuildFile; fileRef = 84F232D4254C831E0011862E /* model_opt.tflite */; }; + 84FCF5922387BD7900663812 /* tfl_logo.png in Resources */ = {isa = PBXBuildFile; fileRef = 84FCF5912387BD7900663812 /* tfl_logo.png */; }; +/* End PBXBuildFile section */ + +/* Begin PBXFileReference section */ + 8402440023D9834600704ABD /* README.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = net.daringfireball.markdown; path = README.md; sourceTree = ""; }; + 840ECB1F238BAA2300C7D88A /* InfoCell.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = InfoCell.swift; sourceTree = ""; }; + 840EDCFC2341DDD30017ED42 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = "Base.lproj/Launch Screen.storyboard"; sourceTree = ""; }; + 840EDD012341DE380017ED42 /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.storyboard; name = Base; path = Base.lproj/Main.storyboard; sourceTree = ""; }; + 842DDB6D2372A82000F6BB94 /* OverlayView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = OverlayView.swift; sourceTree = ""; }; + 846499C1235DAB0D009CBBC7 /* ModelDataHandler.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ModelDataHandler.swift; sourceTree = ""; }; + 846BAF7523E7FE13006FC136 /* Constants.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Constants.swift; sourceTree = ""; }; + 8474FEC82341D36E00377D34 /* PreviewView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PreviewView.swift; sourceTree = ""; }; + 8474FECA2341D39800377D34 /* CameraFeedManager.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CameraFeedManager.swift; sourceTree = ""; }; + 84884291236FF0A30043FC4C /* download_models.sh */ = {isa = PBXFileReference; lastKnownFileType = text.script.sh; path = download_models.sh; sourceTree = ""; }; + 84952CB4236186BE0052C104 /* CVPixelBufferExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CVPixelBufferExtension.swift; sourceTree = ""; }; + 84952CB82361874A0052C104 /* TFLiteExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = TFLiteExtension.swift; sourceTree = ""; }; + 84B67CEB2326338300A11A08 /* Midas.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Midas.app; sourceTree = BUILT_PRODUCTS_DIR; }; + 84B67CEE2326338300A11A08 /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; + 84B67CF02326338300A11A08 /* ViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = ViewController.swift; sourceTree = ""; }; + 84B67CF52326338400A11A08 /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 84B67CFA2326338400A11A08 /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 84D6576C2387BB7E0048171E /* CGSizeExtension.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CGSizeExtension.swift; sourceTree = ""; }; + 84F232D4254C831E0011862E /* model_opt.tflite */ = {isa = PBXFileReference; lastKnownFileType = file; path = model_opt.tflite; sourceTree = ""; }; + 84FCF5912387BD7900663812 /* tfl_logo.png */ = {isa = PBXFileReference; lastKnownFileType = image.png; name = tfl_logo.png; path = Assets.xcassets/tfl_logo.png; sourceTree = ""; }; + A1CE41C09920CCEC31985547 /* Pods_Midas.framework */ = {isa = PBXFileReference; explicitFileType = wrapper.framework; includeInIndex = 0; path = Pods_Midas.framework; sourceTree = BUILT_PRODUCTS_DIR; }; + D2BFF06D0AE9137D332447F3 /* Pods-Midas.release.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-Midas.release.xcconfig"; path = "Target Support Files/Pods-Midas/Pods-Midas.release.xcconfig"; sourceTree = ""; }; + FCA88463911267B1001A596F /* Pods-Midas.debug.xcconfig */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = text.xcconfig; name = "Pods-Midas.debug.xcconfig"; path = "Target Support Files/Pods-Midas/Pods-Midas.debug.xcconfig"; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 84B67CE82326338300A11A08 /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + 0CDA8C85042ADF65D0787629 /* Pods_Midas.framework in Frameworks */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 840ECB1E238BAA0D00C7D88A /* Cells */ = { + isa = PBXGroup; + children = ( + 840ECB1F238BAA2300C7D88A /* InfoCell.swift */, + ); + path = Cells; + sourceTree = ""; + }; + 842DDB6C2372A80E00F6BB94 /* Views */ = { + isa = PBXGroup; + children = ( + 842DDB6D2372A82000F6BB94 /* OverlayView.swift */, + ); + path = Views; + sourceTree = ""; + }; + 846499C0235DAAE7009CBBC7 /* ModelDataHandler */ = { + isa = PBXGroup; + children = ( + 846499C1235DAB0D009CBBC7 /* ModelDataHandler.swift */, + ); + path = ModelDataHandler; + sourceTree = ""; + }; + 8474FEC62341D2BE00377D34 /* ViewControllers */ = { + isa = PBXGroup; + children = ( + 84B67CF02326338300A11A08 /* ViewController.swift */, + ); + path = ViewControllers; + sourceTree = ""; + }; + 8474FEC72341D35800377D34 /* Camera Feed */ = { + isa = PBXGroup; + children = ( + 8474FEC82341D36E00377D34 /* PreviewView.swift */, + 8474FECA2341D39800377D34 /* CameraFeedManager.swift */, + ); + path = "Camera Feed"; + sourceTree = ""; + }; + 84884290236FF07F0043FC4C /* RunScripts */ = { + isa = PBXGroup; + children = ( + 84884291236FF0A30043FC4C /* download_models.sh */, + ); + path = RunScripts; + sourceTree = ""; + }; + 848842A22370180C0043FC4C /* Model */ = { + isa = PBXGroup; + children = ( + 84F232D4254C831E0011862E /* model_opt.tflite */, + ); + path = Model; + sourceTree = ""; + }; + 84952CB3236186A20052C104 /* Extensions */ = { + isa = PBXGroup; + children = ( + 84952CB4236186BE0052C104 /* CVPixelBufferExtension.swift */, + 84952CB82361874A0052C104 /* TFLiteExtension.swift */, + 84D6576C2387BB7E0048171E /* CGSizeExtension.swift */, + ); + path = Extensions; + sourceTree = ""; + }; + 84B67CE22326338300A11A08 = { + isa = PBXGroup; + children = ( + 8402440023D9834600704ABD /* README.md */, + 84884290236FF07F0043FC4C /* RunScripts */, + 84B67CED2326338300A11A08 /* Midas */, + 84B67CEC2326338300A11A08 /* Products */, + B4DFDCC28443B641BC36251D /* Pods */, + A3DA804B8D3F6891E3A02852 /* Frameworks */, + ); + sourceTree = ""; + }; + 84B67CEC2326338300A11A08 /* Products */ = { + isa = PBXGroup; + children = ( + 84B67CEB2326338300A11A08 /* Midas.app */, + ); + name = Products; + sourceTree = ""; + }; + 84B67CED2326338300A11A08 /* Midas */ = { + isa = PBXGroup; + children = ( + 840ECB1E238BAA0D00C7D88A /* Cells */, + 842DDB6C2372A80E00F6BB94 /* Views */, + 848842A22370180C0043FC4C /* Model */, + 84952CB3236186A20052C104 /* Extensions */, + 846499C0235DAAE7009CBBC7 /* ModelDataHandler */, + 8474FEC72341D35800377D34 /* Camera Feed */, + 8474FEC62341D2BE00377D34 /* ViewControllers */, + 84B67D002326339000A11A08 /* Storyboards */, + 84B67CEE2326338300A11A08 /* AppDelegate.swift */, + 846BAF7523E7FE13006FC136 /* Constants.swift */, + 84B67CF52326338400A11A08 /* Assets.xcassets */, + 84FCF5912387BD7900663812 /* tfl_logo.png */, + 84B67CFA2326338400A11A08 /* Info.plist */, + ); + path = Midas; + sourceTree = ""; + }; + 84B67D002326339000A11A08 /* Storyboards */ = { + isa = PBXGroup; + children = ( + 840EDCFB2341DDD30017ED42 /* Launch Screen.storyboard */, + 840EDD002341DE380017ED42 /* Main.storyboard */, + ); + path = Storyboards; + sourceTree = ""; + }; + A3DA804B8D3F6891E3A02852 /* Frameworks */ = { + isa = PBXGroup; + children = ( + A1CE41C09920CCEC31985547 /* Pods_Midas.framework */, + ); + name = Frameworks; + sourceTree = ""; + }; + B4DFDCC28443B641BC36251D /* Pods */ = { + isa = PBXGroup; + children = ( + FCA88463911267B1001A596F /* Pods-Midas.debug.xcconfig */, + D2BFF06D0AE9137D332447F3 /* Pods-Midas.release.xcconfig */, + ); + path = Pods; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 84B67CEA2326338300A11A08 /* Midas */ = { + isa = PBXNativeTarget; + buildConfigurationList = 84B67CFD2326338400A11A08 /* Build configuration list for PBXNativeTarget "Midas" */; + buildPhases = ( + 14067F3CF309C9DB723C9F6F /* [CP] Check Pods Manifest.lock */, + 84884298237010B90043FC4C /* Download TensorFlow Lite model */, + 84B67CE72326338300A11A08 /* Sources */, + 84B67CE82326338300A11A08 /* Frameworks */, + 84B67CE92326338300A11A08 /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = Midas; + productName = Midas; + productReference = 84B67CEB2326338300A11A08 /* Midas.app */; + productType = "com.apple.product-type.application"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 84B67CE32326338300A11A08 /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 1030; + LastUpgradeCheck = 1030; + ORGANIZATIONNAME = tensorflow; + TargetAttributes = { + 84B67CEA2326338300A11A08 = { + CreatedOnToolsVersion = 10.3; + }; + }; + }; + buildConfigurationList = 84B67CE62326338300A11A08 /* Build configuration list for PBXProject "Midas" */; + compatibilityVersion = "Xcode 9.3"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + ); + mainGroup = 84B67CE22326338300A11A08; + productRefGroup = 84B67CEC2326338300A11A08 /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 84B67CEA2326338300A11A08 /* Midas */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 84B67CE92326338300A11A08 /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 8402440123D9834600704ABD /* README.md in Resources */, + 84F232D5254C831E0011862E /* model_opt.tflite in Resources */, + 840EDD022341DE380017ED42 /* Main.storyboard in Resources */, + 840EDCFD2341DDD30017ED42 /* Launch Screen.storyboard in Resources */, + 84FCF5922387BD7900663812 /* tfl_logo.png in Resources */, + 84B67CF62326338400A11A08 /* Assets.xcassets in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXShellScriptBuildPhase section */ + 14067F3CF309C9DB723C9F6F /* [CP] Check Pods Manifest.lock */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputFileListPaths = ( + ); + inputPaths = ( + "${PODS_PODFILE_DIR_PATH}/Podfile.lock", + "${PODS_ROOT}/Manifest.lock", + ); + name = "[CP] Check Pods Manifest.lock"; + outputFileListPaths = ( + ); + outputPaths = ( + "$(DERIVED_FILE_DIR)/Pods-Midas-checkManifestLockResult.txt", + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/sh; + shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n # print error to STDERR\n echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n"; + showEnvVarsInLog = 0; + }; + 84884298237010B90043FC4C /* Download TensorFlow Lite model */ = { + isa = PBXShellScriptBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + inputFileListPaths = ( + ); + inputPaths = ( + ); + name = "Download TensorFlow Lite model"; + outputFileListPaths = ( + ); + outputPaths = ( + ); + runOnlyForDeploymentPostprocessing = 0; + shellPath = /bin/bash; + shellScript = "\"$SRCROOT/RunScripts/download_models.sh\"\n"; + }; +/* End PBXShellScriptBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 84B67CE72326338300A11A08 /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 842DDB6E2372A82000F6BB94 /* OverlayView.swift in Sources */, + 846BAF7623E7FE13006FC136 /* Constants.swift in Sources */, + 84952CB92361874A0052C104 /* TFLiteExtension.swift in Sources */, + 84D6576D2387BB7E0048171E /* CGSizeExtension.swift in Sources */, + 84B67CF12326338300A11A08 /* ViewController.swift in Sources */, + 84B67CEF2326338300A11A08 /* AppDelegate.swift in Sources */, + 8474FECB2341D39800377D34 /* CameraFeedManager.swift in Sources */, + 846499C2235DAB0D009CBBC7 /* ModelDataHandler.swift in Sources */, + 8474FEC92341D36E00377D34 /* PreviewView.swift in Sources */, + 84952CB5236186BE0052C104 /* CVPixelBufferExtension.swift in Sources */, + 840ECB20238BAA2300C7D88A /* InfoCell.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXVariantGroup section */ + 840EDCFB2341DDD30017ED42 /* Launch Screen.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 840EDCFC2341DDD30017ED42 /* Base */, + ); + name = "Launch Screen.storyboard"; + sourceTree = ""; + }; + 840EDD002341DE380017ED42 /* Main.storyboard */ = { + isa = PBXVariantGroup; + children = ( + 840EDD012341DE380017ED42 /* Base */, + ); + name = Main.storyboard; + sourceTree = ""; + }; +/* End PBXVariantGroup section */ + +/* Begin XCBuildConfiguration section */ + 84B67CFB2326338400A11A08 /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 12.4; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = iphoneos; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + }; + name = Debug; + }; + 84B67CFC2326338400A11A08 /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + CODE_SIGN_IDENTITY = "iPhone Developer"; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + IPHONEOS_DEPLOYMENT_TARGET = 12.4; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = iphoneos; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + VALIDATE_PRODUCT = YES; + }; + name = Release; + }; + 84B67CFE2326338400A11A08 /* Debug */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = FCA88463911267B1001A596F /* Pods-Midas.debug.xcconfig */; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_IDENTITY = "iPhone Developer"; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = BV6M48J3RX; + INFOPLIST_FILE = Midas/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.midas.midas-tflite-npu"; + PRODUCT_NAME = Midas; + PROVISIONING_PROFILE_SPECIFIER = ""; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Debug; + }; + 84B67CFF2326338400A11A08 /* Release */ = { + isa = XCBuildConfiguration; + baseConfigurationReference = D2BFF06D0AE9137D332447F3 /* Pods-Midas.release.xcconfig */; + buildSettings = { + ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; + CODE_SIGN_IDENTITY = "iPhone Developer"; + CODE_SIGN_STYLE = Automatic; + DEVELOPMENT_TEAM = BV6M48J3RX; + INFOPLIST_FILE = Midas/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/Frameworks", + ); + PRODUCT_BUNDLE_IDENTIFIER = "org.midas.midas-tflite-npu"; + PRODUCT_NAME = Midas; + PROVISIONING_PROFILE_SPECIFIER = ""; + SWIFT_VERSION = 5.0; + TARGETED_DEVICE_FAMILY = "1,2"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 84B67CE62326338300A11A08 /* Build configuration list for PBXProject "Midas" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 84B67CFB2326338400A11A08 /* Debug */, + 84B67CFC2326338400A11A08 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 84B67CFD2326338400A11A08 /* Build configuration list for PBXNativeTarget "Midas" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 84B67CFE2326338400A11A08 /* Debug */, + 84B67CFF2326338400A11A08 /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 84B67CE32326338300A11A08 /* Project object */; +} diff --git a/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 0000000000000000000000000000000000000000..919434a6254f0e9651f402737811be6634a03e9c --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 0000000000000000000000000000000000000000..18d981003d68d0546c4804ac2ff47dd97c6e7921 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcuserdata/admin.xcuserdatad/UserInterfaceState.xcuserstate b/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcuserdata/admin.xcuserdatad/UserInterfaceState.xcuserstate new file mode 100644 index 0000000000000000000000000000000000000000..1d20756ee57b79e9f9f886453bdb7997ca2ee2d4 Binary files /dev/null and b/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/project.xcworkspace/xcuserdata/admin.xcuserdatad/UserInterfaceState.xcuserstate differ diff --git a/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/xcuserdata/admin.xcuserdatad/xcschemes/xcschememanagement.plist b/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/xcuserdata/admin.xcuserdatad/xcschemes/xcschememanagement.plist new file mode 100644 index 0000000000000000000000000000000000000000..6093f6160eedfdfc20e96396247a7dbc9247cc55 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas.xcodeproj/xcuserdata/admin.xcuserdatad/xcschemes/xcschememanagement.plist @@ -0,0 +1,14 @@ + + + + + SchemeUserState + + PoseNet.xcscheme_^#shared#^_ + + orderHint + 3 + + + + diff --git a/live2diff/MiDaS/mobile/ios/Midas/AppDelegate.swift b/live2diff/MiDaS/mobile/ios/Midas/AppDelegate.swift new file mode 100644 index 0000000000000000000000000000000000000000..233f0291ab4f379067543bdad3cc198a2dc3ab0f --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/AppDelegate.swift @@ -0,0 +1,41 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import UIKit + +@UIApplicationMain +class AppDelegate: UIResponder, UIApplicationDelegate { + + var window: UIWindow? + + func application(_ application: UIApplication, didFinishLaunchingWithOptions launchOptions: [UIApplication.LaunchOptionsKey: Any]?) -> Bool { + return true + } + + func applicationWillResignActive(_ application: UIApplication) { + } + + func applicationDidEnterBackground(_ application: UIApplication) { + } + + func applicationWillEnterForeground(_ application: UIApplication) { + } + + func applicationDidBecomeActive(_ application: UIApplication) { + } + + func applicationWillTerminate(_ application: UIApplication) { + } +} + diff --git a/live2diff/MiDaS/mobile/ios/Midas/Assets.xcassets/AppIcon.appiconset/Contents.json b/live2diff/MiDaS/mobile/ios/Midas/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 0000000000000000000000000000000000000000..65b74d7ef11fa59fafa829e681ac90906f3ac8b2 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1 @@ +{"images":[{"size":"60x60","expected-size":"180","filename":"180.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"40x40","expected-size":"80","filename":"80.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"40x40","expected-size":"120","filename":"120.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"60x60","expected-size":"120","filename":"120.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"57x57","expected-size":"57","filename":"57.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"1x"},{"size":"29x29","expected-size":"58","filename":"58.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"29x29","expected-size":"29","filename":"29.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"1x"},{"size":"29x29","expected-size":"87","filename":"87.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"57x57","expected-size":"114","filename":"114.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"20x20","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"2x"},{"size":"20x20","expected-size":"60","filename":"60.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"iphone","scale":"3x"},{"size":"1024x1024","filename":"1024.png","expected-size":"1024","idiom":"ios-marketing","folder":"Assets.xcassets/AppIcon.appiconset/","scale":"1x"},{"size":"40x40","expected-size":"80","filename":"80.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"72x72","expected-size":"72","filename":"72.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"76x76","expected-size":"152","filename":"152.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"50x50","expected-size":"100","filename":"100.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"29x29","expected-size":"58","filename":"58.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"76x76","expected-size":"76","filename":"76.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"29x29","expected-size":"29","filename":"29.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"50x50","expected-size":"50","filename":"50.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"72x72","expected-size":"144","filename":"144.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"40x40","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"83.5x83.5","expected-size":"167","filename":"167.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"},{"size":"20x20","expected-size":"20","filename":"20.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"1x"},{"size":"20x20","expected-size":"40","filename":"40.png","folder":"Assets.xcassets/AppIcon.appiconset/","idiom":"ipad","scale":"2x"}]} \ No newline at end of file diff --git a/live2diff/MiDaS/mobile/ios/Midas/Assets.xcassets/Contents.json b/live2diff/MiDaS/mobile/ios/Midas/Assets.xcassets/Contents.json new file mode 100644 index 0000000000000000000000000000000000000000..da4a164c918651cdd1e11dca5cc62c333f097601 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "version" : 1, + "author" : "xcode" + } +} \ No newline at end of file diff --git a/live2diff/MiDaS/mobile/ios/Midas/Camera Feed/CameraFeedManager.swift b/live2diff/MiDaS/mobile/ios/Midas/Camera Feed/CameraFeedManager.swift new file mode 100644 index 0000000000000000000000000000000000000000..48d65b88ee220e722fbad2570c8e879a431cd0f5 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Camera Feed/CameraFeedManager.swift @@ -0,0 +1,316 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import AVFoundation +import UIKit +import os + +// MARK: - CameraFeedManagerDelegate Declaration +@objc protocol CameraFeedManagerDelegate: class { + /// This method delivers the pixel buffer of the current frame seen by the device's camera. + @objc optional func cameraFeedManager( + _ manager: CameraFeedManager, didOutput pixelBuffer: CVPixelBuffer + ) + + /// This method initimates that a session runtime error occured. + func cameraFeedManagerDidEncounterSessionRunTimeError(_ manager: CameraFeedManager) + + /// This method initimates that the session was interrupted. + func cameraFeedManager( + _ manager: CameraFeedManager, sessionWasInterrupted canResumeManually: Bool + ) + + /// This method initimates that the session interruption has ended. + func cameraFeedManagerDidEndSessionInterruption(_ manager: CameraFeedManager) + + /// This method initimates that there was an error in video configurtion. + func presentVideoConfigurationErrorAlert(_ manager: CameraFeedManager) + + /// This method initimates that the camera permissions have been denied. + func presentCameraPermissionsDeniedAlert(_ manager: CameraFeedManager) +} + +/// This enum holds the state of the camera initialization. +// MARK: - Camera Initialization State Enum +enum CameraConfiguration { + case success + case failed + case permissionDenied +} + +/// This class manages all camera related functionalities. +// MARK: - Camera Related Functionalies Manager +class CameraFeedManager: NSObject { + // MARK: Camera Related Instance Variables + private let session: AVCaptureSession = AVCaptureSession() + + private let previewView: PreviewView + private let sessionQueue = DispatchQueue(label: "sessionQueue") + private var cameraConfiguration: CameraConfiguration = .failed + private lazy var videoDataOutput = AVCaptureVideoDataOutput() + private var isSessionRunning = false + + // MARK: CameraFeedManagerDelegate + weak var delegate: CameraFeedManagerDelegate? + + // MARK: Initializer + init(previewView: PreviewView) { + self.previewView = previewView + super.init() + + // Initializes the session + session.sessionPreset = .high + self.previewView.session = session + self.previewView.previewLayer.connection?.videoOrientation = .portrait + self.previewView.previewLayer.videoGravity = .resizeAspectFill + self.attemptToConfigureSession() + } + + // MARK: Session Start and End methods + + /// This method starts an AVCaptureSession based on whether the camera configuration was successful. + func checkCameraConfigurationAndStartSession() { + sessionQueue.async { + switch self.cameraConfiguration { + case .success: + self.addObservers() + self.startSession() + case .failed: + DispatchQueue.main.async { + self.delegate?.presentVideoConfigurationErrorAlert(self) + } + case .permissionDenied: + DispatchQueue.main.async { + self.delegate?.presentCameraPermissionsDeniedAlert(self) + } + } + } + } + + /// This method stops a running an AVCaptureSession. + func stopSession() { + self.removeObservers() + sessionQueue.async { + if self.session.isRunning { + self.session.stopRunning() + self.isSessionRunning = self.session.isRunning + } + } + + } + + /// This method resumes an interrupted AVCaptureSession. + func resumeInterruptedSession(withCompletion completion: @escaping (Bool) -> Void) { + sessionQueue.async { + self.startSession() + + DispatchQueue.main.async { + completion(self.isSessionRunning) + } + } + } + + /// This method starts the AVCaptureSession + private func startSession() { + self.session.startRunning() + self.isSessionRunning = self.session.isRunning + } + + // MARK: Session Configuration Methods. + /// This method requests for camera permissions and handles the configuration of the session and stores the result of configuration. + private func attemptToConfigureSession() { + switch AVCaptureDevice.authorizationStatus(for: .video) { + case .authorized: + self.cameraConfiguration = .success + case .notDetermined: + self.sessionQueue.suspend() + self.requestCameraAccess(completion: { granted in + self.sessionQueue.resume() + }) + case .denied: + self.cameraConfiguration = .permissionDenied + default: + break + } + + self.sessionQueue.async { + self.configureSession() + } + } + + /// This method requests for camera permissions. + private func requestCameraAccess(completion: @escaping (Bool) -> Void) { + AVCaptureDevice.requestAccess(for: .video) { (granted) in + if !granted { + self.cameraConfiguration = .permissionDenied + } else { + self.cameraConfiguration = .success + } + completion(granted) + } + } + + /// This method handles all the steps to configure an AVCaptureSession. + private func configureSession() { + guard cameraConfiguration == .success else { + return + } + session.beginConfiguration() + + // Tries to add an AVCaptureDeviceInput. + guard addVideoDeviceInput() == true else { + self.session.commitConfiguration() + self.cameraConfiguration = .failed + return + } + + // Tries to add an AVCaptureVideoDataOutput. + guard addVideoDataOutput() else { + self.session.commitConfiguration() + self.cameraConfiguration = .failed + return + } + + session.commitConfiguration() + self.cameraConfiguration = .success + } + + /// This method tries to an AVCaptureDeviceInput to the current AVCaptureSession. + private func addVideoDeviceInput() -> Bool { + /// Tries to get the default back camera. + guard + let camera = AVCaptureDevice.default(.builtInWideAngleCamera, for: .video, position: .back) + else { + fatalError("Cannot find camera") + } + + do { + let videoDeviceInput = try AVCaptureDeviceInput(device: camera) + if session.canAddInput(videoDeviceInput) { + session.addInput(videoDeviceInput) + return true + } else { + return false + } + } catch { + fatalError("Cannot create video device input") + } + } + + /// This method tries to an AVCaptureVideoDataOutput to the current AVCaptureSession. + private func addVideoDataOutput() -> Bool { + let sampleBufferQueue = DispatchQueue(label: "sampleBufferQueue") + videoDataOutput.setSampleBufferDelegate(self, queue: sampleBufferQueue) + videoDataOutput.alwaysDiscardsLateVideoFrames = true + videoDataOutput.videoSettings = [ + String(kCVPixelBufferPixelFormatTypeKey): kCMPixelFormat_32BGRA + ] + + if session.canAddOutput(videoDataOutput) { + session.addOutput(videoDataOutput) + videoDataOutput.connection(with: .video)?.videoOrientation = .portrait + return true + } + return false + } + + // MARK: Notification Observer Handling + private func addObservers() { + NotificationCenter.default.addObserver( + self, selector: #selector(CameraFeedManager.sessionRuntimeErrorOccured(notification:)), + name: NSNotification.Name.AVCaptureSessionRuntimeError, object: session) + NotificationCenter.default.addObserver( + self, selector: #selector(CameraFeedManager.sessionWasInterrupted(notification:)), + name: NSNotification.Name.AVCaptureSessionWasInterrupted, object: session) + NotificationCenter.default.addObserver( + self, selector: #selector(CameraFeedManager.sessionInterruptionEnded), + name: NSNotification.Name.AVCaptureSessionInterruptionEnded, object: session) + } + + private func removeObservers() { + NotificationCenter.default.removeObserver( + self, name: NSNotification.Name.AVCaptureSessionRuntimeError, object: session) + NotificationCenter.default.removeObserver( + self, name: NSNotification.Name.AVCaptureSessionWasInterrupted, object: session) + NotificationCenter.default.removeObserver( + self, name: NSNotification.Name.AVCaptureSessionInterruptionEnded, object: session) + } + + // MARK: Notification Observers + @objc func sessionWasInterrupted(notification: Notification) { + if let userInfoValue = notification.userInfo?[AVCaptureSessionInterruptionReasonKey] + as AnyObject?, + let reasonIntegerValue = userInfoValue.integerValue, + let reason = AVCaptureSession.InterruptionReason(rawValue: reasonIntegerValue) + { + os_log("Capture session was interrupted with reason: %s", type: .error, reason.rawValue) + + var canResumeManually = false + if reason == .videoDeviceInUseByAnotherClient { + canResumeManually = true + } else if reason == .videoDeviceNotAvailableWithMultipleForegroundApps { + canResumeManually = false + } + + delegate?.cameraFeedManager(self, sessionWasInterrupted: canResumeManually) + + } + } + + @objc func sessionInterruptionEnded(notification: Notification) { + delegate?.cameraFeedManagerDidEndSessionInterruption(self) + } + + @objc func sessionRuntimeErrorOccured(notification: Notification) { + guard let error = notification.userInfo?[AVCaptureSessionErrorKey] as? AVError else { + return + } + + os_log("Capture session runtime error: %s", type: .error, error.localizedDescription) + + if error.code == .mediaServicesWereReset { + sessionQueue.async { + if self.isSessionRunning { + self.startSession() + } else { + DispatchQueue.main.async { + self.delegate?.cameraFeedManagerDidEncounterSessionRunTimeError(self) + } + } + } + } else { + delegate?.cameraFeedManagerDidEncounterSessionRunTimeError(self) + } + } +} + +/// AVCaptureVideoDataOutputSampleBufferDelegate +extension CameraFeedManager: AVCaptureVideoDataOutputSampleBufferDelegate { + /// This method delegates the CVPixelBuffer of the frame seen by the camera currently. + func captureOutput( + _ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, + from connection: AVCaptureConnection + ) { + + // Converts the CMSampleBuffer to a CVPixelBuffer. + let pixelBuffer: CVPixelBuffer? = CMSampleBufferGetImageBuffer(sampleBuffer) + + guard let imagePixelBuffer = pixelBuffer else { + return + } + + // Delegates the pixel buffer to the ViewController. + delegate?.cameraFeedManager?(self, didOutput: imagePixelBuffer) + } +} diff --git a/live2diff/MiDaS/mobile/ios/Midas/Camera Feed/PreviewView.swift b/live2diff/MiDaS/mobile/ios/Midas/Camera Feed/PreviewView.swift new file mode 100644 index 0000000000000000000000000000000000000000..308c7ec54308af5c152ff6038670b26501a8e82c --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Camera Feed/PreviewView.swift @@ -0,0 +1,39 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import UIKit +import AVFoundation + + /// The camera frame is displayed on this view. +class PreviewView: UIView { + var previewLayer: AVCaptureVideoPreviewLayer { + guard let layer = layer as? AVCaptureVideoPreviewLayer else { + fatalError("Layer expected is of type VideoPreviewLayer") + } + return layer + } + + var session: AVCaptureSession? { + get { + return previewLayer.session + } + set { + previewLayer.session = newValue + } + } + + override class var layerClass: AnyClass { + return AVCaptureVideoPreviewLayer.self + } +} diff --git a/live2diff/MiDaS/mobile/ios/Midas/Cells/InfoCell.swift b/live2diff/MiDaS/mobile/ios/Midas/Cells/InfoCell.swift new file mode 100644 index 0000000000000000000000000000000000000000..c6be64af5678541ec09fc367b03c80155876f0ba --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Cells/InfoCell.swift @@ -0,0 +1,21 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import UIKit + +/// Table cell for inference result in bottom view. +class InfoCell: UITableViewCell { + @IBOutlet weak var fieldNameLabel: UILabel! + @IBOutlet weak var infoLabel: UILabel! +} diff --git a/live2diff/MiDaS/mobile/ios/Midas/Constants.swift b/live2diff/MiDaS/mobile/ios/Midas/Constants.swift new file mode 100644 index 0000000000000000000000000000000000000000..b0789ee58a1ea373d441f05333d8ce8914adadb7 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Constants.swift @@ -0,0 +1,25 @@ +// Copyright 2020 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +enum Constants { + // MARK: - Constants related to the image processing + static let bgraPixel = (channels: 4, alphaComponent: 3, lastBgrComponent: 2) + static let rgbPixelChannels = 3 + static let maxRGBValue: Float32 = 255.0 + + // MARK: - Constants related to the model interperter + static let defaultThreadCount = 2 + static let defaultDelegate: Delegates = .CPU +} diff --git a/live2diff/MiDaS/mobile/ios/Midas/Extensions/CGSizeExtension.swift b/live2diff/MiDaS/mobile/ios/Midas/Extensions/CGSizeExtension.swift new file mode 100644 index 0000000000000000000000000000000000000000..031550ea0081963d18b5b83712854babaf7c0a34 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Extensions/CGSizeExtension.swift @@ -0,0 +1,45 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +import Accelerate +import Foundation + +extension CGSize { + /// Returns `CGAfineTransform` to resize `self` to fit in destination size, keeping aspect ratio + /// of `self`. `self` image is resized to be inscribe to destination size and located in center of + /// destination. + /// + /// - Parameter toFitIn: destination size to be filled. + /// - Returns: `CGAffineTransform` to transform `self` image to `dest` image. + func transformKeepAspect(toFitIn dest: CGSize) -> CGAffineTransform { + let sourceRatio = self.height / self.width + let destRatio = dest.height / dest.width + + // Calculates ratio `self` to `dest`. + var ratio: CGFloat + var x: CGFloat = 0 + var y: CGFloat = 0 + if sourceRatio > destRatio { + // Source size is taller than destination. Resized to fit in destination height, and find + // horizontal starting point to be centered. + ratio = dest.height / self.height + x = (dest.width - self.width * ratio) / 2 + } else { + ratio = dest.width / self.width + y = (dest.height - self.height * ratio) / 2 + } + return CGAffineTransform(a: ratio, b: 0, c: 0, d: ratio, tx: x, ty: y) + } +} diff --git a/live2diff/MiDaS/mobile/ios/Midas/Extensions/CVPixelBufferExtension.swift b/live2diff/MiDaS/mobile/ios/Midas/Extensions/CVPixelBufferExtension.swift new file mode 100644 index 0000000000000000000000000000000000000000..4899c76562a546c513736fbf4556629b08d2c929 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Extensions/CVPixelBufferExtension.swift @@ -0,0 +1,172 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +import Accelerate +import Foundation + +extension CVPixelBuffer { + var size: CGSize { + return CGSize(width: CVPixelBufferGetWidth(self), height: CVPixelBufferGetHeight(self)) + } + + /// Returns a new `CVPixelBuffer` created by taking the self area and resizing it to the + /// specified target size. Aspect ratios of source image and destination image are expected to be + /// same. + /// + /// - Parameters: + /// - from: Source area of image to be cropped and resized. + /// - to: Size to scale the image to(i.e. image size used while training the model). + /// - Returns: The cropped and resized image of itself. + func resize(from source: CGRect, to size: CGSize) -> CVPixelBuffer? { + let rect = CGRect(origin: CGPoint(x: 0, y: 0), size: self.size) + guard rect.contains(source) else { + os_log("Resizing Error: source area is out of index", type: .error) + return nil + } + guard rect.size.width / rect.size.height - source.size.width / source.size.height < 1e-5 + else { + os_log( + "Resizing Error: source image ratio and destination image ratio is different", + type: .error) + return nil + } + + let inputImageRowBytes = CVPixelBufferGetBytesPerRow(self) + let imageChannels = 4 + + CVPixelBufferLockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0)) + defer { CVPixelBufferUnlockBaseAddress(self, CVPixelBufferLockFlags(rawValue: 0)) } + + // Finds the address of the upper leftmost pixel of the source area. + guard + let inputBaseAddress = CVPixelBufferGetBaseAddress(self)?.advanced( + by: Int(source.minY) * inputImageRowBytes + Int(source.minX) * imageChannels) + else { + return nil + } + + // Crops given area as vImage Buffer. + var croppedImage = vImage_Buffer( + data: inputBaseAddress, height: UInt(source.height), width: UInt(source.width), + rowBytes: inputImageRowBytes) + + let resultRowBytes = Int(size.width) * imageChannels + guard let resultAddress = malloc(Int(size.height) * resultRowBytes) else { + return nil + } + + // Allocates a vacant vImage buffer for resized image. + var resizedImage = vImage_Buffer( + data: resultAddress, + height: UInt(size.height), width: UInt(size.width), + rowBytes: resultRowBytes + ) + + // Performs the scale operation on cropped image and stores it in result image buffer. + guard vImageScale_ARGB8888(&croppedImage, &resizedImage, nil, vImage_Flags(0)) == kvImageNoError + else { + return nil + } + + let releaseCallBack: CVPixelBufferReleaseBytesCallback = { mutablePointer, pointer in + if let pointer = pointer { + free(UnsafeMutableRawPointer(mutating: pointer)) + } + } + + var result: CVPixelBuffer? + + // Converts the thumbnail vImage buffer to CVPixelBuffer + let conversionStatus = CVPixelBufferCreateWithBytes( + nil, + Int(size.width), Int(size.height), + CVPixelBufferGetPixelFormatType(self), + resultAddress, + resultRowBytes, + releaseCallBack, + nil, + nil, + &result + ) + + guard conversionStatus == kCVReturnSuccess else { + free(resultAddress) + return nil + } + + return result + } + + /// Returns the RGB `Data` representation of the given image buffer. + /// + /// - Parameters: + /// - isModelQuantized: Whether the model is quantized (i.e. fixed point values rather than + /// floating point values). + /// - Returns: The RGB data representation of the image buffer or `nil` if the buffer could not be + /// converted. + func rgbData( + isModelQuantized: Bool + ) -> Data? { + CVPixelBufferLockBaseAddress(self, .readOnly) + defer { CVPixelBufferUnlockBaseAddress(self, .readOnly) } + guard let sourceData = CVPixelBufferGetBaseAddress(self) else { + return nil + } + + let width = CVPixelBufferGetWidth(self) + let height = CVPixelBufferGetHeight(self) + let sourceBytesPerRow = CVPixelBufferGetBytesPerRow(self) + let destinationBytesPerRow = Constants.rgbPixelChannels * width + + // Assign input image to `sourceBuffer` to convert it. + var sourceBuffer = vImage_Buffer( + data: sourceData, + height: vImagePixelCount(height), + width: vImagePixelCount(width), + rowBytes: sourceBytesPerRow) + + // Make `destinationBuffer` and `destinationData` for its data to be assigned. + guard let destinationData = malloc(height * destinationBytesPerRow) else { + os_log("Error: out of memory", type: .error) + return nil + } + defer { free(destinationData) } + var destinationBuffer = vImage_Buffer( + data: destinationData, + height: vImagePixelCount(height), + width: vImagePixelCount(width), + rowBytes: destinationBytesPerRow) + + // Convert image type. + switch CVPixelBufferGetPixelFormatType(self) { + case kCVPixelFormatType_32BGRA: + vImageConvert_BGRA8888toRGB888(&sourceBuffer, &destinationBuffer, UInt32(kvImageNoFlags)) + case kCVPixelFormatType_32ARGB: + vImageConvert_BGRA8888toRGB888(&sourceBuffer, &destinationBuffer, UInt32(kvImageNoFlags)) + default: + os_log("The type of this image is not supported.", type: .error) + return nil + } + + // Make `Data` with converted image. + let imageByteData = Data( + bytes: destinationBuffer.data, count: destinationBuffer.rowBytes * height) + + if isModelQuantized { return imageByteData } + + let imageBytes = [UInt8](imageByteData) + return Data(copyingBufferOf: imageBytes.map { Float($0) / Constants.maxRGBValue }) + } +} diff --git a/live2diff/MiDaS/mobile/ios/Midas/Extensions/TFLiteExtension.swift b/live2diff/MiDaS/mobile/ios/Midas/Extensions/TFLiteExtension.swift new file mode 100644 index 0000000000000000000000000000000000000000..63f7ced786e2b550391c77af534d1d3c431522c6 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Extensions/TFLiteExtension.swift @@ -0,0 +1,75 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// ============================================================================= + +import Accelerate +import CoreImage +import Foundation +import TensorFlowLite + +// MARK: - Data +extension Data { + /// Creates a new buffer by copying the buffer pointer of the given array. + /// + /// - Warning: The given array's element type `T` must be trivial in that it can be copied bit + /// for bit with no indirection or reference-counting operations; otherwise, reinterpreting + /// data from the resulting buffer has undefined behavior. + /// - Parameter array: An array with elements of type `T`. + init(copyingBufferOf array: [T]) { + self = array.withUnsafeBufferPointer(Data.init) + } + + /// Convert a Data instance to Array representation. + func toArray(type: T.Type) -> [T] where T: AdditiveArithmetic { + var array = [T](repeating: T.zero, count: self.count / MemoryLayout.stride) + _ = array.withUnsafeMutableBytes { self.copyBytes(to: $0) } + return array + } +} + +// MARK: - Wrappers +/// Struct for handling multidimension `Data` in flat `Array`. +struct FlatArray { + private var array: [Element] + var dimensions: [Int] + + init(tensor: Tensor) { + dimensions = tensor.shape.dimensions + array = tensor.data.toArray(type: Element.self) + } + + private func flatIndex(_ index: [Int]) -> Int { + guard index.count == dimensions.count else { + fatalError("Invalid index: got \(index.count) index(es) for \(dimensions.count) index(es).") + } + + var result = 0 + for i in 0.. index[i] else { + fatalError("Invalid index: \(index[i]) is bigger than \(dimensions[i])") + } + result = dimensions[i] * result + index[i] + } + return result + } + + subscript(_ index: Int...) -> Element { + get { + return array[flatIndex(index)] + } + set(newValue) { + array[flatIndex(index)] = newValue + } + } +} diff --git a/live2diff/MiDaS/mobile/ios/Midas/Info.plist b/live2diff/MiDaS/mobile/ios/Midas/Info.plist new file mode 100644 index 0000000000000000000000000000000000000000..4330d9b33f31010549802febc6f6f2bc9fd9b950 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Info.plist @@ -0,0 +1,42 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + LSRequiresIPhoneOS + + NSCameraUsageDescription + This app will use camera to continuously estimate the depth map. + UILaunchStoryboardName + LaunchScreen + UIMainStoryboardFile + Main + UIRequiredDeviceCapabilities + + armv7 + + UISupportedInterfaceOrientations + + UIInterfaceOrientationPortrait + + UISupportedInterfaceOrientations~ipad + + UIInterfaceOrientationPortrait + + + diff --git a/live2diff/MiDaS/mobile/ios/Midas/ModelDataHandler/ModelDataHandler.swift b/live2diff/MiDaS/mobile/ios/Midas/ModelDataHandler/ModelDataHandler.swift new file mode 100644 index 0000000000000000000000000000000000000000..144cfe1fa3a65af5adcb572237f2bf9718e570ae --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/ModelDataHandler/ModelDataHandler.swift @@ -0,0 +1,464 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import Accelerate +import CoreImage +import Foundation +import TensorFlowLite +import UIKit + +/// This class handles all data preprocessing and makes calls to run inference on a given frame +/// by invoking the `Interpreter`. It then formats the inferences obtained. +class ModelDataHandler { + // MARK: - Private Properties + + /// TensorFlow Lite `Interpreter` object for performing inference on a given model. + private var interpreter: Interpreter + + /// TensorFlow lite `Tensor` of model input and output. + private var inputTensor: Tensor + + //private var heatsTensor: Tensor + //private var offsetsTensor: Tensor + private var outputTensor: Tensor + // MARK: - Initialization + + /// A failable initializer for `ModelDataHandler`. A new instance is created if the model is + /// successfully loaded from the app's main bundle. Default `threadCount` is 2. + init( + threadCount: Int = Constants.defaultThreadCount, + delegate: Delegates = Constants.defaultDelegate + ) throws { + // Construct the path to the model file. + guard + let modelPath = Bundle.main.path( + forResource: Model.file.name, + ofType: Model.file.extension + ) + else { + fatalError("Failed to load the model file with name: \(Model.file.name).") + } + + // Specify the options for the `Interpreter`. + var options = Interpreter.Options() + options.threadCount = threadCount + + // Specify the delegates for the `Interpreter`. + var delegates: [Delegate]? + switch delegate { + case .Metal: + delegates = [MetalDelegate()] + case .CoreML: + if let coreMLDelegate = CoreMLDelegate() { + delegates = [coreMLDelegate] + } else { + delegates = nil + } + default: + delegates = nil + } + + // Create the `Interpreter`. + interpreter = try Interpreter(modelPath: modelPath, options: options, delegates: delegates) + + // Initialize input and output `Tensor`s. + // Allocate memory for the model's input `Tensor`s. + try interpreter.allocateTensors() + + // Get allocated input and output `Tensor`s. + inputTensor = try interpreter.input(at: 0) + outputTensor = try interpreter.output(at: 0) + //heatsTensor = try interpreter.output(at: 0) + //offsetsTensor = try interpreter.output(at: 1) + + /* + // Check if input and output `Tensor`s are in the expected formats. + guard (inputTensor.dataType == .uInt8) == Model.isQuantized else { + fatalError("Unexpected Model: quantization is \(!Model.isQuantized)") + } + + guard inputTensor.shape.dimensions[0] == Model.input.batchSize, + inputTensor.shape.dimensions[1] == Model.input.height, + inputTensor.shape.dimensions[2] == Model.input.width, + inputTensor.shape.dimensions[3] == Model.input.channelSize + else { + fatalError("Unexpected Model: input shape") + } + + + guard heatsTensor.shape.dimensions[0] == Model.output.batchSize, + heatsTensor.shape.dimensions[1] == Model.output.height, + heatsTensor.shape.dimensions[2] == Model.output.width, + heatsTensor.shape.dimensions[3] == Model.output.keypointSize + else { + fatalError("Unexpected Model: heat tensor") + } + + guard offsetsTensor.shape.dimensions[0] == Model.output.batchSize, + offsetsTensor.shape.dimensions[1] == Model.output.height, + offsetsTensor.shape.dimensions[2] == Model.output.width, + offsetsTensor.shape.dimensions[3] == Model.output.offsetSize + else { + fatalError("Unexpected Model: offset tensor") + } + */ + + } + + /// Runs Midas model with given image with given source area to destination area. + /// + /// - Parameters: + /// - on: Input image to run the model. + /// - from: Range of input image to run the model. + /// - to: Size of view to render the result. + /// - Returns: Result of the inference and the times consumed in every steps. + func runMidas(on pixelbuffer: CVPixelBuffer, from source: CGRect, to dest: CGSize) + //-> (Result, Times)? + //-> (FlatArray, Times)? + -> ([Float], Int, Int, Times)? + { + // Start times of each process. + let preprocessingStartTime: Date + let inferenceStartTime: Date + let postprocessingStartTime: Date + + // Processing times in miliseconds. + let preprocessingTime: TimeInterval + let inferenceTime: TimeInterval + let postprocessingTime: TimeInterval + + preprocessingStartTime = Date() + guard let data = preprocess(of: pixelbuffer, from: source) else { + os_log("Preprocessing failed", type: .error) + return nil + } + preprocessingTime = Date().timeIntervalSince(preprocessingStartTime) * 1000 + + inferenceStartTime = Date() + inference(from: data) + inferenceTime = Date().timeIntervalSince(inferenceStartTime) * 1000 + + postprocessingStartTime = Date() + //guard let result = postprocess(to: dest) else { + // os_log("Postprocessing failed", type: .error) + // return nil + //} + postprocessingTime = Date().timeIntervalSince(postprocessingStartTime) * 1000 + + + let results: [Float] + switch outputTensor.dataType { + case .uInt8: + guard let quantization = outputTensor.quantizationParameters else { + print("No results returned because the quantization values for the output tensor are nil.") + return nil + } + let quantizedResults = [UInt8](outputTensor.data) + results = quantizedResults.map { + quantization.scale * Float(Int($0) - quantization.zeroPoint) + } + case .float32: + results = [Float32](unsafeData: outputTensor.data) ?? [] + default: + print("Output tensor data type \(outputTensor.dataType) is unsupported for this example app.") + return nil + } + + + let times = Times( + preprocessing: preprocessingTime, + inference: inferenceTime, + postprocessing: postprocessingTime) + + return (results, Model.input.width, Model.input.height, times) + } + + // MARK: - Private functions to run model + /// Preprocesses given rectangle image to be `Data` of disired size by croping and resizing it. + /// + /// - Parameters: + /// - of: Input image to crop and resize. + /// - from: Target area to be cropped and resized. + /// - Returns: The cropped and resized image. `nil` if it can not be processed. + private func preprocess(of pixelBuffer: CVPixelBuffer, from targetSquare: CGRect) -> Data? { + let sourcePixelFormat = CVPixelBufferGetPixelFormatType(pixelBuffer) + assert(sourcePixelFormat == kCVPixelFormatType_32BGRA) + + // Resize `targetSquare` of input image to `modelSize`. + let modelSize = CGSize(width: Model.input.width, height: Model.input.height) + guard let thumbnail = pixelBuffer.resize(from: targetSquare, to: modelSize) + else { + return nil + } + + // Remove the alpha component from the image buffer to get the initialized `Data`. + let byteCount = + Model.input.batchSize + * Model.input.height * Model.input.width + * Model.input.channelSize + guard + let inputData = thumbnail.rgbData( + isModelQuantized: Model.isQuantized + ) + else { + os_log("Failed to convert the image buffer to RGB data.", type: .error) + return nil + } + + return inputData + } + + + + /* + /// Postprocesses output `Tensor`s to `Result` with size of view to render the result. + /// + /// - Parameters: + /// - to: Size of view to be displaied. + /// - Returns: Postprocessed `Result`. `nil` if it can not be processed. + private func postprocess(to viewSize: CGSize) -> Result? { + // MARK: Formats output tensors + // Convert `Tensor` to `FlatArray`. As Midas is not quantized, convert them to Float type + // `FlatArray`. + let heats = FlatArray(tensor: heatsTensor) + let offsets = FlatArray(tensor: offsetsTensor) + + // MARK: Find position of each key point + // Finds the (row, col) locations of where the keypoints are most likely to be. The highest + // `heats[0, row, col, keypoint]` value, the more likely `keypoint` being located in (`row`, + // `col`). + let keypointPositions = (0.. (Int, Int) in + var maxValue = heats[0, 0, 0, keypoint] + var maxRow = 0 + var maxCol = 0 + for row in 0.. maxValue { + maxValue = heats[0, row, col, keypoint] + maxRow = row + maxCol = col + } + } + } + return (maxRow, maxCol) + } + + // MARK: Calculates total confidence score + // Calculates total confidence score of each key position. + let totalScoreSum = keypointPositions.enumerated().reduce(0.0) { accumulator, elem -> Float32 in + accumulator + sigmoid(heats[0, elem.element.0, elem.element.1, elem.offset]) + } + let totalScore = totalScoreSum / Float32(Model.output.keypointSize) + + // MARK: Calculate key point position on model input + // Calculates `KeyPoint` coordination model input image with `offsets` adjustment. + let coords = keypointPositions.enumerated().map { index, elem -> (y: Float32, x: Float32) in + let (y, x) = elem + let yCoord = + Float32(y) / Float32(Model.output.height - 1) * Float32(Model.input.height) + + offsets[0, y, x, index] + let xCoord = + Float32(x) / Float32(Model.output.width - 1) * Float32(Model.input.width) + + offsets[0, y, x, index + Model.output.keypointSize] + return (y: yCoord, x: xCoord) + } + + // MARK: Transform key point position and make lines + // Make `Result` from `keypointPosition'. Each point is adjusted to `ViewSize` to be drawn. + var result = Result(dots: [], lines: [], score: totalScore) + var bodyPartToDotMap = [BodyPart: CGPoint]() + for (index, part) in BodyPart.allCases.enumerated() { + let position = CGPoint( + x: CGFloat(coords[index].x) * viewSize.width / CGFloat(Model.input.width), + y: CGFloat(coords[index].y) * viewSize.height / CGFloat(Model.input.height) + ) + bodyPartToDotMap[part] = position + result.dots.append(position) + } + + do { + try result.lines = BodyPart.lines.map { map throws -> Line in + guard let from = bodyPartToDotMap[map.from] else { + throw PostprocessError.missingBodyPart(of: map.from) + } + guard let to = bodyPartToDotMap[map.to] else { + throw PostprocessError.missingBodyPart(of: map.to) + } + return Line(from: from, to: to) + } + } catch PostprocessError.missingBodyPart(let missingPart) { + os_log("Postprocessing error: %s is missing.", type: .error, missingPart.rawValue) + return nil + } catch { + os_log("Postprocessing error: %s", type: .error, error.localizedDescription) + return nil + } + + return result + } +*/ + + + + /// Run inference with given `Data` + /// + /// Parameter `from`: `Data` of input image to run model. + private func inference(from data: Data) { + // Copy the initialized `Data` to the input `Tensor`. + do { + try interpreter.copy(data, toInputAt: 0) + + // Run inference by invoking the `Interpreter`. + try interpreter.invoke() + + // Get the output `Tensor` to process the inference results. + outputTensor = try interpreter.output(at: 0) + //heatsTensor = try interpreter.output(at: 0) + //offsetsTensor = try interpreter.output(at: 1) + + + } catch let error { + os_log( + "Failed to invoke the interpreter with error: %s", type: .error, + error.localizedDescription) + return + } + } + + /// Returns value within [0,1]. + private func sigmoid(_ x: Float32) -> Float32 { + return (1.0 / (1.0 + exp(-x))) + } +} + +// MARK: - Data types for inference result +struct KeyPoint { + var bodyPart: BodyPart = BodyPart.NOSE + var position: CGPoint = CGPoint() + var score: Float = 0.0 +} + +struct Line { + let from: CGPoint + let to: CGPoint +} + +struct Times { + var preprocessing: Double + var inference: Double + var postprocessing: Double +} + +struct Result { + var dots: [CGPoint] + var lines: [Line] + var score: Float +} + +enum BodyPart: String, CaseIterable { + case NOSE = "nose" + case LEFT_EYE = "left eye" + case RIGHT_EYE = "right eye" + case LEFT_EAR = "left ear" + case RIGHT_EAR = "right ear" + case LEFT_SHOULDER = "left shoulder" + case RIGHT_SHOULDER = "right shoulder" + case LEFT_ELBOW = "left elbow" + case RIGHT_ELBOW = "right elbow" + case LEFT_WRIST = "left wrist" + case RIGHT_WRIST = "right wrist" + case LEFT_HIP = "left hip" + case RIGHT_HIP = "right hip" + case LEFT_KNEE = "left knee" + case RIGHT_KNEE = "right knee" + case LEFT_ANKLE = "left ankle" + case RIGHT_ANKLE = "right ankle" + + /// List of lines connecting each part. + static let lines = [ + (from: BodyPart.LEFT_WRIST, to: BodyPart.LEFT_ELBOW), + (from: BodyPart.LEFT_ELBOW, to: BodyPart.LEFT_SHOULDER), + (from: BodyPart.LEFT_SHOULDER, to: BodyPart.RIGHT_SHOULDER), + (from: BodyPart.RIGHT_SHOULDER, to: BodyPart.RIGHT_ELBOW), + (from: BodyPart.RIGHT_ELBOW, to: BodyPart.RIGHT_WRIST), + (from: BodyPart.LEFT_SHOULDER, to: BodyPart.LEFT_HIP), + (from: BodyPart.LEFT_HIP, to: BodyPart.RIGHT_HIP), + (from: BodyPart.RIGHT_HIP, to: BodyPart.RIGHT_SHOULDER), + (from: BodyPart.LEFT_HIP, to: BodyPart.LEFT_KNEE), + (from: BodyPart.LEFT_KNEE, to: BodyPart.LEFT_ANKLE), + (from: BodyPart.RIGHT_HIP, to: BodyPart.RIGHT_KNEE), + (from: BodyPart.RIGHT_KNEE, to: BodyPart.RIGHT_ANKLE), + ] +} + +// MARK: - Delegates Enum +enum Delegates: Int, CaseIterable { + case CPU + case Metal + case CoreML + + var description: String { + switch self { + case .CPU: + return "CPU" + case .Metal: + return "GPU" + case .CoreML: + return "NPU" + } + } +} + +// MARK: - Custom Errors +enum PostprocessError: Error { + case missingBodyPart(of: BodyPart) +} + +// MARK: - Information about the model file. +typealias FileInfo = (name: String, extension: String) + +enum Model { + static let file: FileInfo = ( + name: "model_opt", extension: "tflite" + ) + + static let input = (batchSize: 1, height: 256, width: 256, channelSize: 3) + static let output = (batchSize: 1, height: 256, width: 256, channelSize: 1) + static let isQuantized = false +} + + +extension Array { + /// Creates a new array from the bytes of the given unsafe data. + /// + /// - Warning: The array's `Element` type must be trivial in that it can be copied bit for bit + /// with no indirection or reference-counting operations; otherwise, copying the raw bytes in + /// the `unsafeData`'s buffer to a new array returns an unsafe copy. + /// - Note: Returns `nil` if `unsafeData.count` is not a multiple of + /// `MemoryLayout.stride`. + /// - Parameter unsafeData: The data containing the bytes to turn into an array. + init?(unsafeData: Data) { + guard unsafeData.count % MemoryLayout.stride == 0 else { return nil } + #if swift(>=5.0) + self = unsafeData.withUnsafeBytes { .init($0.bindMemory(to: Element.self)) } + #else + self = unsafeData.withUnsafeBytes { + .init(UnsafeBufferPointer( + start: $0, + count: unsafeData.count / MemoryLayout.stride + )) + } + #endif // swift(>=5.0) + } +} diff --git a/live2diff/MiDaS/mobile/ios/Midas/Storyboards/Base.lproj/Launch Screen.storyboard b/live2diff/MiDaS/mobile/ios/Midas/Storyboards/Base.lproj/Launch Screen.storyboard new file mode 100644 index 0000000000000000000000000000000000000000..a04c79f554777863bd0dc8287bfd60704ce28bf2 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Storyboards/Base.lproj/Launch Screen.storyboard @@ -0,0 +1,48 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/live2diff/MiDaS/mobile/ios/Midas/Storyboards/Base.lproj/Main.storyboard b/live2diff/MiDaS/mobile/ios/Midas/Storyboards/Base.lproj/Main.storyboard new file mode 100644 index 0000000000000000000000000000000000000000..5f5623794bd35b9bb75efd7b7e249fd7357fdfbd --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Storyboards/Base.lproj/Main.storyboard @@ -0,0 +1,236 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/live2diff/MiDaS/mobile/ios/Midas/ViewControllers/ViewController.swift b/live2diff/MiDaS/mobile/ios/Midas/ViewControllers/ViewController.swift new file mode 100644 index 0000000000000000000000000000000000000000..fbb51b5a303412c0bbd158d76d025cf88fee6f8f --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/ViewControllers/ViewController.swift @@ -0,0 +1,489 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import AVFoundation +import UIKit +import os + + +public struct PixelData { + var a: UInt8 + var r: UInt8 + var g: UInt8 + var b: UInt8 +} + +extension UIImage { + convenience init?(pixels: [PixelData], width: Int, height: Int) { + guard width > 0 && height > 0, pixels.count == width * height else { return nil } + var data = pixels + guard let providerRef = CGDataProvider(data: Data(bytes: &data, count: data.count * MemoryLayout.size) as CFData) + else { return nil } + guard let cgim = CGImage( + width: width, + height: height, + bitsPerComponent: 8, + bitsPerPixel: 32, + bytesPerRow: width * MemoryLayout.size, + space: CGColorSpaceCreateDeviceRGB(), + bitmapInfo: CGBitmapInfo(rawValue: CGImageAlphaInfo.premultipliedFirst.rawValue), + provider: providerRef, + decode: nil, + shouldInterpolate: false, + intent: .defaultIntent) + else { return nil } + self.init(cgImage: cgim) + } +} + + +class ViewController: UIViewController { + // MARK: Storyboards Connections + @IBOutlet weak var previewView: PreviewView! + + //@IBOutlet weak var overlayView: OverlayView! + @IBOutlet weak var overlayView: UIImageView! + + private var imageView : UIImageView = UIImageView(frame:CGRect(x:0, y:0, width:400, height:400)) + + private var imageViewInitialized: Bool = false + + @IBOutlet weak var resumeButton: UIButton! + @IBOutlet weak var cameraUnavailableLabel: UILabel! + + @IBOutlet weak var tableView: UITableView! + + @IBOutlet weak var threadCountLabel: UILabel! + @IBOutlet weak var threadCountStepper: UIStepper! + + @IBOutlet weak var delegatesControl: UISegmentedControl! + + // MARK: ModelDataHandler traits + var threadCount: Int = Constants.defaultThreadCount + var delegate: Delegates = Constants.defaultDelegate + + // MARK: Result Variables + // Inferenced data to render. + private var inferencedData: InferencedData? + + // Minimum score to render the result. + private let minimumScore: Float = 0.5 + + private var avg_latency: Double = 0.0 + + // Relative location of `overlayView` to `previewView`. + private var overlayViewFrame: CGRect? + + private var previewViewFrame: CGRect? + + // MARK: Controllers that manage functionality + // Handles all the camera related functionality + private lazy var cameraCapture = CameraFeedManager(previewView: previewView) + + // Handles all data preprocessing and makes calls to run inference. + private var modelDataHandler: ModelDataHandler? + + // MARK: View Handling Methods + override func viewDidLoad() { + super.viewDidLoad() + + do { + modelDataHandler = try ModelDataHandler() + } catch let error { + fatalError(error.localizedDescription) + } + + cameraCapture.delegate = self + tableView.delegate = self + tableView.dataSource = self + + // MARK: UI Initialization + // Setup thread count stepper with white color. + // https://forums.developer.apple.com/thread/121495 + threadCountStepper.setDecrementImage( + threadCountStepper.decrementImage(for: .normal), for: .normal) + threadCountStepper.setIncrementImage( + threadCountStepper.incrementImage(for: .normal), for: .normal) + // Setup initial stepper value and its label. + threadCountStepper.value = Double(Constants.defaultThreadCount) + threadCountLabel.text = Constants.defaultThreadCount.description + + // Setup segmented controller's color. + delegatesControl.setTitleTextAttributes( + [NSAttributedString.Key.foregroundColor: UIColor.lightGray], + for: .normal) + delegatesControl.setTitleTextAttributes( + [NSAttributedString.Key.foregroundColor: UIColor.black], + for: .selected) + // Remove existing segments to initialize it with `Delegates` entries. + delegatesControl.removeAllSegments() + Delegates.allCases.forEach { delegate in + delegatesControl.insertSegment( + withTitle: delegate.description, + at: delegate.rawValue, + animated: false) + } + delegatesControl.selectedSegmentIndex = 0 + } + + override func viewWillAppear(_ animated: Bool) { + super.viewWillAppear(animated) + + cameraCapture.checkCameraConfigurationAndStartSession() + } + + override func viewWillDisappear(_ animated: Bool) { + cameraCapture.stopSession() + } + + override func viewDidLayoutSubviews() { + overlayViewFrame = overlayView.frame + previewViewFrame = previewView.frame + } + + // MARK: Button Actions + @IBAction func didChangeThreadCount(_ sender: UIStepper) { + let changedCount = Int(sender.value) + if threadCountLabel.text == changedCount.description { + return + } + + do { + modelDataHandler = try ModelDataHandler(threadCount: changedCount, delegate: delegate) + } catch let error { + fatalError(error.localizedDescription) + } + threadCount = changedCount + threadCountLabel.text = changedCount.description + os_log("Thread count is changed to: %d", threadCount) + } + + @IBAction func didChangeDelegate(_ sender: UISegmentedControl) { + guard let changedDelegate = Delegates(rawValue: delegatesControl.selectedSegmentIndex) else { + fatalError("Unexpected value from delegates segemented controller.") + } + do { + modelDataHandler = try ModelDataHandler(threadCount: threadCount, delegate: changedDelegate) + } catch let error { + fatalError(error.localizedDescription) + } + delegate = changedDelegate + os_log("Delegate is changed to: %s", delegate.description) + } + + @IBAction func didTapResumeButton(_ sender: Any) { + cameraCapture.resumeInterruptedSession { complete in + + if complete { + self.resumeButton.isHidden = true + self.cameraUnavailableLabel.isHidden = true + } else { + self.presentUnableToResumeSessionAlert() + } + } + } + + func presentUnableToResumeSessionAlert() { + let alert = UIAlertController( + title: "Unable to Resume Session", + message: "There was an error while attempting to resume session.", + preferredStyle: .alert + ) + alert.addAction(UIAlertAction(title: "OK", style: .default, handler: nil)) + + self.present(alert, animated: true) + } +} + +// MARK: - CameraFeedManagerDelegate Methods +extension ViewController: CameraFeedManagerDelegate { + func cameraFeedManager(_ manager: CameraFeedManager, didOutput pixelBuffer: CVPixelBuffer) { + runModel(on: pixelBuffer) + } + + // MARK: Session Handling Alerts + func cameraFeedManagerDidEncounterSessionRunTimeError(_ manager: CameraFeedManager) { + // Handles session run time error by updating the UI and providing a button if session can be + // manually resumed. + self.resumeButton.isHidden = false + } + + func cameraFeedManager( + _ manager: CameraFeedManager, sessionWasInterrupted canResumeManually: Bool + ) { + // Updates the UI when session is interupted. + if canResumeManually { + self.resumeButton.isHidden = false + } else { + self.cameraUnavailableLabel.isHidden = false + } + } + + func cameraFeedManagerDidEndSessionInterruption(_ manager: CameraFeedManager) { + // Updates UI once session interruption has ended. + self.cameraUnavailableLabel.isHidden = true + self.resumeButton.isHidden = true + } + + func presentVideoConfigurationErrorAlert(_ manager: CameraFeedManager) { + let alertController = UIAlertController( + title: "Confirguration Failed", message: "Configuration of camera has failed.", + preferredStyle: .alert) + let okAction = UIAlertAction(title: "OK", style: .cancel, handler: nil) + alertController.addAction(okAction) + + present(alertController, animated: true, completion: nil) + } + + func presentCameraPermissionsDeniedAlert(_ manager: CameraFeedManager) { + let alertController = UIAlertController( + title: "Camera Permissions Denied", + message: + "Camera permissions have been denied for this app. You can change this by going to Settings", + preferredStyle: .alert) + + let cancelAction = UIAlertAction(title: "Cancel", style: .cancel, handler: nil) + let settingsAction = UIAlertAction(title: "Settings", style: .default) { action in + if let url = URL.init(string: UIApplication.openSettingsURLString) { + UIApplication.shared.open(url, options: [:], completionHandler: nil) + } + } + + alertController.addAction(cancelAction) + alertController.addAction(settingsAction) + + present(alertController, animated: true, completion: nil) + } + + @objc func runModel(on pixelBuffer: CVPixelBuffer) { + guard let overlayViewFrame = overlayViewFrame, let previewViewFrame = previewViewFrame + else { + return + } + // To put `overlayView` area as model input, transform `overlayViewFrame` following transform + // from `previewView` to `pixelBuffer`. `previewView` area is transformed to fit in + // `pixelBuffer`, because `pixelBuffer` as a camera output is resized to fill `previewView`. + // https://developer.apple.com/documentation/avfoundation/avlayervideogravity/1385607-resizeaspectfill + let modelInputRange = overlayViewFrame.applying( + previewViewFrame.size.transformKeepAspect(toFitIn: pixelBuffer.size)) + + // Run Midas model. + guard + let (result, width, height, times) = self.modelDataHandler?.runMidas( + on: pixelBuffer, + from: modelInputRange, + to: overlayViewFrame.size) + else { + os_log("Cannot get inference result.", type: .error) + return + } + + if avg_latency == 0 { + avg_latency = times.inference + } else { + avg_latency = times.inference*0.1 + avg_latency*0.9 + } + + // Udpate `inferencedData` to render data in `tableView`. + inferencedData = InferencedData(score: Float(avg_latency), times: times) + + //let height = 256 + //let width = 256 + + let outputs = result + let outputs_size = width * height; + + var multiplier : Float = 1.0; + + let max_val : Float = outputs.max() ?? 0 + let min_val : Float = outputs.min() ?? 0 + + if((max_val - min_val) > 0) { + multiplier = 255 / (max_val - min_val); + } + + // Draw result. + DispatchQueue.main.async { + self.tableView.reloadData() + + var pixels: [PixelData] = .init(repeating: .init(a: 255, r: 0, g: 0, b: 0), count: width * height) + + for i in pixels.indices { + //if(i < 1000) + //{ + let val = UInt8((outputs[i] - min_val) * multiplier) + + pixels[i].r = val + pixels[i].g = val + pixels[i].b = val + //} + } + + + /* + pixels[i].a = 255 + pixels[i].r = .random(in: 0...255) + pixels[i].g = .random(in: 0...255) + pixels[i].b = .random(in: 0...255) + } + */ + + DispatchQueue.main.async { + let image = UIImage(pixels: pixels, width: width, height: height) + + self.imageView.image = image + + if (self.imageViewInitialized == false) { + self.imageViewInitialized = true + self.overlayView.addSubview(self.imageView) + self.overlayView.setNeedsDisplay() + } + } + + /* + let image = UIImage(pixels: pixels, width: width, height: height) + + var imageView : UIImageView + imageView = UIImageView(frame:CGRect(x:0, y:0, width:400, height:400)); + imageView.image = image + self.overlayView.addSubview(imageView) + self.overlayView.setNeedsDisplay() + */ + } + } +/* + func drawResult(of result: Result) { + self.overlayView.dots = result.dots + self.overlayView.lines = result.lines + self.overlayView.setNeedsDisplay() + } + + func clearResult() { + self.overlayView.clear() + self.overlayView.setNeedsDisplay() + } + */ + +} + + +// MARK: - TableViewDelegate, TableViewDataSource Methods +extension ViewController: UITableViewDelegate, UITableViewDataSource { + func numberOfSections(in tableView: UITableView) -> Int { + return InferenceSections.allCases.count + } + + func tableView(_ tableView: UITableView, numberOfRowsInSection section: Int) -> Int { + guard let section = InferenceSections(rawValue: section) else { + return 0 + } + + return section.subcaseCount + } + + func tableView(_ tableView: UITableView, cellForRowAt indexPath: IndexPath) -> UITableViewCell { + let cell = tableView.dequeueReusableCell(withIdentifier: "InfoCell") as! InfoCell + guard let section = InferenceSections(rawValue: indexPath.section) else { + return cell + } + guard let data = inferencedData else { return cell } + + var fieldName: String + var info: String + + switch section { + case .Score: + fieldName = section.description + info = String(format: "%.3f", data.score) + case .Time: + guard let row = ProcessingTimes(rawValue: indexPath.row) else { + return cell + } + var time: Double + switch row { + case .InferenceTime: + time = data.times.inference + } + fieldName = row.description + info = String(format: "%.2fms", time) + } + + cell.fieldNameLabel.text = fieldName + cell.infoLabel.text = info + + return cell + } + + func tableView(_ tableView: UITableView, heightForRowAt indexPath: IndexPath) -> CGFloat { + guard let section = InferenceSections(rawValue: indexPath.section) else { + return 0 + } + + var height = Traits.normalCellHeight + if indexPath.row == section.subcaseCount - 1 { + height = Traits.separatorCellHeight + Traits.bottomSpacing + } + return height + } + +} + +// MARK: - Private enums +/// UI coinstraint values +fileprivate enum Traits { + static let normalCellHeight: CGFloat = 35.0 + static let separatorCellHeight: CGFloat = 25.0 + static let bottomSpacing: CGFloat = 30.0 +} + +fileprivate struct InferencedData { + var score: Float + var times: Times +} + +/// Type of sections in Info Cell +fileprivate enum InferenceSections: Int, CaseIterable { + case Score + case Time + + var description: String { + switch self { + case .Score: + return "Average" + case .Time: + return "Processing Time" + } + } + + var subcaseCount: Int { + switch self { + case .Score: + return 1 + case .Time: + return ProcessingTimes.allCases.count + } + } +} + +/// Type of processing times in Time section in Info Cell +fileprivate enum ProcessingTimes: Int, CaseIterable { + case InferenceTime + + var description: String { + switch self { + case .InferenceTime: + return "Inference Time" + } + } +} diff --git a/live2diff/MiDaS/mobile/ios/Midas/Views/OverlayView.swift b/live2diff/MiDaS/mobile/ios/Midas/Views/OverlayView.swift new file mode 100644 index 0000000000000000000000000000000000000000..3b53910b57563b6a195fd53321fa2a24ebaf3d3f --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Midas/Views/OverlayView.swift @@ -0,0 +1,63 @@ +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import UIKit + +/// UIView for rendering inference output. +class OverlayView: UIView { + + var dots = [CGPoint]() + var lines = [Line]() + + override func draw(_ rect: CGRect) { + for dot in dots { + drawDot(of: dot) + } + for line in lines { + drawLine(of: line) + } + } + + func drawDot(of dot: CGPoint) { + let dotRect = CGRect( + x: dot.x - Traits.dot.radius / 2, y: dot.y - Traits.dot.radius / 2, + width: Traits.dot.radius, height: Traits.dot.radius) + let dotPath = UIBezierPath(ovalIn: dotRect) + + Traits.dot.color.setFill() + dotPath.fill() + } + + func drawLine(of line: Line) { + let linePath = UIBezierPath() + linePath.move(to: CGPoint(x: line.from.x, y: line.from.y)) + linePath.addLine(to: CGPoint(x: line.to.x, y: line.to.y)) + linePath.close() + + linePath.lineWidth = Traits.line.width + Traits.line.color.setStroke() + + linePath.stroke() + } + + func clear() { + self.dots = [] + self.lines = [] + } +} + +private enum Traits { + static let dot = (radius: CGFloat(5), color: UIColor.orange) + static let line = (width: CGFloat(1.0), color: UIColor.orange) +} diff --git a/live2diff/MiDaS/mobile/ios/Podfile b/live2diff/MiDaS/mobile/ios/Podfile new file mode 100644 index 0000000000000000000000000000000000000000..5e9461fc96dbbe3c22ca6bbf2bfd7df3981b9462 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/Podfile @@ -0,0 +1,12 @@ +# Uncomment the next line to define a global platform for your project + platform :ios, '12.0' + +target 'Midas' do + # Comment the next line if you're not using Swift and don't want to use dynamic frameworks + use_frameworks! + + # Pods for Midas + pod 'TensorFlowLiteSwift', '~> 0.0.1-nightly' + pod 'TensorFlowLiteSwift/CoreML', '~> 0.0.1-nightly' + pod 'TensorFlowLiteSwift/Metal', '~> 0.0.1-nightly' +end diff --git a/live2diff/MiDaS/mobile/ios/README.md b/live2diff/MiDaS/mobile/ios/README.md new file mode 100644 index 0000000000000000000000000000000000000000..f430fd038f0f92da3f87e78b0cc86021debb151e --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/README.md @@ -0,0 +1,105 @@ +# Tensorflow Lite MiDaS iOS Example + +### Requirements + +- XCode 11.0 or above +- iOS 12.0 or above, [iOS 14 breaks the NPU Delegate](https://github.com/tensorflow/tensorflow/issues/43339) +- TensorFlow 2.4.0, TensorFlowLiteSwift -> 0.0.1-nightly + +## Quick Start with a MiDaS Example + +MiDaS is a neural network to compute depth from a single image. It uses TensorFlowLiteSwift / C++ libraries on iOS. The code is written in Swift. + +Paper: https://arxiv.org/abs/1907.01341 + +> Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer +> René Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, Vladlen Koltun + +### Install TensorFlow + +Set default python version to python3: + +``` +echo 'export PATH=/usr/local/opt/python/libexec/bin:$PATH' >> ~/.zshenv +echo 'alias python=python3' >> ~/.zshenv +echo 'alias pip=pip3' >> ~/.zshenv +``` + +Install TensorFlow + +```shell +pip install tensorflow +``` + +### Install TensorFlowLiteSwift via Cocoapods + +Set required TensorFlowLiteSwift version in the file (`0.0.1-nightly` is recommended): https://github.com/AlexeyAB/midas_tf_ios/blob/main/Podfile#L9 + +Install: brew, ruby, cocoapods + +``` +ruby -e "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/master/install)" +brew install mc rbenv ruby-build +sudo gem install cocoapods +``` + + +The TensorFlowLiteSwift library is available in [Cocoapods](https://cocoapods.org/), to integrate it to our project, we can run in the root directory of the project: + +```ruby +pod install +``` + +Now open the `Midas.xcworkspace` file in XCode, select your iPhone device (XCode->Product->Destination->iPhone) and launch it (cmd + R). If everything works well, you should see a real-time depth map from your camera. + +### Model + +The TensorFlow (TFlite) model `midas.tflite` is in the folder `/Midas/Model` + + +To use another model, you should convert it from TensorFlow saved-model to TFlite model (so that it can be deployed): + +```python +saved_model_export_dir = "./saved_model" +converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_export_dir) +tflite_model = converter.convert() +open(model_tflite_name, "wb").write("model.tflite") +``` + +### Setup XCode + +* Open directory `.xcworkspace` from the XCode + +* Press on your ProjectName (left-top corner) -> change Bundle Identifier to `com.midas.tflite-npu` or something like this (it should be unique) + +* select your Developer Team (your should be signed-in by using your AppleID) + +* Connect your iPhone (if you want to run it on real device instead of simulator), select your iPhone device (XCode->Product->Destination->iPhone) + +* Click in the XCode: Product -> Run + +* On your iPhone device go to the: Settings -> General -> Device Management (or Profiles) -> Apple Development -> Trust Apple Development + +---- + +Original repository: https://github.com/intel-isl/MiDaS + + +### Examples: + +| ![photo_2020-09-27_17-43-20](https://user-images.githubusercontent.com/4096485/94367804-9610de80-00e9-11eb-8a23-8b32a6f52d41.jpg) | ![photo_2020-09-27_17-49-22](https://user-images.githubusercontent.com/4096485/94367974-7201cd00-00ea-11eb-8e0a-68eb9ea10f63.jpg) | ![photo_2020-09-27_17-52-30](https://user-images.githubusercontent.com/4096485/94367976-729a6380-00ea-11eb-8ce0-39d3e26dd550.jpg) | ![photo_2020-09-27_17-43-21](https://user-images.githubusercontent.com/4096485/94367807-97420b80-00e9-11eb-9dcd-848ad9e89e03.jpg) | +|---|---|---|---| + +## LICENSE + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/live2diff/MiDaS/mobile/ios/RunScripts/download_models.sh b/live2diff/MiDaS/mobile/ios/RunScripts/download_models.sh new file mode 100644 index 0000000000000000000000000000000000000000..03a4bcd0ec7339bab050cc4cbbed598786de0399 --- /dev/null +++ b/live2diff/MiDaS/mobile/ios/RunScripts/download_models.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# Download TF Lite model from the internet if it does not exist. + +TFLITE_MODEL="model_opt.tflite" +TFLITE_FILE="Midas/Model/${TFLITE_MODEL}" +MODEL_SRC="https://github.com/intel-isl/MiDaS/releases/download/v2/${TFLITE_MODEL}" + +if test -f "${TFLITE_FILE}"; then + echo "INFO: TF Lite model already exists. Skip downloading and use the local model." +else + curl --create-dirs -o "${TFLITE_FILE}" -LJO "${MODEL_SRC}" + echo "INFO: Downloaded TensorFlow Lite model to ${TFLITE_FILE}." +fi + diff --git a/live2diff/MiDaS/onnxtools.py b/live2diff/MiDaS/onnxtools.py new file mode 100644 index 0000000000000000000000000000000000000000..a06e32ba048aba988ca0bb641f02ea16527f4fc4 --- /dev/null +++ b/live2diff/MiDaS/onnxtools.py @@ -0,0 +1,53 @@ +import cv2 +import torch +import utils +from torchvision.transforms import Compose +from midas.dpt_depth import DPTDepthModel +from midas.transforms import Resize, NormalizeImage, PrepareForNet + + +def compose2(f1, f2): + return lambda x: f2(f1(x)) + + +model_params = ( + {"name": "dpt_large-midas", "path": "weights/dpt_large-midas-2f21e586.pt", "backbone": "vitl16_384"}, + {"name": "dpt_hybrid-midas", "path": "weights/dpt_hybrid-midas-501f0c75.pt", "backbone": "vitb_rn50_384"} +) + +for model_param in model_params: + model_path = model_param["path"] + device = torch.device("cpu") + model = DPTDepthModel( + path=model_path, + backbone=model_param["backbone"], + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + + resize_image = Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ) + + transform = Compose( + [ + resize_image, + normalization, + PrepareForNet() + ] + ) + model.eval() + + img = utils.read_image("input/dog.jpg") + img_input = transform({"image": img})["image"] + shaped = img_input.reshape(1, 3, net_h, net_w) + torch.onnx.export(model, torch.rand(1, 3, 384, 384, dtype=torch.float), "weights/" + model_param["name"] + ".onnx", + export_params=True) diff --git a/live2diff/MiDaS/output/.placeholder b/live2diff/MiDaS/output/.placeholder new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/live2diff/MiDaS/ros/LICENSE b/live2diff/MiDaS/ros/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..6606ec028d1c629986e7019fe3564f5b4bfe425d --- /dev/null +++ b/live2diff/MiDaS/ros/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Alexey + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/live2diff/MiDaS/ros/README.md b/live2diff/MiDaS/ros/README.md new file mode 100644 index 0000000000000000000000000000000000000000..7da9666ca41afd4bf7dfb694b82917f3b3d274e7 --- /dev/null +++ b/live2diff/MiDaS/ros/README.md @@ -0,0 +1,131 @@ +# MiDaS for ROS1 by using LibTorch in C++ + +### Requirements + +- Ubuntu 17.10 / 18.04 / 20.04, Debian Stretch +- ROS Melodic for Ubuntu (17.10 / 18.04) / Debian Stretch, ROS Noetic for Ubuntu 20.04 +- C++11 +- LibTorch >= 1.6 + +## Quick Start with a MiDaS Example + +MiDaS is a neural network to compute depth from a single image. + +* input from `image_topic`: `sensor_msgs/Image` - `RGB8` image with any shape +* output to `midas_topic`: `sensor_msgs/Image` - `TYPE_32FC1` inverse relative depth maps in range [0 - 255] with original size and channels=1 + +### Install Dependecies + +* install ROS Melodic for Ubuntu 17.10 / 18.04: +```bash +wget https://raw.githubusercontent.com/intel-isl/MiDaS/master/ros/additions/install_ros_melodic_ubuntu_17_18.sh +./install_ros_melodic_ubuntu_17_18.sh +``` + +or Noetic for Ubuntu 20.04: + +```bash +wget https://raw.githubusercontent.com/intel-isl/MiDaS/master/ros/additions/install_ros_noetic_ubuntu_20.sh +./install_ros_noetic_ubuntu_20.sh +``` + + +* install LibTorch 1.7 with CUDA 11.0: + +On **Jetson (ARM)**: +```bash +wget https://nvidia.box.com/shared/static/wa34qwrwtk9njtyarwt5nvo6imenfy26.whl -O torch-1.7.0-cp36-cp36m-linux_aarch64.whl +sudo apt-get install python3-pip libopenblas-base libopenmpi-dev +pip3 install Cython +pip3 install numpy torch-1.7.0-cp36-cp36m-linux_aarch64.whl +``` +Or compile LibTorch from source: https://github.com/pytorch/pytorch#from-source + +On **Linux (x86_64)**: +```bash +cd ~/ +wget https://download.pytorch.org/libtorch/cu110/libtorch-cxx11-abi-shared-with-deps-1.7.0%2Bcu110.zip +unzip libtorch-cxx11-abi-shared-with-deps-1.7.0+cu110.zip +``` + +* create symlink for OpenCV: + +```bash +sudo ln -s /usr/include/opencv4 /usr/include/opencv +``` + +* download and install MiDaS: + +```bash +source ~/.bashrc +cd ~/ +mkdir catkin_ws +cd catkin_ws +git clone https://github.com/intel-isl/MiDaS +mkdir src +cp -r MiDaS/ros/* src + +chmod +x src/additions/*.sh +chmod +x src/*.sh +chmod +x src/midas_cpp/scripts/*.py +cp src/additions/do_catkin_make.sh ./do_catkin_make.sh +./do_catkin_make.sh +./src/additions/downloads.sh +``` + +### Usage + +* run only `midas` node: `~/catkin_ws/src/launch_midas_cpp.sh` + +#### Test + +* Test - capture video and show result in the window: + * place any `test.mp4` video file to the directory `~/catkin_ws/src/` + * run `midas` node: `~/catkin_ws/src/launch_midas_cpp.sh` + * run test nodes in another terminal: `cd ~/catkin_ws/src && ./run_talker_listener_test.sh` and wait 30 seconds + + (to use Python 2, run command `sed -i 's/python3/python2/' ~/catkin_ws/src/midas_cpp/scripts/*.py` ) + +## Mobile version of MiDaS - Monocular Depth Estimation + +### Accuracy + +* Old small model - ResNet50 default-decoder 384x384 +* New small model - EfficientNet-Lite3 small-decoder 256x256 + +**Zero-shot error** (the lower - the better): + +| Model | DIW WHDR | Eth3d AbsRel | Sintel AbsRel | Kitti δ>1.25 | NyuDepthV2 δ>1.25 | TUM δ>1.25 | +|---|---|---|---|---|---|---| +| Old small model 384x384 | **0.1248** | 0.1550 | **0.3300** | **21.81** | 15.73 | 17.00 | +| New small model 256x256 | 0.1344 | **0.1344** | 0.3370 | 29.27 | **13.43** | **14.53** | +| Relative improvement, % | -8 % | **+13 %** | -2 % | -34 % | **+15 %** | **+15 %** | + +None of Train/Valid/Test subsets of datasets (DIW, Eth3d, Sintel, Kitti, NyuDepthV2, TUM) were not involved in Training or Fine Tuning. + +### Inference speed (FPS) on nVidia GPU + +Inference speed excluding pre and post processing, batch=1, **Frames Per Second** (the higher - the better): + +| Model | Jetson Nano, FPS | RTX 2080Ti, FPS | +|---|---|---| +| Old small model 384x384 | 1.6 | 117 | +| New small model 256x256 | 8.1 | 232 | +| SpeedUp, X times | **5x** | **2x** | + +### Citation + +This repository contains code to compute depth from a single image. It accompanies our [paper](https://arxiv.org/abs/1907.01341v3): + +>Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer +René Ranftl, Katrin Lasinger, David Hafner, Konrad Schindler, Vladlen Koltun + +Please cite our paper if you use this code or any of the models: +``` +@article{Ranftl2020, + author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun}, + title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer}, + journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)}, + year = {2020}, +} +``` \ No newline at end of file diff --git a/live2diff/MiDaS/ros/additions/do_catkin_make.sh b/live2diff/MiDaS/ros/additions/do_catkin_make.sh new file mode 100644 index 0000000000000000000000000000000000000000..0d416fc00282aab146326bbba12a9274e1ba29b8 --- /dev/null +++ b/live2diff/MiDaS/ros/additions/do_catkin_make.sh @@ -0,0 +1,5 @@ +mkdir src +catkin_make +source devel/setup.bash +echo $ROS_PACKAGE_PATH +chmod +x ./devel/setup.bash diff --git a/live2diff/MiDaS/ros/additions/downloads.sh b/live2diff/MiDaS/ros/additions/downloads.sh new file mode 100644 index 0000000000000000000000000000000000000000..fd4b1736595f0852d397df306f955d8d24a772ce --- /dev/null +++ b/live2diff/MiDaS/ros/additions/downloads.sh @@ -0,0 +1,5 @@ +mkdir ~/.ros +wget https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-small-traced.pt +cp ./model-small-traced.pt ~/.ros/model-small-traced.pt + + diff --git a/live2diff/MiDaS/ros/additions/install_ros_melodic_ubuntu_17_18.sh b/live2diff/MiDaS/ros/additions/install_ros_melodic_ubuntu_17_18.sh new file mode 100644 index 0000000000000000000000000000000000000000..b868112631e9d9bc7bccb601407dfc857b8a99d5 --- /dev/null +++ b/live2diff/MiDaS/ros/additions/install_ros_melodic_ubuntu_17_18.sh @@ -0,0 +1,34 @@ +#@title { display-mode: "code" } + +#from http://wiki.ros.org/indigo/Installation/Ubuntu + +#1.2 Setup sources.list +sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list' + +# 1.3 Setup keys +sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654 +sudo apt-key adv --keyserver 'hkp://ha.pool.sks-keyservers.net:80' --recv-key 421C365BD9FF1F717815A3895523BAEEB01FA116 + +curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add - + +# 1.4 Installation +sudo apt-get update +sudo apt-get upgrade + +# Desktop-Full Install: +sudo apt-get install ros-melodic-desktop-full + +printf "\nsource /opt/ros/melodic/setup.bash\n" >> ~/.bashrc + +# 1.5 Initialize rosdep +sudo rosdep init +rosdep update + + +# 1.7 Getting rosinstall (python) +sudo apt-get install python-rosinstall +sudo apt-get install python-catkin-tools +sudo apt-get install python-rospy +sudo apt-get install python-rosdep +sudo apt-get install python-roscd +sudo apt-get install python-pip \ No newline at end of file diff --git a/live2diff/MiDaS/ros/additions/install_ros_noetic_ubuntu_20.sh b/live2diff/MiDaS/ros/additions/install_ros_noetic_ubuntu_20.sh new file mode 100644 index 0000000000000000000000000000000000000000..d73ea1a3d92359819167d735a92d2a650b9bc245 --- /dev/null +++ b/live2diff/MiDaS/ros/additions/install_ros_noetic_ubuntu_20.sh @@ -0,0 +1,33 @@ +#@title { display-mode: "code" } + +#from http://wiki.ros.org/indigo/Installation/Ubuntu + +#1.2 Setup sources.list +sudo sh -c 'echo "deb http://packages.ros.org/ros/ubuntu $(lsb_release -sc) main" > /etc/apt/sources.list.d/ros-latest.list' + +# 1.3 Setup keys +sudo apt-key adv --keyserver 'hkp://keyserver.ubuntu.com:80' --recv-key C1CF6E31E6BADE8868B172B4F42ED6FBAB17C654 + +curl -sSL 'http://keyserver.ubuntu.com/pks/lookup?op=get&search=0xC1CF6E31E6BADE8868B172B4F42ED6FBAB17C654' | sudo apt-key add - + +# 1.4 Installation +sudo apt-get update +sudo apt-get upgrade + +# Desktop-Full Install: +sudo apt-get install ros-noetic-desktop-full + +printf "\nsource /opt/ros/noetic/setup.bash\n" >> ~/.bashrc + +# 1.5 Initialize rosdep +sudo rosdep init +rosdep update + + +# 1.7 Getting rosinstall (python) +sudo apt-get install python3-rosinstall +sudo apt-get install python3-catkin-tools +sudo apt-get install python3-rospy +sudo apt-get install python3-rosdep +sudo apt-get install python3-roscd +sudo apt-get install python3-pip \ No newline at end of file diff --git a/live2diff/MiDaS/ros/additions/make_package_cpp.sh b/live2diff/MiDaS/ros/additions/make_package_cpp.sh new file mode 100644 index 0000000000000000000000000000000000000000..d0ef6073a9c9ce40744e1c81d557c1c68255b95e --- /dev/null +++ b/live2diff/MiDaS/ros/additions/make_package_cpp.sh @@ -0,0 +1,16 @@ +cd ~/catkin_ws/src +catkin_create_pkg midas_cpp std_msgs roscpp cv_bridge sensor_msgs image_transport +cd ~/catkin_ws +catkin_make + +chmod +x ~/catkin_ws/devel/setup.bash +printf "\nsource ~/catkin_ws/devel/setup.bash" >> ~/.bashrc +source ~/catkin_ws/devel/setup.bash + + +sudo rosdep init +rosdep update +#rospack depends1 midas_cpp +roscd midas_cpp +#cat package.xml +#rospack depends midas_cpp \ No newline at end of file diff --git a/live2diff/MiDaS/ros/launch_midas_cpp.sh b/live2diff/MiDaS/ros/launch_midas_cpp.sh new file mode 100644 index 0000000000000000000000000000000000000000..5a0d1583fffdc49216c625dfd07af2ae3b01a7a0 --- /dev/null +++ b/live2diff/MiDaS/ros/launch_midas_cpp.sh @@ -0,0 +1,2 @@ +source ~/catkin_ws/devel/setup.bash +roslaunch midas_cpp midas_cpp.launch model_name:="model-small-traced.pt" input_topic:="image_topic" output_topic:="midas_topic" out_orig_size:="true" \ No newline at end of file diff --git a/live2diff/MiDaS/ros/midas_cpp/CMakeLists.txt b/live2diff/MiDaS/ros/midas_cpp/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..885341691d217f9c4c8fcb1e4ff568d87788c7b8 --- /dev/null +++ b/live2diff/MiDaS/ros/midas_cpp/CMakeLists.txt @@ -0,0 +1,189 @@ +cmake_minimum_required(VERSION 3.0.2) +project(midas_cpp) + +## Compile as C++11, supported in ROS Kinetic and newer +# add_compile_options(-std=c++11) + +## Find catkin macros and libraries +## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) +## is used, also find other catkin packages +find_package(catkin REQUIRED COMPONENTS + cv_bridge + image_transport + roscpp + rospy + sensor_msgs + std_msgs +) + +## System dependencies are found with CMake's conventions +# find_package(Boost REQUIRED COMPONENTS system) + +list(APPEND CMAKE_PREFIX_PATH "~/libtorch") +list(APPEND CMAKE_PREFIX_PATH "/usr/local/lib/python3.6/dist-packages/torch/lib") +list(APPEND CMAKE_PREFIX_PATH "/usr/local/lib/python2.7/dist-packages/torch/lib") + +if(NOT EXISTS "~/libtorch") + if (EXISTS "/usr/local/lib/python3.6/dist-packages/torch") + include_directories(/usr/local/include) + include_directories(/usr/local/lib/python3.6/dist-packages/torch/include/torch/csrc/api/include) + include_directories(/usr/local/lib/python3.6/dist-packages/torch/include) + + link_directories(/usr/local/lib) + link_directories(/usr/local/lib/python3.6/dist-packages/torch/lib) + + set(CMAKE_PREFIX_PATH /usr/local/lib/python3.6/dist-packages/torch) + set(Boost_USE_MULTITHREADED ON) + set(Torch_DIR /usr/local/lib/python3.6/dist-packages/torch) + + elseif (EXISTS "/usr/local/lib/python2.7/dist-packages/torch") + + include_directories(/usr/local/include) + include_directories(/usr/local/lib/python2.7/dist-packages/torch/include/torch/csrc/api/include) + include_directories(/usr/local/lib/python2.7/dist-packages/torch/include) + + link_directories(/usr/local/lib) + link_directories(/usr/local/lib/python2.7/dist-packages/torch/lib) + + set(CMAKE_PREFIX_PATH /usr/local/lib/python2.7/dist-packages/torch) + set(Boost_USE_MULTITHREADED ON) + set(Torch_DIR /usr/local/lib/python2.7/dist-packages/torch) + endif() +endif() + + + +find_package(Torch REQUIRED) +find_package(OpenCV REQUIRED) +include_directories( ${OpenCV_INCLUDE_DIRS} ) + +add_executable(midas_cpp src/main.cpp) +target_link_libraries(midas_cpp "${TORCH_LIBRARIES}" "${OpenCV_LIBS} ${catkin_LIBRARIES}") +set_property(TARGET midas_cpp PROPERTY CXX_STANDARD 14) + + + +################################### +## catkin specific configuration ## +################################### +## The catkin_package macro generates cmake config files for your package +## Declare things to be passed to dependent projects +## INCLUDE_DIRS: uncomment this if your package contains header files +## LIBRARIES: libraries you create in this project that dependent projects also need +## CATKIN_DEPENDS: catkin_packages dependent projects also need +## DEPENDS: system dependencies of this project that dependent projects also need +catkin_package( +# INCLUDE_DIRS include +# LIBRARIES midas_cpp +# CATKIN_DEPENDS cv_bridge image_transport roscpp sensor_msgs std_msgs +# DEPENDS system_lib +) + +########### +## Build ## +########### + +## Specify additional locations of header files +## Your package locations should be listed before other locations +include_directories( +# include + ${catkin_INCLUDE_DIRS} +) + +## Declare a C++ library +# add_library(${PROJECT_NAME} +# src/${PROJECT_NAME}/midas_cpp.cpp +# ) + +## Add cmake target dependencies of the library +## as an example, code may need to be generated before libraries +## either from message generation or dynamic reconfigure +# add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Declare a C++ executable +## With catkin_make all packages are built within a single CMake context +## The recommended prefix ensures that target names across packages don't collide +# add_executable(${PROJECT_NAME}_node src/midas_cpp_node.cpp) + +## Rename C++ executable without prefix +## The above recommended prefix causes long target names, the following renames the +## target back to the shorter version for ease of user use +## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" +# set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") + +## Add cmake target dependencies of the executable +## same as for the library above +# add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) + +## Specify libraries to link a library or executable target against +# target_link_libraries(${PROJECT_NAME}_node +# ${catkin_LIBRARIES} +# ) + +############# +## Install ## +############# + +# all install targets should use catkin DESTINATION variables +# See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html + +## Mark executable scripts (Python etc.) for installation +## in contrast to setup.py, you can choose the destination +# catkin_install_python(PROGRAMS +# scripts/my_python_script +# DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark executables for installation +## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_executables.html +# install(TARGETS ${PROJECT_NAME}_node +# RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +# ) + +## Mark libraries for installation +## See http://docs.ros.org/melodic/api/catkin/html/howto/format1/building_libraries.html +# install(TARGETS ${PROJECT_NAME} +# ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} +# RUNTIME DESTINATION ${CATKIN_GLOBAL_BIN_DESTINATION} +# ) + +## Mark cpp header files for installation +# install(DIRECTORY include/${PROJECT_NAME}/ +# DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} +# FILES_MATCHING PATTERN "*.h" +# PATTERN ".svn" EXCLUDE +# ) + +## Mark other files for installation (e.g. launch and bag files, etc.) +# install(FILES +# # myfile1 +# # myfile2 +# DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} +# ) + +############# +## Testing ## +############# + +## Add gtest based cpp test target and link libraries +# catkin_add_gtest(${PROJECT_NAME}-test test/test_midas_cpp.cpp) +# if(TARGET ${PROJECT_NAME}-test) +# target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) +# endif() + +## Add folders to be run by python nosetests +# catkin_add_nosetests(test) + +install(TARGETS ${PROJECT_NAME} + ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} + LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} + RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} +) + +add_custom_command( + TARGET midas_cpp POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_BINARY_DIR}/midas_cpp + ${CMAKE_SOURCE_DIR}/midas_cpp +) \ No newline at end of file diff --git a/live2diff/MiDaS/ros/midas_cpp/launch/midas_cpp.launch b/live2diff/MiDaS/ros/midas_cpp/launch/midas_cpp.launch new file mode 100644 index 0000000000000000000000000000000000000000..88e86f42f668e76ad4976ec6794a8cb0f20cac65 --- /dev/null +++ b/live2diff/MiDaS/ros/midas_cpp/launch/midas_cpp.launch @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/live2diff/MiDaS/ros/midas_cpp/launch/midas_talker_listener.launch b/live2diff/MiDaS/ros/midas_cpp/launch/midas_talker_listener.launch new file mode 100644 index 0000000000000000000000000000000000000000..8817a4f4933c56986fe0edc0886b2fded3d3406d --- /dev/null +++ b/live2diff/MiDaS/ros/midas_cpp/launch/midas_talker_listener.launch @@ -0,0 +1,23 @@ + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/live2diff/MiDaS/ros/midas_cpp/package.xml b/live2diff/MiDaS/ros/midas_cpp/package.xml new file mode 100644 index 0000000000000000000000000000000000000000..1b346fc18d2eeae1386dd821c496b3df4f7e9158 --- /dev/null +++ b/live2diff/MiDaS/ros/midas_cpp/package.xml @@ -0,0 +1,77 @@ + + + midas_cpp + 0.1.0 + The midas_cpp package + + Alexey Bochkovskiy + MIT + https://github.com/AlexeyAB/midas_ros + + + + + + + TODO + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + catkin + cv_bridge + image_transport + roscpp + rospy + sensor_msgs + std_msgs + cv_bridge + image_transport + roscpp + rospy + sensor_msgs + std_msgs + cv_bridge + image_transport + roscpp + rospy + sensor_msgs + std_msgs + + + + + + + + diff --git a/live2diff/MiDaS/ros/midas_cpp/scripts/listener.py b/live2diff/MiDaS/ros/midas_cpp/scripts/listener.py new file mode 100644 index 0000000000000000000000000000000000000000..6927ea7a83ac9309e5f883ee974a5dcfa8a2aa3b --- /dev/null +++ b/live2diff/MiDaS/ros/midas_cpp/scripts/listener.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +from __future__ import print_function + +import roslib +#roslib.load_manifest('my_package') +import sys +import rospy +import cv2 +import numpy as np +from std_msgs.msg import String +from sensor_msgs.msg import Image +from cv_bridge import CvBridge, CvBridgeError + +class video_show: + + def __init__(self): + self.show_output = rospy.get_param('~show_output', True) + self.save_output = rospy.get_param('~save_output', False) + self.output_video_file = rospy.get_param('~output_video_file','result.mp4') + # rospy.loginfo(f"Listener - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}") + + self.bridge = CvBridge() + self.image_sub = rospy.Subscriber("midas_topic", Image, self.callback) + + def callback(self, data): + try: + cv_image = self.bridge.imgmsg_to_cv2(data) + except CvBridgeError as e: + print(e) + return + + if cv_image.size == 0: + return + + rospy.loginfo("Listener: Received new frame") + cv_image = cv_image.astype("uint8") + + if self.show_output==True: + cv2.imshow("video_show", cv_image) + cv2.waitKey(10) + + if self.save_output==True: + if self.video_writer_init==False: + fourcc = cv2.VideoWriter_fourcc(*'XVID') + self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0])) + + self.out.write(cv_image) + + + +def main(args): + rospy.init_node('listener', anonymous=True) + ic = video_show() + try: + rospy.spin() + except KeyboardInterrupt: + print("Shutting down") + cv2.destroyAllWindows() + +if __name__ == '__main__': + main(sys.argv) \ No newline at end of file diff --git a/live2diff/MiDaS/ros/midas_cpp/scripts/listener_original.py b/live2diff/MiDaS/ros/midas_cpp/scripts/listener_original.py new file mode 100644 index 0000000000000000000000000000000000000000..20e235f6958d644b89383752ab18e9e2275f55e5 --- /dev/null +++ b/live2diff/MiDaS/ros/midas_cpp/scripts/listener_original.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +from __future__ import print_function + +import roslib +#roslib.load_manifest('my_package') +import sys +import rospy +import cv2 +import numpy as np +from std_msgs.msg import String +from sensor_msgs.msg import Image +from cv_bridge import CvBridge, CvBridgeError + +class video_show: + + def __init__(self): + self.show_output = rospy.get_param('~show_output', True) + self.save_output = rospy.get_param('~save_output', False) + self.output_video_file = rospy.get_param('~output_video_file','result.mp4') + # rospy.loginfo(f"Listener original - params: show_output={self.show_output}, save_output={self.save_output}, output_video_file={self.output_video_file}") + + self.bridge = CvBridge() + self.image_sub = rospy.Subscriber("image_topic", Image, self.callback) + + def callback(self, data): + try: + cv_image = self.bridge.imgmsg_to_cv2(data) + except CvBridgeError as e: + print(e) + return + + if cv_image.size == 0: + return + + rospy.loginfo("Listener_original: Received new frame") + cv_image = cv_image.astype("uint8") + + if self.show_output==True: + cv2.imshow("video_show_orig", cv_image) + cv2.waitKey(10) + + if self.save_output==True: + if self.video_writer_init==False: + fourcc = cv2.VideoWriter_fourcc(*'XVID') + self.out = cv2.VideoWriter(self.output_video_file, fourcc, 25, (cv_image.shape[1], cv_image.shape[0])) + + self.out.write(cv_image) + + + +def main(args): + rospy.init_node('listener_original', anonymous=True) + ic = video_show() + try: + rospy.spin() + except KeyboardInterrupt: + print("Shutting down") + cv2.destroyAllWindows() + +if __name__ == '__main__': + main(sys.argv) \ No newline at end of file diff --git a/live2diff/MiDaS/ros/midas_cpp/scripts/talker.py b/live2diff/MiDaS/ros/midas_cpp/scripts/talker.py new file mode 100644 index 0000000000000000000000000000000000000000..8219cc8632484a2efd02984347c615efad6b78b2 --- /dev/null +++ b/live2diff/MiDaS/ros/midas_cpp/scripts/talker.py @@ -0,0 +1,53 @@ +#!/usr/bin/env python3 + + +import roslib +#roslib.load_manifest('my_package') +import sys +import rospy +import cv2 +from std_msgs.msg import String +from sensor_msgs.msg import Image +from cv_bridge import CvBridge, CvBridgeError + + +def talker(): + rospy.init_node('talker', anonymous=True) + + use_camera = rospy.get_param('~use_camera', False) + input_video_file = rospy.get_param('~input_video_file','test.mp4') + # rospy.loginfo(f"Talker - params: use_camera={use_camera}, input_video_file={input_video_file}") + + # rospy.loginfo("Talker: Trying to open a video stream") + if use_camera == True: + cap = cv2.VideoCapture(0) + else: + cap = cv2.VideoCapture(input_video_file) + + pub = rospy.Publisher('image_topic', Image, queue_size=1) + rate = rospy.Rate(30) # 30hz + bridge = CvBridge() + + while not rospy.is_shutdown(): + ret, cv_image = cap.read() + if ret==False: + print("Talker: Video is over") + rospy.loginfo("Video is over") + return + + try: + image = bridge.cv2_to_imgmsg(cv_image, "bgr8") + except CvBridgeError as e: + rospy.logerr("Talker: cv2image conversion failed: ", e) + print(e) + continue + + rospy.loginfo("Talker: Publishing frame") + pub.publish(image) + rate.sleep() + +if __name__ == '__main__': + try: + talker() + except rospy.ROSInterruptException: + pass diff --git a/live2diff/MiDaS/ros/midas_cpp/src/main.cpp b/live2diff/MiDaS/ros/midas_cpp/src/main.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e4fc72c6955f66af71c9cb1fc7a7b1f643129685 --- /dev/null +++ b/live2diff/MiDaS/ros/midas_cpp/src/main.cpp @@ -0,0 +1,285 @@ +#include +#include +#include +#include + +#include + +#include // One-stop header. + +#include +#include +#include +#include + +#include +#include + +// includes for OpenCV >= 3.x +#ifndef CV_VERSION_EPOCH +#include +#include +#include +#endif + +// OpenCV includes for OpenCV 2.x +#ifdef CV_VERSION_EPOCH +#include +#include +#include +#include +#endif + +static const std::string OPENCV_WINDOW = "Image window"; + +class Midas +{ + ros::NodeHandle nh_; + image_transport::ImageTransport it_; + image_transport::Subscriber image_sub_; + image_transport::Publisher image_pub_; + + torch::jit::script::Module module; + torch::Device device; + + auto ToTensor(cv::Mat img, bool show_output = false, bool unsqueeze = false, int unsqueeze_dim = 0) + { + //std::cout << "image shape: " << img.size() << std::endl; + at::Tensor tensor_image = torch::from_blob(img.data, { img.rows, img.cols, 3 }, at::kByte); + + if (unsqueeze) + { + tensor_image.unsqueeze_(unsqueeze_dim); + //std::cout << "tensors new shape: " << tensor_image.sizes() << std::endl; + } + + if (show_output) + { + std::cout << tensor_image.slice(2, 0, 1) << std::endl; + } + //std::cout << "tenor shape: " << tensor_image.sizes() << std::endl; + return tensor_image; + } + + auto ToInput(at::Tensor tensor_image) + { + // Create a vector of inputs. + return std::vector{tensor_image}; + } + + auto ToCvImage(at::Tensor tensor, int cv_type = CV_8UC3) + { + int width = tensor.sizes()[0]; + int height = tensor.sizes()[1]; + try + { + cv::Mat output_mat; + if (cv_type == CV_8UC4 || cv_type == CV_8UC3 || cv_type == CV_8UC2 || cv_type == CV_8UC1) { + cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr()); + output_mat = cv_image; + } + else if (cv_type == CV_32FC4 || cv_type == CV_32FC3 || cv_type == CV_32FC2 || cv_type == CV_32FC1) { + cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr()); + output_mat = cv_image; + } + else if (cv_type == CV_64FC4 || cv_type == CV_64FC3 || cv_type == CV_64FC2 || cv_type == CV_64FC1) { + cv::Mat cv_image(cv::Size{ height, width }, cv_type, tensor.data_ptr()); + output_mat = cv_image; + } + + //show_image(output_mat, "converted image from tensor"); + return output_mat.clone(); + } + catch (const c10::Error& e) + { + std::cout << "an error has occured : " << e.msg() << std::endl; + } + return cv::Mat(height, width, CV_8UC3); + } + + std::string input_topic, output_topic, model_name; + bool out_orig_size; + int net_width, net_height; + torch::NoGradGuard guard; + at::Tensor mean, std; + at::Tensor output, tensor; + +public: + Midas() + : nh_(), it_(nh_), device(torch::Device(torch::kCPU)) + { + ros::param::param("~input_topic", input_topic, "image_topic"); + ros::param::param("~output_topic", output_topic, "midas_topic"); + ros::param::param("~model_name", model_name, "model-small-traced.pt"); + ros::param::param("~out_orig_size", out_orig_size, true); + ros::param::param("~net_width", net_width, 256); + ros::param::param("~net_height", net_height, 256); + + std::cout << ", input_topic = " << input_topic << + ", output_topic = " << output_topic << + ", model_name = " << model_name << + ", out_orig_size = " << out_orig_size << + ", net_width = " << net_width << + ", net_height = " << net_height << + std::endl; + + // Subscrive to input video feed and publish output video feed + image_sub_ = it_.subscribe(input_topic, 1, &Midas::imageCb, this); + image_pub_ = it_.advertise(output_topic, 1); + + std::cout << "Try to load torchscript model \n"; + + try { + // Deserialize the ScriptModule from a file using torch::jit::load(). + module = torch::jit::load(model_name); + } + catch (const c10::Error& e) { + std::cerr << "error loading the model\n"; + exit(0); + } + + std::cout << "ok\n"; + + try { + module.eval(); + torch::jit::getProfilingMode() = false; + torch::jit::setGraphExecutorOptimize(true); + + mean = torch::tensor({ 0.485, 0.456, 0.406 }); + std = torch::tensor({ 0.229, 0.224, 0.225 }); + + if (torch::hasCUDA()) { + std::cout << "cuda is available" << std::endl; + at::globalContext().setBenchmarkCuDNN(true); + device = torch::Device(torch::kCUDA); + module.to(device); + mean = mean.to(device); + std = std.to(device); + } + } + catch (const c10::Error& e) + { + std::cerr << " module initialization: " << e.msg() << std::endl; + } + } + + ~Midas() + { + } + + void imageCb(const sensor_msgs::ImageConstPtr& msg) + { + cv_bridge::CvImagePtr cv_ptr; + try + { + // sensor_msgs::Image to cv::Mat + cv_ptr = cv_bridge::toCvCopy(msg, sensor_msgs::image_encodings::RGB8); + } + catch (cv_bridge::Exception& e) + { + ROS_ERROR("cv_bridge exception: %s", e.what()); + return; + } + + // pre-processing + auto tensor_cpu = ToTensor(cv_ptr->image); // OpenCV-image -> Libtorch-tensor + + try { + tensor = tensor_cpu.to(device); // move to device (CPU or GPU) + + tensor = tensor.toType(c10::kFloat); + tensor = tensor.permute({ 2, 0, 1 }); // HWC -> CHW + tensor = tensor.unsqueeze(0); + tensor = at::upsample_bilinear2d(tensor, { net_height, net_width }, true); // resize + tensor = tensor.squeeze(0); + tensor = tensor.permute({ 1, 2, 0 }); // CHW -> HWC + + tensor = tensor.div(255).sub(mean).div(std); // normalization + tensor = tensor.permute({ 2, 0, 1 }); // HWC -> CHW + tensor.unsqueeze_(0); // CHW -> NCHW + } + catch (const c10::Error& e) + { + std::cerr << " pre-processing exception: " << e.msg() << std::endl; + return; + } + + auto input_to_net = ToInput(tensor); // input to the network + + // inference + output; + try { + output = module.forward(input_to_net).toTensor(); // run inference + } + catch (const c10::Error& e) + { + std::cerr << " module.forward() exception: " << e.msg() << std::endl; + return; + } + + output = output.detach().to(torch::kF32); + + // move to CPU temporary + at::Tensor output_tmp = output; + output_tmp = output_tmp.to(torch::kCPU); + + // normalization + float min_val = std::numeric_limits::max(); + float max_val = std::numeric_limits::min(); + + for (int i = 0; i < net_width * net_height; ++i) { + float val = output_tmp.data_ptr()[i]; + if (min_val > val) min_val = val; + if (max_val < val) max_val = val; + } + float range_val = max_val - min_val; + + output = output.sub(min_val).div(range_val).mul(255.0F).clamp(0, 255).to(torch::kF32); // .to(torch::kU8); + + // resize to the original size if required + if (out_orig_size) { + try { + output = at::upsample_bilinear2d(output.unsqueeze(0), { cv_ptr->image.size().height, cv_ptr->image.size().width }, true); + output = output.squeeze(0); + } + catch (const c10::Error& e) + { + std::cout << " upsample_bilinear2d() exception: " << e.msg() << std::endl; + return; + } + } + output = output.permute({ 1, 2, 0 }).to(torch::kCPU); + + int cv_type = CV_32FC1; // CV_8UC1; + auto cv_img = ToCvImage(output, cv_type); + + sensor_msgs::Image img_msg; + + try { + // cv::Mat -> sensor_msgs::Image + std_msgs::Header header; // empty header + header.seq = 0; // user defined counter + header.stamp = ros::Time::now();// time + //cv_bridge::CvImage img_bridge = cv_bridge::CvImage(header, sensor_msgs::image_encodings::MONO8, cv_img); + cv_bridge::CvImage img_bridge = cv_bridge::CvImage(header, sensor_msgs::image_encodings::TYPE_32FC1, cv_img); + + img_bridge.toImageMsg(img_msg); // cv_bridge -> sensor_msgs::Image + } + catch (cv_bridge::Exception& e) + { + ROS_ERROR("cv_bridge exception: %s", e.what()); + return; + } + + // Output modified video stream + image_pub_.publish(img_msg); + } +}; + +int main(int argc, char** argv) +{ + ros::init(argc, argv, "midas", ros::init_options::AnonymousName); + Midas ic; + ros::spin(); + return 0; +} \ No newline at end of file diff --git a/live2diff/MiDaS/ros/run_talker_listener_test.sh b/live2diff/MiDaS/ros/run_talker_listener_test.sh new file mode 100644 index 0000000000000000000000000000000000000000..a997c4261072d0d627598fe06a723fcc7522d347 --- /dev/null +++ b/live2diff/MiDaS/ros/run_talker_listener_test.sh @@ -0,0 +1,16 @@ +# place any test.mp4 file near with this file + +# roscore +# rosnode kill -a + +source ~/catkin_ws/devel/setup.bash + +roscore & +P1=$! +rosrun midas_cpp talker.py & +P2=$! +rosrun midas_cpp listener_original.py & +P3=$! +rosrun midas_cpp listener.py & +P4=$! +wait $P1 $P2 $P3 $P4 \ No newline at end of file diff --git a/live2diff/MiDaS/run.py b/live2diff/MiDaS/run.py new file mode 100644 index 0000000000000000000000000000000000000000..54c2b65d5e64f73450676e09283f95fffe03f87a --- /dev/null +++ b/live2diff/MiDaS/run.py @@ -0,0 +1,188 @@ +"""Compute depth maps for images in the input folder. +""" +import os +import glob +import torch +import utils +import cv2 +import argparse + +from torchvision.transforms import Compose +from midas.dpt_depth import DPTDepthModel +from midas.midas_net import MidasNet +from midas.midas_net_custom import MidasNet_small +from midas.transforms import Resize, NormalizeImage, PrepareForNet + + +def run(input_path, output_path, model_path, model_type="large", optimize=True): + """Run MonoDepthNN to compute depth maps. + + Args: + input_path (str): path to input folder + output_path (str): path to output folder + model_path (str): path to saved model + """ + print("initialize") + + # select device + device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + print("device: %s" % device) + + # load network + if model_type == "dpt_large": # DPT-Large + model = DPTDepthModel( + path=model_path, + backbone="vitl16_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode = "minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + elif model_type == "dpt_hybrid": #DPT-Hybrid + model = DPTDepthModel( + path=model_path, + backbone="vitb_rn50_384", + non_negative=True, + ) + net_w, net_h = 384, 384 + resize_mode="minimal" + normalization = NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) + elif model_type == "midas_v21": + model = MidasNet(model_path, non_negative=True) + net_w, net_h = 384, 384 + resize_mode="upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + elif model_type == "midas_v21_small": + model = MidasNet_small(model_path, features=64, backbone="efficientnet_lite3", exportable=True, non_negative=True, blocks={'expand': True}) + net_w, net_h = 256, 256 + resize_mode="upper_bound" + normalization = NormalizeImage( + mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225] + ) + else: + print(f"model_type '{model_type}' not implemented, use: --model_type large") + assert False + + transform = Compose( + [ + Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=True, + ensure_multiple_of=32, + resize_method=resize_mode, + image_interpolation_method=cv2.INTER_CUBIC, + ), + normalization, + PrepareForNet(), + ] + ) + + model.eval() + + if optimize==True: + # rand_example = torch.rand(1, 3, net_h, net_w) + # model(rand_example) + # traced_script_module = torch.jit.trace(model, rand_example) + # model = traced_script_module + + if device == torch.device("cuda"): + model = model.to(memory_format=torch.channels_last) + model = model.half() + + model.to(device) + + # get input + img_names = glob.glob(os.path.join(input_path, "*")) + num_images = len(img_names) + + # create output folder + os.makedirs(output_path, exist_ok=True) + + print("start processing") + + for ind, img_name in enumerate(img_names): + + print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) + + # input + + img = utils.read_image(img_name) + img_input = transform({"image": img})["image"] + + # compute + with torch.no_grad(): + sample = torch.from_numpy(img_input).to(device).unsqueeze(0) + if optimize==True and device == torch.device("cuda"): + sample = sample.to(memory_format=torch.channels_last) + sample = sample.half() + prediction = model.forward(sample) + prediction = ( + torch.nn.functional.interpolate( + prediction.unsqueeze(1), + size=img.shape[:2], + mode="bicubic", + align_corners=False, + ) + .squeeze() + .cpu() + .numpy() + ) + + # output + filename = os.path.join( + output_path, os.path.splitext(os.path.basename(img_name))[0] + ) + utils.write_depth(filename, prediction, bits=2) + + print("finished") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-i', '--input_path', + default='input', + help='folder with input images' + ) + + parser.add_argument('-o', '--output_path', + default='output', + help='folder for output images' + ) + + parser.add_argument('-m', '--model_weights', + default=None, + help='path to the trained weights of model' + ) + + parser.add_argument('-t', '--model_type', + default='dpt_large', + help='model type: dpt_large, dpt_hybrid, midas_v21_large or midas_v21_small' + ) + + parser.add_argument('--optimize', dest='optimize', action='store_true') + parser.add_argument('--no-optimize', dest='optimize', action='store_false') + parser.set_defaults(optimize=True) + + args = parser.parse_args() + + default_models = { + "midas_v21_small": "weights/midas_v21_small-70d6b9c8.pt", + "midas_v21": "weights/midas_v21-f6b98070.pt", + "dpt_large": "weights/dpt_large-midas-2f21e586.pt", + "dpt_hybrid": "weights/dpt_hybrid-midas-501f0c75.pt", + } + + if args.model_weights is None: + args.model_weights = default_models[args.model_type] + + # set torch options + torch.backends.cudnn.enabled = True + torch.backends.cudnn.benchmark = True + + # compute depth maps + run(args.input_path, args.output_path, args.model_weights, args.model_type, args.optimize) diff --git a/live2diff/MiDaS/tf/README.md b/live2diff/MiDaS/tf/README.md new file mode 100644 index 0000000000000000000000000000000000000000..a613da6c3bb547415bd67423f2b53ad260b8c703 --- /dev/null +++ b/live2diff/MiDaS/tf/README.md @@ -0,0 +1,147 @@ +## Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer + +### TensorFlow inference using `.pb` and `.onnx` models + +1. [Run inference on TensorFlow-model by using TensorFlow](#run-inference-on-tensorflow-model-by-using-tensorFlow) + +2. [Run inference on ONNX-model by using TensorFlow](#run-inference-on-onnx-model-by-using-tensorflow) + +3. [Make ONNX model from downloaded Pytorch model file](#make-onnx-model-from-downloaded-pytorch-model-file) + + +### Run inference on TensorFlow-model by using TensorFlow + +1) Download the model weights [model-f6b98070.pb](https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-f6b98070.pb) +and [model-small.pb](https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-small.pb) and place the +file in the `/tf/` folder. + +2) Set up dependencies: + +```shell +# install OpenCV +pip install --upgrade pip +pip install opencv-python + +# install TensorFlow +pip install -I grpcio tensorflow==2.3.0 tensorflow-addons==0.11.2 numpy==1.18.0 +``` + +#### Usage + +1) Place one or more input images in the folder `tf/input`. + +2) Run the model: + + ```shell + python tf/run_pb.py + ``` + + Or run the small model: + + ```shell + python tf/run_pb.py --model_weights model-small.pb --model_type small + ``` + +3) The resulting inverse depth maps are written to the `tf/output` folder. + + +### Run inference on ONNX-model by using ONNX-Runtime + +1) Download the model weights [model-f6b98070.onnx](https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-f6b98070.onnx) +and [model-small.onnx](https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-small.onnx) and place the +file in the `/tf/` folder. + +2) Set up dependencies: + +```shell +# install OpenCV +pip install --upgrade pip +pip install opencv-python + +# install ONNX +pip install onnx==1.7.0 + +# install ONNX Runtime +pip install onnxruntime==1.5.2 +``` + +#### Usage + +1) Place one or more input images in the folder `tf/input`. + +2) Run the model: + + ```shell + python tf/run_onnx.py + ``` + + Or run the small model: + + ```shell + python tf/run_onnx.py --model_weights model-small.onnx --model_type small + ``` + +3) The resulting inverse depth maps are written to the `tf/output` folder. + + + +### Make ONNX model from downloaded Pytorch model file + +1) Download the model weights [model-f6b98070.pt](https://github.com/intel-isl/MiDaS/releases/download/v2_1/model-f6b98070.pt) and place the +file in the root folder. + +2) Set up dependencies: + +```shell +# install OpenCV +pip install --upgrade pip +pip install opencv-python + +# install PyTorch TorchVision +pip install -I torch==1.7.0 torchvision==0.8.0 + +# install TensorFlow +pip install -I grpcio tensorflow==2.3.0 tensorflow-addons==0.11.2 numpy==1.18.0 + +# install ONNX +pip install onnx==1.7.0 + +# install ONNX-TensorFlow +git clone https://github.com/onnx/onnx-tensorflow.git +cd onnx-tensorflow +git checkout 095b51b88e35c4001d70f15f80f31014b592b81e +pip install -e . +``` + +#### Usage + +1) Run the converter: + + ```shell + python tf/make_onnx_model.py + ``` + +2) The resulting `model-f6b98070.onnx` file is written to the `/tf/` folder. + + +### Requirements + + The code was tested with Python 3.6.9, PyTorch 1.5.1, TensorFlow 2.2.0, TensorFlow-addons 0.8.3, ONNX 1.7.0, ONNX-TensorFlow (GitHub-master-17.07.2020) and OpenCV 4.3.0. + +### Citation + +Please cite our paper if you use this code or any of the models: +``` +@article{Ranftl2019, + author = {Ren\'{e} Ranftl and Katrin Lasinger and David Hafner and Konrad Schindler and Vladlen Koltun}, + title = {Towards Robust Monocular Depth Estimation: Mixing Datasets for Zero-shot Cross-dataset Transfer}, + journal = {IEEE Transactions on Pattern Analysis and Machine Intelligence (TPAMI)}, + year = {2020}, +} +``` + +### License + +MIT License + + diff --git a/live2diff/MiDaS/tf/input/.placeholder b/live2diff/MiDaS/tf/input/.placeholder new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/live2diff/MiDaS/tf/make_onnx_model.py b/live2diff/MiDaS/tf/make_onnx_model.py new file mode 100644 index 0000000000000000000000000000000000000000..d14b0e4e1d2ea70fa315fd7ca7dfd72440a19376 --- /dev/null +++ b/live2diff/MiDaS/tf/make_onnx_model.py @@ -0,0 +1,112 @@ +"""Compute depth maps for images in the input folder. +""" +import os +import ntpath +import glob +import torch +import utils +import cv2 +import numpy as np +from torchvision.transforms import Compose, Normalize +from torchvision import transforms + +from shutil import copyfile +import fileinput +import sys +sys.path.append(os.getcwd() + '/..') + +def modify_file(): + modify_filename = '../midas/blocks.py' + copyfile(modify_filename, modify_filename+'.bak') + + with open(modify_filename, 'r') as file : + filedata = file.read() + + filedata = filedata.replace('align_corners=True', 'align_corners=False') + filedata = filedata.replace('import torch.nn as nn', 'import torch.nn as nn\nimport torchvision.models as models') + filedata = filedata.replace('torch.hub.load("facebookresearch/WSL-Images", "resnext101_32x8d_wsl")', 'models.resnext101_32x8d()') + + with open(modify_filename, 'w') as file: + file.write(filedata) + +def restore_file(): + modify_filename = '../midas/blocks.py' + copyfile(modify_filename+'.bak', modify_filename) + +modify_file() + +from midas.midas_net import MidasNet +from midas.transforms import Resize, NormalizeImage, PrepareForNet + +restore_file() + + +class MidasNet_preprocessing(MidasNet): + """Network for monocular depth estimation. + """ + def forward(self, x): + """Forward pass. + + Args: + x (tensor): input data (image) + + Returns: + tensor: depth + """ + + mean = torch.tensor([0.485, 0.456, 0.406]) + std = torch.tensor([0.229, 0.224, 0.225]) + x.sub_(mean[None, :, None, None]).div_(std[None, :, None, None]) + + return MidasNet.forward(self, x) + + +def run(model_path): + """Run MonoDepthNN to compute depth maps. + + Args: + model_path (str): path to saved model + """ + print("initialize") + + # select device + + # load network + #model = MidasNet(model_path, non_negative=True) + model = MidasNet_preprocessing(model_path, non_negative=True) + + model.eval() + + print("start processing") + + # input + img_input = np.zeros((3, 384, 384), np.float32) + + # compute + with torch.no_grad(): + sample = torch.from_numpy(img_input).unsqueeze(0) + prediction = model.forward(sample) + prediction = ( + torch.nn.functional.interpolate( + prediction.unsqueeze(1), + size=img_input.shape[:2], + mode="bicubic", + align_corners=False, + ) + .squeeze() + .cpu() + .numpy() + ) + + torch.onnx.export(model, sample, ntpath.basename(model_path).rsplit('.', 1)[0]+'.onnx', opset_version=9) + + print("finished") + + +if __name__ == "__main__": + # set paths + # MODEL_PATH = "model.pt" + MODEL_PATH = "../model-f6b98070.pt" + + # compute depth maps + run(MODEL_PATH) diff --git a/live2diff/MiDaS/tf/output/.placeholder b/live2diff/MiDaS/tf/output/.placeholder new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/live2diff/MiDaS/tf/run_onnx.py b/live2diff/MiDaS/tf/run_onnx.py new file mode 100644 index 0000000000000000000000000000000000000000..7107b99969a127f951814f743d5c562a436b2430 --- /dev/null +++ b/live2diff/MiDaS/tf/run_onnx.py @@ -0,0 +1,119 @@ +"""Compute depth maps for images in the input folder. +""" +import os +import glob +import utils +import cv2 +import sys +import numpy as np +import argparse + +import onnx +import onnxruntime as rt + +from transforms import Resize, NormalizeImage, PrepareForNet + + +def run(input_path, output_path, model_path, model_type="large"): + """Run MonoDepthNN to compute depth maps. + + Args: + input_path (str): path to input folder + output_path (str): path to output folder + model_path (str): path to saved model + """ + print("initialize") + + # select device + device = "CUDA:0" + #device = "CPU" + print("device: %s" % device) + + # network resolution + if model_type == "large": + net_w, net_h = 384, 384 + elif model_type == "small": + net_w, net_h = 256, 256 + else: + print(f"model_type '{model_type}' not implemented, use: --model_type large") + assert False + + # load network + print("loading model...") + model = rt.InferenceSession(model_path) + input_name = model.get_inputs()[0].name + output_name = model.get_outputs()[0].name + + resize_image = Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ) + + def compose2(f1, f2): + return lambda x: f2(f1(x)) + + transform = compose2(resize_image, PrepareForNet()) + + # get input + img_names = glob.glob(os.path.join(input_path, "*")) + num_images = len(img_names) + + # create output folder + os.makedirs(output_path, exist_ok=True) + + print("start processing") + + for ind, img_name in enumerate(img_names): + + print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) + + # input + img = utils.read_image(img_name) + img_input = transform({"image": img})["image"] + + # compute + output = model.run([output_name], {input_name: img_input.reshape(1, 3, net_h, net_w).astype(np.float32)})[0] + prediction = np.array(output).reshape(net_h, net_w) + prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) + + # output + filename = os.path.join( + output_path, os.path.splitext(os.path.basename(img_name))[0] + ) + utils.write_depth(filename, prediction, bits=2) + + print("finished") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-i', '--input_path', + default='input', + help='folder with input images' + ) + + parser.add_argument('-o', '--output_path', + default='output', + help='folder for output images' + ) + + parser.add_argument('-m', '--model_weights', + default='model-f6b98070.onnx', + help='path to the trained weights of model' + ) + + parser.add_argument('-t', '--model_type', + default='large', + help='model type: large or small' + ) + + args = parser.parse_args() + + # compute depth maps + run(args.input_path, args.output_path, args.model_weights, args.model_type) diff --git a/live2diff/MiDaS/tf/run_pb.py b/live2diff/MiDaS/tf/run_pb.py new file mode 100644 index 0000000000000000000000000000000000000000..e46254f7b37f72e7d87672d70fd4b2f393ad7658 --- /dev/null +++ b/live2diff/MiDaS/tf/run_pb.py @@ -0,0 +1,135 @@ +"""Compute depth maps for images in the input folder. +""" +import os +import glob +import utils +import cv2 +import argparse + +import tensorflow as tf + +from transforms import Resize, NormalizeImage, PrepareForNet + +def run(input_path, output_path, model_path, model_type="large"): + """Run MonoDepthNN to compute depth maps. + + Args: + input_path (str): path to input folder + output_path (str): path to output folder + model_path (str): path to saved model + """ + print("initialize") + + # the runtime initialization will not allocate all memory on the device to avoid out of GPU memory + gpus = tf.config.experimental.list_physical_devices('GPU') + if gpus: + try: + for gpu in gpus: + #tf.config.experimental.set_memory_growth(gpu, True) + tf.config.experimental.set_virtual_device_configuration(gpu, + [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4000)]) + except RuntimeError as e: + print(e) + + # network resolution + if model_type == "large": + net_w, net_h = 384, 384 + elif model_type == "small": + net_w, net_h = 256, 256 + else: + print(f"model_type '{model_type}' not implemented, use: --model_type large") + assert False + + # load network + graph_def = tf.compat.v1.GraphDef() + with tf.io.gfile.GFile(model_path, 'rb') as f: + graph_def.ParseFromString(f.read()) + tf.import_graph_def(graph_def, name='') + + + model_operations = tf.compat.v1.get_default_graph().get_operations() + input_node = '0:0' + output_layer = model_operations[len(model_operations) - 1].name + ':0' + print("Last layer name: ", output_layer) + + resize_image = Resize( + net_w, + net_h, + resize_target=None, + keep_aspect_ratio=False, + ensure_multiple_of=32, + resize_method="upper_bound", + image_interpolation_method=cv2.INTER_CUBIC, + ) + + def compose2(f1, f2): + return lambda x: f2(f1(x)) + + transform = compose2(resize_image, PrepareForNet()) + + # get input + img_names = glob.glob(os.path.join(input_path, "*")) + num_images = len(img_names) + + # create output folder + os.makedirs(output_path, exist_ok=True) + + print("start processing") + + with tf.compat.v1.Session() as sess: + try: + # load images + for ind, img_name in enumerate(img_names): + + print(" processing {} ({}/{})".format(img_name, ind + 1, num_images)) + + # input + img = utils.read_image(img_name) + img_input = transform({"image": img})["image"] + + # compute + prob_tensor = sess.graph.get_tensor_by_name(output_layer) + prediction, = sess.run(prob_tensor, {input_node: [img_input] }) + prediction = prediction.reshape(net_h, net_w) + prediction = cv2.resize(prediction, (img.shape[1], img.shape[0]), interpolation=cv2.INTER_CUBIC) + + # output + filename = os.path.join( + output_path, os.path.splitext(os.path.basename(img_name))[0] + ) + utils.write_depth(filename, prediction, bits=2) + + except KeyError: + print ("Couldn't find input node: ' + input_node + ' or output layer: " + output_layer + ".") + exit(-1) + + print("finished") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument('-i', '--input_path', + default='input', + help='folder with input images' + ) + + parser.add_argument('-o', '--output_path', + default='output', + help='folder for output images' + ) + + parser.add_argument('-m', '--model_weights', + default='model-f6b98070.pb', + help='path to the trained weights of model' + ) + + parser.add_argument('-t', '--model_type', + default='large', + help='model type: large or small' + ) + + args = parser.parse_args() + + # compute depth maps + run(args.input_path, args.output_path, args.model_weights, args.model_type) diff --git a/live2diff/MiDaS/tf/transforms.py b/live2diff/MiDaS/tf/transforms.py new file mode 100644 index 0000000000000000000000000000000000000000..350cbc11662633ad7f8968eb10be2e7de6e384e9 --- /dev/null +++ b/live2diff/MiDaS/tf/transforms.py @@ -0,0 +1,234 @@ +import numpy as np +import cv2 +import math + + +def apply_min_size(sample, size, image_interpolation_method=cv2.INTER_AREA): + """Rezise the sample to ensure the given size. Keeps aspect ratio. + + Args: + sample (dict): sample + size (tuple): image size + + Returns: + tuple: new size + """ + shape = list(sample["disparity"].shape) + + if shape[0] >= size[0] and shape[1] >= size[1]: + return sample + + scale = [0, 0] + scale[0] = size[0] / shape[0] + scale[1] = size[1] / shape[1] + + scale = max(scale) + + shape[0] = math.ceil(scale * shape[0]) + shape[1] = math.ceil(scale * shape[1]) + + # resize + sample["image"] = cv2.resize( + sample["image"], tuple(shape[::-1]), interpolation=image_interpolation_method + ) + + sample["disparity"] = cv2.resize( + sample["disparity"], tuple(shape[::-1]), interpolation=cv2.INTER_NEAREST + ) + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + tuple(shape[::-1]), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return tuple(shape) + + +class Resize(object): + """Resize sample to given size (width, height). + """ + + def __init__( + self, + width, + height, + resize_target=True, + keep_aspect_ratio=False, + ensure_multiple_of=1, + resize_method="lower_bound", + image_interpolation_method=cv2.INTER_AREA, + ): + """Init. + + Args: + width (int): desired output width + height (int): desired output height + resize_target (bool, optional): + True: Resize the full sample (image, mask, target). + False: Resize image only. + Defaults to True. + keep_aspect_ratio (bool, optional): + True: Keep the aspect ratio of the input sample. + Output sample might not have the given width and height, and + resize behaviour depends on the parameter 'resize_method'. + Defaults to False. + ensure_multiple_of (int, optional): + Output width and height is constrained to be multiple of this parameter. + Defaults to 1. + resize_method (str, optional): + "lower_bound": Output will be at least as large as the given size. + "upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.) + "minimal": Scale as least as possible. (Output size might be smaller than given size.) + Defaults to "lower_bound". + """ + self.__width = width + self.__height = height + + self.__resize_target = resize_target + self.__keep_aspect_ratio = keep_aspect_ratio + self.__multiple_of = ensure_multiple_of + self.__resize_method = resize_method + self.__image_interpolation_method = image_interpolation_method + + def constrain_to_multiple_of(self, x, min_val=0, max_val=None): + y = (np.round(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if max_val is not None and y > max_val: + y = (np.floor(x / self.__multiple_of) * self.__multiple_of).astype(int) + + if y < min_val: + y = (np.ceil(x / self.__multiple_of) * self.__multiple_of).astype(int) + + return y + + def get_size(self, width, height): + # determine new height and width + scale_height = self.__height / height + scale_width = self.__width / width + + if self.__keep_aspect_ratio: + if self.__resize_method == "lower_bound": + # scale such that output size is lower bound + if scale_width > scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "upper_bound": + # scale such that output size is upper bound + if scale_width < scale_height: + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + elif self.__resize_method == "minimal": + # scale as least as possbile + if abs(1 - scale_width) < abs(1 - scale_height): + # fit width + scale_height = scale_width + else: + # fit height + scale_width = scale_height + else: + raise ValueError( + f"resize_method {self.__resize_method} not implemented" + ) + + if self.__resize_method == "lower_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width + ) + elif self.__resize_method == "upper_bound": + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height + ) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width + ) + elif self.__resize_method == "minimal": + new_height = self.constrain_to_multiple_of(scale_height * height) + new_width = self.constrain_to_multiple_of(scale_width * width) + else: + raise ValueError(f"resize_method {self.__resize_method} not implemented") + + return (new_width, new_height) + + def __call__(self, sample): + width, height = self.get_size( + sample["image"].shape[1], sample["image"].shape[0] + ) + + # resize sample + sample["image"] = cv2.resize( + sample["image"], + (width, height), + interpolation=self.__image_interpolation_method, + ) + + if self.__resize_target: + if "disparity" in sample: + sample["disparity"] = cv2.resize( + sample["disparity"], + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + + if "depth" in sample: + sample["depth"] = cv2.resize( + sample["depth"], (width, height), interpolation=cv2.INTER_NEAREST + ) + + sample["mask"] = cv2.resize( + sample["mask"].astype(np.float32), + (width, height), + interpolation=cv2.INTER_NEAREST, + ) + sample["mask"] = sample["mask"].astype(bool) + + return sample + + +class NormalizeImage(object): + """Normlize image by given mean and std. + """ + + def __init__(self, mean, std): + self.__mean = mean + self.__std = std + + def __call__(self, sample): + sample["image"] = (sample["image"] - self.__mean) / self.__std + + return sample + + +class PrepareForNet(object): + """Prepare sample for usage as network input. + """ + + def __init__(self): + pass + + def __call__(self, sample): + image = np.transpose(sample["image"], (2, 0, 1)) + sample["image"] = np.ascontiguousarray(image).astype(np.float32) + + if "mask" in sample: + sample["mask"] = sample["mask"].astype(np.float32) + sample["mask"] = np.ascontiguousarray(sample["mask"]) + + if "disparity" in sample: + disparity = sample["disparity"].astype(np.float32) + sample["disparity"] = np.ascontiguousarray(disparity) + + if "depth" in sample: + depth = sample["depth"].astype(np.float32) + sample["depth"] = np.ascontiguousarray(depth) + + return sample diff --git a/live2diff/MiDaS/tf/utils.py b/live2diff/MiDaS/tf/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..ff9a54bd55f5e31a90fad21242efbfda5a6cc1a7 --- /dev/null +++ b/live2diff/MiDaS/tf/utils.py @@ -0,0 +1,82 @@ +import numpy as np +import sys +import cv2 + + +def write_pfm(path, image, scale=1): + """Write pfm file. + Args: + path (str): pathto file + image (array): data + scale (int, optional): Scale. Defaults to 1. + """ + + with open(path, "wb") as file: + color = None + + if image.dtype.name != "float32": + raise Exception("Image dtype must be float32.") + + image = np.flipud(image) + + if len(image.shape) == 3 and image.shape[2] == 3: # color image + color = True + elif ( + len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1 + ): # greyscale + color = False + else: + raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.") + + file.write("PF\n" if color else "Pf\n".encode()) + file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) + + endian = image.dtype.byteorder + + if endian == "<" or endian == "=" and sys.byteorder == "little": + scale = -scale + + file.write("%f\n".encode() % scale) + + image.tofile(file) + +def read_image(path): + """Read image and output RGB image (0-1). + Args: + path (str): path to file + Returns: + array: RGB image (0-1) + """ + img = cv2.imread(path) + + if img.ndim == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + + return img + +def write_depth(path, depth, bits=1): + """Write depth map to pfm and png file. + Args: + path (str): filepath without extension + depth (array): depth + """ + write_pfm(path + ".pfm", depth.astype(np.float32)) + + depth_min = depth.min() + depth_max = depth.max() + + max_val = (2**(8*bits))-1 + + if depth_max - depth_min > np.finfo("float").eps: + out = max_val * (depth - depth_min) / (depth_max - depth_min) + else: + out = 0 + + if bits == 1: + cv2.imwrite(path + ".png", out.astype("uint8")) + elif bits == 2: + cv2.imwrite(path + ".png", out.astype("uint16")) + + return \ No newline at end of file diff --git a/live2diff/MiDaS/utils.py b/live2diff/MiDaS/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..55fda8ffd6837fe14ddae0eb37a6bcbf1c158d86 --- /dev/null +++ b/live2diff/MiDaS/utils.py @@ -0,0 +1,190 @@ +"""Utils for monoDepth. +""" +import sys +import re +import numpy as np +import cv2 +import torch + + +def read_pfm(path): + """Read pfm file. + + Args: + path (str): path to file + + Returns: + tuple: (data, scale) + """ + with open(path, "rb") as file: + + color = None + width = None + height = None + scale = None + endian = None + + header = file.readline().rstrip() + if header.decode("ascii") == "PF": + color = True + elif header.decode("ascii") == "Pf": + color = False + else: + raise Exception("Not a PFM file: " + path) + + dim_match = re.match(r"^(\d+)\s(\d+)\s$", file.readline().decode("ascii")) + if dim_match: + width, height = list(map(int, dim_match.groups())) + else: + raise Exception("Malformed PFM header.") + + scale = float(file.readline().decode("ascii").rstrip()) + if scale < 0: + # little-endian + endian = "<" + scale = -scale + else: + # big-endian + endian = ">" + + data = np.fromfile(file, endian + "f") + shape = (height, width, 3) if color else (height, width) + + data = np.reshape(data, shape) + data = np.flipud(data) + + return data, scale + + +def write_pfm(path, image, scale=1): + """Write pfm file. + + Args: + path (str): pathto file + image (array): data + scale (int, optional): Scale. Defaults to 1. + """ + + with open(path, "wb") as file: + color = None + + if image.dtype.name != "float32": + raise Exception("Image dtype must be float32.") + + image = np.flipud(image) + + if len(image.shape) == 3 and image.shape[2] == 3: # color image + color = True + elif ( + len(image.shape) == 2 or len(image.shape) == 3 and image.shape[2] == 1 + ): # greyscale + color = False + else: + raise Exception("Image must have H x W x 3, H x W x 1 or H x W dimensions.") + + file.write("PF\n" if color else "Pf\n".encode()) + file.write("%d %d\n".encode() % (image.shape[1], image.shape[0])) + + endian = image.dtype.byteorder + + if endian == "<" or endian == "=" and sys.byteorder == "little": + scale = -scale + + file.write("%f\n".encode() % scale) + + image.tofile(file) + + +def read_image(path): + """Read image and output RGB image (0-1). + + Args: + path (str): path to file + + Returns: + array: RGB image (0-1) + """ + img = cv2.imread(path) + + if img.ndim == 2: + img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) + + img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0 + + return img + + +def resize_image(img): + """Resize image and make it fit for network. + + Args: + img (array): image + + Returns: + tensor: data ready for network + """ + height_orig = img.shape[0] + width_orig = img.shape[1] + + if width_orig > height_orig: + scale = width_orig / 384 + else: + scale = height_orig / 384 + + height = (np.ceil(height_orig / scale / 32) * 32).astype(int) + width = (np.ceil(width_orig / scale / 32) * 32).astype(int) + + img_resized = cv2.resize(img, (width, height), interpolation=cv2.INTER_AREA) + + img_resized = ( + torch.from_numpy(np.transpose(img_resized, (2, 0, 1))).contiguous().float() + ) + img_resized = img_resized.unsqueeze(0) + + return img_resized + + +def resize_depth(depth, width, height): + """Resize depth map and bring to CPU (numpy). + + Args: + depth (tensor): depth + width (int): image width + height (int): image height + + Returns: + array: processed depth + """ + depth = torch.squeeze(depth[0, :, :, :]).to("cpu") + + depth_resized = cv2.resize( + depth.numpy(), (width, height), interpolation=cv2.INTER_CUBIC + ) + + return depth_resized + +def write_depth(path, depth, bits=1): + """Write depth map to pfm and png file. + + Args: + path (str): filepath without extension + depth (array): depth + """ + write_pfm(path + ".pfm", depth.astype(np.float32)) + + depth_min = depth.min() + depth_max = depth.max() + + max_val = (2**(8*bits))-1 + + if depth_max - depth_min > np.finfo("float").eps: + out = max_val * (depth - depth_min) / (depth_max - depth_min) + else: + out = np.zeros(depth.shape, dtype=depth.type) + + if bits == 1: + cv2.imwrite(path + ".png", out.astype("uint8")) + elif bits == 2: + cv2.imwrite(path + ".png", out.astype("uint16")) + + return diff --git a/live2diff/MiDaS/weights/.placeholder b/live2diff/MiDaS/weights/.placeholder new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391