Lodor
commited on
Commit
·
206ce41
1
Parent(s):
a33c8f4
Initial commit
Browse files- .gitignore +124 -0
- .streamlit/config.toml +6 -0
- Dockerfile +9 -0
- README.md +1 -0
- app.py +80 -0
- assets/demo.jpg +0 -0
- docker-compose.yml +13 -0
- requirements.txt +6 -0
- src/__init__.py +0 -0
- src/models/__init__.py +0 -0
- src/models/backbones/__init__.py +10 -0
- src/models/backbones/mobilenetv2.py +199 -0
- src/models/backbones/wrapper.py +82 -0
- src/models/modnet.py +255 -0
- src/st_style.py +42 -0
- src/trainer.py +299 -0
- src/utils.py +107 -0
.gitignore
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Byte-compiled / optimized / DLL files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*$py.class
|
| 5 |
+
|
| 6 |
+
# C extensions
|
| 7 |
+
*.so
|
| 8 |
+
|
| 9 |
+
# Distribution / packaging
|
| 10 |
+
.Python
|
| 11 |
+
build/
|
| 12 |
+
develop-eggs/
|
| 13 |
+
dist/
|
| 14 |
+
downloads/
|
| 15 |
+
eggs/
|
| 16 |
+
.eggs/
|
| 17 |
+
lib/
|
| 18 |
+
lib64/
|
| 19 |
+
parts/
|
| 20 |
+
sdist/
|
| 21 |
+
var/
|
| 22 |
+
wheels/
|
| 23 |
+
pip-wheel-metadata/
|
| 24 |
+
share/python-wheels/
|
| 25 |
+
*.egg-info/
|
| 26 |
+
.installed.cfg
|
| 27 |
+
*.egg
|
| 28 |
+
MANIFEST
|
| 29 |
+
|
| 30 |
+
# PyInstaller
|
| 31 |
+
# Usually these files are written by a python script from a template
|
| 32 |
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
| 33 |
+
*.manifest
|
| 34 |
+
*.spec
|
| 35 |
+
|
| 36 |
+
# Installer logs
|
| 37 |
+
pip-log.txt
|
| 38 |
+
pip-delete-this-directory.txt
|
| 39 |
+
|
| 40 |
+
# Unit test / coverage reports
|
| 41 |
+
htmlcov/
|
| 42 |
+
.tox/
|
| 43 |
+
.nox/
|
| 44 |
+
.coverage
|
| 45 |
+
.coverage.*
|
| 46 |
+
.cache
|
| 47 |
+
nosetests.xml
|
| 48 |
+
coverage.xml
|
| 49 |
+
*.cover
|
| 50 |
+
.hypothesis/
|
| 51 |
+
.pytest_cache/
|
| 52 |
+
|
| 53 |
+
# Translations
|
| 54 |
+
*.mo
|
| 55 |
+
*.pot
|
| 56 |
+
|
| 57 |
+
# Django stuff:
|
| 58 |
+
*.log
|
| 59 |
+
local_settings.py
|
| 60 |
+
db.sqlite3
|
| 61 |
+
db.sqlite3-journal
|
| 62 |
+
|
| 63 |
+
# Flask stuff:
|
| 64 |
+
instance/
|
| 65 |
+
.webassets-cache
|
| 66 |
+
|
| 67 |
+
# Scrapy stuff:
|
| 68 |
+
.scrapy
|
| 69 |
+
|
| 70 |
+
# Sphinx documentation
|
| 71 |
+
docs/_build/
|
| 72 |
+
|
| 73 |
+
# PyBuilder
|
| 74 |
+
target/
|
| 75 |
+
|
| 76 |
+
# Jupyter Notebook
|
| 77 |
+
.ipynb_checkpoints
|
| 78 |
+
|
| 79 |
+
# IPython
|
| 80 |
+
profile_default/
|
| 81 |
+
ipython_config.py
|
| 82 |
+
|
| 83 |
+
# pyenv
|
| 84 |
+
.python-version
|
| 85 |
+
|
| 86 |
+
# pipenv
|
| 87 |
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
| 88 |
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
| 89 |
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
| 90 |
+
# install all needed dependencies.
|
| 91 |
+
#Pipfile.lock
|
| 92 |
+
|
| 93 |
+
# celery beat schedule file
|
| 94 |
+
celerybeat-schedule
|
| 95 |
+
|
| 96 |
+
# SageMath parsed files
|
| 97 |
+
*.sage.py
|
| 98 |
+
|
| 99 |
+
# Environments
|
| 100 |
+
.env
|
| 101 |
+
.venv
|
| 102 |
+
env/
|
| 103 |
+
venv/
|
| 104 |
+
ENV/
|
| 105 |
+
env.bak/
|
| 106 |
+
venv.bak/
|
| 107 |
+
|
| 108 |
+
# Spyder project settings
|
| 109 |
+
.spyderproject
|
| 110 |
+
.spyproject
|
| 111 |
+
|
| 112 |
+
# Rope project settings
|
| 113 |
+
.ropeproject
|
| 114 |
+
|
| 115 |
+
# mkdocs documentation
|
| 116 |
+
/site
|
| 117 |
+
|
| 118 |
+
# mypy
|
| 119 |
+
.mypy_cache/
|
| 120 |
+
.dmypy.json
|
| 121 |
+
dmypy.json
|
| 122 |
+
|
| 123 |
+
# Pyre type checker
|
| 124 |
+
.pyre/
|
.streamlit/config.toml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[server]
|
| 2 |
+
maxUploadSize = 10
|
| 3 |
+
|
| 4 |
+
[theme]
|
| 5 |
+
base="light"
|
| 6 |
+
primaryColor="#0074ff"
|
Dockerfile
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM pytorch/pytorch:latest
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
COPY . .
|
| 6 |
+
|
| 7 |
+
RUN pip install -r requirements.txt
|
| 8 |
+
|
| 9 |
+
CMD [ "streamlit", "run", "app.py" ]
|
README.md
CHANGED
|
@@ -5,6 +5,7 @@ colorFrom: green
|
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: streamlit
|
| 7 |
sdk_version: 1.2.0
|
|
|
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
---
|
|
|
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: streamlit
|
| 7 |
sdk_version: 1.2.0
|
| 8 |
+
python_version: 3.9.5
|
| 9 |
app_file: app.py
|
| 10 |
pinned: false
|
| 11 |
---
|
app.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import streamlit as st
|
| 2 |
+
import os
|
| 3 |
+
from datetime import datetime
|
| 4 |
+
from PIL import Image
|
| 5 |
+
from io import BytesIO
|
| 6 |
+
|
| 7 |
+
from src.utils import change_background, matte
|
| 8 |
+
from src.st_style import apply_prod_style
|
| 9 |
+
|
| 10 |
+
# apply_prod_style(st) # NOTE: Uncomment this for production!
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def image_download_button(pil_image, filename: str, fmt: str, label="Download"):
|
| 14 |
+
if fmt not in ["jpg", "png"]:
|
| 15 |
+
raise Exception(f"Unknown image format (Available: {fmt} - case sensitive)")
|
| 16 |
+
|
| 17 |
+
pil_format = "JPEG" if fmt == "jpg" else "PNG"
|
| 18 |
+
file_format = "jpg" if fmt == "jpg" else "png"
|
| 19 |
+
mime = "image/jpeg" if fmt == "jpg" else "image/png"
|
| 20 |
+
|
| 21 |
+
buf = BytesIO()
|
| 22 |
+
pil_image.save(buf, format=pil_format)
|
| 23 |
+
|
| 24 |
+
return st.download_button(
|
| 25 |
+
label=label,
|
| 26 |
+
data=buf.getvalue(),
|
| 27 |
+
file_name=f'{filename}.{file_format}',
|
| 28 |
+
mime=mime,
|
| 29 |
+
)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
st.title("AI Photo Background Removal")
|
| 33 |
+
st.image(Image.open("assets/demo.jpg"))
|
| 34 |
+
st.write(
|
| 35 |
+
"""
|
| 36 |
+
You want to remove your photo background, but don't have the time and effort to learn photo editing skills?
|
| 37 |
+
**This app will change or remove your photo background, in seconds.**
|
| 38 |
+
"""
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
uploaded_file = st.file_uploader(
|
| 42 |
+
label="Upload your photo here",
|
| 43 |
+
accept_multiple_files=False, type=["png", "jpg", "jpeg"],
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
if uploaded_file is not None:
|
| 47 |
+
|
| 48 |
+
with st.expander("Original photo", expanded=True):
|
| 49 |
+
if uploaded_file is not None:
|
| 50 |
+
st.image(uploaded_file)
|
| 51 |
+
else:
|
| 52 |
+
st.warning("You haven't uploaded any photo yet")
|
| 53 |
+
|
| 54 |
+
in_mode = st.selectbox("Choose background color", ["Transparent (PNG)", "White", "Black", "Green", "Red", "Blue"])
|
| 55 |
+
in_submit = st.button("Submit")
|
| 56 |
+
|
| 57 |
+
if uploaded_file is not None and in_submit:
|
| 58 |
+
img_input = Image.open(uploaded_file)
|
| 59 |
+
|
| 60 |
+
with st.spinner("AI is doing magic to your photo. Please wait..."):
|
| 61 |
+
hexmap = {
|
| 62 |
+
"Transparent (PNG)": "#000000",
|
| 63 |
+
"Black": "#000000",
|
| 64 |
+
"White": "#FFFFFF",
|
| 65 |
+
"Green": "#22EE22",
|
| 66 |
+
"Red": "#EE2222",
|
| 67 |
+
"Blue": "#2222EE",
|
| 68 |
+
}
|
| 69 |
+
alpha = 0.0 if in_mode == "Transparent (PNG)" else 1.0
|
| 70 |
+
img_matte = matte(img_input)
|
| 71 |
+
img_output = change_background(img_input, img_matte, background_alpha=alpha, background_hex=hexmap[in_mode])
|
| 72 |
+
|
| 73 |
+
with st.expander("Success!", expanded=True):
|
| 74 |
+
st.image(img_output)
|
| 75 |
+
uploaded_name = os.path.splitext(uploaded_file.name)[0]
|
| 76 |
+
image_download_button(
|
| 77 |
+
pil_image=img_output,
|
| 78 |
+
filename=uploaded_name,
|
| 79 |
+
fmt="png"
|
| 80 |
+
)
|
assets/demo.jpg
ADDED
|
docker-compose.yml
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
version: '3'
|
| 3 |
+
services:
|
| 4 |
+
st-remove-photo-background:
|
| 5 |
+
build: .
|
| 6 |
+
container_name: st-remove-photo-background
|
| 7 |
+
restart: unless-stopped
|
| 8 |
+
ports:
|
| 9 |
+
- 51001:8501
|
| 10 |
+
volumes:
|
| 11 |
+
- .:/app
|
| 12 |
+
environment:
|
| 13 |
+
- TZ=Asia/Jakarta
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
torch
|
| 2 |
+
torchvision
|
| 3 |
+
numpy
|
| 4 |
+
opencv-python-headless
|
| 5 |
+
matplotlib
|
| 6 |
+
streamlit
|
src/__init__.py
ADDED
|
File without changes
|
src/models/__init__.py
ADDED
|
File without changes
|
src/models/backbones/__init__.py
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .wrapper import *
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
#------------------------------------------------------------------------------
|
| 5 |
+
# Replaceable Backbones
|
| 6 |
+
#------------------------------------------------------------------------------
|
| 7 |
+
|
| 8 |
+
SUPPORTED_BACKBONES = {
|
| 9 |
+
'mobilenetv2': MobileNetV2Backbone,
|
| 10 |
+
}
|
src/models/backbones/mobilenetv2.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
""" This file is adapted from https://github.com/thuyngch/Human-Segmentation-PyTorch"""
|
| 2 |
+
|
| 3 |
+
import math
|
| 4 |
+
import json
|
| 5 |
+
from functools import reduce
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from torch import nn
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
#------------------------------------------------------------------------------
|
| 12 |
+
# Useful functions
|
| 13 |
+
#------------------------------------------------------------------------------
|
| 14 |
+
|
| 15 |
+
def _make_divisible(v, divisor, min_value=None):
|
| 16 |
+
if min_value is None:
|
| 17 |
+
min_value = divisor
|
| 18 |
+
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
|
| 19 |
+
# Make sure that round down does not go down by more than 10%.
|
| 20 |
+
if new_v < 0.9 * v:
|
| 21 |
+
new_v += divisor
|
| 22 |
+
return new_v
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def conv_bn(inp, oup, stride):
|
| 26 |
+
return nn.Sequential(
|
| 27 |
+
nn.Conv2d(inp, oup, 3, stride, 1, bias=False),
|
| 28 |
+
nn.BatchNorm2d(oup),
|
| 29 |
+
nn.ReLU6(inplace=True)
|
| 30 |
+
)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def conv_1x1_bn(inp, oup):
|
| 34 |
+
return nn.Sequential(
|
| 35 |
+
nn.Conv2d(inp, oup, 1, 1, 0, bias=False),
|
| 36 |
+
nn.BatchNorm2d(oup),
|
| 37 |
+
nn.ReLU6(inplace=True)
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
#------------------------------------------------------------------------------
|
| 42 |
+
# Class of Inverted Residual block
|
| 43 |
+
#------------------------------------------------------------------------------
|
| 44 |
+
|
| 45 |
+
class InvertedResidual(nn.Module):
|
| 46 |
+
def __init__(self, inp, oup, stride, expansion, dilation=1):
|
| 47 |
+
super(InvertedResidual, self).__init__()
|
| 48 |
+
self.stride = stride
|
| 49 |
+
assert stride in [1, 2]
|
| 50 |
+
|
| 51 |
+
hidden_dim = round(inp * expansion)
|
| 52 |
+
self.use_res_connect = self.stride == 1 and inp == oup
|
| 53 |
+
|
| 54 |
+
if expansion == 1:
|
| 55 |
+
self.conv = nn.Sequential(
|
| 56 |
+
# dw
|
| 57 |
+
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
|
| 58 |
+
nn.BatchNorm2d(hidden_dim),
|
| 59 |
+
nn.ReLU6(inplace=True),
|
| 60 |
+
# pw-linear
|
| 61 |
+
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
| 62 |
+
nn.BatchNorm2d(oup),
|
| 63 |
+
)
|
| 64 |
+
else:
|
| 65 |
+
self.conv = nn.Sequential(
|
| 66 |
+
# pw
|
| 67 |
+
nn.Conv2d(inp, hidden_dim, 1, 1, 0, bias=False),
|
| 68 |
+
nn.BatchNorm2d(hidden_dim),
|
| 69 |
+
nn.ReLU6(inplace=True),
|
| 70 |
+
# dw
|
| 71 |
+
nn.Conv2d(hidden_dim, hidden_dim, 3, stride, 1, groups=hidden_dim, dilation=dilation, bias=False),
|
| 72 |
+
nn.BatchNorm2d(hidden_dim),
|
| 73 |
+
nn.ReLU6(inplace=True),
|
| 74 |
+
# pw-linear
|
| 75 |
+
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
|
| 76 |
+
nn.BatchNorm2d(oup),
|
| 77 |
+
)
|
| 78 |
+
|
| 79 |
+
def forward(self, x):
|
| 80 |
+
if self.use_res_connect:
|
| 81 |
+
return x + self.conv(x)
|
| 82 |
+
else:
|
| 83 |
+
return self.conv(x)
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
#------------------------------------------------------------------------------
|
| 87 |
+
# Class of MobileNetV2
|
| 88 |
+
#------------------------------------------------------------------------------
|
| 89 |
+
|
| 90 |
+
class MobileNetV2(nn.Module):
|
| 91 |
+
def __init__(self, in_channels, alpha=1.0, expansion=6, num_classes=1000):
|
| 92 |
+
super(MobileNetV2, self).__init__()
|
| 93 |
+
self.in_channels = in_channels
|
| 94 |
+
self.num_classes = num_classes
|
| 95 |
+
input_channel = 32
|
| 96 |
+
last_channel = 1280
|
| 97 |
+
interverted_residual_setting = [
|
| 98 |
+
# t, c, n, s
|
| 99 |
+
[1 , 16, 1, 1],
|
| 100 |
+
[expansion, 24, 2, 2],
|
| 101 |
+
[expansion, 32, 3, 2],
|
| 102 |
+
[expansion, 64, 4, 2],
|
| 103 |
+
[expansion, 96, 3, 1],
|
| 104 |
+
[expansion, 160, 3, 2],
|
| 105 |
+
[expansion, 320, 1, 1],
|
| 106 |
+
]
|
| 107 |
+
|
| 108 |
+
# building first layer
|
| 109 |
+
input_channel = _make_divisible(input_channel*alpha, 8)
|
| 110 |
+
self.last_channel = _make_divisible(last_channel*alpha, 8) if alpha > 1.0 else last_channel
|
| 111 |
+
self.features = [conv_bn(self.in_channels, input_channel, 2)]
|
| 112 |
+
|
| 113 |
+
# building inverted residual blocks
|
| 114 |
+
for t, c, n, s in interverted_residual_setting:
|
| 115 |
+
output_channel = _make_divisible(int(c*alpha), 8)
|
| 116 |
+
for i in range(n):
|
| 117 |
+
if i == 0:
|
| 118 |
+
self.features.append(InvertedResidual(input_channel, output_channel, s, expansion=t))
|
| 119 |
+
else:
|
| 120 |
+
self.features.append(InvertedResidual(input_channel, output_channel, 1, expansion=t))
|
| 121 |
+
input_channel = output_channel
|
| 122 |
+
|
| 123 |
+
# building last several layers
|
| 124 |
+
self.features.append(conv_1x1_bn(input_channel, self.last_channel))
|
| 125 |
+
|
| 126 |
+
# make it nn.Sequential
|
| 127 |
+
self.features = nn.Sequential(*self.features)
|
| 128 |
+
|
| 129 |
+
# building classifier
|
| 130 |
+
if self.num_classes is not None:
|
| 131 |
+
self.classifier = nn.Sequential(
|
| 132 |
+
nn.Dropout(0.2),
|
| 133 |
+
nn.Linear(self.last_channel, num_classes),
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
# Initialize weights
|
| 137 |
+
self._init_weights()
|
| 138 |
+
|
| 139 |
+
def forward(self, x):
|
| 140 |
+
# Stage1
|
| 141 |
+
x = self.features[0](x)
|
| 142 |
+
x = self.features[1](x)
|
| 143 |
+
# Stage2
|
| 144 |
+
x = self.features[2](x)
|
| 145 |
+
x = self.features[3](x)
|
| 146 |
+
# Stage3
|
| 147 |
+
x = self.features[4](x)
|
| 148 |
+
x = self.features[5](x)
|
| 149 |
+
x = self.features[6](x)
|
| 150 |
+
# Stage4
|
| 151 |
+
x = self.features[7](x)
|
| 152 |
+
x = self.features[8](x)
|
| 153 |
+
x = self.features[9](x)
|
| 154 |
+
x = self.features[10](x)
|
| 155 |
+
x = self.features[11](x)
|
| 156 |
+
x = self.features[12](x)
|
| 157 |
+
x = self.features[13](x)
|
| 158 |
+
# Stage5
|
| 159 |
+
x = self.features[14](x)
|
| 160 |
+
x = self.features[15](x)
|
| 161 |
+
x = self.features[16](x)
|
| 162 |
+
x = self.features[17](x)
|
| 163 |
+
x = self.features[18](x)
|
| 164 |
+
|
| 165 |
+
# Classification
|
| 166 |
+
if self.num_classes is not None:
|
| 167 |
+
x = x.mean(dim=(2,3))
|
| 168 |
+
x = self.classifier(x)
|
| 169 |
+
|
| 170 |
+
# Output
|
| 171 |
+
return x
|
| 172 |
+
|
| 173 |
+
def _load_pretrained_model(self, pretrained_file):
|
| 174 |
+
pretrain_dict = torch.load(pretrained_file, map_location='cpu')
|
| 175 |
+
model_dict = {}
|
| 176 |
+
state_dict = self.state_dict()
|
| 177 |
+
print("[MobileNetV2] Loading pretrained model...")
|
| 178 |
+
for k, v in pretrain_dict.items():
|
| 179 |
+
if k in state_dict:
|
| 180 |
+
model_dict[k] = v
|
| 181 |
+
else:
|
| 182 |
+
print(k, "is ignored")
|
| 183 |
+
state_dict.update(model_dict)
|
| 184 |
+
self.load_state_dict(state_dict)
|
| 185 |
+
|
| 186 |
+
def _init_weights(self):
|
| 187 |
+
for m in self.modules():
|
| 188 |
+
if isinstance(m, nn.Conv2d):
|
| 189 |
+
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
| 190 |
+
m.weight.data.normal_(0, math.sqrt(2. / n))
|
| 191 |
+
if m.bias is not None:
|
| 192 |
+
m.bias.data.zero_()
|
| 193 |
+
elif isinstance(m, nn.BatchNorm2d):
|
| 194 |
+
m.weight.data.fill_(1)
|
| 195 |
+
m.bias.data.zero_()
|
| 196 |
+
elif isinstance(m, nn.Linear):
|
| 197 |
+
n = m.weight.size(1)
|
| 198 |
+
m.weight.data.normal_(0, 0.01)
|
| 199 |
+
m.bias.data.zero_()
|
src/models/backbones/wrapper.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from functools import reduce
|
| 3 |
+
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
|
| 7 |
+
from .mobilenetv2 import MobileNetV2
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class BaseBackbone(nn.Module):
|
| 11 |
+
""" Superclass of Replaceable Backbone Model for Semantic Estimation
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
def __init__(self, in_channels):
|
| 15 |
+
super(BaseBackbone, self).__init__()
|
| 16 |
+
self.in_channels = in_channels
|
| 17 |
+
|
| 18 |
+
self.model = None
|
| 19 |
+
self.enc_channels = []
|
| 20 |
+
|
| 21 |
+
def forward(self, x):
|
| 22 |
+
raise NotImplementedError
|
| 23 |
+
|
| 24 |
+
def load_pretrained_ckpt(self):
|
| 25 |
+
raise NotImplementedError
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
class MobileNetV2Backbone(BaseBackbone):
|
| 29 |
+
""" MobileNetV2 Backbone
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
def __init__(self, in_channels):
|
| 33 |
+
super(MobileNetV2Backbone, self).__init__(in_channels)
|
| 34 |
+
|
| 35 |
+
self.model = MobileNetV2(self.in_channels, alpha=1.0, expansion=6, num_classes=None)
|
| 36 |
+
self.enc_channels = [16, 24, 32, 96, 1280]
|
| 37 |
+
|
| 38 |
+
def forward(self, x):
|
| 39 |
+
# x = reduce(lambda x, n: self.model.features[n](x), list(range(0, 2)), x)
|
| 40 |
+
x = self.model.features[0](x)
|
| 41 |
+
x = self.model.features[1](x)
|
| 42 |
+
enc2x = x
|
| 43 |
+
|
| 44 |
+
# x = reduce(lambda x, n: self.model.features[n](x), list(range(2, 4)), x)
|
| 45 |
+
x = self.model.features[2](x)
|
| 46 |
+
x = self.model.features[3](x)
|
| 47 |
+
enc4x = x
|
| 48 |
+
|
| 49 |
+
# x = reduce(lambda x, n: self.model.features[n](x), list(range(4, 7)), x)
|
| 50 |
+
x = self.model.features[4](x)
|
| 51 |
+
x = self.model.features[5](x)
|
| 52 |
+
x = self.model.features[6](x)
|
| 53 |
+
enc8x = x
|
| 54 |
+
|
| 55 |
+
# x = reduce(lambda x, n: self.model.features[n](x), list(range(7, 14)), x)
|
| 56 |
+
x = self.model.features[7](x)
|
| 57 |
+
x = self.model.features[8](x)
|
| 58 |
+
x = self.model.features[9](x)
|
| 59 |
+
x = self.model.features[10](x)
|
| 60 |
+
x = self.model.features[11](x)
|
| 61 |
+
x = self.model.features[12](x)
|
| 62 |
+
x = self.model.features[13](x)
|
| 63 |
+
enc16x = x
|
| 64 |
+
|
| 65 |
+
# x = reduce(lambda x, n: self.model.features[n](x), list(range(14, 19)), x)
|
| 66 |
+
x = self.model.features[14](x)
|
| 67 |
+
x = self.model.features[15](x)
|
| 68 |
+
x = self.model.features[16](x)
|
| 69 |
+
x = self.model.features[17](x)
|
| 70 |
+
x = self.model.features[18](x)
|
| 71 |
+
enc32x = x
|
| 72 |
+
return [enc2x, enc4x, enc8x, enc16x, enc32x]
|
| 73 |
+
|
| 74 |
+
def load_pretrained_ckpt(self):
|
| 75 |
+
# the pre-trained model is provided by https://github.com/thuyngch/Human-Segmentation-PyTorch
|
| 76 |
+
ckpt_path = './pretrained/mobilenetv2_human_seg.ckpt'
|
| 77 |
+
if not os.path.exists(ckpt_path):
|
| 78 |
+
print('cannot find the pretrained mobilenetv2 backbone')
|
| 79 |
+
exit()
|
| 80 |
+
|
| 81 |
+
ckpt = torch.load(ckpt_path)
|
| 82 |
+
self.model.load_state_dict(ckpt)
|
src/models/modnet.py
ADDED
|
@@ -0,0 +1,255 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import torch
|
| 2 |
+
import torch.nn as nn
|
| 3 |
+
import torch.nn.functional as F
|
| 4 |
+
|
| 5 |
+
from .backbones import SUPPORTED_BACKBONES
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
#------------------------------------------------------------------------------
|
| 9 |
+
# MODNet Basic Modules
|
| 10 |
+
#------------------------------------------------------------------------------
|
| 11 |
+
|
| 12 |
+
class IBNorm(nn.Module):
|
| 13 |
+
""" Combine Instance Norm and Batch Norm into One Layer
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
def __init__(self, in_channels):
|
| 17 |
+
super(IBNorm, self).__init__()
|
| 18 |
+
in_channels = in_channels
|
| 19 |
+
self.bnorm_channels = int(in_channels / 2)
|
| 20 |
+
self.inorm_channels = in_channels - self.bnorm_channels
|
| 21 |
+
|
| 22 |
+
self.bnorm = nn.BatchNorm2d(self.bnorm_channels, affine=True)
|
| 23 |
+
self.inorm = nn.InstanceNorm2d(self.inorm_channels, affine=False)
|
| 24 |
+
|
| 25 |
+
def forward(self, x):
|
| 26 |
+
bn_x = self.bnorm(x[:, :self.bnorm_channels, ...].contiguous())
|
| 27 |
+
in_x = self.inorm(x[:, self.bnorm_channels:, ...].contiguous())
|
| 28 |
+
|
| 29 |
+
return torch.cat((bn_x, in_x), 1)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
class Conv2dIBNormRelu(nn.Module):
|
| 33 |
+
""" Convolution + IBNorm + ReLu
|
| 34 |
+
"""
|
| 35 |
+
|
| 36 |
+
def __init__(self, in_channels, out_channels, kernel_size,
|
| 37 |
+
stride=1, padding=0, dilation=1, groups=1, bias=True,
|
| 38 |
+
with_ibn=True, with_relu=True):
|
| 39 |
+
super(Conv2dIBNormRelu, self).__init__()
|
| 40 |
+
|
| 41 |
+
layers = [
|
| 42 |
+
nn.Conv2d(in_channels, out_channels, kernel_size,
|
| 43 |
+
stride=stride, padding=padding, dilation=dilation,
|
| 44 |
+
groups=groups, bias=bias)
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
if with_ibn:
|
| 48 |
+
layers.append(IBNorm(out_channels))
|
| 49 |
+
if with_relu:
|
| 50 |
+
layers.append(nn.ReLU(inplace=True))
|
| 51 |
+
|
| 52 |
+
self.layers = nn.Sequential(*layers)
|
| 53 |
+
|
| 54 |
+
def forward(self, x):
|
| 55 |
+
return self.layers(x)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
class SEBlock(nn.Module):
|
| 59 |
+
""" SE Block Proposed in https://arxiv.org/pdf/1709.01507.pdf
|
| 60 |
+
"""
|
| 61 |
+
|
| 62 |
+
def __init__(self, in_channels, out_channels, reduction=1):
|
| 63 |
+
super(SEBlock, self).__init__()
|
| 64 |
+
self.pool = nn.AdaptiveAvgPool2d(1)
|
| 65 |
+
self.fc = nn.Sequential(
|
| 66 |
+
nn.Linear(in_channels, int(in_channels // reduction), bias=False),
|
| 67 |
+
nn.ReLU(inplace=True),
|
| 68 |
+
nn.Linear(int(in_channels // reduction), out_channels, bias=False),
|
| 69 |
+
nn.Sigmoid()
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
def forward(self, x):
|
| 73 |
+
b, c, _, _ = x.size()
|
| 74 |
+
w = self.pool(x).view(b, c)
|
| 75 |
+
w = self.fc(w).view(b, c, 1, 1)
|
| 76 |
+
|
| 77 |
+
return x * w.expand_as(x)
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
#------------------------------------------------------------------------------
|
| 81 |
+
# MODNet Branches
|
| 82 |
+
#------------------------------------------------------------------------------
|
| 83 |
+
|
| 84 |
+
class LRBranch(nn.Module):
|
| 85 |
+
""" Low Resolution Branch of MODNet
|
| 86 |
+
"""
|
| 87 |
+
|
| 88 |
+
def __init__(self, backbone):
|
| 89 |
+
super(LRBranch, self).__init__()
|
| 90 |
+
|
| 91 |
+
enc_channels = backbone.enc_channels
|
| 92 |
+
|
| 93 |
+
self.backbone = backbone
|
| 94 |
+
self.se_block = SEBlock(enc_channels[4], enc_channels[4], reduction=4)
|
| 95 |
+
self.conv_lr16x = Conv2dIBNormRelu(enc_channels[4], enc_channels[3], 5, stride=1, padding=2)
|
| 96 |
+
self.conv_lr8x = Conv2dIBNormRelu(enc_channels[3], enc_channels[2], 5, stride=1, padding=2)
|
| 97 |
+
self.conv_lr = Conv2dIBNormRelu(enc_channels[2], 1, kernel_size=3, stride=2, padding=1, with_ibn=False, with_relu=False)
|
| 98 |
+
|
| 99 |
+
def forward(self, img, inference):
|
| 100 |
+
enc_features = self.backbone.forward(img)
|
| 101 |
+
enc2x, enc4x, enc32x = enc_features[0], enc_features[1], enc_features[4]
|
| 102 |
+
|
| 103 |
+
enc32x = self.se_block(enc32x)
|
| 104 |
+
lr16x = F.interpolate(enc32x, scale_factor=2, mode='bilinear', align_corners=False)
|
| 105 |
+
lr16x = self.conv_lr16x(lr16x)
|
| 106 |
+
lr8x = F.interpolate(lr16x, scale_factor=2, mode='bilinear', align_corners=False)
|
| 107 |
+
lr8x = self.conv_lr8x(lr8x)
|
| 108 |
+
|
| 109 |
+
pred_semantic = None
|
| 110 |
+
if not inference:
|
| 111 |
+
lr = self.conv_lr(lr8x)
|
| 112 |
+
pred_semantic = torch.sigmoid(lr)
|
| 113 |
+
|
| 114 |
+
return pred_semantic, lr8x, [enc2x, enc4x]
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
class HRBranch(nn.Module):
|
| 118 |
+
""" High Resolution Branch of MODNet
|
| 119 |
+
"""
|
| 120 |
+
|
| 121 |
+
def __init__(self, hr_channels, enc_channels):
|
| 122 |
+
super(HRBranch, self).__init__()
|
| 123 |
+
|
| 124 |
+
self.tohr_enc2x = Conv2dIBNormRelu(enc_channels[0], hr_channels, 1, stride=1, padding=0)
|
| 125 |
+
self.conv_enc2x = Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=2, padding=1)
|
| 126 |
+
|
| 127 |
+
self.tohr_enc4x = Conv2dIBNormRelu(enc_channels[1], hr_channels, 1, stride=1, padding=0)
|
| 128 |
+
self.conv_enc4x = Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1)
|
| 129 |
+
|
| 130 |
+
self.conv_hr4x = nn.Sequential(
|
| 131 |
+
Conv2dIBNormRelu(3 * hr_channels + 3, 2 * hr_channels, 3, stride=1, padding=1),
|
| 132 |
+
Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
|
| 133 |
+
Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
self.conv_hr2x = nn.Sequential(
|
| 137 |
+
Conv2dIBNormRelu(2 * hr_channels, 2 * hr_channels, 3, stride=1, padding=1),
|
| 138 |
+
Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1),
|
| 139 |
+
Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
|
| 140 |
+
Conv2dIBNormRelu(hr_channels, hr_channels, 3, stride=1, padding=1),
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
self.conv_hr = nn.Sequential(
|
| 144 |
+
Conv2dIBNormRelu(hr_channels + 3, hr_channels, 3, stride=1, padding=1),
|
| 145 |
+
Conv2dIBNormRelu(hr_channels, 1, kernel_size=1, stride=1, padding=0, with_ibn=False, with_relu=False),
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
def forward(self, img, enc2x, enc4x, lr8x, inference):
|
| 149 |
+
img2x = F.interpolate(img, scale_factor=1/2, mode='bilinear', align_corners=False)
|
| 150 |
+
img4x = F.interpolate(img, scale_factor=1/4, mode='bilinear', align_corners=False)
|
| 151 |
+
|
| 152 |
+
enc2x = self.tohr_enc2x(enc2x)
|
| 153 |
+
hr4x = self.conv_enc2x(torch.cat((img2x, enc2x), dim=1))
|
| 154 |
+
|
| 155 |
+
enc4x = self.tohr_enc4x(enc4x)
|
| 156 |
+
hr4x = self.conv_enc4x(torch.cat((hr4x, enc4x), dim=1))
|
| 157 |
+
|
| 158 |
+
lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
|
| 159 |
+
hr4x = self.conv_hr4x(torch.cat((hr4x, lr4x, img4x), dim=1))
|
| 160 |
+
|
| 161 |
+
hr2x = F.interpolate(hr4x, scale_factor=2, mode='bilinear', align_corners=False)
|
| 162 |
+
hr2x = self.conv_hr2x(torch.cat((hr2x, enc2x), dim=1))
|
| 163 |
+
|
| 164 |
+
pred_detail = None
|
| 165 |
+
if not inference:
|
| 166 |
+
hr = F.interpolate(hr2x, scale_factor=2, mode='bilinear', align_corners=False)
|
| 167 |
+
hr = self.conv_hr(torch.cat((hr, img), dim=1))
|
| 168 |
+
pred_detail = torch.sigmoid(hr)
|
| 169 |
+
|
| 170 |
+
return pred_detail, hr2x
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
class FusionBranch(nn.Module):
|
| 174 |
+
""" Fusion Branch of MODNet
|
| 175 |
+
"""
|
| 176 |
+
|
| 177 |
+
def __init__(self, hr_channels, enc_channels):
|
| 178 |
+
super(FusionBranch, self).__init__()
|
| 179 |
+
self.conv_lr4x = Conv2dIBNormRelu(enc_channels[2], hr_channels, 5, stride=1, padding=2)
|
| 180 |
+
|
| 181 |
+
self.conv_f2x = Conv2dIBNormRelu(2 * hr_channels, hr_channels, 3, stride=1, padding=1)
|
| 182 |
+
self.conv_f = nn.Sequential(
|
| 183 |
+
Conv2dIBNormRelu(hr_channels + 3, int(hr_channels / 2), 3, stride=1, padding=1),
|
| 184 |
+
Conv2dIBNormRelu(int(hr_channels / 2), 1, 1, stride=1, padding=0, with_ibn=False, with_relu=False),
|
| 185 |
+
)
|
| 186 |
+
|
| 187 |
+
def forward(self, img, lr8x, hr2x):
|
| 188 |
+
lr4x = F.interpolate(lr8x, scale_factor=2, mode='bilinear', align_corners=False)
|
| 189 |
+
lr4x = self.conv_lr4x(lr4x)
|
| 190 |
+
lr2x = F.interpolate(lr4x, scale_factor=2, mode='bilinear', align_corners=False)
|
| 191 |
+
|
| 192 |
+
f2x = self.conv_f2x(torch.cat((lr2x, hr2x), dim=1))
|
| 193 |
+
f = F.interpolate(f2x, scale_factor=2, mode='bilinear', align_corners=False)
|
| 194 |
+
f = self.conv_f(torch.cat((f, img), dim=1))
|
| 195 |
+
pred_matte = torch.sigmoid(f)
|
| 196 |
+
|
| 197 |
+
return pred_matte
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
#------------------------------------------------------------------------------
|
| 201 |
+
# MODNet
|
| 202 |
+
#------------------------------------------------------------------------------
|
| 203 |
+
|
| 204 |
+
class MODNet(nn.Module):
|
| 205 |
+
""" Architecture of MODNet
|
| 206 |
+
"""
|
| 207 |
+
|
| 208 |
+
def __init__(self, in_channels=3, hr_channels=32, backbone_arch='mobilenetv2', backbone_pretrained=True):
|
| 209 |
+
super(MODNet, self).__init__()
|
| 210 |
+
|
| 211 |
+
self.in_channels = in_channels
|
| 212 |
+
self.hr_channels = hr_channels
|
| 213 |
+
self.backbone_arch = backbone_arch
|
| 214 |
+
self.backbone_pretrained = backbone_pretrained
|
| 215 |
+
|
| 216 |
+
self.backbone = SUPPORTED_BACKBONES[self.backbone_arch](self.in_channels)
|
| 217 |
+
|
| 218 |
+
self.lr_branch = LRBranch(self.backbone)
|
| 219 |
+
self.hr_branch = HRBranch(self.hr_channels, self.backbone.enc_channels)
|
| 220 |
+
self.f_branch = FusionBranch(self.hr_channels, self.backbone.enc_channels)
|
| 221 |
+
|
| 222 |
+
for m in self.modules():
|
| 223 |
+
if isinstance(m, nn.Conv2d):
|
| 224 |
+
self._init_conv(m)
|
| 225 |
+
elif isinstance(m, nn.BatchNorm2d) or isinstance(m, nn.InstanceNorm2d):
|
| 226 |
+
self._init_norm(m)
|
| 227 |
+
|
| 228 |
+
if self.backbone_pretrained:
|
| 229 |
+
self.backbone.load_pretrained_ckpt()
|
| 230 |
+
|
| 231 |
+
def forward(self, img, inference):
|
| 232 |
+
pred_semantic, lr8x, [enc2x, enc4x] = self.lr_branch(img, inference)
|
| 233 |
+
pred_detail, hr2x = self.hr_branch(img, enc2x, enc4x, lr8x, inference)
|
| 234 |
+
pred_matte = self.f_branch(img, lr8x, hr2x)
|
| 235 |
+
|
| 236 |
+
return pred_semantic, pred_detail, pred_matte
|
| 237 |
+
|
| 238 |
+
def freeze_norm(self):
|
| 239 |
+
norm_types = [nn.BatchNorm2d, nn.InstanceNorm2d]
|
| 240 |
+
for m in self.modules():
|
| 241 |
+
for n in norm_types:
|
| 242 |
+
if isinstance(m, n):
|
| 243 |
+
m.eval()
|
| 244 |
+
continue
|
| 245 |
+
|
| 246 |
+
def _init_conv(self, conv):
|
| 247 |
+
nn.init.kaiming_uniform_(
|
| 248 |
+
conv.weight, a=0, mode='fan_in', nonlinearity='relu')
|
| 249 |
+
if conv.bias is not None:
|
| 250 |
+
nn.init.constant_(conv.bias, 0)
|
| 251 |
+
|
| 252 |
+
def _init_norm(self, norm):
|
| 253 |
+
if norm.weight is not None:
|
| 254 |
+
nn.init.constant_(norm.weight, 1)
|
| 255 |
+
nn.init.constant_(norm.bias, 0)
|
src/st_style.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
button_style = """
|
| 2 |
+
<style>
|
| 3 |
+
div.stButton > button:first-child {
|
| 4 |
+
background-color: rgb(255, 75, 75);
|
| 5 |
+
color: rgb(255, 255, 255);
|
| 6 |
+
}
|
| 7 |
+
div.stButton > button:hover {
|
| 8 |
+
background-color: rgb(255, 75, 75);
|
| 9 |
+
color: rgb(255, 255, 255);
|
| 10 |
+
}
|
| 11 |
+
div.stButton > button:active {
|
| 12 |
+
background-color: rgb(255, 75, 75);
|
| 13 |
+
color: rgb(255, 255, 255);
|
| 14 |
+
}
|
| 15 |
+
div.stButton > button:focus {
|
| 16 |
+
background-color: rgb(255, 75, 75);
|
| 17 |
+
color: rgb(255, 255, 255);
|
| 18 |
+
}
|
| 19 |
+
.css-1cpxqw2:focus:not(:active) {
|
| 20 |
+
background-color: rgb(255, 75, 75);
|
| 21 |
+
border-color: rgb(255, 75, 75);
|
| 22 |
+
color: rgb(255, 255, 255);
|
| 23 |
+
}
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
style = """
|
| 27 |
+
<style>
|
| 28 |
+
#MainMenu {
|
| 29 |
+
visibility: hidden;
|
| 30 |
+
}
|
| 31 |
+
footer {
|
| 32 |
+
visibility: hidden;
|
| 33 |
+
}
|
| 34 |
+
header {
|
| 35 |
+
visibility: hidden;
|
| 36 |
+
}
|
| 37 |
+
</style>
|
| 38 |
+
"""
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def apply_prod_style(st):
|
| 42 |
+
return st.markdown(style, unsafe_allow_html=True)
|
src/trainer.py
ADDED
|
@@ -0,0 +1,299 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import math
|
| 2 |
+
import scipy
|
| 3 |
+
import numpy as np
|
| 4 |
+
from scipy.ndimage import grey_dilation, grey_erosion
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
import torch.nn as nn
|
| 8 |
+
import torch.nn.functional as F
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
__all__ = [
|
| 12 |
+
'supervised_training_iter',
|
| 13 |
+
'soc_adaptation_iter',
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# ----------------------------------------------------------------------------------
|
| 18 |
+
# Tool Classes/Functions
|
| 19 |
+
# ----------------------------------------------------------------------------------
|
| 20 |
+
|
| 21 |
+
class GaussianBlurLayer(nn.Module):
|
| 22 |
+
""" Add Gaussian Blur to a 4D tensors
|
| 23 |
+
This layer takes a 4D tensor of {N, C, H, W} as input.
|
| 24 |
+
The Gaussian blur will be performed in given channel number (C) splitly.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(self, channels, kernel_size):
|
| 28 |
+
"""
|
| 29 |
+
Arguments:
|
| 30 |
+
channels (int): Channel for input tensor
|
| 31 |
+
kernel_size (int): Size of the kernel used in blurring
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
super(GaussianBlurLayer, self).__init__()
|
| 35 |
+
self.channels = channels
|
| 36 |
+
self.kernel_size = kernel_size
|
| 37 |
+
assert self.kernel_size % 2 != 0
|
| 38 |
+
|
| 39 |
+
self.op = nn.Sequential(
|
| 40 |
+
nn.ReflectionPad2d(math.floor(self.kernel_size / 2)),
|
| 41 |
+
nn.Conv2d(channels, channels, self.kernel_size,
|
| 42 |
+
stride=1, padding=0, bias=None, groups=channels)
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
self._init_kernel()
|
| 46 |
+
|
| 47 |
+
def forward(self, x):
|
| 48 |
+
"""
|
| 49 |
+
Arguments:
|
| 50 |
+
x (torch.Tensor): input 4D tensor
|
| 51 |
+
Returns:
|
| 52 |
+
torch.Tensor: Blurred version of the input
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
if not len(list(x.shape)) == 4:
|
| 56 |
+
print('\'GaussianBlurLayer\' requires a 4D tensor as input\n')
|
| 57 |
+
exit()
|
| 58 |
+
elif not x.shape[1] == self.channels:
|
| 59 |
+
print('In \'GaussianBlurLayer\', the required channel ({0}) is'
|
| 60 |
+
'not the same as input ({1})\n'.format(self.channels, x.shape[1]))
|
| 61 |
+
exit()
|
| 62 |
+
|
| 63 |
+
return self.op(x)
|
| 64 |
+
|
| 65 |
+
def _init_kernel(self):
|
| 66 |
+
sigma = 0.3 * ((self.kernel_size - 1) * 0.5 - 1) + 0.8
|
| 67 |
+
|
| 68 |
+
n = np.zeros((self.kernel_size, self.kernel_size))
|
| 69 |
+
i = math.floor(self.kernel_size / 2)
|
| 70 |
+
n[i, i] = 1
|
| 71 |
+
kernel = scipy.ndimage.gaussian_filter(n, sigma)
|
| 72 |
+
|
| 73 |
+
for name, param in self.named_parameters():
|
| 74 |
+
param.data.copy_(torch.from_numpy(kernel))
|
| 75 |
+
|
| 76 |
+
# ----------------------------------------------------------------------------------
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
# ----------------------------------------------------------------------------------
|
| 80 |
+
# MODNet Training Functions
|
| 81 |
+
# ----------------------------------------------------------------------------------
|
| 82 |
+
|
| 83 |
+
blurer = GaussianBlurLayer(1, 3).cuda()
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def supervised_training_iter(
|
| 87 |
+
modnet, optimizer, image, trimap, gt_matte,
|
| 88 |
+
semantic_scale=10.0, detail_scale=10.0, matte_scale=1.0):
|
| 89 |
+
""" Supervised training iteration of MODNet
|
| 90 |
+
This function trains MODNet for one iteration in a labeled dataset.
|
| 91 |
+
|
| 92 |
+
Arguments:
|
| 93 |
+
modnet (torch.nn.Module): instance of MODNet
|
| 94 |
+
optimizer (torch.optim.Optimizer): optimizer for supervised training
|
| 95 |
+
image (torch.autograd.Variable): input RGB image
|
| 96 |
+
its pixel values should be normalized
|
| 97 |
+
trimap (torch.autograd.Variable): trimap used to calculate the losses
|
| 98 |
+
its pixel values can be 0, 0.5, or 1
|
| 99 |
+
(foreground=1, background=0, unknown=0.5)
|
| 100 |
+
gt_matte (torch.autograd.Variable): ground truth alpha matte
|
| 101 |
+
its pixel values are between [0, 1]
|
| 102 |
+
semantic_scale (float): scale of the semantic loss
|
| 103 |
+
NOTE: please adjust according to your dataset
|
| 104 |
+
detail_scale (float): scale of the detail loss
|
| 105 |
+
NOTE: please adjust according to your dataset
|
| 106 |
+
matte_scale (float): scale of the matte loss
|
| 107 |
+
NOTE: please adjust according to your dataset
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
semantic_loss (torch.Tensor): loss of the semantic estimation [Low-Resolution (LR) Branch]
|
| 111 |
+
detail_loss (torch.Tensor): loss of the detail prediction [High-Resolution (HR) Branch]
|
| 112 |
+
matte_loss (torch.Tensor): loss of the semantic-detail fusion [Fusion Branch]
|
| 113 |
+
|
| 114 |
+
Example:
|
| 115 |
+
import torch
|
| 116 |
+
from src.models.modnet import MODNet
|
| 117 |
+
from src.trainer import supervised_training_iter
|
| 118 |
+
|
| 119 |
+
bs = 16 # batch size
|
| 120 |
+
lr = 0.01 # learn rate
|
| 121 |
+
epochs = 40 # total epochs
|
| 122 |
+
|
| 123 |
+
modnet = torch.nn.DataParallel(MODNet()).cuda()
|
| 124 |
+
optimizer = torch.optim.SGD(modnet.parameters(), lr=lr, momentum=0.9)
|
| 125 |
+
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=int(0.25 * epochs), gamma=0.1)
|
| 126 |
+
|
| 127 |
+
dataloader = CREATE_YOUR_DATALOADER(bs) # NOTE: please finish this function
|
| 128 |
+
|
| 129 |
+
for epoch in range(0, epochs):
|
| 130 |
+
for idx, (image, trimap, gt_matte) in enumerate(dataloader):
|
| 131 |
+
semantic_loss, detail_loss, matte_loss = \
|
| 132 |
+
supervised_training_iter(modnet, optimizer, image, trimap, gt_matte)
|
| 133 |
+
lr_scheduler.step()
|
| 134 |
+
"""
|
| 135 |
+
|
| 136 |
+
global blurer
|
| 137 |
+
|
| 138 |
+
# set the model to train mode and clear the optimizer
|
| 139 |
+
modnet.train()
|
| 140 |
+
optimizer.zero_grad()
|
| 141 |
+
|
| 142 |
+
# forward the model
|
| 143 |
+
pred_semantic, pred_detail, pred_matte = modnet(image, False)
|
| 144 |
+
|
| 145 |
+
# calculate the boundary mask from the trimap
|
| 146 |
+
boundaries = (trimap < 0.5) + (trimap > 0.5)
|
| 147 |
+
|
| 148 |
+
# calculate the semantic loss
|
| 149 |
+
gt_semantic = F.interpolate(gt_matte, scale_factor=1/16, mode='bilinear')
|
| 150 |
+
gt_semantic = blurer(gt_semantic)
|
| 151 |
+
semantic_loss = torch.mean(F.mse_loss(pred_semantic, gt_semantic))
|
| 152 |
+
semantic_loss = semantic_scale * semantic_loss
|
| 153 |
+
|
| 154 |
+
# calculate the detail loss
|
| 155 |
+
pred_boundary_detail = torch.where(boundaries, trimap, pred_detail)
|
| 156 |
+
gt_detail = torch.where(boundaries, trimap, gt_matte)
|
| 157 |
+
detail_loss = torch.mean(F.l1_loss(pred_boundary_detail, gt_detail))
|
| 158 |
+
detail_loss = detail_scale * detail_loss
|
| 159 |
+
|
| 160 |
+
# calculate the matte loss
|
| 161 |
+
pred_boundary_matte = torch.where(boundaries, trimap, pred_matte)
|
| 162 |
+
matte_l1_loss = F.l1_loss(pred_matte, gt_matte) + 4.0 * F.l1_loss(pred_boundary_matte, gt_matte)
|
| 163 |
+
matte_compositional_loss = F.l1_loss(image * pred_matte, image * gt_matte) \
|
| 164 |
+
+ 4.0 * F.l1_loss(image * pred_boundary_matte, image * gt_matte)
|
| 165 |
+
matte_loss = torch.mean(matte_l1_loss + matte_compositional_loss)
|
| 166 |
+
matte_loss = matte_scale * matte_loss
|
| 167 |
+
|
| 168 |
+
# calculate the final loss, backward the loss, and update the model
|
| 169 |
+
loss = semantic_loss + detail_loss + matte_loss
|
| 170 |
+
loss.backward()
|
| 171 |
+
optimizer.step()
|
| 172 |
+
|
| 173 |
+
# for test
|
| 174 |
+
return semantic_loss, detail_loss, matte_loss
|
| 175 |
+
|
| 176 |
+
|
| 177 |
+
def soc_adaptation_iter(
|
| 178 |
+
modnet, backup_modnet, optimizer, image,
|
| 179 |
+
soc_semantic_scale=100.0, soc_detail_scale=1.0):
|
| 180 |
+
""" Self-Supervised sub-objective consistency (SOC) adaptation iteration of MODNet
|
| 181 |
+
This function fine-tunes MODNet for one iteration in an unlabeled dataset.
|
| 182 |
+
Note that SOC can only fine-tune a converged MODNet, i.e., MODNet that has been
|
| 183 |
+
trained in a labeled dataset.
|
| 184 |
+
|
| 185 |
+
Arguments:
|
| 186 |
+
modnet (torch.nn.Module): instance of MODNet
|
| 187 |
+
backup_modnet (torch.nn.Module): backup of the trained MODNet
|
| 188 |
+
optimizer (torch.optim.Optimizer): optimizer for self-supervised SOC
|
| 189 |
+
image (torch.autograd.Variable): input RGB image
|
| 190 |
+
its pixel values should be normalized
|
| 191 |
+
soc_semantic_scale (float): scale of the SOC semantic loss
|
| 192 |
+
NOTE: please adjust according to your dataset
|
| 193 |
+
soc_detail_scale (float): scale of the SOC detail loss
|
| 194 |
+
NOTE: please adjust according to your dataset
|
| 195 |
+
|
| 196 |
+
Returns:
|
| 197 |
+
soc_semantic_loss (torch.Tensor): loss of the semantic SOC
|
| 198 |
+
soc_detail_loss (torch.Tensor): loss of the detail SOC
|
| 199 |
+
|
| 200 |
+
Example:
|
| 201 |
+
import copy
|
| 202 |
+
import torch
|
| 203 |
+
from src.models.modnet import MODNet
|
| 204 |
+
from src.trainer import soc_adaptation_iter
|
| 205 |
+
|
| 206 |
+
bs = 1 # batch size
|
| 207 |
+
lr = 0.00001 # learn rate
|
| 208 |
+
epochs = 10 # total epochs
|
| 209 |
+
|
| 210 |
+
modnet = torch.nn.DataParallel(MODNet()).cuda()
|
| 211 |
+
modnet = LOAD_TRAINED_CKPT() # NOTE: please finish this function
|
| 212 |
+
|
| 213 |
+
optimizer = torch.optim.Adam(modnet.parameters(), lr=lr, betas=(0.9, 0.99))
|
| 214 |
+
dataloader = CREATE_YOUR_DATALOADER(bs) # NOTE: please finish this function
|
| 215 |
+
|
| 216 |
+
for epoch in range(0, epochs):
|
| 217 |
+
backup_modnet = copy.deepcopy(modnet)
|
| 218 |
+
for idx, (image) in enumerate(dataloader):
|
| 219 |
+
soc_semantic_loss, soc_detail_loss = \
|
| 220 |
+
soc_adaptation_iter(modnet, backup_modnet, optimizer, image)
|
| 221 |
+
"""
|
| 222 |
+
|
| 223 |
+
global blurer
|
| 224 |
+
|
| 225 |
+
# set the backup model to eval mode
|
| 226 |
+
backup_modnet.eval()
|
| 227 |
+
|
| 228 |
+
# set the main model to train mode and freeze its norm layers
|
| 229 |
+
modnet.train()
|
| 230 |
+
modnet.module.freeze_norm()
|
| 231 |
+
|
| 232 |
+
# clear the optimizer
|
| 233 |
+
optimizer.zero_grad()
|
| 234 |
+
|
| 235 |
+
# forward the main model
|
| 236 |
+
pred_semantic, pred_detail, pred_matte = modnet(image, False)
|
| 237 |
+
|
| 238 |
+
# forward the backup model
|
| 239 |
+
with torch.no_grad():
|
| 240 |
+
_, pred_backup_detail, pred_backup_matte = backup_modnet(image, False)
|
| 241 |
+
|
| 242 |
+
# calculate the boundary mask from `pred_matte` and `pred_semantic`
|
| 243 |
+
pred_matte_fg = (pred_matte.detach() > 0.1).float()
|
| 244 |
+
pred_semantic_fg = (pred_semantic.detach() > 0.1).float()
|
| 245 |
+
pred_semantic_fg = F.interpolate(pred_semantic_fg, scale_factor=16, mode='bilinear')
|
| 246 |
+
pred_fg = pred_matte_fg * pred_semantic_fg
|
| 247 |
+
|
| 248 |
+
n, c, h, w = pred_matte.shape
|
| 249 |
+
np_pred_fg = pred_fg.data.cpu().numpy()
|
| 250 |
+
np_boundaries = np.zeros([n, c, h, w])
|
| 251 |
+
for sdx in range(0, n):
|
| 252 |
+
sample_np_boundaries = np_boundaries[sdx, 0, ...]
|
| 253 |
+
sample_np_pred_fg = np_pred_fg[sdx, 0, ...]
|
| 254 |
+
|
| 255 |
+
side = int((h + w) / 2 * 0.05)
|
| 256 |
+
dilated = grey_dilation(sample_np_pred_fg, size=(side, side))
|
| 257 |
+
eroded = grey_erosion(sample_np_pred_fg, size=(side, side))
|
| 258 |
+
|
| 259 |
+
sample_np_boundaries[np.where(dilated - eroded != 0)] = 1
|
| 260 |
+
np_boundaries[sdx, 0, ...] = sample_np_boundaries
|
| 261 |
+
|
| 262 |
+
boundaries = torch.tensor(np_boundaries).float().cuda()
|
| 263 |
+
|
| 264 |
+
# sub-objectives consistency between `pred_semantic` and `pred_matte`
|
| 265 |
+
# generate pseudo ground truth for `pred_semantic`
|
| 266 |
+
downsampled_pred_matte = blurer(F.interpolate(pred_matte, scale_factor=1/16, mode='bilinear'))
|
| 267 |
+
pseudo_gt_semantic = downsampled_pred_matte.detach()
|
| 268 |
+
pseudo_gt_semantic = pseudo_gt_semantic * (pseudo_gt_semantic > 0.01).float()
|
| 269 |
+
|
| 270 |
+
# generate pseudo ground truth for `pred_matte`
|
| 271 |
+
pseudo_gt_matte = pred_semantic.detach()
|
| 272 |
+
pseudo_gt_matte = pseudo_gt_matte * (pseudo_gt_matte > 0.01).float()
|
| 273 |
+
|
| 274 |
+
# calculate the SOC semantic loss
|
| 275 |
+
soc_semantic_loss = F.mse_loss(pred_semantic, pseudo_gt_semantic) + F.mse_loss(downsampled_pred_matte, pseudo_gt_matte)
|
| 276 |
+
soc_semantic_loss = soc_semantic_scale * torch.mean(soc_semantic_loss)
|
| 277 |
+
|
| 278 |
+
# NOTE: using the formulas in our paper to calculate the following losses has similar results
|
| 279 |
+
# sub-objectives consistency between `pred_detail` and `pred_backup_detail` (on boundaries only)
|
| 280 |
+
backup_detail_loss = boundaries * F.l1_loss(pred_detail, pred_backup_detail, reduction='none')
|
| 281 |
+
backup_detail_loss = torch.sum(backup_detail_loss, dim=(1,2,3)) / torch.sum(boundaries, dim=(1,2,3))
|
| 282 |
+
backup_detail_loss = torch.mean(backup_detail_loss)
|
| 283 |
+
|
| 284 |
+
# sub-objectives consistency between pred_matte` and `pred_backup_matte` (on boundaries only)
|
| 285 |
+
backup_matte_loss = boundaries * F.l1_loss(pred_matte, pred_backup_matte, reduction='none')
|
| 286 |
+
backup_matte_loss = torch.sum(backup_matte_loss, dim=(1,2,3)) / torch.sum(boundaries, dim=(1,2,3))
|
| 287 |
+
backup_matte_loss = torch.mean(backup_matte_loss)
|
| 288 |
+
|
| 289 |
+
soc_detail_loss = soc_detail_scale * (backup_detail_loss + backup_matte_loss)
|
| 290 |
+
|
| 291 |
+
# calculate the final loss, backward the loss, and update the model
|
| 292 |
+
loss = soc_semantic_loss + soc_detail_loss
|
| 293 |
+
|
| 294 |
+
loss.backward()
|
| 295 |
+
optimizer.step()
|
| 296 |
+
|
| 297 |
+
return soc_semantic_loss, soc_detail_loss
|
| 298 |
+
|
| 299 |
+
# ----------------------------------------------------------------------------------
|
src/utils.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Credits to https://github.com/ZHKKKe/MODNet for the model.
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import numpy as np
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
import time
|
| 6 |
+
import os
|
| 7 |
+
from PIL import Image, ImageColor
|
| 8 |
+
from copy import deepcopy
|
| 9 |
+
|
| 10 |
+
import torch
|
| 11 |
+
import torch.nn as nn
|
| 12 |
+
import torch.nn.functional as F
|
| 13 |
+
import torchvision.transforms as transforms
|
| 14 |
+
|
| 15 |
+
from src.models.modnet import MODNet
|
| 16 |
+
from src.st_style import apply_prod_style
|
| 17 |
+
|
| 18 |
+
# apply(st)
|
| 19 |
+
|
| 20 |
+
MODEL = "./assets/modnet_photographic_portrait_matting.ckpt"
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def change_background(image, matte, background_alpha: float=1.0, background_hex: str="#000000"):
|
| 24 |
+
"""
|
| 25 |
+
image: PIL Image (RGBA)
|
| 26 |
+
matte: PIL Image (grayscale, if 255 it is foreground)
|
| 27 |
+
background_alpha: float
|
| 28 |
+
background_hex: string
|
| 29 |
+
"""
|
| 30 |
+
img = deepcopy(image)
|
| 31 |
+
if image.mode != "RGBA":
|
| 32 |
+
img = img.convert("RGBA")
|
| 33 |
+
|
| 34 |
+
background_color = ImageColor.getrgb(background_hex)
|
| 35 |
+
background_alpha = int(255 * background_alpha)
|
| 36 |
+
background = Image.new("RGBA", img.size, color=background_color + (background_alpha,))
|
| 37 |
+
background.paste(img, mask=matte)
|
| 38 |
+
return background
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def matte(image):
|
| 42 |
+
# define hyper-parameters
|
| 43 |
+
ref_size = 512
|
| 44 |
+
|
| 45 |
+
# define image to tensor transform
|
| 46 |
+
im_transform = transforms.Compose(
|
| 47 |
+
[
|
| 48 |
+
transforms.ToTensor(),
|
| 49 |
+
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
|
| 50 |
+
]
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# create MODNet and load the pre-trained ckpt
|
| 54 |
+
modnet = MODNet(backbone_pretrained=False)
|
| 55 |
+
modnet = nn.DataParallel(modnet)
|
| 56 |
+
|
| 57 |
+
if torch.cuda.is_available():
|
| 58 |
+
modnet = modnet.cuda()
|
| 59 |
+
weights = torch.load(MODEL)
|
| 60 |
+
else:
|
| 61 |
+
weights = torch.load(MODEL, map_location=torch.device('cpu'))
|
| 62 |
+
modnet.load_state_dict(weights)
|
| 63 |
+
modnet.eval()
|
| 64 |
+
|
| 65 |
+
# read image
|
| 66 |
+
im = deepcopy(image)
|
| 67 |
+
|
| 68 |
+
# unify image channels to 3
|
| 69 |
+
im = np.asarray(im)
|
| 70 |
+
if len(im.shape) == 2:
|
| 71 |
+
im = im[:, :, None]
|
| 72 |
+
if im.shape[2] == 1:
|
| 73 |
+
im = np.repeat(im, 3, axis=2)
|
| 74 |
+
elif im.shape[2] == 4:
|
| 75 |
+
im = im[:, :, 0:3]
|
| 76 |
+
|
| 77 |
+
# convert image to PyTorch tensor
|
| 78 |
+
im = Image.fromarray(im)
|
| 79 |
+
im = im_transform(im)
|
| 80 |
+
|
| 81 |
+
# add mini-batch dim
|
| 82 |
+
im = im[None, :, :, :]
|
| 83 |
+
|
| 84 |
+
# resize image for input
|
| 85 |
+
im_b, im_c, im_h, im_w = im.shape
|
| 86 |
+
if max(im_h, im_w) < ref_size or min(im_h, im_w) > ref_size:
|
| 87 |
+
if im_w >= im_h:
|
| 88 |
+
im_rh = ref_size
|
| 89 |
+
im_rw = int(im_w / im_h * ref_size)
|
| 90 |
+
elif im_w < im_h:
|
| 91 |
+
im_rw = ref_size
|
| 92 |
+
im_rh = int(im_h / im_w * ref_size)
|
| 93 |
+
else:
|
| 94 |
+
im_rh = im_h
|
| 95 |
+
im_rw = im_w
|
| 96 |
+
|
| 97 |
+
im_rw = im_rw - im_rw % 32
|
| 98 |
+
im_rh = im_rh - im_rh % 32
|
| 99 |
+
im = F.interpolate(im, size=(im_rh, im_rw), mode='area')
|
| 100 |
+
|
| 101 |
+
# inference
|
| 102 |
+
_, _, matte = modnet(im.cuda() if torch.cuda.is_available() else im, True)
|
| 103 |
+
|
| 104 |
+
# resize and save matte
|
| 105 |
+
matte = F.interpolate(matte, size=(im_h, im_w), mode='area')
|
| 106 |
+
matte = matte[0][0].data.cpu().numpy()
|
| 107 |
+
return Image.fromarray(((matte * 255).astype('uint8')), mode='L')
|