Spaces:
Runtime error
Runtime error
first commit
Browse files- app.py +180 -0
- makefile +3 -0
- requirements.txt +57 -0
- src/README.md +239 -0
- src/__pycache__/neural_style_transfer.cpython-310.pyc +0 -0
- src/__pycache__/reconstruct_image_from_representation.cpython-310.pyc +0 -0
- src/commands.py +96 -0
- src/config.yaml +16 -0
- src/data/content.jpg +0 -0
- src/data/style.jpg +0 -0
- src/data/transfer/0000.jpg +0 -0
- src/data/transfer/0001.jpg +0 -0
- src/data/transfer/0002.jpg +0 -0
- src/data/transfer/0003.jpg +0 -0
- src/data/transfer/0004.jpg +0 -0
- src/data/transfer/0005.jpg +0 -0
- src/data/transfer/0006.jpg +0 -0
- src/data/transfer/0007.jpg +0 -0
- src/data/transfer/0008.jpg +0 -0
- src/data/transfer/0009.jpg +0 -0
- src/data/transfer/out.mp4 +0 -0
- src/environment.yml +13 -0
- src/models/definitions/__init__.py +0 -0
- src/models/definitions/__pycache__/__init__.cpython-310.pyc +0 -0
- src/models/definitions/__pycache__/vgg_nets.cpython-310.pyc +0 -0
- src/models/definitions/vgg_nets.py +241 -0
- src/neural_style_transfer.py +163 -0
- src/reconstruct_image_from_representation.py +108 -0
- src/utils/__init__.py +0 -0
- src/utils/__pycache__/__init__.cpython-310.pyc +0 -0
- src/utils/__pycache__/utils.cpython-310.pyc +0 -0
- src/utils/__pycache__/video_utils.cpython-310.pyc +0 -0
- src/utils/utils.py +282 -0
- src/utils/video_utils.py +38 -0
app.py
ADDED
@@ -0,0 +1,180 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import io
|
2 |
+
import streamlit as st
|
3 |
+
import numpy as np
|
4 |
+
from src.utils import utils
|
5 |
+
import PIL.Image as Image
|
6 |
+
from src.reconstruct_image_from_representation import reconstruct_image_from_representation
|
7 |
+
from src.neural_style_transfer import neural_style_transfer
|
8 |
+
|
9 |
+
st.set_page_config(
|
10 |
+
page_title="Neural Style Transfer Video Generation of image reconstruction",
|
11 |
+
page_icon="\u2712",
|
12 |
+
layout="wide",
|
13 |
+
initial_sidebar_state="expanded",
|
14 |
+
)
|
15 |
+
|
16 |
+
st.header("Neural Style Transfer Video Generation")
|
17 |
+
|
18 |
+
# Sidebar
|
19 |
+
st.sidebar.header("Neural Style Transfer Video Generation")
|
20 |
+
with st.sidebar.expander('About the app'):
|
21 |
+
st.write("""
|
22 |
+
Use this application to play with the Neural Style Transfer
|
23 |
+
by generating video of optimizer
|
24 |
+
""")
|
25 |
+
|
26 |
+
# Reconstruct or Transfer
|
27 |
+
with st.sidebar.container():
|
28 |
+
st.sidebar.subheader("Reconstruct or Transfer")
|
29 |
+
|
30 |
+
Type = st.sidebar.selectbox("Do you want to reconstruct or transfer",
|
31 |
+
["Reconstruct", "Transfer"])
|
32 |
+
utils.yamlSet('type', Type)
|
33 |
+
|
34 |
+
# Optimizer
|
35 |
+
with st.sidebar.container():
|
36 |
+
st.sidebar.subheader("Optimizer")
|
37 |
+
|
38 |
+
optimizer = st.sidebar.selectbox("Choose Optimizer", ["Adam", "LBFGS"])
|
39 |
+
utils.yamlSet('optimizer', optimizer)
|
40 |
+
|
41 |
+
iterations = st.sidebar.slider("Iterations", 10, 3000)
|
42 |
+
utils.yamlSet('iterations', iterations)
|
43 |
+
|
44 |
+
if optimizer == "Adam":
|
45 |
+
learning_rate = st.sidebar.slider("Learning Rate (100\u03BB)", 0.01,
|
46 |
+
90.0)
|
47 |
+
utils.yamlSet('learning_rate', learning_rate)
|
48 |
+
st.sidebar.write("\u03BB = ", learning_rate / 100.0)
|
49 |
+
|
50 |
+
# Reconstruction
|
51 |
+
if Type == "Reconstruct":
|
52 |
+
with st.sidebar.container():
|
53 |
+
st.sidebar.subheader("Reconstruction")
|
54 |
+
reconstruct = st.sidebar.selectbox("Reconstruct which image",
|
55 |
+
('Content', 'Style'))
|
56 |
+
utils.yamlSet('reconstruct', reconstruct)
|
57 |
+
|
58 |
+
# Visualization
|
59 |
+
with st.sidebar.container():
|
60 |
+
st.sidebar.subheader("Visualization")
|
61 |
+
visualize = st.sidebar.selectbox(
|
62 |
+
"Do you want to visualize feature maps of reconstruct images",
|
63 |
+
("Yes", "No"))
|
64 |
+
utils.yamlSet('visualize', visualize)
|
65 |
+
|
66 |
+
# Model
|
67 |
+
with st.sidebar.container():
|
68 |
+
st.sidebar.subheader("Model")
|
69 |
+
model = st.sidebar.selectbox("Choose Model",
|
70 |
+
("VGG16", "VGG16-Experimental"))
|
71 |
+
utils.yamlSet('model', model)
|
72 |
+
|
73 |
+
# # use layer
|
74 |
+
# if model == "VGG19":
|
75 |
+
# with st.sidebar.container():
|
76 |
+
# st.sidebar.subheader("Layer Type")
|
77 |
+
# use = st.sidebar.selectbox("Which type of layer you want to use",
|
78 |
+
# ("convolution", "relu"))
|
79 |
+
|
80 |
+
# Init Image
|
81 |
+
if Type == "Transfer":
|
82 |
+
with st.sidebar.container():
|
83 |
+
st.sidebar.subheader("Init Image")
|
84 |
+
initImage = st.sidebar.selectbox(
|
85 |
+
"Init Image",
|
86 |
+
('Gaussian Noise Image', 'White Noise Image', 'Content', 'Style'))
|
87 |
+
utils.yamlSet('initImage', initImage)
|
88 |
+
|
89 |
+
# Content Layer
|
90 |
+
with st.sidebar.container():
|
91 |
+
st.sidebar.subheader("Content Layer")
|
92 |
+
if model == "VGG16-Experimental":
|
93 |
+
contentLayer = st.sidebar.selectbox(
|
94 |
+
"Content Layer", ('relu1_1', 'relu2_1', 'relu2_2', 'relu3_1',
|
95 |
+
'relu3_2', 'relu4_1', 'relu4_3', 'relu5_1'))
|
96 |
+
elif model == "VGG16":
|
97 |
+
contentLayer = st.sidebar.selectbox(
|
98 |
+
"Content Layer", ('relu1_2', 'relu2_2', 'relu3_3', 'relu4_3'))
|
99 |
+
utils.yamlSet('contentLayer', contentLayer)
|
100 |
+
# elif model == "VGG19" and use == "relu":
|
101 |
+
# st.sidebar.selectbox("Content Layer",
|
102 |
+
# ('relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1'))
|
103 |
+
# elif model == "VGG19" and use == "convolution":
|
104 |
+
# st.sidebar.selectbox("Content Layer",
|
105 |
+
# ('conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv4_2',
|
106 |
+
# 'conv5_1'))
|
107 |
+
|
108 |
+
# Height
|
109 |
+
with st.sidebar.container():
|
110 |
+
st.sidebar.subheader("Height")
|
111 |
+
height = st.sidebar.slider("Height", 100, 6000, 400)
|
112 |
+
utils.yamlSet('height', height)
|
113 |
+
|
114 |
+
# Representation saving frequency
|
115 |
+
with st.sidebar.container():
|
116 |
+
st.sidebar.subheader("Representation Saving Frequency")
|
117 |
+
reprSavFreq = st.sidebar.slider(
|
118 |
+
"After how many iterations you want to save representation for "
|
119 |
+
"video generation", 1, 100)
|
120 |
+
utils.yamlSet('reprSavFreq', reprSavFreq)
|
121 |
+
|
122 |
+
if Type == "Transfer":
|
123 |
+
# Content Weight
|
124 |
+
col1, col2 = st.columns([0.85, 0.15])
|
125 |
+
with col1:
|
126 |
+
contentWeight = st.slider("Content Weight (1000\u03B1)", 0.01, 1000.0)
|
127 |
+
utils.yamlSet('contentWeight', contentWeight)
|
128 |
+
|
129 |
+
with col2:
|
130 |
+
st.write("\u03B1 = ", contentWeight / 1000.0)
|
131 |
+
|
132 |
+
# Style Weight
|
133 |
+
col1, col2 = st.columns([0.85, 0.15])
|
134 |
+
with col1:
|
135 |
+
styleWeight = st.slider("Style Weight (1000\u03B2)", 0.01, 1000.0)
|
136 |
+
utils.yamlSet('styleWeight', styleWeight)
|
137 |
+
|
138 |
+
with col2:
|
139 |
+
st.write("\u03B2 = ", styleWeight / 1000.0)
|
140 |
+
|
141 |
+
# Total Variation Weight
|
142 |
+
col1, col2 = st.columns([0.85, 0.15])
|
143 |
+
with col1:
|
144 |
+
totalVariationWeight = st.slider("Total Variation Weight (1000\u03B3)",
|
145 |
+
0.01, 1000.0)
|
146 |
+
utils.yamlSet('totalVariationWeight', totalVariationWeight)
|
147 |
+
|
148 |
+
with col2:
|
149 |
+
st.write("\u03B3 = ", totalVariationWeight / 1000.0)
|
150 |
+
|
151 |
+
# File upload
|
152 |
+
col1, col2 = st.columns([0.5, 0.5])
|
153 |
+
with col1:
|
154 |
+
contentImage = st.file_uploader('Choose Content Image', type=['jpg'])
|
155 |
+
if contentImage:
|
156 |
+
st.image(contentImage)
|
157 |
+
contentNumpy = np.asarray(
|
158 |
+
Image.open(io.BytesIO(contentImage.getvalue())))
|
159 |
+
contentPath = utils.save_numpy_array_as_jpg(contentNumpy, "content")
|
160 |
+
utils.yamlSet('contentPath', contentPath)
|
161 |
+
|
162 |
+
with col2:
|
163 |
+
styleImage = st.file_uploader('Choose Style Image', type=['jpg'])
|
164 |
+
if styleImage:
|
165 |
+
st.image(styleImage)
|
166 |
+
styleNumpy = np.asarray(Image.open(io.BytesIO(styleImage.getvalue())))
|
167 |
+
stylePath = utils.save_numpy_array_as_jpg(styleNumpy, "style")
|
168 |
+
utils.yamlSet("stylePath", stylePath)
|
169 |
+
|
170 |
+
submit = st.button("Submit")
|
171 |
+
|
172 |
+
if submit:
|
173 |
+
utils.clearDir()
|
174 |
+
if Type == "Reconstruct":
|
175 |
+
reconstruct_image_from_representation()
|
176 |
+
elif Type == "Transfer":
|
177 |
+
neural_style_transfer()
|
178 |
+
video_file = open("src/data/transfer/out.mp4", "rb")
|
179 |
+
video_bytes = video_file.read()
|
180 |
+
st.video(video_bytes)
|
makefile
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
format:
|
2 |
+
yapf -i --recursive src/
|
3 |
+
yapf -i app.py
|
requirements.txt
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
altair==4.2.2
|
2 |
+
attrs==22.2.0
|
3 |
+
blinker==1.5
|
4 |
+
cachetools==5.3.0
|
5 |
+
certifi==2022.12.7
|
6 |
+
charset-normalizer==3.0.1
|
7 |
+
click==8.1.3
|
8 |
+
decorator==5.1.1
|
9 |
+
entrypoints==0.4
|
10 |
+
gitdb==4.0.10
|
11 |
+
GitPython==3.1.31
|
12 |
+
idna==3.4
|
13 |
+
importlib-metadata==6.0.0
|
14 |
+
Jinja2==3.1.2
|
15 |
+
jsonschema==4.17.3
|
16 |
+
markdown-it-py==2.2.0
|
17 |
+
MarkupSafe==2.1.2
|
18 |
+
mdurl==0.1.2
|
19 |
+
numpy==1.24.2
|
20 |
+
nvidia-cublas-cu11==11.10.3.66
|
21 |
+
nvidia-cuda-nvrtc-cu11==11.7.99
|
22 |
+
nvidia-cuda-runtime-cu11==11.7.99
|
23 |
+
nvidia-cudnn-cu11==8.5.0.96
|
24 |
+
opencv-python==4.7.0.72
|
25 |
+
packaging==23.0
|
26 |
+
pandas==1.5.3
|
27 |
+
Pillow==9.4.0
|
28 |
+
protobuf==3.20.3
|
29 |
+
pyarrow==11.0.0
|
30 |
+
pydeck==0.8.0
|
31 |
+
Pygments==2.14.0
|
32 |
+
Pympler==1.0.1
|
33 |
+
pyrsistent==0.19.3
|
34 |
+
python-dateutil==2.8.2
|
35 |
+
pytz==2022.7.1
|
36 |
+
pytz-deprecation-shim==0.1.0.post0
|
37 |
+
PyYAML==6.0
|
38 |
+
requests==2.28.2
|
39 |
+
rich==13.3.1
|
40 |
+
semver==2.13.0
|
41 |
+
six==1.16.0
|
42 |
+
smmap==5.0.0
|
43 |
+
streamlit==1.19.0
|
44 |
+
toml==0.10.2
|
45 |
+
toolz==0.12.0
|
46 |
+
torch==1.13.1
|
47 |
+
torchaudio==0.13.1
|
48 |
+
torchvision==0.14.1
|
49 |
+
tornado==6.2
|
50 |
+
typing_extensions==4.5.0
|
51 |
+
tzdata==2022.7
|
52 |
+
tzlocal==4.2
|
53 |
+
urllib3==1.26.14
|
54 |
+
validators==0.20.0
|
55 |
+
watchdog==2.3.0
|
56 |
+
yapf==0.32.0
|
57 |
+
zipp==3.15.0
|
src/README.md
ADDED
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
## Neural Style Transfer (optimization method) :computer: + :art: = :heart:
|
2 |
+
This repo contains a concise PyTorch implementation of the original NST paper (:link: [Gatys et al.](https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Gatys_Image_Style_Transfer_CVPR_2016_paper.pdf)).
|
3 |
+
|
4 |
+
It's an accompanying repository for [this video series on YouTube](https://www.youtube.com/watch?v=S78LQebx6jo&list=PLBoQnSflObcmbfshq9oNs41vODgXG-608).
|
5 |
+
|
6 |
+
<p align="left">
|
7 |
+
<a href="https://www.youtube.com/watch?v=S78LQebx6jo" target="_blank"><img src="https://img.youtube.com/vi/S78LQebx6jo/0.jpg"
|
8 |
+
alt="NST Intro" width="480" height="360" border="10" /></a>
|
9 |
+
</p>
|
10 |
+
|
11 |
+
### What is NST algorithm?
|
12 |
+
The algorithm transfers style from one input image (the style image) onto another input image (the content image) using CNN nets (usually VGG-16/19) and gives a composite, stylized image out which keeps the content from the content image but takes the style from the style image.
|
13 |
+
|
14 |
+
<p align="center">
|
15 |
+
<img src="data/examples/bridge/green_bridge_vg_la_cafe_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="570"/>
|
16 |
+
<img src="data/examples/bridge/content_style.jpg" width="260"/>
|
17 |
+
</p>
|
18 |
+
|
19 |
+
### Why yet another NST repo?
|
20 |
+
It's the **cleanest and most concise** NST repo that I know of + it's written in **PyTorch!** :heart:
|
21 |
+
|
22 |
+
Most of NST repos were written in TensorFlow (before it even had L-BFGS optimizer) and torch (obsolete framework, used Lua) and are overly complicated often times including multiple functionalities (video, static image, color transfer, etc.) in 1 repo and exposing 100 parameters over command-line (out of which maybe 5 or 6 may actually be used on a regular basis).
|
23 |
+
|
24 |
+
## Examples
|
25 |
+
|
26 |
+
Transfering style gives beautiful artistic results:
|
27 |
+
|
28 |
+
<p align="center">
|
29 |
+
<img src="data/examples/bridge/green_bridge_vg_starry_night_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
|
30 |
+
<img src="data/examples/bridge/green_bridge_edtaonisl_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
|
31 |
+
<img src="data/examples/bridge/green_bridge_wave_crop_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
|
32 |
+
|
33 |
+
<img src="data/examples/lion/lion_candy_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
|
34 |
+
<img src="data/examples/lion/lion_edtaonisl_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
|
35 |
+
<img src="data/examples/lion/lion_vg_la_cafe_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
|
36 |
+
</p>
|
37 |
+
|
38 |
+
And here are some results coupled with their style:
|
39 |
+
|
40 |
+
<p align="center">
|
41 |
+
<img src="data/examples/figures/figures_ben_giles_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="400px">
|
42 |
+
<img src="data/style-images/ben_giles.jpg" width="267px">
|
43 |
+
|
44 |
+
<img src="data/examples/figures/figures_wave_crop_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="400px">
|
45 |
+
<img src="data/style-images/wave_crop.jpg" width="267px">
|
46 |
+
|
47 |
+
<img src="data/examples/figures/figures_vg_wheat_field_w_350_m_vgg19_cw_100000.0_sw_300000.0_tv_1.0_resized.jpg" width="400px">
|
48 |
+
<img src="data/style-images/vg_wheat_field_cropped.jpg" width="267px">
|
49 |
+
|
50 |
+
<img src="data/examples/figures/figures_vg_starry_night_w_350_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="400px">
|
51 |
+
<img src="data/style-images/vg_starry_night_resized.jpg" width="267px">
|
52 |
+
</p>
|
53 |
+
|
54 |
+
*Note: all of the stylized images were produced by me (using this repo), credits for original image artists [are given bellow](#acknowledgements).*
|
55 |
+
|
56 |
+
### Content/Style tradeoff
|
57 |
+
|
58 |
+
Changing style weight gives you less or more style on the final image, assuming you keep the content weight constant. <br/>
|
59 |
+
I did increments of 10 here for style weight (1e1, 1e2, 1e3, 1e4), while keeping content weight at constant 1e5, and I used random image as initialization image.
|
60 |
+
|
61 |
+
<p align="center">
|
62 |
+
<img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_10.0_tv_1.0_resized.jpg" width="200px">
|
63 |
+
<img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_100.0_tv_1.0_resized.jpg" width="200px">
|
64 |
+
<img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_1000.0_tv_1.0_resized.jpg" width="200px">
|
65 |
+
<img src="data/examples/style-tradeoff/figures_vg_starry_night_o_lbfgs_i_random_h_352_m_vgg19_cw_100000.0_sw_10000.0_tv_1.0_resized.jpg" width="200px">
|
66 |
+
</p>
|
67 |
+
|
68 |
+
### Impact of total variation (tv) loss
|
69 |
+
|
70 |
+
Rarely explained, the total variation loss i.e. it's corresponding weight controls the smoothness of the image. <br/>
|
71 |
+
I also did increments of 10 here (1e1, 1e4, 1e5, 1e6) and I used content image as initialization image.
|
72 |
+
|
73 |
+
<p align="center">
|
74 |
+
<img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_10.0_resized.jpg" width="200px">
|
75 |
+
<img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_10000.0_resized.jpg" width="200px">
|
76 |
+
<img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_100000.0_resized.jpg" width="200px">
|
77 |
+
<img src="data/examples/tv-tradeoff/figures_candy_o_lbfgs_i_content_h_350_m_vgg19_cw_100000.0_sw_30000.0_tv_1000000.0_resized.jpg" width="200px">
|
78 |
+
</p>
|
79 |
+
|
80 |
+
### Optimization initialization
|
81 |
+
|
82 |
+
Starting with different initialization images: noise (white or gaussian), content and style leads to different results. <br/>
|
83 |
+
Empirically content image gives the best results as explored in [this research paper](https://arxiv.org/pdf/1602.07188.pdf) also. <br/>
|
84 |
+
Here you can see results for content, random and style initialization in that order (left to right):
|
85 |
+
|
86 |
+
<p align="center">
|
87 |
+
<img src="data/examples/init_methods/golden_gate_vg_la_cafe_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0_resized.jpg" width="270px">
|
88 |
+
<img src="data/examples/init_methods/golden_gate_vg_la_cafe_o_lbfgs_i_random_h_500_m_vgg19_cw_100000.0_sw_1000.0_tv_1.0_resized.jpg" width="270px">
|
89 |
+
<img src="data/examples/init_methods/golden_gate_vg_la_cafe_o_lbfgs_i_style_h_500_m_vgg19_cw_100000.0_sw_10.0_tv_0.1_resized.jpg" width="270px">
|
90 |
+
</p>
|
91 |
+
|
92 |
+
You can also see that with style initialization we had some content from the artwork leaking directly into our output.
|
93 |
+
|
94 |
+
### Famous "Figure 3" reconstruction
|
95 |
+
|
96 |
+
Finally if I haven't included this portion you couldn't say that I've successfully reproduced the [original paper]((https://www.cv-foundation.org/openaccess/content_cvpr_2016/papers/Gatys_Image_Style_Transfer_CVPR_2016_paper.pdf)) (laughs in Python):
|
97 |
+
|
98 |
+
<p align="center">
|
99 |
+
<img src="data/examples/gatys_reconstruction/tubingen.jpg" width="300px">
|
100 |
+
<img src="data/examples/gatys_reconstruction/tubingen_shipwreck_o_lbfgs_i_random_h_400_m_vgg19_cw_100000.0_sw_200.0_tv_1.0_resized.jpg" width="300px">
|
101 |
+
<img src="data/examples/gatys_reconstruction/tubingen_starry-night_o_lbfgs_i_content_h_400_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="300px">
|
102 |
+
|
103 |
+
<img src="data/examples/gatys_reconstruction/tubingen_the_scream_o_lbfgs_i_random_h_400_m_vgg19_cw_100000.0_sw_300.0_tv_1.0.jpg" width="300px">
|
104 |
+
<img src="data/examples/gatys_reconstruction/tubingen_seated-nude_o_lbfgs_i_random_h_400_m_vgg19_cw_100000.0_sw_2000.0_tv_1.0.jpg" width="300px">
|
105 |
+
<img src="data/examples/gatys_reconstruction/tubingen_kandinsky_o_lbfgs_i_content_h_400_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="300px">
|
106 |
+
</p>
|
107 |
+
|
108 |
+
I haven't give it much effort results can be much nicer.
|
109 |
+
|
110 |
+
### Content reconstruction
|
111 |
+
|
112 |
+
If we only use the content (perceptual) loss and try to minimize that objective function this is what we get (starting from noise):
|
113 |
+
|
114 |
+
<p align="center">
|
115 |
+
<img src="data/examples/content_reconstruction/0000.jpg" width="200px">
|
116 |
+
<img src="data/examples/content_reconstruction/0026.jpg" width="200px">
|
117 |
+
<img src="data/examples/content_reconstruction/0070.jpg" width="200px">
|
118 |
+
<img src="data/examples/content_reconstruction/0509.jpg" width="200px">
|
119 |
+
</p>
|
120 |
+
|
121 |
+
In steps 0, 26, 70 and 509 of the L-BFGS numerical optimizer, using layer relu3_1 for content representation.<br/>
|
122 |
+
Check-out [this section](#reconstruct-image-from-representation) if you want to play with this.
|
123 |
+
|
124 |
+
### Style reconstruction
|
125 |
+
|
126 |
+
We can do the same thing for style (on the left is the original art image "Candy") starting from noise:
|
127 |
+
|
128 |
+
<p align="center">
|
129 |
+
<img src="data/examples/style_reconstruction/candy.jpg" width="200px">
|
130 |
+
<img src="data/examples/style_reconstruction/0045.jpg" width="200px">
|
131 |
+
<img src="data/examples/style_reconstruction/0129.jpg" width="200px">
|
132 |
+
<img src="data/examples/style_reconstruction/0510.jpg" width="200px">
|
133 |
+
</p>
|
134 |
+
|
135 |
+
In steps 45, 129 and 510 of the L-BFGS using layers relu1_1, relu2_1, relu3_1, relu4_1 and relu5_1 for style representation.
|
136 |
+
|
137 |
+
## Setup
|
138 |
+
|
139 |
+
1. Open Anaconda Prompt and navigate into project directory `cd path_to_repo`
|
140 |
+
2. Run `conda env create` (while in project directory)
|
141 |
+
3. Run `activate pytorch-nst`
|
142 |
+
|
143 |
+
That's it! It should work out-of-the-box executing environment.yml file which deals with dependencies.
|
144 |
+
|
145 |
+
-----
|
146 |
+
|
147 |
+
PyTorch package will pull some version of CUDA with it, but it is highly recommended that you install system-wide CUDA beforehand, mostly because of GPU drivers. I also recommend using Miniconda installer as a way to get conda on your system.
|
148 |
+
|
149 |
+
Follow through points 1 and 2 of [this setup](https://github.com/Petlja/PSIML/blob/master/docs/MachineSetup.md) and use the most up-to-date versions of Miniconda (Python 3.7) and CUDA/cuDNN.
|
150 |
+
(I recommend CUDA 10.1 as it is compatible with PyTorch 1.4, which is used in this repo, and newest compatible cuDNN)
|
151 |
+
|
152 |
+
## Usage
|
153 |
+
|
154 |
+
1. Copy content images to the default content image directory: `/data/content-images/`
|
155 |
+
2. Copy style images to the default style image directory: `/data/style-images/`
|
156 |
+
3. Run `python neural_style_transfer.py --content_img_name <content-img-name> --style_img_name <style-img-name>`
|
157 |
+
|
158 |
+
It's that easy. For more advanced usage take a look at the code it's (hopefully) self-explanatory (if you speak Python ^^).
|
159 |
+
|
160 |
+
Or take a look at [this accompanying YouTube video](https://www.youtube.com/watch?v=XWMwdkaLFsI), it explains how to use this repo in greater detail.
|
161 |
+
|
162 |
+
Just run it! So that you can get something like this: :heart:
|
163 |
+
<p align="center">
|
164 |
+
<img src="data/examples/taj_mahal/taj_mahal_ben_giles_o_lbfgs_i_content_h_500_m_vgg19_cw_100000.0_sw_30000.0_tv_1.0.jpg" width="615px">
|
165 |
+
</p>
|
166 |
+
|
167 |
+
### Debugging/Experimenting
|
168 |
+
|
169 |
+
Q: L-BFGS can't run on my computer it takes too much GPU VRAM?<br/>
|
170 |
+
A: Set Adam as your default and take a look at the code for initial style/content/tv weights you should use as a start point.
|
171 |
+
|
172 |
+
Q: Output image looks too much like style image?<br/>
|
173 |
+
A: Decrease style weight or take a look at the table of weights (in neural_style_transfer.py), which I've included, that works.
|
174 |
+
|
175 |
+
Q: There is too much noise (image is not smooth)?<br/>
|
176 |
+
A: Increase total variation (tv) weight (usually by multiples of 10, again the table is your friend here or just experiment yourself).
|
177 |
+
|
178 |
+
### Reconstruct image from representation
|
179 |
+
|
180 |
+
I've also included a file that will help you better understand how the algorithm works and what the neural net sees.<br/>
|
181 |
+
What it does is that it allows you to visualize content **(feature maps)** and style representations **(Gram matrices)**.<br/>
|
182 |
+
It will also reconstruct either only style or content using those representations and corresponding model that produces them. <br/>
|
183 |
+
|
184 |
+
Just run this:<br/>
|
185 |
+
`reconstruct_image_from_representation.py --should_reconstruct_content <Bool> --should_visualize_representation <Bool>`
|
186 |
+
<br/><br/>
|
187 |
+
And that's it! --should_visualize_representation if set to True will visualize these for you<br/>
|
188 |
+
--should_reconstruct_content picks between style and content reconstruction
|
189 |
+
|
190 |
+
Here are some feature maps (relu1_1, VGG 19) as well as a Gram matrix (relu2_1, VGG 19) for Van Gogh's famous [starry night](https://en.wikipedia.org/wiki/The_Starry_Night):
|
191 |
+
|
192 |
+
<p align="center">
|
193 |
+
<img src="data/examples/fms_gram/fm_vgg19_relu1_1_0005_resized.jpg" width="200px">
|
194 |
+
<img src="data/examples/fms_gram/fm_vgg19_relu1_1_0046_resized.jpg" width="200px">
|
195 |
+
<img src="data/examples/fms_gram/fm_vgg19_relu1_1_0058_resized.jpg" width="200px">
|
196 |
+
<img src="data/examples/fms_gram/gram_vgg19_relu2_1_0001.jpg" width="200px">
|
197 |
+
</p>
|
198 |
+
|
199 |
+
No more dark magic.
|
200 |
+
|
201 |
+
## Acknowledgements
|
202 |
+
|
203 |
+
I found these repos useful: (while developing this one)
|
204 |
+
* [fast_neural_style](https://github.com/pytorch/examples/tree/master/fast_neural_style) (PyTorch, feed-forward method)
|
205 |
+
* [neural-style-tf](https://github.com/cysmith/neural-style-tf/) (TensorFlow, optimization method)
|
206 |
+
* [neural-style](https://github.com/anishathalye/neural-style/) (TensorFlow, optimization method)
|
207 |
+
|
208 |
+
I found some of the content/style images I was using here:
|
209 |
+
* [style/artistic images](https://www.rawpixel.com/board/537381/vincent-van-gogh-free-original-public-domain-paintings?sort=curated&mode=shop&page=1)
|
210 |
+
* [awesome figures pic](https://www.pexels.com/photo/action-android-device-electronics-595804/)
|
211 |
+
* [awesome bridge pic](https://www.pexels.com/photo/gray-bridge-and-trees-814499/)
|
212 |
+
|
213 |
+
Other images are now already classics in the NST world.
|
214 |
+
|
215 |
+
## Citation
|
216 |
+
|
217 |
+
If you find this code useful for your research, please cite the following:
|
218 |
+
|
219 |
+
```
|
220 |
+
@misc{Gordić2020nst,
|
221 |
+
author = {Gordić, Aleksa},
|
222 |
+
title = {pytorch-neural-style-transfer},
|
223 |
+
year = {2020},
|
224 |
+
publisher = {GitHub},
|
225 |
+
journal = {GitHub repository},
|
226 |
+
howpublished = {\url{https://github.com/gordicaleksa/pytorch-neural-style-transfer}},
|
227 |
+
}
|
228 |
+
```
|
229 |
+
|
230 |
+
## Connect with me
|
231 |
+
|
232 |
+
If you'd love to have some more AI-related content in your life :nerd_face:, consider:
|
233 |
+
* Subscribing to my YouTube channel [The AI Epiphany](https://www.youtube.com/c/TheAiEpiphany) :bell:
|
234 |
+
* Follow me on [LinkedIn](https://www.linkedin.com/in/aleksagordic/) and [Twitter](https://twitter.com/gordic_aleksa) :bulb:
|
235 |
+
* Follow me on [Medium](https://gordicaleksa.medium.com/) :books: :heart:
|
236 |
+
|
237 |
+
## Licence
|
238 |
+
|
239 |
+
[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://github.com/gordicaleksa/pytorch-neural-style-transfer/blob/master/LICENCE)
|
src/__pycache__/neural_style_transfer.cpython-310.pyc
ADDED
Binary file (5.46 kB). View file
|
|
src/__pycache__/reconstruct_image_from_representation.cpython-310.pyc
ADDED
Binary file (3.34 kB). View file
|
|
src/commands.py
ADDED
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from abc import ABC, abstractmethod
|
2 |
+
import numpy as np
|
3 |
+
import utils
|
4 |
+
import torch
|
5 |
+
|
6 |
+
|
7 |
+
class Tuning(ABC):
|
8 |
+
|
9 |
+
@abstractmethod
|
10 |
+
def Image(self, image):
|
11 |
+
pass
|
12 |
+
|
13 |
+
|
14 |
+
class TuningReconstruction(Tuning):
|
15 |
+
|
16 |
+
def __init__(self, model, optimizer, target_representation,
|
17 |
+
content_feature_maps_index, style_feature_maps_indices):
|
18 |
+
|
19 |
+
self.model = model
|
20 |
+
self.optimizer = optimizer
|
21 |
+
self.target_representation = target_representation
|
22 |
+
self.content_feature_maps_index = content_feature_maps_index
|
23 |
+
self.style_feature_maps_indices = style_feature_maps_indices
|
24 |
+
|
25 |
+
def Image(self, image):
|
26 |
+
|
27 |
+
# Finds the current representation
|
28 |
+
set_of_feature_maps = self.model(image)
|
29 |
+
if utils.yamlGet('reconstruct') == 'Content':
|
30 |
+
current_representation = set_of_feature_maps[
|
31 |
+
self.content_feature_maps_index].squeeze(axis=0)
|
32 |
+
elif utils.yamlGet('reconstruct') == 'Style':
|
33 |
+
current_representation = [
|
34 |
+
utils.gram_matrix(fmaps)
|
35 |
+
for i, fmaps in enumerate(set_of_feature_maps)
|
36 |
+
if i in self.style_feature_maps_indices
|
37 |
+
]
|
38 |
+
|
39 |
+
loss = 0.0
|
40 |
+
|
41 |
+
if utils.yamlGet('reconstruct') == 'Content':
|
42 |
+
loss = torch.nn.MSELoss(reduction='mean')(
|
43 |
+
self.target_representation, current_representation)
|
44 |
+
elif utils.yamlGet('reconstruct') == 'Style':
|
45 |
+
for gram_gt, gram_hat in zip(self.target_representation,
|
46 |
+
current_representation):
|
47 |
+
loss += (1 / len(self.target_representation)) * \
|
48 |
+
torch.nn.MSELoss(
|
49 |
+
reduction='sum')(gram_gt[0], gram_hat[0])
|
50 |
+
|
51 |
+
loss.backward()
|
52 |
+
self.optimizer.step()
|
53 |
+
self.optimizer.zero_grad()
|
54 |
+
return loss.item(), current_representation
|
55 |
+
|
56 |
+
|
57 |
+
class Reconstruct(ABC):
|
58 |
+
|
59 |
+
@abstractmethod
|
60 |
+
def Visualize(self):
|
61 |
+
pass
|
62 |
+
|
63 |
+
|
64 |
+
class ContentReconstruct(Reconstruct):
|
65 |
+
"""
|
66 |
+
tcr -> target_content_representation
|
67 |
+
"""
|
68 |
+
|
69 |
+
def __init__(self, feature_maps):
|
70 |
+
self.fm = feature_maps
|
71 |
+
self.tcr = self.fm['set_of_feature_maps'][
|
72 |
+
self.fm['content_feature_maps_index_name'][0]].squeeze(axis=0)
|
73 |
+
self.nfm = self.tcr.size()[0]
|
74 |
+
|
75 |
+
def Visualize(self):
|
76 |
+
for i in range(self.nfm):
|
77 |
+
feature_map = self.tcr[i].to('cpu').numpy()
|
78 |
+
feature_map = np.uint8(utils.get_uint8_range(feature_map))
|
79 |
+
# plt.imshow(feature_map)
|
80 |
+
# plt.title(
|
81 |
+
# f'Feature map {i+1}/{num_of_feature_maps} from layer'
|
82 |
+
# f' {content_feature_maps_index_name[1]} '
|
83 |
+
# f'(model={config["model"]}) for'
|
84 |
+
# f' {config["content_img_name"]} image.'
|
85 |
+
# )
|
86 |
+
# plt.show()
|
87 |
+
filename = f'fm_{config["model"]}_{content_feature_maps_index_name[1]}_{str(i).zfill(config["img_format"][0])}{config["img_format"][1]}'
|
88 |
+
utils.save_image(feature_map, os.path.join(dump_path, filename))
|
89 |
+
|
90 |
+
|
91 |
+
class StyleReconstruct(Reconstruct):
|
92 |
+
pass
|
93 |
+
|
94 |
+
|
95 |
+
class Invoker:
|
96 |
+
pass
|
src/config.yaml
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
contentLayer: relu4_3
|
2 |
+
contentPath: src/data/content.jpg
|
3 |
+
contentWeight: 475.71
|
4 |
+
height: 400
|
5 |
+
initImage: Content
|
6 |
+
iterations: 10
|
7 |
+
learning_rate: 0.01
|
8 |
+
model: VGG16
|
9 |
+
optimizer: Adam
|
10 |
+
reconstruct: Content
|
11 |
+
reprSavFreq: 1
|
12 |
+
stylePath: src/data/style.jpg
|
13 |
+
styleWeight: 307.7
|
14 |
+
totalVariationWeight: 854.25
|
15 |
+
type: Transfer
|
16 |
+
visualize: 'Yes'
|
src/data/content.jpg
ADDED
src/data/style.jpg
ADDED
src/data/transfer/0000.jpg
ADDED
src/data/transfer/0001.jpg
ADDED
src/data/transfer/0002.jpg
ADDED
src/data/transfer/0003.jpg
ADDED
src/data/transfer/0004.jpg
ADDED
src/data/transfer/0005.jpg
ADDED
src/data/transfer/0006.jpg
ADDED
src/data/transfer/0007.jpg
ADDED
src/data/transfer/0008.jpg
ADDED
src/data/transfer/0009.jpg
ADDED
src/data/transfer/out.mp4
ADDED
Binary file (20.1 kB). View file
|
|
src/environment.yml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: pytorch-nst
|
2 |
+
channels:
|
3 |
+
- defaults
|
4 |
+
- pytorch
|
5 |
+
dependencies:
|
6 |
+
- python=3.7.6
|
7 |
+
- pip=20.0.2
|
8 |
+
- matplotlib=3.1.3
|
9 |
+
- pytorch==1.4.0
|
10 |
+
- torchvision=0.5.0
|
11 |
+
- pip:
|
12 |
+
- numpy==1.18.1
|
13 |
+
- opencv-python==4.2.0.32
|
src/models/definitions/__init__.py
ADDED
File without changes
|
src/models/definitions/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (191 Bytes). View file
|
|
src/models/definitions/__pycache__/vgg_nets.cpython-310.pyc
ADDED
Binary file (6.01 kB). View file
|
|
src/models/definitions/vgg_nets.py
ADDED
@@ -0,0 +1,241 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from collections import namedtuple
|
2 |
+
import torch
|
3 |
+
from torchvision import models
|
4 |
+
from src.utils import utils
|
5 |
+
"""
|
6 |
+
More detail about the VGG architecture (if you want to understand magic/hardcoded numbers) can be found here:
|
7 |
+
|
8 |
+
https://github.com/pytorch/vision/blob/3c254fb7af5f8af252c24e89949c54a3461ff0be/torchvision/models/vgg.py
|
9 |
+
"""
|
10 |
+
|
11 |
+
|
12 |
+
class Vgg16(torch.nn.Module):
|
13 |
+
"""Only those layers are exposed which have already proven to work nicely."""
|
14 |
+
|
15 |
+
def __init__(self, requires_grad=False, show_progress=False):
|
16 |
+
super().__init__()
|
17 |
+
vgg_pretrained_features = models.vgg16(pretrained=True,
|
18 |
+
progress=show_progress).features
|
19 |
+
self.layer_names = {'relu1_2': 1, 'relu2_2': 2,
|
20 |
+
'relu3_3': 3, 'relu4_3': 4}
|
21 |
+
self.content_feature_maps_index = self.layer_names[
|
22 |
+
utils.yamlGet('contentLayer')]-1 # relu2_2
|
23 |
+
self.style_feature_maps_indices = list(range(len(
|
24 |
+
self.layer_names))) # all layers used for style representation
|
25 |
+
|
26 |
+
self.slice1 = torch.nn.Sequential()
|
27 |
+
self.slice2 = torch.nn.Sequential()
|
28 |
+
self.slice3 = torch.nn.Sequential()
|
29 |
+
self.slice4 = torch.nn.Sequential()
|
30 |
+
for x in range(4):
|
31 |
+
self.slice1.add_module(str(x), vgg_pretrained_features[x])
|
32 |
+
for x in range(4, 9):
|
33 |
+
self.slice2.add_module(str(x), vgg_pretrained_features[x])
|
34 |
+
for x in range(9, 16):
|
35 |
+
self.slice3.add_module(str(x), vgg_pretrained_features[x])
|
36 |
+
for x in range(16, 23):
|
37 |
+
self.slice4.add_module(str(x), vgg_pretrained_features[x])
|
38 |
+
if not requires_grad:
|
39 |
+
for param in self.parameters():
|
40 |
+
param.requires_grad = False
|
41 |
+
|
42 |
+
def forward(self, x):
|
43 |
+
x = self.slice1(x)
|
44 |
+
relu1_2 = x
|
45 |
+
x = self.slice2(x)
|
46 |
+
relu2_2 = x
|
47 |
+
x = self.slice3(x)
|
48 |
+
relu3_3 = x
|
49 |
+
x = self.slice4(x)
|
50 |
+
relu4_3 = x
|
51 |
+
vgg_outputs = namedtuple("VggOutputs", self.layer_names.keys())
|
52 |
+
out = vgg_outputs(relu1_2, relu2_2, relu3_3, relu4_3)
|
53 |
+
return out
|
54 |
+
|
55 |
+
|
56 |
+
class Vgg16Experimental(torch.nn.Module):
|
57 |
+
"""Everything exposed so you can play with different combinations for style and content representation"""
|
58 |
+
|
59 |
+
def __init__(self, requires_grad=False, show_progress=False):
|
60 |
+
super().__init__()
|
61 |
+
vgg_pretrained_features = models.vgg16(pretrained=True,
|
62 |
+
progress=show_progress).features
|
63 |
+
self.layer_names = [
|
64 |
+
'relu1_1', 'relu2_1', 'relu2_2', 'relu3_1', 'relu3_2', 'relu4_1',
|
65 |
+
'relu4_3', 'relu5_1'
|
66 |
+
]
|
67 |
+
self.content_feature_maps_index = 4
|
68 |
+
self.style_feature_maps_indices = list(range(len(
|
69 |
+
self.layer_names))) # all layers used for style representation
|
70 |
+
|
71 |
+
self.conv1_1 = vgg_pretrained_features[0]
|
72 |
+
self.relu1_1 = vgg_pretrained_features[1]
|
73 |
+
self.conv1_2 = vgg_pretrained_features[2]
|
74 |
+
self.relu1_2 = vgg_pretrained_features[3]
|
75 |
+
self.max_pooling1 = vgg_pretrained_features[4]
|
76 |
+
self.conv2_1 = vgg_pretrained_features[5]
|
77 |
+
self.relu2_1 = vgg_pretrained_features[6]
|
78 |
+
self.conv2_2 = vgg_pretrained_features[7]
|
79 |
+
self.relu2_2 = vgg_pretrained_features[8]
|
80 |
+
self.max_pooling2 = vgg_pretrained_features[9]
|
81 |
+
self.conv3_1 = vgg_pretrained_features[10]
|
82 |
+
self.relu3_1 = vgg_pretrained_features[11]
|
83 |
+
self.conv3_2 = vgg_pretrained_features[12]
|
84 |
+
self.relu3_2 = vgg_pretrained_features[13]
|
85 |
+
self.conv3_3 = vgg_pretrained_features[14]
|
86 |
+
self.relu3_3 = vgg_pretrained_features[15]
|
87 |
+
self.max_pooling3 = vgg_pretrained_features[16]
|
88 |
+
self.conv4_1 = vgg_pretrained_features[17]
|
89 |
+
self.relu4_1 = vgg_pretrained_features[18]
|
90 |
+
self.conv4_2 = vgg_pretrained_features[19]
|
91 |
+
self.relu4_2 = vgg_pretrained_features[20]
|
92 |
+
self.conv4_3 = vgg_pretrained_features[21]
|
93 |
+
self.relu4_3 = vgg_pretrained_features[22]
|
94 |
+
self.max_pooling4 = vgg_pretrained_features[23]
|
95 |
+
self.conv5_1 = vgg_pretrained_features[24]
|
96 |
+
self.relu5_1 = vgg_pretrained_features[25]
|
97 |
+
self.conv5_2 = vgg_pretrained_features[26]
|
98 |
+
self.relu5_2 = vgg_pretrained_features[27]
|
99 |
+
self.conv5_3 = vgg_pretrained_features[28]
|
100 |
+
self.relu5_3 = vgg_pretrained_features[29]
|
101 |
+
self.max_pooling5 = vgg_pretrained_features[30]
|
102 |
+
if not requires_grad:
|
103 |
+
for param in self.parameters():
|
104 |
+
param.requires_grad = False
|
105 |
+
|
106 |
+
def forward(self, x):
|
107 |
+
x = self.conv1_1(x)
|
108 |
+
conv1_1 = x
|
109 |
+
x = self.relu1_1(x)
|
110 |
+
relu1_1 = x
|
111 |
+
x = self.conv1_2(x)
|
112 |
+
conv1_2 = x
|
113 |
+
x = self.relu1_2(x)
|
114 |
+
relu1_2 = x
|
115 |
+
x = self.max_pooling1(x)
|
116 |
+
x = self.conv2_1(x)
|
117 |
+
conv2_1 = x
|
118 |
+
x = self.relu2_1(x)
|
119 |
+
relu2_1 = x
|
120 |
+
x = self.conv2_2(x)
|
121 |
+
conv2_2 = x
|
122 |
+
x = self.relu2_2(x)
|
123 |
+
relu2_2 = x
|
124 |
+
x = self.max_pooling2(x)
|
125 |
+
x = self.conv3_1(x)
|
126 |
+
conv3_1 = x
|
127 |
+
x = self.relu3_1(x)
|
128 |
+
relu3_1 = x
|
129 |
+
x = self.conv3_2(x)
|
130 |
+
conv3_2 = x
|
131 |
+
x = self.relu3_2(x)
|
132 |
+
relu3_2 = x
|
133 |
+
x = self.conv3_3(x)
|
134 |
+
conv3_3 = x
|
135 |
+
x = self.relu3_3(x)
|
136 |
+
relu3_3 = x
|
137 |
+
x = self.max_pooling3(x)
|
138 |
+
x = self.conv4_1(x)
|
139 |
+
conv4_1 = x
|
140 |
+
x = self.relu4_1(x)
|
141 |
+
relu4_1 = x
|
142 |
+
x = self.conv4_2(x)
|
143 |
+
conv4_2 = x
|
144 |
+
x = self.relu4_2(x)
|
145 |
+
relu4_2 = x
|
146 |
+
x = self.conv4_3(x)
|
147 |
+
conv4_3 = x
|
148 |
+
x = self.relu4_3(x)
|
149 |
+
relu4_3 = x
|
150 |
+
x = self.max_pooling4(x)
|
151 |
+
x = self.conv5_1(x)
|
152 |
+
conv5_1 = x
|
153 |
+
x = self.relu5_1(x)
|
154 |
+
relu5_1 = x
|
155 |
+
x = self.conv5_2(x)
|
156 |
+
conv5_2 = x
|
157 |
+
x = self.relu5_2(x)
|
158 |
+
relu5_2 = x
|
159 |
+
x = self.conv5_3(x)
|
160 |
+
conv5_3 = x
|
161 |
+
x = self.relu5_3(x)
|
162 |
+
relu5_3 = x
|
163 |
+
x = self.max_pooling5(x)
|
164 |
+
# expose only the layers that you want to experiment with here
|
165 |
+
vgg_outputs = namedtuple("VggOutputs", self.layer_names)
|
166 |
+
out = vgg_outputs(relu1_1, relu2_1, relu2_2, relu3_1, relu3_2, relu4_1,
|
167 |
+
relu4_3, relu5_1)
|
168 |
+
|
169 |
+
return out
|
170 |
+
|
171 |
+
|
172 |
+
class Vgg19(torch.nn.Module):
|
173 |
+
"""
|
174 |
+
Used in the original NST paper, only those layers are exposed which were used in the original paper
|
175 |
+
|
176 |
+
'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1' were used for style representation
|
177 |
+
'conv4_2' was used for content representation (although they did some experiments with conv2_2 and conv5_2)
|
178 |
+
"""
|
179 |
+
|
180 |
+
def __init__(self,
|
181 |
+
requires_grad=False,
|
182 |
+
show_progress=False,
|
183 |
+
use_relu=True):
|
184 |
+
super().__init__()
|
185 |
+
vgg_pretrained_features = models.vgg19(pretrained=True,
|
186 |
+
progress=show_progress).features
|
187 |
+
if use_relu: # use relu or as in original paper conv layers
|
188 |
+
self.layer_names = [
|
189 |
+
'relu1_1', 'relu2_1', 'relu3_1', 'relu4_1', 'relu5_1'
|
190 |
+
]
|
191 |
+
self.offset = 1
|
192 |
+
else:
|
193 |
+
self.layer_names = [
|
194 |
+
'conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv4_2',
|
195 |
+
'conv5_1'
|
196 |
+
]
|
197 |
+
self.offset = 0
|
198 |
+
self.content_feature_maps_index = 4 # conv4_2
|
199 |
+
# all layers used for style representation except conv4_2
|
200 |
+
self.style_feature_maps_indices = list(range(len(self.layer_names)))
|
201 |
+
self.style_feature_maps_indices.remove(4) # conv4_2
|
202 |
+
|
203 |
+
self.slice1 = torch.nn.Sequential()
|
204 |
+
self.slice2 = torch.nn.Sequential()
|
205 |
+
self.slice3 = torch.nn.Sequential()
|
206 |
+
self.slice4 = torch.nn.Sequential()
|
207 |
+
self.slice5 = torch.nn.Sequential()
|
208 |
+
self.slice6 = torch.nn.Sequential()
|
209 |
+
for x in range(1 + self.offset):
|
210 |
+
self.slice1.add_module(str(x), vgg_pretrained_features[x])
|
211 |
+
for x in range(1 + self.offset, 6 + self.offset):
|
212 |
+
self.slice2.add_module(str(x), vgg_pretrained_features[x])
|
213 |
+
for x in range(6 + self.offset, 11 + self.offset):
|
214 |
+
self.slice3.add_module(str(x), vgg_pretrained_features[x])
|
215 |
+
for x in range(11 + self.offset, 20 + self.offset):
|
216 |
+
self.slice4.add_module(str(x), vgg_pretrained_features[x])
|
217 |
+
for x in range(20 + self.offset, 22):
|
218 |
+
self.slice5.add_module(str(x), vgg_pretrained_features[x])
|
219 |
+
for x in range(22, 29 + +self.offset):
|
220 |
+
self.slice6.add_module(str(x), vgg_pretrained_features[x])
|
221 |
+
if not requires_grad:
|
222 |
+
for param in self.parameters():
|
223 |
+
param.requires_grad = False
|
224 |
+
|
225 |
+
def forward(self, x):
|
226 |
+
x = self.slice1(x)
|
227 |
+
layer1_1 = x
|
228 |
+
x = self.slice2(x)
|
229 |
+
layer2_1 = x
|
230 |
+
x = self.slice3(x)
|
231 |
+
layer3_1 = x
|
232 |
+
x = self.slice4(x)
|
233 |
+
layer4_1 = x
|
234 |
+
x = self.slice5(x)
|
235 |
+
conv4_2 = x
|
236 |
+
x = self.slice6(x)
|
237 |
+
layer5_1 = x
|
238 |
+
vgg_outputs = namedtuple("VggOutputs", self.layer_names)
|
239 |
+
out = vgg_outputs(layer1_1, layer2_1, layer3_1, layer4_1, conv4_2,
|
240 |
+
layer5_1)
|
241 |
+
return out
|
src/neural_style_transfer.py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import src.utils.utils as utils
|
3 |
+
from src.utils.video_utils import create_video_from_intermediate_results
|
4 |
+
import torch
|
5 |
+
from torch import nn
|
6 |
+
from torch.optim import Adam, LBFGS
|
7 |
+
from torch.autograd import Variable
|
8 |
+
|
9 |
+
|
10 |
+
class ContentLoss(nn.Module):
|
11 |
+
def __init__(self, target):
|
12 |
+
super(ContentLoss, self).__init__()
|
13 |
+
self.target = target.detach()
|
14 |
+
|
15 |
+
def forward(self, current):
|
16 |
+
return nn.MSELoss(reduction='mean')(self.target, current)
|
17 |
+
|
18 |
+
|
19 |
+
class StyleLoss(nn.Module):
|
20 |
+
def __init__(self):
|
21 |
+
super(StyleLoss, self).__init__()
|
22 |
+
self.loss = 0.0
|
23 |
+
|
24 |
+
def forward(self, x, y):
|
25 |
+
for gram_gt, gram_hat in zip(x, y):
|
26 |
+
self.loss += torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])
|
27 |
+
self.loss /= len(x)
|
28 |
+
return self.loss
|
29 |
+
|
30 |
+
|
31 |
+
class Build(nn.Module):
|
32 |
+
def __init__(
|
33 |
+
self,
|
34 |
+
config,
|
35 |
+
target_content_representation,
|
36 |
+
target_style_representation,
|
37 |
+
):
|
38 |
+
super(Build, self).__init__()
|
39 |
+
self.current_set_of_feature_maps = None
|
40 |
+
self.current_content_representation = None
|
41 |
+
self.current_Style_representation = None
|
42 |
+
self.config = config
|
43 |
+
self.target_content_representation = target_content_representation
|
44 |
+
self.target_style_representation = target_style_representation
|
45 |
+
|
46 |
+
def forward(self, model, x):
|
47 |
+
self.current_set_of_feature_maps = model(x)
|
48 |
+
|
49 |
+
self.current_content_representation = self.current_set_of_feature_maps[
|
50 |
+
self.config.content_feature_maps_index].squeeze(axis=0)
|
51 |
+
self.current_style_representation = [
|
52 |
+
utils.gram_matrix(x)
|
53 |
+
for cnt, x in enumerate(self.current_set_of_feature_maps)
|
54 |
+
if cnt in self.config.style_feature_maps_indices
|
55 |
+
]
|
56 |
+
content_loss = ContentLoss(self.target_content_representation)(
|
57 |
+
self.current_content_representation)
|
58 |
+
style_loss = StyleLoss()(
|
59 |
+
self.target_style_representation,
|
60 |
+
self.current_style_representation)
|
61 |
+
tv_loss = TotalVariationLoss(x)()
|
62 |
+
|
63 |
+
return Loss()(content_loss, style_loss, tv_loss)
|
64 |
+
|
65 |
+
|
66 |
+
class TotalVariationLoss(nn.Module):
|
67 |
+
def __init__(self, y):
|
68 |
+
super(TotalVariationLoss, self).__init__()
|
69 |
+
self.first = torch.sum(torch.abs(y[:, :, :, :-1] - y[:, :, :, 1:]))
|
70 |
+
self.second = torch.sum(torch.abs(y[:, :, :-1, :] - y[:, :, 1:, :]))
|
71 |
+
|
72 |
+
def forward(self):
|
73 |
+
return self.first + self.second
|
74 |
+
|
75 |
+
|
76 |
+
class Loss(nn.Module):
|
77 |
+
def __init__(self):
|
78 |
+
super(Loss, self).__init__()
|
79 |
+
|
80 |
+
def forward(self, x, y, z):
|
81 |
+
return utils.yamlGet("contentWeight") * x + utils.yamlGet("styleWeight") * y + utils.yamlGet("totalVariationWeight") * z
|
82 |
+
|
83 |
+
|
84 |
+
def neural_style_transfer():
|
85 |
+
|
86 |
+
dump_path = os.path.join(os.path.dirname(__file__), "data/transfer")
|
87 |
+
config = utils.Config()
|
88 |
+
|
89 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
90 |
+
|
91 |
+
content_img, style_img, init_img = utils.Images().getImages(device)
|
92 |
+
optimizing_img = Variable(init_img, requires_grad=True)
|
93 |
+
|
94 |
+
output = list(utils.prepare_model(device))
|
95 |
+
neural_net = output[0]
|
96 |
+
content_feature_maps_index_name = output[1]
|
97 |
+
style_feature_maps_indices_names = output[2]
|
98 |
+
|
99 |
+
config.content_feature_maps_index = content_feature_maps_index_name[0]
|
100 |
+
config.style_feature_maps_indices = style_feature_maps_indices_names[0]
|
101 |
+
|
102 |
+
content_img_set_of_feature_maps = neural_net(content_img)
|
103 |
+
style_img_set_of_feature_maps = neural_net(style_img)
|
104 |
+
|
105 |
+
target_content_representation = content_img_set_of_feature_maps[
|
106 |
+
config.content_feature_maps_index].squeeze(axis=0)
|
107 |
+
target_style_representation = [
|
108 |
+
utils.gram_matrix(x)
|
109 |
+
for cnt, x in enumerate(style_img_set_of_feature_maps)
|
110 |
+
if cnt in config.style_feature_maps_indices
|
111 |
+
]
|
112 |
+
|
113 |
+
if utils.yamlGet('optimizer') == 'Adam':
|
114 |
+
optimizer = Adam((optimizing_img, ), lr=utils.yamlGet('learning_rate'))
|
115 |
+
for cnt in range(utils.yamlGet("iterations")):
|
116 |
+
|
117 |
+
total_loss = Build(config, target_content_representation,
|
118 |
+
target_style_representation)(neural_net,
|
119 |
+
optimizing_img)
|
120 |
+
|
121 |
+
total_loss.backward()
|
122 |
+
optimizer.step()
|
123 |
+
optimizer.zero_grad()
|
124 |
+
with torch.no_grad():
|
125 |
+
utils.save_optimizing_image(optimizing_img, dump_path, cnt)
|
126 |
+
|
127 |
+
elif utils.yamlGet('optimizer') == 'LBFGS':
|
128 |
+
optimizer = LBFGS((optimizing_img, ),
|
129 |
+
max_iter=utils.yamlGet('iterations'),
|
130 |
+
line_search_fn='strong_wolfe')
|
131 |
+
|
132 |
+
def closure():
|
133 |
+
total_loss, _, _, _ = build_loss(
|
134 |
+
neural_net, optimizing_img, target_content_representation,
|
135 |
+
target_style_representation, config)
|
136 |
+
total_loss.backward()
|
137 |
+
optimizer.zero_grad()
|
138 |
+
with torch.no_grad():
|
139 |
+
utils.save_optimizing_image(optimizing_img, dump_path, cnt)
|
140 |
+
return total_loss
|
141 |
+
|
142 |
+
for cnt in range(utils.yamlGet("iterations")):
|
143 |
+
optimizer.step(closure)
|
144 |
+
|
145 |
+
create_video_from_intermediate_results(dump_path)
|
146 |
+
|
147 |
+
|
148 |
+
# some values of weights that worked for figures.jpg, vg_starry_night.jpg
|
149 |
+
# (starting point for finding good images)
|
150 |
+
# once you understand what each one does it gets really easy -> also see
|
151 |
+
# README.md
|
152 |
+
|
153 |
+
# lbfgs, content init -> (cw, sw, tv) = (1e5, 3e4, 1e0)
|
154 |
+
# lbfgs, style init -> (cw, sw, tv) = (1e5, 1e1, 1e-1)
|
155 |
+
# lbfgs, random init -> (cw, sw, tv) = (1e5, 1e3, 1e0)
|
156 |
+
|
157 |
+
# adam, content init -> (cw, sw, tv, lr) = (1e5, 1e5, 1e-1, 1e1)
|
158 |
+
# adam, style init -> (cw, sw, tv, lr) = (1e5, 1e2, 1e-1, 1e1)
|
159 |
+
# adam, random init -> (cw, sw, tv, lr) = (1e5, 1e2, 1e-1, 1e1)
|
160 |
+
|
161 |
+
# original NST Neural Style Transfer) algorithm (Gatys et al.)
|
162 |
+
# results_path = neural_style_transfer()
|
163 |
+
# create_video_from_intermediate_results(results_path)
|
src/reconstruct_image_from_representation.py
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import src.utils.utils as utils
|
3 |
+
from src.utils.video_utils import create_video_from_intermediate_results
|
4 |
+
import torch
|
5 |
+
from torch.autograd import Variable
|
6 |
+
from torch.optim import Adam, LBFGS
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
|
10 |
+
def make_tuning_step(optimizer, config):
|
11 |
+
|
12 |
+
def tuning_step(optimizing_img):
|
13 |
+
|
14 |
+
config.current_set_of_feature_maps = config.neural_net(optimizing_img)
|
15 |
+
loss, config.current_representation = utils.getCurrentData(config)
|
16 |
+
loss.backward()
|
17 |
+
optimizer.step()
|
18 |
+
optimizer.zero_grad()
|
19 |
+
return loss.item(), config.current_representation
|
20 |
+
|
21 |
+
return tuning_step
|
22 |
+
|
23 |
+
|
24 |
+
def reconstruct_image_from_representation():
|
25 |
+
|
26 |
+
dump_path = os.path.join(os.path.dirname(__file__), "data/reconstruct")
|
27 |
+
config = utils.Config()
|
28 |
+
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
29 |
+
|
30 |
+
img, img_path = utils.getImageAndPath(device)
|
31 |
+
white_noise_img = np.random.uniform(-90., 90.,
|
32 |
+
img.shape).astype(np.float32)
|
33 |
+
init_img = torch.from_numpy(white_noise_img).float().to(device)
|
34 |
+
optimizing_img = Variable(init_img, requires_grad=True)
|
35 |
+
|
36 |
+
# indices pick relevant feature maps (say conv4_1, relu1_1, etc.)
|
37 |
+
output = list(utils.prepare_model(device))
|
38 |
+
config.neural_net = output[0]
|
39 |
+
content_feature_maps_index_name = output[1]
|
40 |
+
style_feature_maps_indices_names = output[2]
|
41 |
+
|
42 |
+
config.content_feature_maps_index = content_feature_maps_index_name[0]
|
43 |
+
config.style_feature_maps_indices = style_feature_maps_indices_names[0]
|
44 |
+
|
45 |
+
config.current_set_of_feature_maps = config.neural_net(img)
|
46 |
+
|
47 |
+
config.target_content_representation = config.current_set_of_feature_maps[
|
48 |
+
config.content_feature_maps_index].squeeze(axis=0)
|
49 |
+
config.target_style_representation = [
|
50 |
+
utils.gram_matrix(fmaps)
|
51 |
+
for i, fmaps in enumerate(config.current_set_of_feature_maps)
|
52 |
+
if i in config.style_feature_maps_indices
|
53 |
+
]
|
54 |
+
|
55 |
+
if utils.yamlGet('reconstruct') == "Content":
|
56 |
+
config.target_representation = config.target_content_representation
|
57 |
+
num_of_feature_maps = config.target_content_representation.size()[0]
|
58 |
+
for i in range(num_of_feature_maps):
|
59 |
+
feature_map = config.target_content_representation[i].to(
|
60 |
+
'cpu').numpy()
|
61 |
+
feature_map = np.uint8(utils.get_uint8_range(feature_map))
|
62 |
+
# filename = f'fm_{config["model"]}_{content_feature_maps_index_name[1]}_{str(i).zfill(config["img_format"][0])}{config["img_format"][1]}'
|
63 |
+
# utils.save_image(feature_map, os.path.join(dump_path, filename))
|
64 |
+
|
65 |
+
elif utils.yamlGet('reconstruct') == "Style":
|
66 |
+
config.target_representation = config.target_style_representation
|
67 |
+
num_of_gram_matrices = len(config.target_style_representation)
|
68 |
+
for i in range(num_of_gram_matrices):
|
69 |
+
Gram_matrix = config.target_style_representation[i].squeeze(
|
70 |
+
axis=0).to('cpu').numpy()
|
71 |
+
Gram_matrix = np.uint8(utils.get_uint8_range(Gram_matrix))
|
72 |
+
# filename = f'gram_{config["model"]}_{style_feature_maps_indices_names[1][i]}_{str(i).zfill(config["img_format"][0])}{config["img_format"][1]}'
|
73 |
+
# utils.save_image(Gram_matrix, os.path.join(dump_path, filename))
|
74 |
+
|
75 |
+
if utils.yamlGet('optimizer') == 'Adam':
|
76 |
+
optimizer = Adam((optimizing_img, ), lr=utils.yamlGet('learning_rate'))
|
77 |
+
tuning_step = make_tuning_step(optimizer, config)
|
78 |
+
for it in range(utils.yamlGet('optimizer')):
|
79 |
+
tuning_step(optimizing_img)
|
80 |
+
with torch.no_grad():
|
81 |
+
utils.save_optimizing_image(optimizing_img, dump_path, it)
|
82 |
+
|
83 |
+
elif utils.yamlGet('optimizer') == 'LBFGS':
|
84 |
+
optimizer = LBFGS((optimizing_img, ),
|
85 |
+
max_iter=utils.yamlGet('optimizer'),
|
86 |
+
line_search_fn='strong_wolfe')
|
87 |
+
cnt = 0
|
88 |
+
|
89 |
+
def closure():
|
90 |
+
nonlocal cnt
|
91 |
+
loss = utils.getLBFGSReconstructLoss(config, optimizing_img)
|
92 |
+
loss.backward()
|
93 |
+
with torch.no_grad():
|
94 |
+
utils.save_optimizing_image(optimizing_img, dump_path, cnt)
|
95 |
+
cnt += 1
|
96 |
+
return loss
|
97 |
+
|
98 |
+
optimizer.step(closure)
|
99 |
+
|
100 |
+
return dump_path
|
101 |
+
|
102 |
+
|
103 |
+
if __name__ == "__main__":
|
104 |
+
|
105 |
+
# reconstruct style or content image purely from their representation
|
106 |
+
results_path = reconstruct_image_from_representation()
|
107 |
+
|
108 |
+
create_video_from_intermediate_results(results_path)
|
src/utils/__init__.py
ADDED
File without changes
|
src/utils/__pycache__/__init__.cpython-310.pyc
ADDED
Binary file (178 Bytes). View file
|
|
src/utils/__pycache__/utils.cpython-310.pyc
ADDED
Binary file (8.67 kB). View file
|
|
src/utils/__pycache__/video_utils.cpython-310.pyc
ADDED
Binary file (1.07 kB). View file
|
|
src/utils/utils.py
ADDED
@@ -0,0 +1,282 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import cv2 as cv
|
2 |
+
import numpy as np
|
3 |
+
import torch
|
4 |
+
from torchvision import transforms
|
5 |
+
import os
|
6 |
+
import yaml
|
7 |
+
import PIL.Image as Image
|
8 |
+
from src.models.definitions.vgg_nets import Vgg16, Vgg19, Vgg16Experimental
|
9 |
+
|
10 |
+
IMAGENET_MEAN_255 = [123.675, 116.28, 103.53]
|
11 |
+
IMAGENET_STD_NEUTRAL = [1, 1, 1]
|
12 |
+
|
13 |
+
|
14 |
+
def load_image(img_path, target_shape=None):
|
15 |
+
if not os.path.exists(img_path):
|
16 |
+
raise Exception(f'Path does not exist: {img_path}')
|
17 |
+
img = cv.imread(img_path)[:, :, ::-1]
|
18 |
+
if target_shape is not None: # resize section
|
19 |
+
current_height, current_width = img.shape[:2]
|
20 |
+
new_height = target_shape
|
21 |
+
new_width = int(current_width * (new_height / current_height))
|
22 |
+
img = cv.resize(img, (new_width, new_height),
|
23 |
+
interpolation=cv.INTER_CUBIC)
|
24 |
+
|
25 |
+
# this need to go after resizing - otherwise cv.resize will push values outside of [0,1] range
|
26 |
+
img = img.astype(np.float32) # convert from uint8 to float32
|
27 |
+
img /= 255.0 # get to [0, 1] range
|
28 |
+
return img
|
29 |
+
|
30 |
+
|
31 |
+
def getInitImage(content_img, style_img, device):
|
32 |
+
|
33 |
+
if yamlGet("initImage") == 'White Noise Image':
|
34 |
+
white_noise_img = np.random.uniform(
|
35 |
+
-90., 90., content_img.shape).astype(np.float32)
|
36 |
+
init_img = torch.from_numpy(white_noise_img).float().to(device)
|
37 |
+
|
38 |
+
elif yamlGet("initImage") == 'Gaussian Noise Image':
|
39 |
+
gaussian_noise_img = np.random.normal(loc=0,
|
40 |
+
scale=90.,
|
41 |
+
size=content_img.shape).astype(
|
42 |
+
np.float32)
|
43 |
+
init_img = torch.from_numpy(gaussian_noise_img).float().to(device)
|
44 |
+
|
45 |
+
elif yamlGet("initImage") == 'Content':
|
46 |
+
init_img = content_img
|
47 |
+
|
48 |
+
else:
|
49 |
+
# init image has same dimension as content image - this is a hard constraint
|
50 |
+
# feature maps need to be of same size for content image and init image
|
51 |
+
style_img_resized = prepare_img(style_img,
|
52 |
+
np.asarray(content_img.shape[2:]),
|
53 |
+
device)
|
54 |
+
init_img = style_img_resized
|
55 |
+
return init_img
|
56 |
+
|
57 |
+
|
58 |
+
def prepare_img(img_path, target_shape, device):
|
59 |
+
img = load_image(img_path, target_shape=target_shape)
|
60 |
+
|
61 |
+
# normalize using ImageNet's mean
|
62 |
+
# [0, 255] range worked much better for me than [0, 1] range (even though PyTorch models were trained on latter)
|
63 |
+
transform = transforms.Compose([
|
64 |
+
transforms.ToTensor(),
|
65 |
+
transforms.Lambda(lambda x: x.mul(255)),
|
66 |
+
transforms.Normalize(mean=IMAGENET_MEAN_255, std=IMAGENET_STD_NEUTRAL)
|
67 |
+
])
|
68 |
+
|
69 |
+
img = transform(img).to(device).unsqueeze(0)
|
70 |
+
|
71 |
+
return img
|
72 |
+
|
73 |
+
|
74 |
+
def save_image(img, img_path):
|
75 |
+
if len(img.shape) == 2:
|
76 |
+
img = np.stack((img, ) * 3, axis=-1)
|
77 |
+
cv.imwrite(img_path, img[:, :, ::-1]
|
78 |
+
) # [:, :, ::-1] converts rgb into bgr (opencv contraint...)
|
79 |
+
|
80 |
+
|
81 |
+
def save_optimizing_image(optimizing_img, dump_path, img_id):
|
82 |
+
img_format = (4, '.jpg')
|
83 |
+
saving_freq = yamlGet('reprSavFreq')
|
84 |
+
out_img = optimizing_img.squeeze(axis=0).to('cpu').detach().numpy()
|
85 |
+
out_img = np.moveaxis(
|
86 |
+
out_img, 0,
|
87 |
+
2) # swap channel from 1st to 3rd position: ch, _, _ -> _, _, chr
|
88 |
+
|
89 |
+
if img_id == yamlGet('iterations') - 1 or \
|
90 |
+
(saving_freq > 0 and img_id % saving_freq == 0):
|
91 |
+
|
92 |
+
out_img_name = str(img_id).zfill(img_format[0]) + img_format[1] \
|
93 |
+
if saving_freq != -1 else None
|
94 |
+
dump_img = np.copy(out_img)
|
95 |
+
dump_img += np.array(IMAGENET_MEAN_255).reshape((1, 1, 3))
|
96 |
+
dump_img = np.clip(dump_img, 0, 255).astype('uint8')
|
97 |
+
cv.imwrite(os.path.join(dump_path, out_img_name), dump_img[:, :, ::-1])
|
98 |
+
print(f"{out_img_name} written to {dump_path}")
|
99 |
+
|
100 |
+
# if should_display:
|
101 |
+
# plt.imshow(np.uint8(get_uint8_range(out_img)))
|
102 |
+
# plt.show()
|
103 |
+
|
104 |
+
|
105 |
+
def get_uint8_range(x):
|
106 |
+
if isinstance(x, np.ndarray):
|
107 |
+
x -= np.min(x)
|
108 |
+
x /= np.max(x)
|
109 |
+
x *= 255
|
110 |
+
return x
|
111 |
+
else:
|
112 |
+
raise ValueError(f'Expected numpy array got {type(x)}')
|
113 |
+
|
114 |
+
|
115 |
+
def prepare_model(device):
|
116 |
+
|
117 |
+
model = yamlGet('model')
|
118 |
+
if model == 'VGG16':
|
119 |
+
model = Vgg16(requires_grad=False, show_progress=True)
|
120 |
+
elif model == 'VGG16-Experimental':
|
121 |
+
model = Vgg16Experimental(requires_grad=False, show_progress=True)
|
122 |
+
elif model == 'VGG19':
|
123 |
+
model = Vgg19(requires_grad=False, show_progress=True)
|
124 |
+
else:
|
125 |
+
raise ValueError(f'{model} not supported.')
|
126 |
+
|
127 |
+
content_feature_maps_index = model.content_feature_maps_index
|
128 |
+
style_feature_maps_indices = model.style_feature_maps_indices
|
129 |
+
layer_names = list(model.layer_names.keys())
|
130 |
+
|
131 |
+
content_fms_index_name = (content_feature_maps_index,
|
132 |
+
layer_names[content_feature_maps_index])
|
133 |
+
style_fms_indices_names = (style_feature_maps_indices, layer_names)
|
134 |
+
return model.to(
|
135 |
+
device).eval(), content_fms_index_name, style_fms_indices_names
|
136 |
+
|
137 |
+
|
138 |
+
def yamlSet(key, value):
|
139 |
+
with open('src/config.yaml', 'r') as f:
|
140 |
+
config = yaml.load(f, Loader=yaml.FullLoader)
|
141 |
+
config[key] = value
|
142 |
+
with open('src/config.yaml', 'w') as f:
|
143 |
+
yaml.dump(config, f, default_flow_style=False)
|
144 |
+
|
145 |
+
|
146 |
+
def yamlGet(key):
|
147 |
+
with open('src/config.yaml', 'r') as f:
|
148 |
+
config = yaml.load(f, Loader=yaml.FullLoader)
|
149 |
+
return config[key]
|
150 |
+
|
151 |
+
|
152 |
+
def save_numpy_array_as_jpg(array, name):
|
153 |
+
image = Image.fromarray(array)
|
154 |
+
image.save("src/data/" + str(name) + '.jpg')
|
155 |
+
return "src/data/" + str(name) + '.jpg'
|
156 |
+
|
157 |
+
|
158 |
+
def gram_matrix(x, should_normalize=True):
|
159 |
+
(b, ch, h, w) = x.size()
|
160 |
+
features = x.view(b, ch, w * h)
|
161 |
+
features_t = features.transpose(1, 2)
|
162 |
+
gram = features.bmm(features_t)
|
163 |
+
if should_normalize:
|
164 |
+
gram /= ch * h * w
|
165 |
+
return gram
|
166 |
+
|
167 |
+
|
168 |
+
def total_variation(y):
|
169 |
+
return
|
170 |
+
|
171 |
+
|
172 |
+
def getImageAndPath(device):
|
173 |
+
|
174 |
+
if yamlGet('reconstruct') == 'Content':
|
175 |
+
img_path = yamlGet('contentPath')
|
176 |
+
elif yamlGet('reconstruct') == 'Style':
|
177 |
+
img_path = yamlGet('stylePath')
|
178 |
+
|
179 |
+
img = prepare_img(img_path, yamlGet('height'), device)
|
180 |
+
|
181 |
+
return img, img_path
|
182 |
+
|
183 |
+
|
184 |
+
def getContentCurrentData(config):
|
185 |
+
current_representation = config.current_set_of_feature_maps[
|
186 |
+
config.content_feature_maps_index].squeeze(axis=0)
|
187 |
+
loss = torch.nn.MSELoss(reduction='mean')(config.target_representation,
|
188 |
+
current_representation)
|
189 |
+
return loss, current_representation
|
190 |
+
|
191 |
+
|
192 |
+
def getStyleCurrentData(config):
|
193 |
+
current_representation = [
|
194 |
+
gram_matrix(x)
|
195 |
+
for cnt, x in enumerate(config.current_set_of_feature_maps)
|
196 |
+
if cnt in config.style_feature_maps_indices
|
197 |
+
]
|
198 |
+
loss = 0.0
|
199 |
+
for gram_gt, gram_hat in zip(config.target_style_representation,
|
200 |
+
current_representation):
|
201 |
+
loss += torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])
|
202 |
+
|
203 |
+
loss /= len(config.target_style_representation)
|
204 |
+
return loss, current_representation
|
205 |
+
|
206 |
+
|
207 |
+
def getCurrentData(config):
|
208 |
+
if yamlGet('reconstruct') == 'Content':
|
209 |
+
return getContentCurrentData(config)
|
210 |
+
|
211 |
+
elif yamlGet('reconstruct') == 'Style':
|
212 |
+
return getStyleCurrentData(config)
|
213 |
+
|
214 |
+
|
215 |
+
def getLBFGSReconstructLoss(config, optimizing_img):
|
216 |
+
|
217 |
+
loss = 0.0
|
218 |
+
|
219 |
+
if yamlGet('reconstruct') == 'Content':
|
220 |
+
loss = torch.nn.MSELoss(reduction='mean')(
|
221 |
+
config.target_content_representation,
|
222 |
+
config.neural_net(optimizing_img)[
|
223 |
+
config.content_feature_maps_index].squeeze(axis=0))
|
224 |
+
|
225 |
+
else:
|
226 |
+
config.current_set_of_feature_maps = config.neural_net(optimizing_img)
|
227 |
+
current_style_representation = [
|
228 |
+
gram_matrix(fmaps)
|
229 |
+
for i, fmaps in enumerate(config.current_set_of_feature_maps)
|
230 |
+
if i in config.style_feature_maps_indices
|
231 |
+
]
|
232 |
+
for gram_gt, gram_hat in zip(config.target_style_representation,
|
233 |
+
current_style_representation):
|
234 |
+
|
235 |
+
loss += (1 / len(config.target_style_representation)) * \
|
236 |
+
torch.nn.MSELoss(reduction='sum')(gram_gt[0], gram_hat[0])
|
237 |
+
|
238 |
+
return loss
|
239 |
+
|
240 |
+
|
241 |
+
class Config:
|
242 |
+
|
243 |
+
def __init__(self):
|
244 |
+
self.target_representation = 0
|
245 |
+
self.target_content_representation = 0
|
246 |
+
self.target_style_representation = 0
|
247 |
+
self.content_feature_maps_index = 0
|
248 |
+
self.style_feature_maps_indices = 0
|
249 |
+
self.current_set_of_feature_maps = 0
|
250 |
+
self.current_representation = 0
|
251 |
+
self.neural_net = 0
|
252 |
+
|
253 |
+
|
254 |
+
class Images:
|
255 |
+
|
256 |
+
def getImages(self, device):
|
257 |
+
|
258 |
+
return [
|
259 |
+
self.__getContentImage(device),
|
260 |
+
self.__getStyleImage(device),
|
261 |
+
self.__getInitImage(device),
|
262 |
+
]
|
263 |
+
|
264 |
+
def __getContentImage(self, device):
|
265 |
+
return prepare_img(yamlGet('contentPath'), yamlGet('height'), device)
|
266 |
+
|
267 |
+
def __getStyleImage(self, device):
|
268 |
+
return prepare_img(yamlGet('stylePath'), yamlGet('height'), device)
|
269 |
+
|
270 |
+
def __getInitImage(self, device):
|
271 |
+
return getInitImage(self.__getContentImage(device),
|
272 |
+
self.__getStyleImage(device), device)
|
273 |
+
|
274 |
+
|
275 |
+
def clearDir():
|
276 |
+
path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "data")
|
277 |
+
reconstructPath = os.path.join(path, "reconstruct")
|
278 |
+
transferPath = os.path.join(path, "transfer")
|
279 |
+
for transfer_file in os.scandir(transferPath):
|
280 |
+
os.remove(transfer_file)
|
281 |
+
for reconstruct_file in os.scandir(reconstructPath):
|
282 |
+
os.remove(reconstruct_file)
|
src/utils/video_utils.py
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import subprocess
|
3 |
+
import shutil
|
4 |
+
|
5 |
+
|
6 |
+
def create_video_from_intermediate_results(results_path):
|
7 |
+
#
|
8 |
+
# change this depending on what you want to accomplish (modify out video
|
9 |
+
# name, change fps and trim video)
|
10 |
+
#
|
11 |
+
img_format = (4, '.jpg')
|
12 |
+
out_file_name = 'out.mp4'
|
13 |
+
fps = 10
|
14 |
+
first_frame = 0
|
15 |
+
number_of_frames_to_process = len(os.listdir(results_path))
|
16 |
+
ffmpeg = 'ffmpeg'
|
17 |
+
if shutil.which(ffmpeg): # if ffmpeg is in system path
|
18 |
+
# example: '%4d.png' for (4, '.png')
|
19 |
+
img_name_format = '%' + str(img_format[0]) + 'd' + img_format[1]
|
20 |
+
pattern = os.path.join(results_path, img_name_format)
|
21 |
+
out_video_path = os.path.join(results_path, out_file_name)
|
22 |
+
|
23 |
+
trim_video_command = [
|
24 |
+
'-start_number',
|
25 |
+
str(first_frame), '-vframes',
|
26 |
+
str(number_of_frames_to_process)
|
27 |
+
]
|
28 |
+
input_options = ['-r', str(fps), '-i', pattern]
|
29 |
+
encoding_options = [
|
30 |
+
'-c:v', 'libx264', '-crf', '25', '-pix_fmt', 'yuv420p',
|
31 |
+
'-vf', "pad=ceil(iw/2)*2:ceil(ih/2)*2"
|
32 |
+
]
|
33 |
+
subprocess.call([
|
34 |
+
ffmpeg, *input_options, *trim_video_command, *encoding_options,
|
35 |
+
out_video_path
|
36 |
+
])
|
37 |
+
else:
|
38 |
+
print(f'{ffmpeg} not found in the system path, aborting.')
|