Spaces:
Runtime error
Runtime error
File size: 8,527 Bytes
7cbaeb9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 |
import collections.abc
import os
import os.path as osp
from torch import nn
import kornia.augmentation as K
import pydiffvg
import save_svg
import cv2
from ttf import font_string_to_svgs, normalize_letter_size
import torch
import numpy as np
def edict_2_dict(x):
if isinstance(x, dict):
xnew = {}
for k in x:
xnew[k] = edict_2_dict(x[k])
return xnew
elif isinstance(x, list):
xnew = []
for i in range(len(x)):
xnew.append( edict_2_dict(x[i]))
return xnew
else:
return x
def check_and_create_dir(path):
pathdir = osp.split(path)[0]
if osp.isdir(pathdir):
pass
else:
os.makedirs(pathdir)
def update(d, u):
"""https://stackoverflow.com/questions/3232943/update-value-of-a-nested-dictionary-of-varying-depth"""
for k, v in u.items():
if isinstance(v, collections.abc.Mapping):
d[k] = update(d.get(k, {}), v)
else:
d[k] = v
return d
def preprocess(font, word, letter, level_of_cc=1):
if level_of_cc == 0:
target_cp = None
else:
target_cp = {"A": 120, "B": 120, "C": 100, "D": 100,
"E": 120, "F": 120, "G": 120, "H": 120,
"I": 35, "J": 80, "K": 100, "L": 80,
"M": 100, "N": 100, "O": 100, "P": 120,
"Q": 120, "R": 130, "S": 110, "T": 90,
"U": 100, "V": 100, "W": 100, "X": 130,
"Y": 120, "Z": 120,
"a": 120, "b": 120, "c": 100, "d": 100,
"e": 120, "f": 120, "g": 120, "h": 120,
"i": 35, "j": 80, "k": 100, "l": 80,
"m": 100, "n": 100, "o": 100, "p": 120,
"q": 120, "r": 130, "s": 110, "t": 90,
"u": 100, "v": 100, "w": 100, "x": 130,
"y": 120, "z": 120
}
target_cp = {k: v * level_of_cc for k, v in target_cp.items()}
print(f"======= {font} =======")
font_path = f"code/data/fonts/{font}.ttf"
init_path = f"code/data/init"
subdivision_thresh = None
font_string_to_svgs(init_path, font_path, word, target_control=target_cp,
subdivision_thresh=subdivision_thresh)
normalize_letter_size(init_path, font_path, word)
# optimaize two adjacent letters
if len(letter) > 1:
subdivision_thresh = None
font_string_to_svgs(init_path, font_path, letter, target_control=target_cp,
subdivision_thresh=subdivision_thresh)
normalize_letter_size(init_path, font_path, letter)
print("Done preprocess")
def get_data_augs(cut_size):
augmentations = []
augmentations.append(K.RandomPerspective(distortion_scale=0.5, p=0.7))
augmentations.append(K.RandomCrop(size=(cut_size, cut_size), pad_if_needed=True, padding_mode='reflect', p=1.0))
return nn.Sequential(*augmentations)
'''pytorch adaptation of https://github.com/google/mipnerf'''
def learning_rate_decay(step,
lr_init,
lr_final,
max_steps,
lr_delay_steps=0,
lr_delay_mult=1):
"""Continuous learning rate decay function.
The returned rate is lr_init when step=0 and lr_final when step=max_steps, and
is log-linearly interpolated elsewhere (equivalent to exponential decay).
If lr_delay_steps>0 then the learning rate will be scaled by some smooth
function of lr_delay_mult, such that the initial learning rate is
lr_init*lr_delay_mult at the beginning of optimization but will be eased back
to the normal learning rate when steps>lr_delay_steps.
Args:
step: int, the current optimization step.
lr_init: float, the initial learning rate.
lr_final: float, the final learning rate.
max_steps: int, the number of steps during optimization.
lr_delay_steps: int, the number of steps to delay the full learning rate.
lr_delay_mult: float, the multiplier on the rate when delaying it.
Returns:
lr: the learning for current step 'step'.
"""
if lr_delay_steps > 0:
# A kind of reverse cosine decay.
delay_rate = lr_delay_mult + (1 - lr_delay_mult) * np.sin(
0.5 * np.pi * np.clip(step / lr_delay_steps, 0, 1))
else:
delay_rate = 1.
t = np.clip(step / max_steps, 0, 1)
log_lerp = np.exp(np.log(lr_init) * (1 - t) + np.log(lr_final) * t)
return delay_rate * log_lerp
def save_image(img, filename, gamma=1):
check_and_create_dir(filename)
imshow = img.detach().cpu()
pydiffvg.imwrite(imshow, filename, gamma=gamma)
def get_letter_ids(letter, word, shape_groups):
for group, l in zip(shape_groups, word):
if l == letter:
return group.shape_ids
def combine_word(word, letter, font, experiment_dir):
word_svg_scaled = f"./code/data/init/{font}_{word}_scaled.svg"
canvas_width_word, canvas_height_word, shapes_word, shape_groups_word = pydiffvg.svg_to_scene(word_svg_scaled)
letter_ids = []
for l in letter:
letter_ids += get_letter_ids(l, word, shape_groups_word)
w_min, w_max = min([torch.min(shapes_word[ids].points[:, 0]) for ids in letter_ids]), max(
[torch.max(shapes_word[ids].points[:, 0]) for ids in letter_ids])
h_min, h_max = min([torch.min(shapes_word[ids].points[:, 1]) for ids in letter_ids]), max(
[torch.max(shapes_word[ids].points[:, 1]) for ids in letter_ids])
c_w = (-w_min+w_max)/2
c_h = (-h_min+h_max)/2
svg_result = os.path.join(experiment_dir, "output-svg", "output.svg")
canvas_width, canvas_height, shapes, shape_groups = pydiffvg.svg_to_scene(svg_result)
out_w_min, out_w_max = min([torch.min(p.points[:, 0]) for p in shapes]), max(
[torch.max(p.points[:, 0]) for p in shapes])
out_h_min, out_h_max = min([torch.min(p.points[:, 1]) for p in shapes]), max(
[torch.max(p.points[:, 1]) for p in shapes])
out_c_w = (-out_w_min+out_w_max)/2
out_c_h = (-out_h_min+out_h_max)/2
scale_canvas_w = (w_max - w_min) / (out_w_max - out_w_min)
scale_canvas_h = (h_max - h_min) / (out_h_max - out_h_min)
if scale_canvas_h > scale_canvas_w:
wsize = int((out_w_max - out_w_min) * scale_canvas_h)
scale_canvas_w = wsize / (out_w_max - out_w_min)
shift_w = -out_c_w * scale_canvas_w + c_w
else:
hsize = int((out_h_max - out_h_min) * scale_canvas_w)
scale_canvas_h = hsize / (out_h_max - out_h_min)
shift_h = -out_c_h * scale_canvas_h + c_h
for num, p in enumerate(shapes):
p.points[:, 0] = p.points[:, 0] * scale_canvas_w
p.points[:, 1] = p.points[:, 1] * scale_canvas_h
if scale_canvas_h > scale_canvas_w:
p.points[:, 0] = p.points[:, 0] - out_w_min * scale_canvas_w + w_min + shift_w
p.points[:, 1] = p.points[:, 1] - out_h_min * scale_canvas_h + h_min
else:
p.points[:, 0] = p.points[:, 0] - out_w_min * scale_canvas_w + w_min
p.points[:, 1] = p.points[:, 1] - out_h_min * scale_canvas_h + h_min + shift_h
for j, s in enumerate(letter_ids):
shapes_word[s] = shapes[j]
save_svg.save_svg(
f"{experiment_dir}/{font}_{word}_{letter}.svg", canvas_width, canvas_height, shapes_word,
shape_groups_word)
# render = pydiffvg.RenderFunction.apply
# scene_args = pydiffvg.RenderFunction.serialize_scene(canvas_width, canvas_height, shapes_word, shape_groups_word)
# img = render(canvas_width, canvas_height, 2, 2, 0, None, *scene_args)
# img = img[:, :, 3:4] * img[:, :, :3] + \
# torch.ones(img.shape[0], img.shape[1], 3, device="cuda") * (1 - img[:, :, 3:4])
# img = img[:, :, :3]
# save_image(img, f"{experiment_dir}/{font}_{word}_{letter}.png")
def create_video(num_iter, experiment_dir, video_frame_freq):
img_array = []
for ii in range(0, num_iter):
if ii % video_frame_freq == 0 or ii == num_iter - 1:
filename = os.path.join(
experiment_dir, "video-png", f"iter{ii:04d}.png")
img = cv2.imread(filename)
img_array.append(img)
video_name = os.path.join(
experiment_dir, "video.mp4")
check_and_create_dir(video_name)
out = cv2.VideoWriter(video_name, cv2.VideoWriter_fourcc(*'mp4v'), 30.0, (600, 600))
for iii in range(len(img_array)):
out.write(img_array[iii])
out.release()
|