import gradio as gr
import os
import argparse
from easydict import EasyDict as edict
import yaml
import os.path as osp
import random
import numpy.random as npr
import sys
# sys.path.append('./code')
sys.path.append('/home/user/app/code')
# set up diffvg
# os.system('git clone https://github.com/BachiLi/diffvg.git')
os.system('git submodule update --init')
os.chdir('diffvg')
os.system('git submodule update --init --recursive')
os.system('python setup.py install --user')
sys.path.append("/home/user/.local/lib/python3.8/site-packages/diffvg-0.0.1-py3.8-linux-x86_64.egg")
os.chdir('/home/user/app')
import torch
from diffusers import StableDiffusionPipeline
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = None
model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device)
from typing import Mapping
from tqdm import tqdm
import torch
from torch.optim.lr_scheduler import LambdaLR
import pydiffvg
import save_svg
from losses import SDSLoss, ToneLoss, ConformalLoss
from utils import (
edict_2_dict,
update,
check_and_create_dir,
get_data_augs,
save_image,
preprocess,
learning_rate_decay,
combine_word)
import warnings
TITLE="""
Word-To-Image: Morphing Arabic Text to a Visual Representation
"""
DESCRIPTION="""This demo builds on the [Word-As-Image for Semantic Typography](https://wordasimage.github.io/Word-As-Image-Page/) work to support Arabic fonts and morphing whole words into semantic concepts. It is part of an ongoing project with the [ARBML](https://arbml.github.io/website/) community."""
# DESCRIPTION += '\nThis demo is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License.
'
# DESCRIPTION += """
For faster inference without waiting in queue, you can [![]()]()"""
DESCRIPTION += '\nFor faster inference without waiting in queue, you can
'
if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
DESCRIPTION = DESCRIPTION.replace("", " ")
DESCRIPTION += f'or duplicate the space and upgrade to GPU in settings. '
else:
DESCRIPTION = DESCRIPTION.replace("either", "")
warnings.filterwarnings("ignore")
pydiffvg.set_print_timing(False)
gamma = 1.0
def set_config(semantic_concept, word, prompt, font_name, num_steps):
cfg_d = edict()
cfg_d.config = "code/config/base.yaml"
cfg_d.experiment = "demo"
with open(cfg_d.config, 'r') as f:
cfg_full = yaml.load(f, Loader=yaml.FullLoader)
cfg_key = cfg_d.experiment
cfgs = [cfg_d]
while cfg_key:
cfgs.append(cfg_full[cfg_key])
cfg_key = cfgs[-1].get('parent_config', 'baseline')
cfg = edict()
for options in reversed(cfgs):
update(cfg, options)
del cfgs
cfg.semantic_concept = semantic_concept
cfg.word = word
cfg.optimized_letter = word
cfg.font = font_name
cfg.seed = 0
cfg.num_iter = num_steps
cfg.batch_size = 1
if ' ' in cfg.word:
raise gr.Error(f'should be only one word')
cfg.caption = prompt
cfg.log_dir = f"output/{cfg.experiment}_{cfg.word}"
if cfg.optimized_letter in cfg.word:
cfg.optimized_letter = cfg.optimized_letter
else:
raise gr.Error(f'letter should be in word')
cfg.letter = f"{cfg.font}_{cfg.optimized_letter}_scaled"
cfg.target = f"code/data/init/{cfg.letter}"
# set experiment dir
signature = f"{cfg.letter}_concept_{cfg.semantic_concept}_seed_{cfg.seed}"
cfg.experiment_dir = \
osp.join(cfg.log_dir, cfg.font, signature)
configfile = osp.join(cfg.experiment_dir, 'config.yaml')
# create experiment dir and save config
check_and_create_dir(configfile)
with open(osp.join(configfile), 'w') as f:
yaml.dump(edict_2_dict(cfg), f)
if cfg.seed is not None:
random.seed(cfg.seed)
npr.seed(cfg.seed)
torch.manual_seed(cfg.seed)
torch.backends.cudnn.benchmark = False
else:
assert False
return cfg
def init_shapes(svg_path, trainable: Mapping[str, bool]):
svg = f'{svg_path}.svg'
canvas_width, canvas_height, shapes_init, shape_groups_init = pydiffvg.svg_to_scene(svg)
parameters = edict()
# path points
if trainable.point:
parameters.point = []
for path in shapes_init:
path.points.requires_grad = True
parameters.point.append(path.points)
return shapes_init, shape_groups_init, parameters
def run_main_ex(word, semantic_concept, num_steps):
prompt = f"a {semantic_concept}. minimal flat 2d vector. lineal color. trending on artstation"
font_name = "ArefRuqaa"
return list(next(run_main_app(semantic_concept, word, prompt, font_name, num_steps, 0)))
def run_main_app(semantic_concept, word, prompt, font_name, num_steps, example=0):
cfg = set_config(semantic_concept, word, prompt, font_name, num_steps)
pydiffvg.set_use_gpu(torch.cuda.is_available())
print("preprocessing")
preprocess(cfg.font, cfg.word, cfg.optimized_letter, cfg.level_of_cc)
filename_init = os.path.join("code/data/init/", f"{cfg.font}_{cfg.word}_scaled.svg").replace(" ", "_")
if not example:
yield gr.update(value=filename_init,visible=True),gr.update(visible=True, label=f'iters: 0 / {cfg.num_iter}'),gr.update(visible=False)
sds_loss = SDSLoss(cfg, device, model)
h, w = cfg.render_size, cfg.render_size
data_augs = get_data_augs(cfg.cut_size)
render = pydiffvg.RenderFunction.apply
# initialize shape
print('initializing shape')
shapes, shape_groups, parameters = init_shapes(svg_path=cfg.target, trainable=cfg.trainable)
scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
img_init = render(w, h, 2, 2, 0, None, *scene_args)
img_init = img_init[:, :, 3:4] * img_init[:, :, :3] + \
torch.ones(img_init.shape[0], img_init.shape[1], 3, device=device) * (1 - img_init[:, :, 3:4])
img_init = img_init[:, :, :3]
tone_loss = ToneLoss(cfg)
tone_loss.set_image_init(img_init)
num_iter = cfg.num_iter
pg = [{'params': parameters["point"], 'lr': cfg.lr_base["point"]}]
optim = torch.optim.Adam(pg, betas=(0.9, 0.9), eps=1e-6)
conformal_loss = ConformalLoss(parameters, device, cfg.optimized_letter, shape_groups)
lr_lambda = lambda step: learning_rate_decay(step, cfg.lr.lr_init, cfg.lr.lr_final, num_iter,
lr_delay_steps=cfg.lr.lr_delay_steps,
lr_delay_mult=cfg.lr.lr_delay_mult) / cfg.lr.lr_init
scheduler = LambdaLR(optim, lr_lambda=lr_lambda, last_epoch=-1) # lr.base * lrlambda_f
print("start training")
# training loop
t_range = tqdm(range(num_iter))
for step in t_range:
optim.zero_grad()
# render image
scene_args = pydiffvg.RenderFunction.serialize_scene(w, h, shapes, shape_groups)
img = render(w, h, 2, 2, step, None, *scene_args)
# compose image with white background
img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device=device) * (
1 - img[:, :, 3:4])
img = img[:, :, :3]
filename = os.path.join(
cfg.experiment_dir, "video-svg", f"iter{step:04d}.svg")
check_and_create_dir(filename)
save_svg.save_svg(filename, w, h, shapes, shape_groups)
if not example:
yield gr.update(visible=True),gr.update(value=filename, label=f'iters: {step} / {num_iter}', visible=True),gr.update(visible=False)
x = img.unsqueeze(0).permute(0, 3, 1, 2) # HWC -> NCHW
x = x.repeat(cfg.batch_size, 1, 1, 1)
x_aug = data_augs.forward(x)
# compute diffusion loss per pixel
loss = sds_loss(x_aug)
tone_loss_res = tone_loss(x, step)
loss = loss + tone_loss_res
loss_angles = conformal_loss()
loss_angles = cfg.loss.conformal.angeles_w * loss_angles
loss = loss + loss_angles
loss.backward()
optim.step()
scheduler.step()
filename = os.path.join(
cfg.experiment_dir, "output-svg", "output.svg")
check_and_create_dir(filename)
save_svg.save_svg(
filename, w, h, shapes, shape_groups)
combine_word(cfg.word, cfg.optimized_letter, cfg.font, cfg.experiment_dir, device)
image = os.path.join(cfg.experiment_dir,f"{cfg.font}_{cfg.word}_{cfg.optimized_letter}.svg")
yield gr.update(value=filename_init,visible=True),gr.update(visible=True),gr.update(value=image,visible=True)
def change_prompt(concept, prompt_suffix):
if concept == "":
concept = "{concept}"
return f"a {concept}. {prompt_suffix}"
with gr.Blocks() as demo:
gr.HTML(TITLE)
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column():
word = gr.Text(
label='Text',
max_lines=1,
placeholder=
'Enter text. For example: حصان'
)
semantic_concept = gr.Text(
label='Concept',
max_lines=1,
placeholder=
'Enter a semantic concept that you want your text to morph into (in English). For example: horse'
)
prompt_suffix = gr.Text(
label='Prompt Suffix',
max_lines=1,
value="minimal flat 2d vector. lineal color. trending on artstation"
)
prompt = gr.Text(
label='Prompt',
max_lines=1,
value="a {concept}. minimal flat 2d vector. lineal color. trending on artstation."
)
semantic_concept.change(change_prompt, [semantic_concept, prompt_suffix], prompt)
prompt_suffix.change(change_prompt, [semantic_concept, prompt_suffix], prompt)
num_steps = gr.Slider(label='Optimization Iterations',
minimum=0,
maximum=500,
step=10,
value=500)
font_name = gr.Text(value=None,visible=False,label="Font Name")
def on_select(evt: gr.SelectData):
return evt.value
font_name.value = "ArefRuqaa"
run = gr.Button('Generate')
with gr.Column():
result0 = gr.Image(type="filepath", label="Initial Word").style(height=333)
result1 = gr.Image(type="filepath", label="Optimization Process").style(height=333)
result2 = gr.Image(type="filepath", label="Final Result",visible=False).style(height=333)
with gr.Row():
# examples
examples = [
["قطة", "Cat", 500],
["كلب", "Dog", 500],
["حصان", "Horse", 500],
["أخطبوط", "Octopus", 500],
]
demo.queue(max_size=10, concurrency_count=2)
gr.Examples(examples=examples,
inputs=[
word,
semantic_concept,
num_steps
],
outputs=[
result0,
result1,
result2
],
fn=run_main_ex,
cache_examples=False)
# inputs
inputs = [
semantic_concept,
word,
prompt,
font_name,
num_steps
]
outputs = [
result0,
result1,
result2
]
run.click(fn=run_main_app, inputs=inputs, outputs=outputs, queue=True)
demo.launch(share=False)