Badr AlKhamissi commited on
Commit
725ab64
1 Parent(s): e53cddc

added new features

Browse files
Files changed (3) hide show
  1. app.py +74 -27
  2. code/config.py +2 -2
  3. requirements.txt +1 -0
app.py CHANGED
@@ -7,6 +7,7 @@ import os.path as osp
7
  import random
8
  import numpy.random as npr
9
  import sys
 
10
 
11
  # sys.path.append('./code')
12
 
@@ -29,7 +30,7 @@ from diffusers import StableDiffusionPipeline
29
 
30
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
31
 
32
- # model = None
33
  model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device)
34
 
35
  from typing import Mapping
@@ -56,7 +57,6 @@ TITLE="""<h1 style="font-size: 42px;" align="center">Word-To-Image: Morphing Ara
56
  DESCRIPTION="""This demo builds on the [Word-As-Image for Semantic Typography](https://wordasimage.github.io/Word-As-Image-Page/) work to support Arabic fonts and morphing whole words into semantic concepts. It is part of an ongoing project with the [ARBML](https://arbml.github.io/website/) community."""
57
 
58
  # DESCRIPTION += '\n<p>This demo is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"> Creative Commons Attribution-ShareAlike 4.0 International License</a>.</p>'
59
- # DESCRIPTION += """<br>For faster inference without waiting in queue, you can [![]()]()"""
60
  DESCRIPTION += '\n<p>For faster inference without waiting in queue, you can <a href="https://colab.research.google.com/drive/1wobOAsnLpkIzaRxG5yac8NcV7iCrlycP"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></p>'
61
 
62
  if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
@@ -74,7 +74,7 @@ pydiffvg.set_print_timing(False)
74
  gamma = 1.0
75
 
76
 
77
- def set_config(semantic_concept, word, prompt, font_name, num_steps):
78
 
79
  cfg_d = edict()
80
  cfg_d.config = "code/config/base.yaml"
@@ -95,16 +95,22 @@ def set_config(semantic_concept, word, prompt, font_name, num_steps):
95
  del cfgs
96
 
97
  cfg.semantic_concept = semantic_concept
 
98
  cfg.word = word
99
  cfg.optimized_letter = word
100
  cfg.font = font_name
101
- cfg.seed = 0
102
  cfg.num_iter = num_steps
103
  cfg.batch_size = 1
 
 
 
 
104
 
105
- if ' ' in cfg.word:
106
- raise gr.Error(f'should be only one word')
107
- cfg.caption = prompt
 
108
  cfg.log_dir = f"output/{cfg.experiment}_{cfg.word}"
109
  if cfg.optimized_letter in cfg.word:
110
  cfg.optimized_letter = cfg.optimized_letter
@@ -151,14 +157,14 @@ def init_shapes(svg_path, trainable: Mapping[str, bool]):
151
  return shapes_init, shape_groups_init, parameters
152
 
153
 
154
- def run_main_ex(word, semantic_concept, num_steps):
155
- prompt = f"a {semantic_concept}. minimal flat 2d vector. lineal color. trending on artstation"
156
  font_name = "ArefRuqaa"
157
- return list(next(run_main_app(semantic_concept, word, prompt, font_name, num_steps, 0)))
158
 
159
- def run_main_app(semantic_concept, word, prompt, font_name, num_steps, example=0):
160
 
161
- cfg = set_config(semantic_concept, word, prompt, font_name, num_steps)
162
 
163
  pydiffvg.set_use_gpu(torch.cuda.is_available())
164
 
@@ -204,6 +210,7 @@ def run_main_app(semantic_concept, word, prompt, font_name, num_steps, example=0
204
  print("start training")
205
  # training loop
206
  t_range = tqdm(range(num_iter))
 
207
  for step in t_range:
208
  optim.zero_grad()
209
 
@@ -215,9 +222,10 @@ def run_main_app(semantic_concept, word, prompt, font_name, num_steps, example=0
215
  img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device=device) * (
216
  1 - img[:, :, 3:4])
217
  img = img[:, :, :3]
 
218
 
219
- filename = os.path.join(
220
- cfg.experiment_dir, "video-svg", f"iter{step:04d}.svg")
221
  check_and_create_dir(filename)
222
  save_svg.save_svg(filename, w, h, shapes, shape_groups)
223
  if not example:
@@ -250,8 +258,10 @@ def run_main_app(semantic_concept, word, prompt, font_name, num_steps, example=0
250
 
251
  combine_word(cfg.word, cfg.optimized_letter, cfg.font, cfg.experiment_dir, device)
252
 
253
- image = os.path.join(cfg.experiment_dir,f"{cfg.font}_{cfg.word}_{cfg.optimized_letter}.svg")
254
- yield gr.update(value=filename_init,visible=True),gr.update(visible=True),gr.update(value=image,visible=True)
 
 
255
 
256
 
257
  def change_prompt(concept, prompt_suffix):
@@ -294,6 +304,37 @@ with gr.Blocks() as demo:
294
  value="a {concept}. minimal flat 2d vector. lineal color. trending on artstation."
295
  )
296
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  semantic_concept.change(change_prompt, [semantic_concept, prompt_suffix], prompt)
298
  prompt_suffix.change(change_prompt, [semantic_concept, prompt_suffix], prompt)
299
 
@@ -301,7 +342,7 @@ with gr.Blocks() as demo:
301
  minimum=0,
302
  maximum=500,
303
  step=10,
304
- value=500)
305
 
306
  font_name = gr.Text(value=None,visible=False,label="Font Name")
307
 
@@ -314,25 +355,26 @@ with gr.Blocks() as demo:
314
  run = gr.Button('Generate')
315
 
316
  with gr.Column():
317
- result0 = gr.Image(type="filepath", label="Initial Word").style(height=170)
318
  result1 = gr.Image(type="filepath", label="Optimization Process").style(height=300)
319
- result2 = gr.Image(type="filepath", label="Final Result",visible=False).style(height=100)
320
 
321
 
322
  with gr.Row():
323
  # examples
324
  examples = [
325
- ["قطة", "Cat", 500],
326
- ["كلب", "Dog", 500],
327
- ["حصان", "Horse", 500],
328
- ["أخطبوط", "Octopus", 500],
329
  ]
330
- demo.queue(max_size=10, concurrency_count=2)
331
  gr.Examples(examples=examples,
332
  inputs=[
333
  word,
334
  semantic_concept,
335
- num_steps
 
336
  ],
337
  outputs=[
338
  result0,
@@ -347,9 +389,14 @@ with gr.Blocks() as demo:
347
  inputs = [
348
  semantic_concept,
349
  word,
350
- prompt,
351
  font_name,
352
- num_steps
 
 
 
 
 
353
  ]
354
 
355
  outputs = [
 
7
  import random
8
  import numpy.random as npr
9
  import sys
10
+ import imageio
11
 
12
  # sys.path.append('./code')
13
 
 
30
 
31
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
32
 
33
+ model = None
34
  model = StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5").to(device)
35
 
36
  from typing import Mapping
 
57
  DESCRIPTION="""This demo builds on the [Word-As-Image for Semantic Typography](https://wordasimage.github.io/Word-As-Image-Page/) work to support Arabic fonts and morphing whole words into semantic concepts. It is part of an ongoing project with the [ARBML](https://arbml.github.io/website/) community."""
58
 
59
  # DESCRIPTION += '\n<p>This demo is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/"> Creative Commons Attribution-ShareAlike 4.0 International License</a>.</p>'
 
60
  DESCRIPTION += '\n<p>For faster inference without waiting in queue, you can <a href="https://colab.research.google.com/drive/1wobOAsnLpkIzaRxG5yac8NcV7iCrlycP"><img style="display: inline; margin-top: 0em; margin-bottom: 0em" src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a></p>'
61
 
62
  if (SPACE_ID := os.getenv('SPACE_ID')) is not None:
 
74
  gamma = 1.0
75
 
76
 
77
+ def set_config(semantic_concept, word, prompt_suffix, font_name, num_steps, seed, dist_loss_weight, pixel_dist_kernel_blur, pixel_dist_sigma, angeles_w):
78
 
79
  cfg_d = edict()
80
  cfg_d.config = "code/config/base.yaml"
 
95
  del cfgs
96
 
97
  cfg.semantic_concept = semantic_concept
98
+ cfg.prompt_suffix = prompt_suffix
99
  cfg.word = word
100
  cfg.optimized_letter = word
101
  cfg.font = font_name
102
+ cfg.seed = seed
103
  cfg.num_iter = num_steps
104
  cfg.batch_size = 1
105
+ cfg.loss.tone.dist_loss_weight = dist_loss_weight
106
+ cfg.loss.tone.pixel_dist_kernel_blur = pixel_dist_kernel_blur
107
+ cfg.loss.tone.pixel_dist_sigma = pixel_dist_sigma
108
+ cfg.loss.conformal.angeles_w = angeles_w
109
 
110
+ # if ' ' in cfg.word:
111
+ # raise gr.Error(f'should be only one word')
112
+
113
+ cfg.caption = f"a {cfg.semantic_concept}. {cfg.prompt_suffix}"
114
  cfg.log_dir = f"output/{cfg.experiment}_{cfg.word}"
115
  if cfg.optimized_letter in cfg.word:
116
  cfg.optimized_letter = cfg.optimized_letter
 
157
  return shapes_init, shape_groups_init, parameters
158
 
159
 
160
+ def run_main_ex(word, semantic_concept, num_steps, seed):
161
+ prompt_suffix = "minimal flat 2d vector. lineal color. trending on artstation"
162
  font_name = "ArefRuqaa"
163
+ return list(next(run_main_app(semantic_concept, word, prompt_suffix, font_name, num_steps, seed, 100, 201, 30, 0.5, 0)))
164
 
165
+ def run_main_app(semantic_concept, word, prompt_suffix, font_name, num_steps, seed, dist_loss_weight, pixel_dist_kernel_blur, pixel_dist_sigma, angeles_w, example=0):
166
 
167
+ cfg = set_config(semantic_concept, word, prompt_suffix, font_name, num_steps, seed, dist_loss_weight, pixel_dist_kernel_blur, pixel_dist_sigma, angeles_w)
168
 
169
  pydiffvg.set_use_gpu(torch.cuda.is_available())
170
 
 
210
  print("start training")
211
  # training loop
212
  t_range = tqdm(range(num_iter))
213
+ gif_frames = []
214
  for step in t_range:
215
  optim.zero_grad()
216
 
 
222
  img = img[:, :, 3:4] * img[:, :, :3] + torch.ones(img.shape[0], img.shape[1], 3, device=device) * (
223
  1 - img[:, :, 3:4])
224
  img = img[:, :, :3]
225
+ gif_frames += [img]
226
 
227
+
228
+ filename = os.path.join(cfg.experiment_dir, "video-svg", f"iter{step:04d}.svg")
229
  check_and_create_dir(filename)
230
  save_svg.save_svg(filename, w, h, shapes, shape_groups)
231
  if not example:
 
258
 
259
  combine_word(cfg.word, cfg.optimized_letter, cfg.font, cfg.experiment_dir, device)
260
 
261
+ filename = os.path.join(cfg.experiment_dir, "final.gif")
262
+ imageio.mimsave(filename, gif_frames)
263
+
264
+ yield gr.update(value=filename_init,visible=True),gr.update(visible=False),gr.update(value=filename,visible=True)
265
 
266
 
267
  def change_prompt(concept, prompt_suffix):
 
304
  value="a {concept}. minimal flat 2d vector. lineal color. trending on artstation."
305
  )
306
 
307
+ with gr.Row():
308
+
309
+ with gr.Accordion("Advanced Parameters", open=False, visible=True):
310
+
311
+ seed = gr.Number(
312
+ label='Seed',
313
+ value=42
314
+ )
315
+
316
+ angeles_w = gr.Number(
317
+ label='ACAP Deformation Loss Weight',
318
+ value=0.5
319
+ )
320
+
321
+ dist_loss_weight = gr.Number(
322
+ label='Tone Loss: dist_loss_weight',
323
+ value=100
324
+ )
325
+
326
+ pixel_dist_kernel_blur = gr.Number(
327
+ label='Tone Loss: pixel_dist_kernel_blur',
328
+ value=201
329
+ )
330
+
331
+ pixel_dist_sigma = gr.Number(
332
+ label='Tone Loss: pixel_dist_sigma',
333
+ value=30
334
+ )
335
+
336
+
337
+
338
  semantic_concept.change(change_prompt, [semantic_concept, prompt_suffix], prompt)
339
  prompt_suffix.change(change_prompt, [semantic_concept, prompt_suffix], prompt)
340
 
 
342
  minimum=0,
343
  maximum=500,
344
  step=10,
345
+ value=250)
346
 
347
  font_name = gr.Text(value=None,visible=False,label="Font Name")
348
 
 
355
  run = gr.Button('Generate')
356
 
357
  with gr.Column():
358
+ result0 = gr.Image(type="filepath", label="Initial Word").style(height=250)
359
  result1 = gr.Image(type="filepath", label="Optimization Process").style(height=300)
360
+ result2 = gr.Image(type="filepath", label="Final Result",visible=False).style(height=300)
361
 
362
 
363
  with gr.Row():
364
  # examples
365
  examples = [
366
+ ["قطة", "Cat", 250, 42],
367
+ ["كلب", "Dog", 250, 42],
368
+ ["حصان", "Horse", 250, 42],
369
+ ["أخطبوط", "Octopus", 250, 42],
370
  ]
371
+ demo.queue(max_size=10, concurrency_count=1)
372
  gr.Examples(examples=examples,
373
  inputs=[
374
  word,
375
  semantic_concept,
376
+ num_steps,
377
+ seed
378
  ],
379
  outputs=[
380
  result0,
 
389
  inputs = [
390
  semantic_concept,
391
  word,
392
+ prompt_suffix,
393
  font_name,
394
+ num_steps,
395
+ seed,
396
+ dist_loss_weight,
397
+ pixel_dist_kernel_blur,
398
+ pixel_dist_sigma,
399
+ angeles_w
400
  ]
401
 
402
  outputs = [
code/config.py CHANGED
@@ -40,8 +40,8 @@ def parse_args():
40
  cfg.font = args.font
41
  cfg.semantic_concept = args.semantic_concept
42
  cfg.word = cfg.semantic_concept if args.word == "none" else args.word
43
- if " " in cfg.word:
44
- raise ValueError(f'no spaces are allowed')
45
  if "jpeg" in args.semantic_concept:
46
  cfg.caption = args.semantic_concept
47
  else:
 
40
  cfg.font = args.font
41
  cfg.semantic_concept = args.semantic_concept
42
  cfg.word = cfg.semantic_concept if args.word == "none" else args.word
43
+ # if " " in cfg.word:
44
+ # raise ValueError(f'no spaces are allowed')
45
  if "jpeg" in args.semantic_concept:
46
  cfg.caption = args.semantic_concept
47
  else:
requirements.txt CHANGED
@@ -5,6 +5,7 @@ torchvision==0.13.1+cu113
5
  cmake
6
  numpy
7
  scikit-image
 
8
  ffmpeg
9
  svgwrite
10
  svgpathtools
 
5
  cmake
6
  numpy
7
  scikit-image
8
+ imageio
9
  ffmpeg
10
  svgwrite
11
  svgpathtools