gyrojeff commited on
Commit
eafaf77
1 Parent(s): 8f19579

fix: refine workflow, enforce minimum font size, tune params

Browse files
Files changed (1) hide show
  1. font_dataset/layout.py +196 -212
font_dataset/layout.py CHANGED
@@ -1,4 +1,4 @@
1
- __all__ = ["generate"]
2
 
3
 
4
  epislon = 1e-6
@@ -47,8 +47,8 @@ stroke_width_max_ratio = 0.25
47
  assert gray_ratio + color_ratio - 1 < epislon
48
 
49
  # clip size ratio
50
- clip_width_max_ratio = 0.7
51
- clip_width_min_ratio = 0.1
52
  clip_width_height_min_ratio = 0.75
53
  clip_width_height_max_ratio = 1.25
54
 
@@ -69,9 +69,9 @@ assert no_rotation_ratio + rotation_ratio - 1 < epislon
69
  # in degree
70
  rotation_max_angle = 30
71
 
72
- # ratio of dataset size for cjk
73
- cjk_ratio = 3
74
 
 
75
  cjk_distribution = {
76
  "ja": 0.3,
77
  "ko": 0.2,
@@ -83,17 +83,10 @@ cjk_distribution = {
83
 
84
  assert sum(cjk_distribution.values()) - 1 < epislon
85
 
86
- train_cnt = 100
87
- val_cnt = 10
88
- test_cnt = 30
89
-
90
- train_cnt_cjk = int(train_cnt * cjk_ratio)
91
- val_cnt_cjk = int(val_cnt * cjk_ratio)
92
- test_cnt_cjk = int(test_cnt * cjk_ratio)
93
-
94
 
95
  import math
96
  import random
 
97
  from PIL import Image, ImageDraw, ImageFont
98
  from .fontlabel import FontLabel
99
  from .font import DSFont
@@ -242,209 +235,200 @@ def RGB2RGBA(color):
242
  return color + (255,)
243
 
244
 
245
- def generate(
246
  img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
247
  ) -> tuple[Image.Image, FontLabel]:
248
- while True:
249
- try:
250
- im = Image.open(img_path)
251
- # crop image
252
- width, height = im.size
253
- clip_width = random.randint(
254
- int(width * clip_width_min_ratio), int(width * clip_width_max_ratio)
255
- )
256
- clip_height = random.randint(
257
- int(clip_width * clip_width_height_min_ratio),
258
- int(clip_width * clip_width_height_max_ratio),
259
- )
260
- if clip_height > height:
261
- clip_height = height
262
- clip_x = random.randint(0, width - clip_width)
263
- clip_y = random.randint(0, height - clip_height)
264
- im = im.crop((clip_x, clip_y, clip_x + clip_width, clip_y + clip_height))
265
-
266
- # language
267
- render_language = font.language
268
- if render_language == "CJK":
269
- render_language = random.choices(
270
- list(cjk_distribution.keys()), list(cjk_distribution.values())
271
- )[0]
272
-
273
- # text direction
274
- if random.random() < ltr_ratio:
275
- text_direction = "ltr"
276
- else:
277
- text_direction = "ttb"
278
 
279
- # text length
280
- if random.random() < short_ratio:
281
- text = corpus_manager.generate(short_condition, font, render_language)
282
- elif random.random() < median_ratio:
283
- text = corpus_manager.generate(median_condition, font, render_language)
284
- else:
285
- text = corpus_manager.generate(long_condition, font, render_language)
286
-
287
- # text color & stroke
288
- if random.random() < gray_ratio:
289
- text_color = random.randint(0, 255)
290
- text_color = (text_color, text_color, text_color)
291
- # no stroke in gray
292
- stroke_ratio = 0
293
- stroke_color = None
294
- im = im.convert("L")
295
- else:
296
- text_color = random_color()
297
- # whether use stroke
298
- if random.random() < pure_color_ratio:
299
- stroke_ratio = 0
300
- stroke_color = None
301
- else:
302
- stroke_ratio = random.random() * stroke_width_max_ratio
303
- stroke_color = random_color()
304
-
305
- # line spacing
306
- line_spacing_ratio = (
307
- random.random() * (line_spacing_max_ratio - line_spacing_min_ratio)
308
- + line_spacing_min_ratio
309
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
310
 
311
- # calculate render ratio
312
- render_calculation_stroke_width = int(
313
- stroke_ratio * render_calculation_size
314
- )
315
- render_calculation_line_spacing = int(
316
- line_spacing_ratio * render_calculation_size
317
- )
 
 
 
 
 
318
 
319
- pil_font = ImageFont.truetype(font.path, size=render_calculation_size)
320
- text_bbox = render_bbox(
321
- ImageDraw.Draw(im),
322
- (0, 0),
323
- text,
324
- font=pil_font,
325
- direction=text_direction,
326
- spacing=render_calculation_line_spacing,
327
- stroke_width=render_calculation_stroke_width,
328
- language=render_language,
329
- )
330
- (
331
- render_calculation_width_no_rotation,
332
- render_calculation_height_no_rotation,
333
- ) = (text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1])
334
- render_calculation_font_x_no_rotation = text_bbox[0]
335
- render_calculation_font_y_no_rotation = text_bbox[1]
336
-
337
- if random.random() < no_rotation_ratio:
338
- render_angle = 0
339
-
340
- render_calculation_width = render_calculation_width_no_rotation
341
- render_calculation_height = render_calculation_height_no_rotation
342
- else:
343
- render_angle = random.randint(-rotation_max_angle, rotation_max_angle)
344
-
345
- render_calculation_width = int(
346
- render_calculation_width_no_rotation
347
- * math.cos(math.radians(abs(render_angle)))
348
- + render_calculation_height_no_rotation
349
- * math.sin(math.radians(abs(render_angle)))
350
- )
351
- render_calculation_height = int(
352
- render_calculation_width_no_rotation
353
- * math.sin(math.radians(abs(render_angle)))
354
- + render_calculation_height_no_rotation
355
- * math.cos(math.radians(abs(render_angle)))
356
- )
357
-
358
- # calculate render size
359
- render_ratio = (
360
- random.random() * (text_longer_max_ratio - text_longer_min_ratio)
361
- + text_longer_min_ratio
362
- )
363
- if (
364
- render_calculation_width / render_calculation_height
365
- < clip_width / clip_height
366
- ):
367
- # height is the limit
368
- render_height = int(clip_height * render_ratio)
369
- render_width = int(
370
- render_calculation_width / render_calculation_height * render_height
371
- )
372
- else:
373
- # width is the limit
374
- render_width = int(clip_width * render_ratio)
375
- render_height = int(
376
- render_calculation_height / render_calculation_width * render_width
377
- )
378
-
379
- # calculate text size
380
- text_size = int(
381
- render_calculation_size * render_height / render_calculation_height
382
- )
383
- render_width_no_rotation = int(
384
- render_calculation_width_no_rotation
385
- / render_calculation_height
386
- * render_height
387
- )
388
- render_height_no_rotation = int(
389
- render_calculation_height_no_rotation
390
- / render_calculation_height
391
- * render_height
392
- )
393
- render_font_x_no_rotation = int(
394
- render_calculation_font_x_no_rotation
395
- / render_calculation_height
396
- * render_height
397
- )
398
- render_font_y_no_rotation = int(
399
- render_calculation_font_y_no_rotation
400
- / render_calculation_height
401
- * render_height
402
- )
403
- stroke_width = int(text_size * stroke_ratio)
404
- line_spacing = int(text_size * line_spacing_ratio)
405
 
406
- # calculate render position
407
- render_x = random.randint(0, clip_width - render_width)
408
- render_y = random.randint(0, clip_height - render_height)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
409
 
410
- font_image = Image.new(
411
- "RGBA",
412
- (render_width_no_rotation, render_height_no_rotation),
413
- (0, 0, 0, 0),
414
- )
415
- pil_font = ImageFont.truetype(font.path, size=text_size)
416
- render_text(
417
- ImageDraw.Draw(font_image),
418
- (-render_font_x_no_rotation, -render_font_y_no_rotation),
419
- text,
420
- font=pil_font,
421
- fill=RGB2RGBA(text_color),
422
- direction=text_direction,
423
- spacing=line_spacing,
424
- stroke_width=stroke_width,
425
- stroke_fill=RGB2RGBA(stroke_color),
426
- language=render_language,
427
- )
428
- if rotation_max_angle != 0:
429
- font_image = font_image.rotate(
430
- render_angle, expand=True, fillcolor=(0, 0, 0, 0)
431
- )
432
-
433
- im.paste(font_image, (render_x, render_y), font_image)
434
- return im, FontLabel(
435
- clip_width,
436
- clip_height,
437
- text,
438
- font,
439
- text_color,
440
- text_size,
441
- text_direction,
442
- stroke_width,
443
- stroke_color,
444
- line_spacing,
445
- render_language,
446
- (render_x, render_y, render_width, render_height),
447
- render_angle,
448
- )
449
- except Exception as e:
450
- print(e)
 
1
+ __all__ = ["generate_font_image"]
2
 
3
 
4
  epislon = 1e-6
 
47
  assert gray_ratio + color_ratio - 1 < epislon
48
 
49
  # clip size ratio
50
+ clip_width_max_ratio = 0.8
51
+ clip_width_min_ratio = 0.3
52
  clip_width_height_min_ratio = 0.75
53
  clip_width_height_max_ratio = 1.25
54
 
 
69
  # in degree
70
  rotation_max_angle = 30
71
 
72
+ text_size_min = 15
 
73
 
74
+ # ratio of dataset size for cjk
75
  cjk_distribution = {
76
  "ja": 0.3,
77
  "ko": 0.2,
 
83
 
84
  assert sum(cjk_distribution.values()) - 1 < epislon
85
 
 
 
 
 
 
 
 
 
86
 
87
  import math
88
  import random
89
+ import traceback
90
  from PIL import Image, ImageDraw, ImageFont
91
  from .fontlabel import FontLabel
92
  from .font import DSFont
 
235
  return color + (255,)
236
 
237
 
238
+ def generate_font_image(
239
  img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
240
  ) -> tuple[Image.Image, FontLabel]:
241
+ im = Image.open(img_path)
242
+ # crop image
243
+ width, height = im.size
244
+ clip_width = random.randint(
245
+ int(width * clip_width_min_ratio), int(width * clip_width_max_ratio)
246
+ )
247
+ clip_height = random.randint(
248
+ int(clip_width * clip_width_height_min_ratio),
249
+ int(clip_width * clip_width_height_max_ratio),
250
+ )
251
+ if clip_height > height:
252
+ clip_height = height
253
+ clip_x = random.randint(0, width - clip_width)
254
+ clip_y = random.randint(0, height - clip_height)
255
+ im = im.crop((clip_x, clip_y, clip_x + clip_width, clip_y + clip_height))
256
+
257
+ # language
258
+ render_language = font.language
259
+ if render_language == "CJK":
260
+ render_language = random.choices(
261
+ list(cjk_distribution.keys()), list(cjk_distribution.values())
262
+ )[0]
263
+ elif render_language == "zh":
264
+ render_language = random.choice(["zh-Hans", "zh-Hant"])
265
+
266
+ # text direction
267
+ if random.random() < ltr_ratio:
268
+ text_direction = "ltr"
269
+ else:
270
+ text_direction = "ttb"
271
 
272
+ # text length
273
+ if random.random() < short_ratio:
274
+ text = corpus_manager.generate(short_condition, font, render_language)
275
+ elif random.random() < median_ratio:
276
+ text = corpus_manager.generate(median_condition, font, render_language)
277
+ else:
278
+ text = corpus_manager.generate(long_condition, font, render_language)
279
+
280
+ # text color & stroke
281
+ if random.random() < gray_ratio:
282
+ text_color = random.randint(0, 255)
283
+ text_color = (text_color, text_color, text_color)
284
+ # no stroke in gray
285
+ stroke_ratio = 0
286
+ stroke_color = None
287
+ im = im.convert("L")
288
+ else:
289
+ text_color = random_color()
290
+ # whether use stroke
291
+ if random.random() < pure_color_ratio:
292
+ stroke_ratio = 0
293
+ stroke_color = None
294
+ else:
295
+ stroke_ratio = random.random() * stroke_width_max_ratio
296
+ stroke_color = random_color()
297
+
298
+ # line spacing
299
+ line_spacing_ratio = (
300
+ random.random() * (line_spacing_max_ratio - line_spacing_min_ratio)
301
+ + line_spacing_min_ratio
302
+ )
303
+
304
+ # calculate render ratio
305
+ render_calculation_stroke_width = int(stroke_ratio * render_calculation_size)
306
+ render_calculation_line_spacing = int(line_spacing_ratio * render_calculation_size)
307
+
308
+ pil_font = ImageFont.truetype(font.path, size=render_calculation_size)
309
+ text_bbox = render_bbox(
310
+ ImageDraw.Draw(im),
311
+ (0, 0),
312
+ text,
313
+ font=pil_font,
314
+ direction=text_direction,
315
+ spacing=render_calculation_line_spacing,
316
+ stroke_width=render_calculation_stroke_width,
317
+ language=render_language,
318
+ )
319
+ (
320
+ render_calculation_width_no_rotation,
321
+ render_calculation_height_no_rotation,
322
+ ) = (text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1])
323
+ render_calculation_font_x_no_rotation = text_bbox[0]
324
+ render_calculation_font_y_no_rotation = text_bbox[1]
325
+
326
+ if random.random() < no_rotation_ratio:
327
+ render_angle = 0
328
+
329
+ render_calculation_width = render_calculation_width_no_rotation
330
+ render_calculation_height = render_calculation_height_no_rotation
331
+ else:
332
+ render_angle = random.randint(-rotation_max_angle, rotation_max_angle)
333
 
334
+ render_calculation_width = int(
335
+ render_calculation_width_no_rotation
336
+ * math.cos(math.radians(abs(render_angle)))
337
+ + render_calculation_height_no_rotation
338
+ * math.sin(math.radians(abs(render_angle)))
339
+ )
340
+ render_calculation_height = int(
341
+ render_calculation_width_no_rotation
342
+ * math.sin(math.radians(abs(render_angle)))
343
+ + render_calculation_height_no_rotation
344
+ * math.cos(math.radians(abs(render_angle)))
345
+ )
346
 
347
+ # calculate render size
348
+ render_ratio = (
349
+ random.random() * (text_longer_max_ratio - text_longer_min_ratio)
350
+ + text_longer_min_ratio
351
+ )
352
+ if render_calculation_width / render_calculation_height < clip_width / clip_height:
353
+ # height is the limit
354
+ render_height = int(clip_height * render_ratio)
355
+ render_width = int(
356
+ render_calculation_width / render_calculation_height * render_height
357
+ )
358
+ else:
359
+ # width is the limit
360
+ render_width = int(clip_width * render_ratio)
361
+ render_height = int(
362
+ render_calculation_height / render_calculation_width * render_width
363
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
 
365
+ # calculate text size
366
+ text_size = int(render_calculation_size * render_height / render_calculation_height)
367
+
368
+ if text_size < text_size_min:
369
+ raise ValueError("text size is too small")
370
+
371
+ render_width_no_rotation = int(
372
+ render_calculation_width_no_rotation / render_calculation_height * render_height
373
+ )
374
+ render_height_no_rotation = int(
375
+ render_calculation_height_no_rotation
376
+ / render_calculation_height
377
+ * render_height
378
+ )
379
+ render_font_x_no_rotation = int(
380
+ render_calculation_font_x_no_rotation
381
+ / render_calculation_height
382
+ * render_height
383
+ )
384
+ render_font_y_no_rotation = int(
385
+ render_calculation_font_y_no_rotation
386
+ / render_calculation_height
387
+ * render_height
388
+ )
389
+ stroke_width = int(text_size * stroke_ratio)
390
+ line_spacing = int(text_size * line_spacing_ratio)
391
+
392
+ # calculate render position
393
+ render_x = random.randint(0, clip_width - render_width)
394
+ render_y = random.randint(0, clip_height - render_height)
395
+
396
+ font_image = Image.new(
397
+ "RGBA",
398
+ (render_width_no_rotation, render_height_no_rotation),
399
+ (0, 0, 0, 0),
400
+ )
401
+ pil_font = ImageFont.truetype(font.path, size=text_size)
402
+ render_text(
403
+ ImageDraw.Draw(font_image),
404
+ (-render_font_x_no_rotation, -render_font_y_no_rotation),
405
+ text,
406
+ font=pil_font,
407
+ fill=RGB2RGBA(text_color),
408
+ direction=text_direction,
409
+ spacing=line_spacing,
410
+ stroke_width=stroke_width,
411
+ stroke_fill=RGB2RGBA(stroke_color),
412
+ language=render_language,
413
+ )
414
+ if rotation_max_angle != 0:
415
+ font_image = font_image.rotate(
416
+ render_angle, expand=True, fillcolor=(0, 0, 0, 0)
417
+ )
418
 
419
+ im.paste(font_image, (render_x, render_y), font_image)
420
+ return im, FontLabel(
421
+ clip_width,
422
+ clip_height,
423
+ text,
424
+ font,
425
+ text_color,
426
+ text_size,
427
+ text_direction,
428
+ stroke_width,
429
+ stroke_color,
430
+ line_spacing,
431
+ render_language,
432
+ (render_x, render_y, render_width, render_height),
433
+ render_angle,
434
+ )