fix: refine workflow, enforce minimum font size, tune params
Browse files- font_dataset/layout.py +196 -212
font_dataset/layout.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
__all__ = ["
|
2 |
|
3 |
|
4 |
epislon = 1e-6
|
@@ -47,8 +47,8 @@ stroke_width_max_ratio = 0.25
|
|
47 |
assert gray_ratio + color_ratio - 1 < epislon
|
48 |
|
49 |
# clip size ratio
|
50 |
-
clip_width_max_ratio = 0.
|
51 |
-
clip_width_min_ratio = 0.
|
52 |
clip_width_height_min_ratio = 0.75
|
53 |
clip_width_height_max_ratio = 1.25
|
54 |
|
@@ -69,9 +69,9 @@ assert no_rotation_ratio + rotation_ratio - 1 < epislon
|
|
69 |
# in degree
|
70 |
rotation_max_angle = 30
|
71 |
|
72 |
-
|
73 |
-
cjk_ratio = 3
|
74 |
|
|
|
75 |
cjk_distribution = {
|
76 |
"ja": 0.3,
|
77 |
"ko": 0.2,
|
@@ -83,17 +83,10 @@ cjk_distribution = {
|
|
83 |
|
84 |
assert sum(cjk_distribution.values()) - 1 < epislon
|
85 |
|
86 |
-
train_cnt = 100
|
87 |
-
val_cnt = 10
|
88 |
-
test_cnt = 30
|
89 |
-
|
90 |
-
train_cnt_cjk = int(train_cnt * cjk_ratio)
|
91 |
-
val_cnt_cjk = int(val_cnt * cjk_ratio)
|
92 |
-
test_cnt_cjk = int(test_cnt * cjk_ratio)
|
93 |
-
|
94 |
|
95 |
import math
|
96 |
import random
|
|
|
97 |
from PIL import Image, ImageDraw, ImageFont
|
98 |
from .fontlabel import FontLabel
|
99 |
from .font import DSFont
|
@@ -242,209 +235,200 @@ def RGB2RGBA(color):
|
|
242 |
return color + (255,)
|
243 |
|
244 |
|
245 |
-
def
|
246 |
img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
|
247 |
) -> tuple[Image.Image, FontLabel]:
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
310 |
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
|
|
|
|
|
|
|
|
|
|
318 |
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
if random.random() < no_rotation_ratio:
|
338 |
-
render_angle = 0
|
339 |
-
|
340 |
-
render_calculation_width = render_calculation_width_no_rotation
|
341 |
-
render_calculation_height = render_calculation_height_no_rotation
|
342 |
-
else:
|
343 |
-
render_angle = random.randint(-rotation_max_angle, rotation_max_angle)
|
344 |
-
|
345 |
-
render_calculation_width = int(
|
346 |
-
render_calculation_width_no_rotation
|
347 |
-
* math.cos(math.radians(abs(render_angle)))
|
348 |
-
+ render_calculation_height_no_rotation
|
349 |
-
* math.sin(math.radians(abs(render_angle)))
|
350 |
-
)
|
351 |
-
render_calculation_height = int(
|
352 |
-
render_calculation_width_no_rotation
|
353 |
-
* math.sin(math.radians(abs(render_angle)))
|
354 |
-
+ render_calculation_height_no_rotation
|
355 |
-
* math.cos(math.radians(abs(render_angle)))
|
356 |
-
)
|
357 |
-
|
358 |
-
# calculate render size
|
359 |
-
render_ratio = (
|
360 |
-
random.random() * (text_longer_max_ratio - text_longer_min_ratio)
|
361 |
-
+ text_longer_min_ratio
|
362 |
-
)
|
363 |
-
if (
|
364 |
-
render_calculation_width / render_calculation_height
|
365 |
-
< clip_width / clip_height
|
366 |
-
):
|
367 |
-
# height is the limit
|
368 |
-
render_height = int(clip_height * render_ratio)
|
369 |
-
render_width = int(
|
370 |
-
render_calculation_width / render_calculation_height * render_height
|
371 |
-
)
|
372 |
-
else:
|
373 |
-
# width is the limit
|
374 |
-
render_width = int(clip_width * render_ratio)
|
375 |
-
render_height = int(
|
376 |
-
render_calculation_height / render_calculation_width * render_width
|
377 |
-
)
|
378 |
-
|
379 |
-
# calculate text size
|
380 |
-
text_size = int(
|
381 |
-
render_calculation_size * render_height / render_calculation_height
|
382 |
-
)
|
383 |
-
render_width_no_rotation = int(
|
384 |
-
render_calculation_width_no_rotation
|
385 |
-
/ render_calculation_height
|
386 |
-
* render_height
|
387 |
-
)
|
388 |
-
render_height_no_rotation = int(
|
389 |
-
render_calculation_height_no_rotation
|
390 |
-
/ render_calculation_height
|
391 |
-
* render_height
|
392 |
-
)
|
393 |
-
render_font_x_no_rotation = int(
|
394 |
-
render_calculation_font_x_no_rotation
|
395 |
-
/ render_calculation_height
|
396 |
-
* render_height
|
397 |
-
)
|
398 |
-
render_font_y_no_rotation = int(
|
399 |
-
render_calculation_font_y_no_rotation
|
400 |
-
/ render_calculation_height
|
401 |
-
* render_height
|
402 |
-
)
|
403 |
-
stroke_width = int(text_size * stroke_ratio)
|
404 |
-
line_spacing = int(text_size * line_spacing_ratio)
|
405 |
|
406 |
-
|
407 |
-
|
408 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
409 |
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
language=render_language,
|
427 |
-
)
|
428 |
-
if rotation_max_angle != 0:
|
429 |
-
font_image = font_image.rotate(
|
430 |
-
render_angle, expand=True, fillcolor=(0, 0, 0, 0)
|
431 |
-
)
|
432 |
-
|
433 |
-
im.paste(font_image, (render_x, render_y), font_image)
|
434 |
-
return im, FontLabel(
|
435 |
-
clip_width,
|
436 |
-
clip_height,
|
437 |
-
text,
|
438 |
-
font,
|
439 |
-
text_color,
|
440 |
-
text_size,
|
441 |
-
text_direction,
|
442 |
-
stroke_width,
|
443 |
-
stroke_color,
|
444 |
-
line_spacing,
|
445 |
-
render_language,
|
446 |
-
(render_x, render_y, render_width, render_height),
|
447 |
-
render_angle,
|
448 |
-
)
|
449 |
-
except Exception as e:
|
450 |
-
print(e)
|
|
|
1 |
+
__all__ = ["generate_font_image"]
|
2 |
|
3 |
|
4 |
epislon = 1e-6
|
|
|
47 |
assert gray_ratio + color_ratio - 1 < epislon
|
48 |
|
49 |
# clip size ratio
|
50 |
+
clip_width_max_ratio = 0.8
|
51 |
+
clip_width_min_ratio = 0.3
|
52 |
clip_width_height_min_ratio = 0.75
|
53 |
clip_width_height_max_ratio = 1.25
|
54 |
|
|
|
69 |
# in degree
|
70 |
rotation_max_angle = 30
|
71 |
|
72 |
+
text_size_min = 15
|
|
|
73 |
|
74 |
+
# ratio of dataset size for cjk
|
75 |
cjk_distribution = {
|
76 |
"ja": 0.3,
|
77 |
"ko": 0.2,
|
|
|
83 |
|
84 |
assert sum(cjk_distribution.values()) - 1 < epislon
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
import math
|
88 |
import random
|
89 |
+
import traceback
|
90 |
from PIL import Image, ImageDraw, ImageFont
|
91 |
from .fontlabel import FontLabel
|
92 |
from .font import DSFont
|
|
|
235 |
return color + (255,)
|
236 |
|
237 |
|
238 |
+
def generate_font_image(
|
239 |
img_path: str, font: DSFont, corpus_manager: CorpusGeneratorManager
|
240 |
) -> tuple[Image.Image, FontLabel]:
|
241 |
+
im = Image.open(img_path)
|
242 |
+
# crop image
|
243 |
+
width, height = im.size
|
244 |
+
clip_width = random.randint(
|
245 |
+
int(width * clip_width_min_ratio), int(width * clip_width_max_ratio)
|
246 |
+
)
|
247 |
+
clip_height = random.randint(
|
248 |
+
int(clip_width * clip_width_height_min_ratio),
|
249 |
+
int(clip_width * clip_width_height_max_ratio),
|
250 |
+
)
|
251 |
+
if clip_height > height:
|
252 |
+
clip_height = height
|
253 |
+
clip_x = random.randint(0, width - clip_width)
|
254 |
+
clip_y = random.randint(0, height - clip_height)
|
255 |
+
im = im.crop((clip_x, clip_y, clip_x + clip_width, clip_y + clip_height))
|
256 |
+
|
257 |
+
# language
|
258 |
+
render_language = font.language
|
259 |
+
if render_language == "CJK":
|
260 |
+
render_language = random.choices(
|
261 |
+
list(cjk_distribution.keys()), list(cjk_distribution.values())
|
262 |
+
)[0]
|
263 |
+
elif render_language == "zh":
|
264 |
+
render_language = random.choice(["zh-Hans", "zh-Hant"])
|
265 |
+
|
266 |
+
# text direction
|
267 |
+
if random.random() < ltr_ratio:
|
268 |
+
text_direction = "ltr"
|
269 |
+
else:
|
270 |
+
text_direction = "ttb"
|
271 |
|
272 |
+
# text length
|
273 |
+
if random.random() < short_ratio:
|
274 |
+
text = corpus_manager.generate(short_condition, font, render_language)
|
275 |
+
elif random.random() < median_ratio:
|
276 |
+
text = corpus_manager.generate(median_condition, font, render_language)
|
277 |
+
else:
|
278 |
+
text = corpus_manager.generate(long_condition, font, render_language)
|
279 |
+
|
280 |
+
# text color & stroke
|
281 |
+
if random.random() < gray_ratio:
|
282 |
+
text_color = random.randint(0, 255)
|
283 |
+
text_color = (text_color, text_color, text_color)
|
284 |
+
# no stroke in gray
|
285 |
+
stroke_ratio = 0
|
286 |
+
stroke_color = None
|
287 |
+
im = im.convert("L")
|
288 |
+
else:
|
289 |
+
text_color = random_color()
|
290 |
+
# whether use stroke
|
291 |
+
if random.random() < pure_color_ratio:
|
292 |
+
stroke_ratio = 0
|
293 |
+
stroke_color = None
|
294 |
+
else:
|
295 |
+
stroke_ratio = random.random() * stroke_width_max_ratio
|
296 |
+
stroke_color = random_color()
|
297 |
+
|
298 |
+
# line spacing
|
299 |
+
line_spacing_ratio = (
|
300 |
+
random.random() * (line_spacing_max_ratio - line_spacing_min_ratio)
|
301 |
+
+ line_spacing_min_ratio
|
302 |
+
)
|
303 |
+
|
304 |
+
# calculate render ratio
|
305 |
+
render_calculation_stroke_width = int(stroke_ratio * render_calculation_size)
|
306 |
+
render_calculation_line_spacing = int(line_spacing_ratio * render_calculation_size)
|
307 |
+
|
308 |
+
pil_font = ImageFont.truetype(font.path, size=render_calculation_size)
|
309 |
+
text_bbox = render_bbox(
|
310 |
+
ImageDraw.Draw(im),
|
311 |
+
(0, 0),
|
312 |
+
text,
|
313 |
+
font=pil_font,
|
314 |
+
direction=text_direction,
|
315 |
+
spacing=render_calculation_line_spacing,
|
316 |
+
stroke_width=render_calculation_stroke_width,
|
317 |
+
language=render_language,
|
318 |
+
)
|
319 |
+
(
|
320 |
+
render_calculation_width_no_rotation,
|
321 |
+
render_calculation_height_no_rotation,
|
322 |
+
) = (text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1])
|
323 |
+
render_calculation_font_x_no_rotation = text_bbox[0]
|
324 |
+
render_calculation_font_y_no_rotation = text_bbox[1]
|
325 |
+
|
326 |
+
if random.random() < no_rotation_ratio:
|
327 |
+
render_angle = 0
|
328 |
+
|
329 |
+
render_calculation_width = render_calculation_width_no_rotation
|
330 |
+
render_calculation_height = render_calculation_height_no_rotation
|
331 |
+
else:
|
332 |
+
render_angle = random.randint(-rotation_max_angle, rotation_max_angle)
|
333 |
|
334 |
+
render_calculation_width = int(
|
335 |
+
render_calculation_width_no_rotation
|
336 |
+
* math.cos(math.radians(abs(render_angle)))
|
337 |
+
+ render_calculation_height_no_rotation
|
338 |
+
* math.sin(math.radians(abs(render_angle)))
|
339 |
+
)
|
340 |
+
render_calculation_height = int(
|
341 |
+
render_calculation_width_no_rotation
|
342 |
+
* math.sin(math.radians(abs(render_angle)))
|
343 |
+
+ render_calculation_height_no_rotation
|
344 |
+
* math.cos(math.radians(abs(render_angle)))
|
345 |
+
)
|
346 |
|
347 |
+
# calculate render size
|
348 |
+
render_ratio = (
|
349 |
+
random.random() * (text_longer_max_ratio - text_longer_min_ratio)
|
350 |
+
+ text_longer_min_ratio
|
351 |
+
)
|
352 |
+
if render_calculation_width / render_calculation_height < clip_width / clip_height:
|
353 |
+
# height is the limit
|
354 |
+
render_height = int(clip_height * render_ratio)
|
355 |
+
render_width = int(
|
356 |
+
render_calculation_width / render_calculation_height * render_height
|
357 |
+
)
|
358 |
+
else:
|
359 |
+
# width is the limit
|
360 |
+
render_width = int(clip_width * render_ratio)
|
361 |
+
render_height = int(
|
362 |
+
render_calculation_height / render_calculation_width * render_width
|
363 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
364 |
|
365 |
+
# calculate text size
|
366 |
+
text_size = int(render_calculation_size * render_height / render_calculation_height)
|
367 |
+
|
368 |
+
if text_size < text_size_min:
|
369 |
+
raise ValueError("text size is too small")
|
370 |
+
|
371 |
+
render_width_no_rotation = int(
|
372 |
+
render_calculation_width_no_rotation / render_calculation_height * render_height
|
373 |
+
)
|
374 |
+
render_height_no_rotation = int(
|
375 |
+
render_calculation_height_no_rotation
|
376 |
+
/ render_calculation_height
|
377 |
+
* render_height
|
378 |
+
)
|
379 |
+
render_font_x_no_rotation = int(
|
380 |
+
render_calculation_font_x_no_rotation
|
381 |
+
/ render_calculation_height
|
382 |
+
* render_height
|
383 |
+
)
|
384 |
+
render_font_y_no_rotation = int(
|
385 |
+
render_calculation_font_y_no_rotation
|
386 |
+
/ render_calculation_height
|
387 |
+
* render_height
|
388 |
+
)
|
389 |
+
stroke_width = int(text_size * stroke_ratio)
|
390 |
+
line_spacing = int(text_size * line_spacing_ratio)
|
391 |
+
|
392 |
+
# calculate render position
|
393 |
+
render_x = random.randint(0, clip_width - render_width)
|
394 |
+
render_y = random.randint(0, clip_height - render_height)
|
395 |
+
|
396 |
+
font_image = Image.new(
|
397 |
+
"RGBA",
|
398 |
+
(render_width_no_rotation, render_height_no_rotation),
|
399 |
+
(0, 0, 0, 0),
|
400 |
+
)
|
401 |
+
pil_font = ImageFont.truetype(font.path, size=text_size)
|
402 |
+
render_text(
|
403 |
+
ImageDraw.Draw(font_image),
|
404 |
+
(-render_font_x_no_rotation, -render_font_y_no_rotation),
|
405 |
+
text,
|
406 |
+
font=pil_font,
|
407 |
+
fill=RGB2RGBA(text_color),
|
408 |
+
direction=text_direction,
|
409 |
+
spacing=line_spacing,
|
410 |
+
stroke_width=stroke_width,
|
411 |
+
stroke_fill=RGB2RGBA(stroke_color),
|
412 |
+
language=render_language,
|
413 |
+
)
|
414 |
+
if rotation_max_angle != 0:
|
415 |
+
font_image = font_image.rotate(
|
416 |
+
render_angle, expand=True, fillcolor=(0, 0, 0, 0)
|
417 |
+
)
|
418 |
|
419 |
+
im.paste(font_image, (render_x, render_y), font_image)
|
420 |
+
return im, FontLabel(
|
421 |
+
clip_width,
|
422 |
+
clip_height,
|
423 |
+
text,
|
424 |
+
font,
|
425 |
+
text_color,
|
426 |
+
text_size,
|
427 |
+
text_direction,
|
428 |
+
stroke_width,
|
429 |
+
stroke_color,
|
430 |
+
line_spacing,
|
431 |
+
render_language,
|
432 |
+
(render_x, render_y, render_width, render_height),
|
433 |
+
render_angle,
|
434 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|