|
import sys |
|
import traceback |
|
import pickle |
|
import os |
|
import concurrent.futures |
|
from tqdm import tqdm |
|
from font_dataset.font import load_fonts |
|
from font_dataset.layout import generate_font_image |
|
from font_dataset.text import CorpusGeneratorManager |
|
from font_dataset.background import background_image_generator |
|
|
|
|
|
cjk_ratio = 3 |
|
|
|
train_cnt = 100 |
|
val_cnt = 5 |
|
test_cnt = 30 |
|
|
|
train_cnt_cjk = int(train_cnt * cjk_ratio) |
|
val_cnt_cjk = int(val_cnt * cjk_ratio) |
|
test_cnt_cjk = int(test_cnt * cjk_ratio) |
|
|
|
dataset_path = "./dataset/font_img" |
|
os.makedirs(dataset_path, exist_ok=True) |
|
|
|
fonts, exclusion_rule = load_fonts() |
|
|
|
|
|
cnt = 0 |
|
|
|
for font in fonts: |
|
if exclusion_rule(font): |
|
print(f"Excluded font: {font.path}") |
|
continue |
|
|
|
if font.language == "CJK": |
|
cnt += cjk_ratio |
|
else: |
|
cnt += 1 |
|
|
|
|
|
print("Total training images:", train_cnt * cnt) |
|
print("Total validation images:", val_cnt * cnt) |
|
print("Total testing images:", test_cnt * cnt) |
|
|
|
if os.path.exists(os.path.join(dataset_path, "train")): |
|
num_file_train = len(os.listdir(os.path.join(dataset_path, "train"))) |
|
else: |
|
num_file_train = 0 |
|
|
|
if os.path.exists(os.path.join(dataset_path, "val")): |
|
num_file_val = len(os.listdir(os.path.join(dataset_path, "val"))) |
|
else: |
|
num_file_val = 0 |
|
|
|
if os.path.exists(os.path.join(dataset_path, "test")): |
|
num_file_test = len(os.listdir(os.path.join(dataset_path, "test"))) |
|
else: |
|
num_file_test = 0 |
|
|
|
print("Total files generated:", num_file_train + num_file_val + num_file_test) |
|
print("Total files target:", (train_cnt + val_cnt + test_cnt) * cnt * 2) |
|
|
|
print( |
|
f"{(num_file_train + num_file_val + num_file_test) / ((train_cnt + val_cnt + test_cnt) * cnt * 2) * 100:.2f}% completed" |
|
) |
|
|