|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from .utils import auto_run |
|
from pathlib import Path |
|
|
|
|
|
def iterfiles(root_dir: Path): |
|
""" |
|
Iterate all subfiles under root_dir. |
|
This is not a pathlib implementation because pathlib does not support globs across symbols.. |
|
""" |
|
|
|
from glob import glob |
|
return [Path(p) for p in glob(f'{root_dir}/**', recursive=True)] |
|
|
|
|
|
def main(root_dir: str, output_dir: str, use_link: bool = False, use_shrink: bool = False): |
|
import os |
|
import subprocess |
|
from pathlib import Path |
|
|
|
def call(*args): |
|
print(*args) |
|
subprocess.call(args, stderr=None) |
|
|
|
scale = 1.0 |
|
if use_shrink: |
|
scale = 0.5 |
|
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
if use_link: |
|
for path in iterfiles(root_dir): |
|
if path.is_dir() and (path / "img").exists() and (path / "xml").exists(): |
|
for img_path in (path / "img").iterdir(): |
|
if img_path.is_file() and not str(img_path.name).startswith('._'): |
|
if img_path.suffix == '.jpg': |
|
call('ln', '-s', str(img_path.resolve()), |
|
str(Path(output_dir) / (path.name + "_" + img_path.name))) |
|
else: |
|
import cv2 |
|
img = cv2.imread(str(img_path.resolve())) |
|
dst_path = ( |
|
Path(output_dir) / (path.name + "_" + img_path.name)).with_suffix('.jpg') |
|
cv2.imwrite(str(dst_path), img) |
|
|
|
if use_shrink: |
|
call('python', './tools/shrink2.py', output_dir, output_dir + "_half") |
|
|
|
xml_paths = [] |
|
img_dirs = [] |
|
|
|
for path in iterfiles(root_dir): |
|
if path.is_dir() and (path / "img").exists() and (path / "xml").exists(): |
|
xml_path = [p for p in ( |
|
path / "xml").iterdir() if p.suffix == ".xml" and not p.name.startswith('._')] |
|
assert len(xml_path) == 1 |
|
xml_paths.append(str(xml_path[0])) |
|
img_dirs.append(str(path / "img")) |
|
|
|
|
|
|
|
|
|
dataset_name = Path(root_dir).name |
|
train_json_path = f'generated/{dataset_name}_train.json' |
|
test_json_path = f'generated/{dataset_name}_test.json' |
|
if use_link: |
|
if use_shrink: |
|
train_json_path = str(Path(output_dir + "_half") / "train.json") |
|
test_json_path = str(Path(output_dir + "_half") / "test.json") |
|
else: |
|
train_json_path = str(Path(output_dir) / "train.json") |
|
test_json_path = str(Path(output_dir) / "test.json") |
|
print("train_json_path :", train_json_path) |
|
print("test_json_path :", test_json_path) |
|
|
|
cmd = ['python', '-m', 'tools.ndl_parser', '--xml_paths', *xml_paths, '--img_dirs', *img_dirs, '--add_prefix', 'True', |
|
'--fx', str(scale), '--fy', str(scale), '--train_json_path', train_json_path, '--test_json_path', test_json_path] |
|
cmd_len = sum(len(line) for line in cmd) |
|
if cmd_len > 2_000_000: |
|
xml_list_filename = 'tmp_xml_paths.list' |
|
img_list_filename = 'tmp_img_dirs.list' |
|
print('xml_paths or img_dirs is long.') |
|
print('export xml_paths and img_dirs to {} and {}'.format( |
|
xml_list_filename, img_list_filename)) |
|
with open(xml_list_filename, 'w') as f: |
|
for path in xml_paths: |
|
f.write(path+'\n') |
|
with open(img_list_filename, 'w') as f: |
|
for path in img_dirs: |
|
f.write(path+'\n') |
|
call('python', './tools/ndl_parser.py', '--xml_list_path', xml_list_filename, '--img_list_path', img_list_filename, '--add_prefix', |
|
'True', '--fx', str(scale), '--fy', str(scale), '--train_json_path', train_json_path, '--test_json_path', test_json_path) |
|
else: |
|
call('python', './tools/ndl_parser.py', '--xml_paths', *xml_paths, '--img_dirs', *img_dirs, '--add_prefix', 'True', |
|
'--fx', str(scale), '--fy', str(scale), '--train_json_path', train_json_path, '--test_json_path', test_json_path) |
|
|
|
|
|
auto_run(main) |
|
|