import xml.etree.ElementTree as ET import os from PIL import Image from IPython.display import display import json import pandas as pd import yaml ## Generate label file class Manga(): def __init__(self, path, image_path, output_path): manga_name = path.split("/")[-1][3:-4] print(manga_name) self.image_path = lambda page: f"{image_path}/{manga_name}/{('000'+page)[-3:]}.jpg" self.output_path = lambda obj_id, ext: f"{output_path}/{obj_id}.{ext}" self.manga_name = manga_name tree = ET.parse(path) self.root = tree.getroot() self.characters = [x.get('id') for x in self.root.find("characters")] self.pages = { page.get('index') : { "frame": {frame.attrib["id"]: frame.attrib for frame in page.findall("frame")}, "face": {face.attrib["id"]: face.attrib for face in page.findall("face")}, "body": {body.attrib["id"]: body.attrib for body in page.findall("body")}, "text": {text.attrib["id"]: {**text.attrib, "text":text.text} for text in page.findall("text")}, } for page in self.root.find("pages") } self.pages_size = { page.get('index') : { "page_size": (int(page.get("width")), int(page.get("height"))) } for page in self.root.find("pages") } for page_id, page, elems, v in self._loop_over_elements(): self._parse_int(v) v["position"] = self._calc_location(v) v["size"] = self._calc_wh(v) v["page_id"] = page_id def _loop_over_elements(self): for page_id, page in self.pages.items(): for elems in page.values(): for v in elems.values(): yield (page_id, page, elems, v) @staticmethod def _parse_int(obj): for k in ["xmin", "xmax", "ymin", "ymax"]: obj[k] = int(obj[k]) @staticmethod def _calc_location(obj): return (0.5 * (obj["xmin"]+obj["xmax"]), 0.5 * (obj["ymin"]+obj["ymax"])) @staticmethod def _calc_wh(obj): return (obj["xmax"]-obj["xmin"]) , (obj["ymax"]-obj["ymin"]) def _get_image(self, obj): image_path = self.image_path(obj["page_id"]) image = Image.open(image_path) trimmed_image = image.crop((obj["xmin"],obj["ymin"], obj["xmax"], obj["ymax"])) return trimmed_image import json path = './annotations_ko' for xml_file in os.listdir(path): if xml_file.endswith('.xml'): xml_path = f"{path}/{xml_file}" print(xml_path) m = Manga(xml_path, './images/', "./json_features") parts = {0:"frame", 1:"face", 2:"body", 3:"text"} for page_id in m.pages.keys(): lines = [] orig_path = m.image_path(page_id) new_path = "./yaml_yolo/labels/" + orig_path.replace("./images//","").replace("/","_").replace(".jpg",".txt") for part_id, part in parts.items(): page = m.pages[page_id] w, h = m.pages_size[page_id]["page_size"] line = [(part_id, v["position"][0]/w, v["position"][1]/h, v["size"][0]/w, v["size"][1]/h) for k, v in page[part].items()] if len(line)>0: labels = "\n".join([" ".join(str(y) for y in x) for x in line]) lines.append(labels) if len(lines)>0: lines = "\n".join(lines) print(orig_path, new_path) print(lines) with open(new_path, "wt") as f: f.write(lines) ## Ganerate yaml file cfg = { "path": "../datasets/manga109", "train": "images/train", "val": "images/train", "test": None, "names":{0:"frame", 1:"face", 2:"body", 3:"text"}, } with open('./yaml_yolo/manga109.yaml', 'w') as f: yaml.dump(cfg, f)