|
import xml.etree.ElementTree as ET |
|
import os |
|
from PIL import Image |
|
from IPython.display import display |
|
import json |
|
import pandas as pd |
|
import yaml |
|
|
|
|
|
|
|
|
|
class Manga(): |
|
def __init__(self, path, image_path, output_path): |
|
|
|
manga_name = path.split("/")[-1][3:-4] |
|
print(manga_name) |
|
self.image_path = lambda page: f"{image_path}/{manga_name}/{('000'+page)[-3:]}.jpg" |
|
self.output_path = lambda obj_id, ext: f"{output_path}/{obj_id}.{ext}" |
|
self.manga_name = manga_name |
|
|
|
tree = ET.parse(path) |
|
self.root = tree.getroot() |
|
self.characters = [x.get('id') for x in self.root.find("characters")] |
|
self.pages = { |
|
page.get('index') : |
|
{ |
|
"frame": {frame.attrib["id"]: frame.attrib for frame in page.findall("frame")}, |
|
"face": {face.attrib["id"]: face.attrib for face in page.findall("face")}, |
|
"body": {body.attrib["id"]: body.attrib for body in page.findall("body")}, |
|
"text": {text.attrib["id"]: {**text.attrib, "text":text.text} for text in page.findall("text")}, |
|
} |
|
for page in self.root.find("pages") |
|
} |
|
self.pages_size = { |
|
page.get('index') : |
|
{ |
|
"page_size": (int(page.get("width")), int(page.get("height"))) |
|
} |
|
for page in self.root.find("pages") |
|
} |
|
for page_id, page, elems, v in self._loop_over_elements(): |
|
self._parse_int(v) |
|
v["position"] = self._calc_location(v) |
|
v["size"] = self._calc_wh(v) |
|
v["page_id"] = page_id |
|
|
|
|
|
def _loop_over_elements(self): |
|
for page_id, page in self.pages.items(): |
|
for elems in page.values(): |
|
for v in elems.values(): |
|
yield (page_id, page, elems, v) |
|
@staticmethod |
|
def _parse_int(obj): |
|
for k in ["xmin", "xmax", "ymin", "ymax"]: |
|
obj[k] = int(obj[k]) |
|
@staticmethod |
|
def _calc_location(obj): |
|
return (0.5 * (obj["xmin"]+obj["xmax"]), 0.5 * (obj["ymin"]+obj["ymax"])) |
|
@staticmethod |
|
def _calc_wh(obj): |
|
return (obj["xmax"]-obj["xmin"]) , (obj["ymax"]-obj["ymin"]) |
|
|
|
def _get_image(self, obj): |
|
image_path = self.image_path(obj["page_id"]) |
|
image = Image.open(image_path) |
|
trimmed_image = image.crop((obj["xmin"],obj["ymin"], obj["xmax"], obj["ymax"])) |
|
return trimmed_image |
|
|
|
|
|
import json |
|
path = './annotations_ko' |
|
for xml_file in os.listdir(path): |
|
if xml_file.endswith('.xml'): |
|
xml_path = f"{path}/{xml_file}" |
|
print(xml_path) |
|
m = Manga(xml_path, './images/', "./json_features") |
|
|
|
parts = {0:"frame", 1:"face", 2:"body", 3:"text"} |
|
|
|
for page_id in m.pages.keys(): |
|
lines = [] |
|
orig_path = m.image_path(page_id) |
|
new_path = "./yaml_yolo/labels/" + orig_path.replace("./images//","").replace("/","_").replace(".jpg",".txt") |
|
for part_id, part in parts.items(): |
|
page = m.pages[page_id] |
|
w, h = m.pages_size[page_id]["page_size"] |
|
line = [(part_id, v["position"][0]/w, v["position"][1]/h, v["size"][0]/w, v["size"][1]/h) for k, v in page[part].items()] |
|
if len(line)>0: |
|
labels = "\n".join([" ".join(str(y) for y in x) for x in line]) |
|
lines.append(labels) |
|
if len(lines)>0: |
|
lines = "\n".join(lines) |
|
print(orig_path, new_path) |
|
print(lines) |
|
with open(new_path, "wt") as f: |
|
f.write(lines) |
|
|
|
|
|
|
|
|
|
|
|
cfg = { |
|
"path": "../datasets/manga109", |
|
"train": "images/train", |
|
"val": "images/train", |
|
"test": None, |
|
"names":{0:"frame", 1:"face", 2:"body", 3:"text"}, |
|
} |
|
|
|
with open('./yaml_yolo/manga109.yaml', 'w') as f: |
|
yaml.dump(cfg, f) |
|
|
|
|