yolov8_comicbook / convert.py
jongkook90's picture
Fixed bug
13ed643
import xml.etree.ElementTree as ET
import os
from PIL import Image
from IPython.display import display
import json
import pandas as pd
import yaml
## Generate label file
class Manga():
def __init__(self, path, image_path, output_path):
manga_name = path.split("/")[-1][3:-4]
print(manga_name)
self.image_path = lambda page: f"{image_path}/{manga_name}/{('000'+page)[-3:]}.jpg"
self.output_path = lambda obj_id, ext: f"{output_path}/{obj_id}.{ext}"
self.manga_name = manga_name
tree = ET.parse(path)
self.root = tree.getroot()
self.characters = [x.get('id') for x in self.root.find("characters")]
self.pages = {
page.get('index') :
{
"frame": {frame.attrib["id"]: frame.attrib for frame in page.findall("frame")},
"face": {face.attrib["id"]: face.attrib for face in page.findall("face")},
"body": {body.attrib["id"]: body.attrib for body in page.findall("body")},
"text": {text.attrib["id"]: {**text.attrib, "text":text.text} for text in page.findall("text")},
}
for page in self.root.find("pages")
}
self.pages_size = {
page.get('index') :
{
"page_size": (int(page.get("width")), int(page.get("height")))
}
for page in self.root.find("pages")
}
for page_id, page, elems, v in self._loop_over_elements():
self._parse_int(v)
v["position"] = self._calc_location(v)
v["size"] = self._calc_wh(v)
v["page_id"] = page_id
def _loop_over_elements(self):
for page_id, page in self.pages.items():
for elems in page.values():
for v in elems.values():
yield (page_id, page, elems, v)
@staticmethod
def _parse_int(obj):
for k in ["xmin", "xmax", "ymin", "ymax"]:
obj[k] = int(obj[k])
@staticmethod
def _calc_location(obj):
return (0.5 * (obj["xmin"]+obj["xmax"]), 0.5 * (obj["ymin"]+obj["ymax"]))
@staticmethod
def _calc_wh(obj):
return (obj["xmax"]-obj["xmin"]) , (obj["ymax"]-obj["ymin"])
def _get_image(self, obj):
image_path = self.image_path(obj["page_id"])
image = Image.open(image_path)
trimmed_image = image.crop((obj["xmin"],obj["ymin"], obj["xmax"], obj["ymax"]))
return trimmed_image
import json
path = './annotations_ko'
for xml_file in os.listdir(path):
if xml_file.endswith('.xml'):
xml_path = f"{path}/{xml_file}"
print(xml_path)
m = Manga(xml_path, './images/', "./json_features")
parts = {0:"frame", 1:"face", 2:"body", 3:"text"}
for page_id in m.pages.keys():
lines = []
orig_path = m.image_path(page_id)
new_path = "./yaml_yolo/labels/" + orig_path.replace("./images//","").replace("/","_").replace(".jpg",".txt")
for part_id, part in parts.items():
page = m.pages[page_id]
w, h = m.pages_size[page_id]["page_size"]
line = [(part_id, v["position"][0]/w, v["position"][1]/h, v["size"][0]/w, v["size"][1]/h) for k, v in page[part].items()]
if len(line)>0:
labels = "\n".join([" ".join(str(y) for y in x) for x in line])
lines.append(labels)
if len(lines)>0:
lines = "\n".join(lines)
print(orig_path, new_path)
print(lines)
with open(new_path, "wt") as f:
f.write(lines)
## Ganerate yaml file
cfg = {
"path": "../datasets/manga109",
"train": "images/train",
"val": "images/train",
"test": None,
"names":{0:"frame", 1:"face", 2:"body", 3:"text"},
}
with open('./yaml_yolo/manga109.yaml', 'w') as f:
yaml.dump(cfg, f)