jongkook90 commited on Jun 20, 2023

Commit

13ed643

•

1 Parent(s): dfae973

Fixed bug

Browse files

removed binary suspected files

Files changed (26) hide show

README.txt +1 -0
convert.py +113 -0
train.py +14 -0
train/F1_curve.png +0 -0
train/PR_curve.png +0 -0
train/P_curve.png +0 -0
train/R_curve.png +0 -0
train/args.yaml +97 -0
train/confusion_matrix.png +0 -0
train/confusion_matrix_normalized.png +0 -0
weights/best.pt → train/events.out.tfevents.1687303014.jongkook90-desktop.37493.0 +2 -2
train/labels.jpg +0 -0
train/labels_correlogram.jpg +0 -0
train/results.csv +4 -0
train/results.png +0 -0
train/val_batch0_labels.jpg +0 -0
train/val_batch0_pred.jpg +0 -0
train/val_batch1_labels.jpg +0 -0
train/val_batch1_pred.jpg +0 -0
train/val_batch2_labels.jpg +0 -0
train/val_batch2_pred.jpg +0 -0
train/weights/best.onnx +3 -0
train/weights/best.pt +3 -0
{weights → train/weights}/last.pt +2 -2
train_log.txt +45 -24
weights/README.md +0 -3

README.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ https://github.com/ultralytics/ultralytics/commit/9d1e5567de48453f168013ff1032810bd95d39fe

convert.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import xml.etree.ElementTree as ET
+import os
+from PIL import Image
+from IPython.display import display
+import json
+import pandas as pd
+import yaml
+## Generate label file
+class Manga():
+    def __init__(self, path, image_path, output_path):
+        manga_name = path.split("/")[-1][3:-4]
+        print(manga_name)
+        self.image_path = lambda page: f"{image_path}/{manga_name}/{('000'+page)[-3:]}.jpg"
+        self.output_path = lambda obj_id, ext: f"{output_path}/{obj_id}.{ext}"
+        self.manga_name = manga_name
+        tree = ET.parse(path)
+        self.root = tree.getroot()
+        self.characters = [x.get('id') for x in self.root.find("characters")]
+        self.pages = {
+            page.get('index') :
+                {
+                    "frame": {frame.attrib["id"]: frame.attrib for frame in page.findall("frame")},
+                    "face": {face.attrib["id"]: face.attrib for face in page.findall("face")},
+                    "body": {body.attrib["id"]: body.attrib for body in page.findall("body")},
+                    "text": {text.attrib["id"]: {**text.attrib, "text":text.text} for text in page.findall("text")},
+                }
+                for page in self.root.find("pages")
+        }
+        self.pages_size = {
+            page.get('index') :
+                {
+                    "page_size": (int(page.get("width")), int(page.get("height")))
+                }
+                for page in self.root.find("pages")
+        }
+        for page_id, page, elems, v in self._loop_over_elements():
+            self._parse_int(v)
+            v["position"] = self._calc_location(v)
+            v["size"] = self._calc_wh(v)
+            v["page_id"] = page_id
+    def _loop_over_elements(self):
+        for page_id, page in self.pages.items():
+            for elems in page.values():
+                for v in elems.values():
+                    yield (page_id, page, elems, v)
+    @staticmethod
+    def _parse_int(obj):
+        for k in ["xmin", "xmax", "ymin", "ymax"]:
+            obj[k] = int(obj[k])
+    @staticmethod
+    def _calc_location(obj):
+        return (0.5 * (obj["xmin"]+obj["xmax"]), 0.5 * (obj["ymin"]+obj["ymax"]))
+    @staticmethod
+    def _calc_wh(obj):
+        return (obj["xmax"]-obj["xmin"]) , (obj["ymax"]-obj["ymin"])
+    def _get_image(self, obj):
+        image_path = self.image_path(obj["page_id"])
+        image = Image.open(image_path)
+        trimmed_image = image.crop((obj["xmin"],obj["ymin"], obj["xmax"], obj["ymax"]))
+        return trimmed_image
+import json
+path = './annotations_ko'
+for xml_file in os.listdir(path):
+    if xml_file.endswith('.xml'):
+        xml_path = f"{path}/{xml_file}"
+        print(xml_path)
+        m = Manga(xml_path, './images/', "./json_features")
+        parts = {0:"frame", 1:"face", 2:"body", 3:"text"}
+        for page_id in m.pages.keys():
+            lines = []
+            orig_path = m.image_path(page_id)
+            new_path = "./yaml_yolo/labels/" + orig_path.replace("./images//","").replace("/","_").replace(".jpg",".txt")
+            for part_id, part in parts.items():
+                page = m.pages[page_id]
+                w, h = m.pages_size[page_id]["page_size"]
+                line = [(part_id, v["position"][0]/w, v["position"][1]/h, v["size"][0]/w, v["size"][1]/h) for k, v in page[part].items()]
+                if len(line)>0:
+                    labels = "\n".join([" ".join(str(y) for y in x) for x in line])
+                    lines.append(labels)
+            if len(lines)>0:
+                lines = "\n".join(lines)
+                print(orig_path, new_path)
+                print(lines)
+                with open(new_path, "wt") as f:
+                    f.write(lines)
+## Ganerate yaml file
+cfg = {
+    "path": "../datasets/manga109",
+    "train": "images/train",
+    "val": "images/train",
+    "test": None,
+    "names":{0:"frame", 1:"face", 2:"body", 3:"text"},
+}
+with open('./yaml_yolo/manga109.yaml', 'w') as f:
+    yaml.dump(cfg, f)

train.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from ultralytics import YOLO
+# Load a model
+# model = YOLO("yolov8n.yaml")  # build a new model from scratch
+# model = YOLO("yolov8n.pt")  # load a pretrained model (recommended for training)
+model = YOLO("yolov8m.yaml")  # build a new model from scratch
+model = YOLO("yolov8m.pt")  # load a pretrained model (recommended for training)
+# Use the model
+# model.train(data="coco128.yaml", epochs=3)  # train the model
+model.train(data="./datasets/manga109/manga109.yaml", epochs=3)  # train the model
+metrics = model.val()  # evaluate model performance on the validation set
+# results = model("https://ultralytics.com/images/bus.jpg")  # predict on an image
+path = model.export(format="onnx")  # export the model to ONNX format

train/F1_curve.png ADDED Viewed

train/PR_curve.png ADDED Viewed

train/P_curve.png ADDED Viewed

train/R_curve.png ADDED Viewed

train/args.yaml ADDED Viewed

	@@ -0,0 +1,97 @@

+task: detect
+mode: train
+model: yolov8m.pt
+data: ./datasets/manga109/manga109.yaml
+epochs: 3
+patience: 50
+batch: 16
+imgsz: 640
+save: true
+save_period: -1
+cache: false
+device: null
+workers: 8
+project: null
+name: null
+exist_ok: false
+pretrained: true
+optimizer: auto
+verbose: true
+seed: 0
+deterministic: true
+single_cls: false
+rect: false
+cos_lr: false
+close_mosaic: 0
+resume: false
+amp: true
+fraction: 1.0
+profile: false
+overlap_mask: true
+mask_ratio: 4
+dropout: 0.0
+val: true
+split: val
+save_json: false
+save_hybrid: false
+conf: null
+iou: 0.7
+max_det: 300
+half: false
+dnn: false
+plots: true
+source: null
+show: false
+save_txt: false
+save_conf: false
+save_crop: false
+show_labels: true
+show_conf: true
+vid_stride: 1
+line_width: null
+visualize: false
+augment: false
+agnostic_nms: false
+classes: null
+retina_masks: false
+boxes: true
+format: torchscript
+keras: false
+optimize: false
+int8: false
+dynamic: false
+simplify: false
+opset: null
+workspace: 4
+nms: false
+lr0: 0.01
+lrf: 0.01
+momentum: 0.937
+weight_decay: 0.0005
+warmup_epochs: 3.0
+warmup_momentum: 0.8
+warmup_bias_lr: 0.1
+box: 7.5
+cls: 0.5
+dfl: 1.5
+pose: 12.0
+kobj: 1.0
+label_smoothing: 0.0
+nbs: 64
+hsv_h: 0.015
+hsv_s: 0.7
+hsv_v: 0.4
+degrees: 0.0
+translate: 0.1
+scale: 0.5
+shear: 0.0
+perspective: 0.0
+flipud: 0.0
+fliplr: 0.5
+mosaic: 1.0
+mixup: 0.0
+copy_paste: 0.0
+cfg: null
+v5loader: false
+tracker: botsort.yaml
+save_dir: runs/detect/train5

train/confusion_matrix.png ADDED Viewed

train/confusion_matrix_normalized.png ADDED Viewed

weights/best.pt → train/events.out.tfevents.1687303014.jongkook90-desktop.37493.0 RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10e3090dc0b887a2a795a69ce69441b4051fabec43e62d4a4b32d196cad19c6e
-size 6229166

 version https://git-lfs.github.com/spec/v1
+oid sha256:8d43759de4ce7e130ce2d6ba60e04ee594cac8d9af20f1ff36080e303308ad64
+size 250830

train/labels.jpg ADDED Viewed

train/labels_correlogram.jpg ADDED Viewed

train/results.csv ADDED Viewed

	@@ -0,0 +1,4 @@

+                  epoch,         train/box_loss,         train/cls_loss,         train/dfl_loss,   metrics/precision(B),      metrics/recall(B),       metrics/mAP50(B),    metrics/mAP50-95(B),           val/box_loss,           val/cls_loss,           val/dfl_loss,                 lr/pg0,                 lr/pg1,                 lr/pg2
+                      0,                0.96814,                0.85244,                 1.1087,                0.86958,                 0.8146,                0.88546,                0.63524,                0.91621,                0.63611,                 1.0506,             0.00041588,             0.00041588,             0.00041588
+                      1,                0.87945,                0.62993,                 1.0581,                0.89082,                0.84483,                0.90655,                0.66293,                0.88578,                0.56822,                 1.0374,             0.00055781,             0.00055781,             0.00055781
+                      2,                0.83749,                0.57467,                 1.0371,                0.90332,                0.86365,                 0.9226,                0.68896,                0.82432,                0.51203,                 1.0026,             0.00042473,             0.00042473,             0.00042473

train/results.png ADDED Viewed

train/val_batch0_labels.jpg ADDED Viewed

train/val_batch0_pred.jpg ADDED Viewed

train/val_batch1_labels.jpg ADDED Viewed

train/val_batch1_pred.jpg ADDED Viewed

train/val_batch2_labels.jpg ADDED Viewed

train/val_batch2_pred.jpg ADDED Viewed

train/weights/best.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a91eec11d8797c89f9db542f263c8d39263d249165964b5ef37beea8fe1ad3ba
+size 103596340

train/weights/best.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e74ad5b1c48c437da0b78d2318d0a25314fcee635359a239d5c70bd2f8da0ce5
+size 52011990

{weights → train/weights}/last.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e3a27fcc2c2394ab27786eed38b07fcbabfa06983554e430fc9dd5240f5aa80a
-size 6229166

 version https://git-lfs.github.com/spec/v1
+oid sha256:9dc6c9c385a5db8e3b0a1756ca18da650d3677294740d909626ce7cf93d18486
+size 52011990

train_log.txt CHANGED Viewed

@@ -1,25 +1,46 @@
-      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
-        1/3      2.56G       1.26      1.267      1.016        122        640: 100%|██████████| 533/533 [01:08<00:00,  7.75it/s]^
-                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 267/267 [01:07<00:00,  3.93it/s]
-                   all       8519     114053       0.97      0.282      0.306      0.216
-      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
-        2/3      3.36G      1.001     0.7182     0.9132        154        640: 100%|██████████| 533/533 [00:59<00:00,  8.92it/s]
-                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 267/267 [01:07<00:00,  3.93it/s]                         all       8519     114053      0.969      0.288       0.31      0.225
-      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
-        3/3      3.37G     0.9513     0.6483     0.9023        127        640: 100%|██████████| 533/533 [00:59<00:00,  8.93it/s]
-                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 267/267 [01:21<00:00,  3.28it/s]
-                   all       8519     114053      0.973      0.294      0.323      0.233
-3 epochs completed in 0.116 hours.
-Optimizer stripped from runs/detect/train3/weights/last.pt, 6.2MB                                                                                Optimizer stripped from runs/detect/train3/weights/best.pt, 6.2MB
-Validating runs/detect/train3/weights/best.pt...
 Ultralytics YOLOv8.0.120 🚀 Python-3.9.7 torch-2.0.0+cu118 CUDA:0 (NVIDIA GeForce RTX 3090, 24265MiB)
-Model summary (fused): 168 layers, 3006428 parameters, 0 gradients
-                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 267/267 [01:19<00:00,  3.37it/s]
-                   all       8519     114053      0.973      0.294      0.323      0.233
-                 frame       8519         36          1          0   0.000451   0.000198
-                  body       8519        403          1          0     0.0344     0.0144
-                  text       8519     113614      0.919      0.882      0.933      0.686
-Speed: 0.1ms preprocess, 0.4ms inference, 0.0ms loss, 1.0ms postprocess per image

+Logging results to runs/detect/train5
+Starting training for 3 epochs...
+      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
+        1/3      8.35G     0.9681     0.8524      1.109        449        640: 100%|██████████| 533/533 [03:36<00:00,  2.47it/s]
+                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 267/267 [01:08<00:00,  3.89it/s]
+                   all       8519     401038       0.87      0.815      0.885      0.635
+      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
+        2/3      9.16G     0.8795     0.6299      1.058        600        640: 100%|██████████| 533/533 [01:36<00:00,  5.54it/s]
+                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 267/267 [01:08<00:00,  3.90it/s]
+                   all       8519     401038      0.891      0.845      0.907      0.663
+      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
+        3/3      10.2G     0.8375     0.5747      1.037        590        640: 100%|██████████| 533/533 [01:34<00:00,  5.63it/s]
+                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 267/267 [02:00<00:00,  2.21it/s]Bn
+                   all       8519     401038      0.903      0.864      0.923      0.689
+3 epochs completed in 0.191 hours.
+Optimizer stripped from runs/detect/train5/weights/last.pt, 52.0MB
+Optimizer stripped from runs/detect/train5/weights/best.pt, 52.0MB
+Validating runs/detect/train5/weights/best.pt...
+Ultralytics YOLOv8.0.120 🚀 Python-3.9.7 torch-2.0.0+cu118 CUDA:0 (NVIDIA GeForce RTX 3090, 24265MiB)
+Model summary (fused): 218 layers, 25842076 parameters, 0 gradients
+                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 267/267 [01:56<00:00,  2.29it/s]
+                   all       8519     401038      0.903      0.864      0.923      0.689
+                 frame       8519      82124      0.944      0.972      0.989      0.942
+                  face       8519      87174      0.903       0.77      0.873      0.467
+                  body       8519     118126      0.852      0.811      0.885      0.642
+                  text       8519     113614      0.915        0.9      0.944      0.704
+Speed: 0.1ms preprocess, 2.1ms inference, 0.0ms loss, 0.8ms postprocess per image
+Results saved to runs/detect/train5
 Ultralytics YOLOv8.0.120 🚀 Python-3.9.7 torch-2.0.0+cu118 CUDA:0 (NVIDIA GeForce RTX 3090, 24265MiB)
+Model summary (fused): 218 layers, 25842076 parameters, 0 gradients
+val: Scanning /media/jongkook90/Morpho DB/dataset-manga109-ko/yolov8-frame/datasets/manga109/labels/train.cache... 8155 images, 364 backgrounds, 0 corrupt: 100%|██████████| 8519/8519 [00:00<?, ?it/s]
+                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   0%|          | 2/533 [00:08<43:38,  4.93s/it]WARNING ⚠️ NMS time limit 1.300s exceeded
+                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95):   1%|          | 3/533 [00:14<47:58,  5.43s/it]WARNING ⚠️ NMS time limit 1.300s exceeded
+                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 533/533 [02:06<00:00,  4.22it/s]
+                   all       8519     401038      0.904      0.861       0.92      0.688
+                 frame       8519      82124      0.944      0.969      0.986      0.941
+                  face       8519      87174      0.903      0.769       0.87      0.467
+                  body       8519     118126      0.852      0.809      0.882      0.641
+                  text       8519     113614      0.916      0.897      0.941      0.704
+Speed: 0.1ms preprocess, 4.1ms inference, 0.0ms loss, 1.5ms postprocess per image
+Results saved to runs/detect/val3

weights/README.md DELETED Viewed

@@ -1,3 +0,0 @@
-yolov8 3epoch training
-TODO: data augmentation, train result check