ControlNet-v1-1-Annotators-cpu

Running

App Files Files Community

ControlNet-v1-1-Annotators-cpu / annotator /oneformer /detectron2 /data /datasets /pascal_voc.py

atatakun

Duplicate from atatakun/testapp2

18dd6ad about 1 year ago

raw

history blame

No virus

3.19 kB

	# -- coding: utf-8 --
	# Copyright (c) Facebook, Inc. and its affiliates.

	import numpy as np
	import os
	import xml.etree.ElementTree as ET
	from typing import List, Tuple, Union

	from annotator.oneformer.detectron2.data import DatasetCatalog, MetadataCatalog
	from annotator.oneformer.detectron2.structures import BoxMode
	from annotator.oneformer.detectron2.utils.file_io import PathManager

	__all__ = ["load_voc_instances", "register_pascal_voc"]


	# fmt: off
	CLASS_NAMES = (
	"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat",
	"chair", "cow", "diningtable", "dog", "horse", "motorbike", "person",
	"pottedplant", "sheep", "sofa", "train", "tvmonitor"
	)
	# fmt: on


	def load_voc_instances(dirname: str, split: str, class_names: Union[List[str], Tuple[str, ...]]):
	"""
	Load Pascal VOC detection annotations to Detectron2 format.

	Args:
	dirname: Contain "Annotations", "ImageSets", "JPEGImages"
	split (str): one of "train", "test", "val", "trainval"
	class_names: list or tuple of class names
	"""
	with PathManager.open(os.path.join(dirname, "ImageSets", "Main", split + ".txt")) as f:
	fileids = np.loadtxt(f, dtype=np.str)

	# Needs to read many small annotation files. Makes sense at local
	annotation_dirname = PathManager.get_local_path(os.path.join(dirname, "Annotations/"))
	dicts = []
	for fileid in fileids:
	anno_file = os.path.join(annotation_dirname, fileid + ".xml")
	jpeg_file = os.path.join(dirname, "JPEGImages", fileid + ".jpg")

	with PathManager.open(anno_file) as f:
	tree = ET.parse(f)

	r = {
	"file_name": jpeg_file,
	"image_id": fileid,
	"height": int(tree.findall("./size/height")[0].text),
	"width": int(tree.findall("./size/width")[0].text),
	}
	instances = []

	for obj in tree.findall("object"):
	cls = obj.find("name").text
	# We include "difficult" samples in training.
	# Based on limited experiments, they don't hurt accuracy.
	# difficult = int(obj.find("difficult").text)
	# if difficult == 1:
	# continue
	bbox = obj.find("bndbox")
	bbox = [float(bbox.find(x).text) for x in ["xmin", "ymin", "xmax", "ymax"]]
	# Original annotations are integers in the range [1, W or H]
	# Assuming they mean 1-based pixel indices (inclusive),
	# a box with annotation (xmin=1, xmax=W) covers the whole image.
	# In coordinate space this is represented by (xmin=0, xmax=W)
	bbox[0] -= 1.0
	bbox[1] -= 1.0
	instances.append(
	{"category_id": class_names.index(cls), "bbox": bbox, "bbox_mode": BoxMode.XYXY_ABS}
	)
	r["annotations"] = instances
	dicts.append(r)
	return dicts


	def register_pascal_voc(name, dirname, split, year, class_names=CLASS_NAMES):
	DatasetCatalog.register(name, lambda: load_voc_instances(dirname, split, class_names))
	MetadataCatalog.get(name).set(
	thing_classes=list(class_names), dirname=dirname, year=year, split=split
	)