| | import os |
| | import sys |
| | import cv2 |
| | import json |
| | import glob |
| | import argparse |
| | import subprocess |
| | from typing import List, Tuple, Dict, Any |
| |
|
| | import numpy as np |
| | from tqdm import tqdm |
| |
|
| |
|
| | |
| | def parse_args(): |
| | ap = argparse.ArgumentParser("OWLv2 detection on JPG folders (Top-K per image), multi-GPU.") |
| | ap.add_argument("--input_dir", type=str, required=True, help="Root that contains subfolders of JPGs; if JPGs are directly under input_dir, it will be treated as a single set.") |
| | ap.add_argument("--startswith", type=str, default="", help="Filter folder name prefix (or input_dir basename if no subfolders).") |
| | ap.add_argument("--endswith", type=str, default="", help="Filter folder name ends (or input_dir basename if no subfolders).") |
| | ap.add_argument("--output_dir", type=str, required=True) |
| | ap.add_argument("--frame_stride", type=int, default=1, help="Sample every N-th image within a folder.") |
| | ap.add_argument("--top_k", type=int, default=5) |
| | ap.add_argument("--max_frames", type=int, default=0, help="Max processed images per folder; 0 means no limit.") |
| | ap.add_argument("--num_workers", type=int, default=1, help="#GPUs/#workers") |
| | ap.add_argument("--worker_idx", type=int, default=-1, help="internal; >=0 means child worker") |
| | ap.add_argument("--shard_file", type=str, default="", help="internal; JSON with folder paths for this worker") |
| | ap.add_argument("--scenic_root", type=str, default="/gz-data/Memory/owlv2/big_vision") |
| | ap.add_argument("--checkpoint_path", type=str, default="/gz-data/Memory/owlv2/owl2-b16-960-st-ngrams-curated-ft-lvisbase-ens-cold-weight-05_209b65b") |
| | return ap.parse_args() |
| |
|
| |
|
| | |
| | def _has_jpgs(path: str) -> bool: |
| | exts = ("*.jpg", "*.jpeg", "*.JPG", "*.JPEG") |
| | for pat in exts: |
| | if glob.glob(os.path.join(path, pat)): |
| | return True |
| | return False |
| |
|
| |
|
| | def iter_image_dirs(input_dir: str, startswith: str, endswith: str) -> List[str]: |
| | """ |
| | Returns a list of directories to process. |
| | - If input_dir contains subfolders: return subfolders that contain JPGs and match startswith. |
| | - Else if input_dir itself contains JPGs and its basename matches startswith: return [input_dir]. |
| | """ |
| | input_dir = os.path.abspath(input_dir) |
| | subs = sorted([p for p in glob.glob(os.path.join(input_dir, "*")) if os.path.isdir(p)]) |
| | |
| | dirs = [d for d in subs if os.path.basename(d).startswith(startswith) and os.path.basename(d).endswith(endswith) and _has_jpgs(d)] |
| | if dirs: |
| | return dirs |
| |
|
| | |
| | base_ok = os.path.basename(os.path.normpath(input_dir)).startswith(startswith) and os.path.basename(d).endswith(endswith) |
| | if base_ok and _has_jpgs(input_dir): |
| | return [input_dir] |
| | return [] |
| |
|
| |
|
| | def ensure_dir(p: str): |
| | os.makedirs(p, exist_ok=True) |
| |
|
| |
|
| | def draw_single_box(frame_bgr: np.ndarray, box: List[float], color=(0, 255, 0), thickness=2) -> np.ndarray: |
| | x1, y1, x2, y2 = map(int, box) |
| | out = frame_bgr.copy() |
| | cv2.rectangle(out, (x1, y1), (x2, y2), color, thickness) |
| | return out |
| |
|
| |
|
| | def list_images_sorted(folder: str) -> List[str]: |
| | pats = ["*.jpg", "*.jpeg", "*.JPG", "*.JPEG"] |
| | files = [] |
| | for pat in pats: |
| | files.extend(glob.glob(os.path.join(folder, pat))) |
| | |
| | return sorted(files) |
| |
|
| |
|
| | |
| | def worker_run(args, dir_paths: List[str]): |
| | import sys as _sys |
| | if args.scenic_root not in _sys.path: |
| | _sys.path.append(args.scenic_root) |
| |
|
| | |
| | import tensorflow as tf |
| | tf.config.experimental.set_visible_devices([], "GPU") |
| |
|
| | from scenic.projects.owl_vit import configs |
| | from scenic.projects.owl_vit import models |
| | import jax |
| | import functools |
| | import owlv2_helper as helper |
| |
|
| | class OWLv2Objectness: |
| | def __init__(self, top_k: int = 5): |
| | self.top_k = top_k |
| | self.config = configs.owl_v2_clip_b16.get_config(init_mode="canonical_checkpoint") |
| | if args.checkpoint_path: |
| | self.config.init_from.checkpoint_path = os.path.abspath(args.checkpoint_path) |
| | |
| | print("[OWLv2] checkpoint_path =", self.config.init_from.checkpoint_path) |
| |
|
| | print("cp path:", self.config.init_from.checkpoint_path) |
| | |
| | self.module = models.TextZeroShotDetectionModule( |
| | body_configs=self.config.model.body, |
| | objectness_head_configs=self.config.model.objectness_head, |
| | normalize=self.config.model.normalize, |
| | box_bias=self.config.model.box_bias, |
| | ) |
| | self.variables = self.module.load_variables(self.config.init_from.checkpoint_path) |
| |
|
| |
|
| | self.image_embedder = jax.jit( |
| | functools.partial(self.module.apply, self.variables, train=False, method=self.module.image_embedder) |
| | ) |
| | self.objectness_predictor = jax.jit( |
| | functools.partial(self.module.apply, self.variables, method=self.module.objectness_predictor) |
| | ) |
| | self.box_predictor = jax.jit( |
| | functools.partial(self.module.apply, self.variables, method=self.module.box_predictor) |
| | ) |
| |
|
| | def detect(self, image_bgr: np.ndarray) -> List[Tuple[List[float], float]]: |
| | image_rgb = cv2.cvtColor(image_bgr, cv2.COLOR_BGR2RGB) |
| | processed = helper.preprocess_images([image_rgb], self.config.dataset_configs.input_size)[0] |
| | feature_map = self.image_embedder(processed[None, ...]) |
| | b, h, w, d = feature_map.shape |
| | image_features = feature_map.reshape(b, h * w, d) |
| |
|
| | obj_logits = self.objectness_predictor(image_features)["objectness_logits"] |
| | raw_boxes = self.box_predictor(image_features=image_features, feature_map=feature_map)["pred_boxes"] |
| |
|
| | obj = np.array(obj_logits[0], dtype=np.float32) |
| | raw_boxes = np.array(raw_boxes[0], dtype=np.float32) |
| | boxes = helper.rescale_detection_box(raw_boxes, image_rgb) |
| |
|
| | if len(obj) == 0: |
| | return [] |
| |
|
| | k = min(self.top_k, len(obj)) |
| | thresh = np.partition(obj, -k)[-k] |
| |
|
| | filtered: List[Tuple[List[float], float]] = [] |
| | H, W = image_rgb.shape[:2] |
| | for box, score in zip(boxes, obj): |
| | if score < thresh: |
| | continue |
| | if helper.too_small(box) or helper.too_large(box, image_rgb): |
| | continue |
| | x1, y1, x2, y2 = box |
| | x1 = max(0, min(float(x1), W - 1)) |
| | y1 = max(0, min(float(y1), H - 1)) |
| | x2 = max(0, min(float(x2), W - 1)) |
| | y2 = max(0, min(float(y2), H - 1)) |
| | filtered.append(([x1, y1, x2, y2], float(score))) |
| |
|
| | kept_boxes = helper.remove_overlapping_bboxes([b for b, _ in filtered]) |
| |
|
| | def _match_score(bb: List[float]) -> float: |
| | arr = np.array([b for b, _ in filtered], dtype=np.float32) |
| | idx = int(np.argmin(np.abs(arr - np.array(bb, dtype=np.float32)).sum(axis=1))) |
| | return filtered[idx][1] |
| |
|
| | return [(bb, _match_score(bb)) for bb in kept_boxes] |
| |
|
| | detector = OWLv2Objectness(top_k=args.top_k) |
| |
|
| | for dpath in tqdm(dir_paths, desc=f"Worker{args.worker_idx}", unit="set"): |
| | stem = os.path.basename(os.path.normpath(dpath)) |
| | images = list_images_sorted(dpath) |
| | if not images: |
| | print(f"[WARN][w{args.worker_idx}] No JPGs under: {dpath}") |
| | continue |
| |
|
| | saved_cnt = 0 |
| | pbar = tqdm(total=len(images), desc=f"{stem}[w{args.worker_idx}]", unit="img", leave=False) |
| |
|
| | for idx, ipath in enumerate(images): |
| | pbar.update(1) |
| | if args.frame_stride > 1 and (idx % args.frame_stride) != 0: |
| | continue |
| |
|
| | frame = cv2.imread(ipath, cv2.IMREAD_COLOR) |
| | if frame is None: |
| | print(f"[WARN][w{args.worker_idx}] Cannot read: {ipath}") |
| | continue |
| |
|
| | boxes_scores = detector.detect(frame) |
| | if boxes_scores: |
| | boxes_scores = sorted(boxes_scores, key=lambda x: x[1], reverse=True)[:args.top_k] |
| |
|
| | fname = os.path.basename(ipath) |
| | for i, (box, score) in enumerate(boxes_scores): |
| | out_dir = os.path.join(args.output_dir, stem, f"object_{i}") |
| | ensure_dir(out_dir) |
| | vis = draw_single_box(frame, box, color=(0, 255, 0), thickness=2) |
| | cv2.imwrite(os.path.join(out_dir, fname), vis) |
| |
|
| | saved_cnt += 1 |
| | if args.max_frames and saved_cnt >= args.max_frames: |
| | break |
| |
|
| | pbar.close() |
| |
|
| |
|
| | |
| | def main(): |
| | args = parse_args() |
| |
|
| | |
| | if args.worker_idx >= 0: |
| | if not args.shard_file or not os.path.exists(args.shard_file): |
| | raise RuntimeError("Worker requires --shard_file with JSON list of folder paths.") |
| | with open(args.shard_file, "r", encoding="utf-8") as f: |
| | dir_paths = json.load(f) |
| | worker_run(args, dir_paths) |
| | return |
| |
|
| | |
| | dir_paths = iter_image_dirs(args.input_dir, args.startswith, args.endswith) |
| | if not dir_paths: |
| | print(f"[INFO] No JPG folders (or JPGs) startwith '{args.startswith}' and endwith '{args.endswith}' under {args.input_dir}") |
| | return |
| |
|
| | num_workers = max(1, int(args.num_workers)) |
| | shards: List[List[str]] = [[] for _ in range(num_workers)] |
| | for i, d in enumerate(dir_paths): |
| | shards[i % num_workers].append(d) |
| |
|
| | procs = [] |
| | tmp_dir = os.path.join(args.output_dir, "_shards_tmp") |
| | ensure_dir(tmp_dir) |
| |
|
| | for w in range(num_workers): |
| | shard_path = os.path.join(tmp_dir, f"shard_{w}.json") |
| | with open(shard_path, "w", encoding="utf-8") as f: |
| | json.dump(shards[w], f, ensure_ascii=False, indent=0) |
| |
|
| | |
| | env = os.environ.copy() |
| | env["CUDA_VISIBLE_DEVICES"] = str(w) |
| |
|
| | cmd = [ |
| | sys.executable, __file__, |
| | "--input_dir", args.input_dir, |
| | "--startswith", args.startswith, |
| | "--output_dir", args.output_dir, |
| | "--frame_stride", str(args.frame_stride), |
| | "--top_k", str(args.top_k), |
| | "--max_frames", str(args.max_frames), |
| | "--num_workers", str(num_workers), |
| | "--worker_idx", str(w), |
| | "--shard_file", shard_path, |
| | "--scenic_root", args.scenic_root, |
| | "--checkpoint_path", args.checkpoint_path, |
| | ] |
| | print(f"[Master] Launch worker {w}: GPU={env['CUDA_VISIBLE_DEVICES']} folders={len(shards[w])}") |
| | procs.append(subprocess.Popen(cmd, env=env)) |
| |
|
| | |
| | rc = 0 |
| | for p in procs: |
| | p.wait() |
| | rc |= p.returncode |
| |
|
| | if rc != 0: |
| | print("[Master] Some workers failed. Return code:", rc) |
| | else: |
| | print("[Master] All workers done. Output:", args.output_dir) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | main() |