lang-ground / langground /localizer.py
JunJiaGuo's picture
Upload folder using huggingface_hub
dd5b9d2 verified
from collections import defaultdict
from pathlib import Path
import torch
from transformers import Owlv2Processor, Owlv2ForObjectDetection
from PIL import Image
from pathlib import Path
from ultralytics import YOLO
def build_localizer(model_name):
if model_name == "owl":
return OWL()
elif model_name == "yolo":
return YOLO11()
else:
raise ValueError(f"Unknown model name: {model_name}")
class OWL:
def __init__(self):
model_name = "google/owlv2-large-patch14-ensemble"
self.processor = Owlv2Processor.from_pretrained(model_name)
self.model = Owlv2ForObjectDetection.from_pretrained(model_name).to("cuda")
self.model.eval()
self.objects_f = Path(__file__).parent / "objs" / "owl.txt"
self.objects = [line.strip() for line in self.objects_f.open().readlines()]
self.device = "cuda"
def localize(self, image, threshold=0.5):
image = Image.fromarray(image)
final = defaultdict(list)
with torch.inference_mode():
inputs = self.processor(text=self.objects, images=[image], return_tensors="pt").to(self.device)
outputs = self.model(**inputs)
target_sizes = torch.Tensor([image.size[::-1]]).to(self.device)
result = self.processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=threshold)[0]
boxes, scores, labels = result["boxes"], result["scores"], result["labels"]
for box, score, label in zip(boxes, scores, labels):
final[self.objects[label]].append(box)
return final
class YOLO11:
def __init__(self):
model_name = "yolo11m.pt"
self.model = YOLO(model_name)
self.objects_f = Path(__file__).parent / "objs" / "yolo.txt"
self.objects = [line.strip() for line in self.objects_f.open().readlines()]
def localize(self, image, threshold=0.5):
result = self.model(image, conf=threshold)[0]
boxes = result.boxes
bbox_ids = boxes.cls.cpu().numpy().astype(int)
boxes_xyxy = boxes.xyxy.cpu().numpy()
final = defaultdict(list)
for label, box in zip(bbox_ids, boxes_xyxy):
final[self.objects[label]].append(box)
return final