아래 링크의 모델에 custom data를 추가해 만들었습니다. https://huggingface.co/TahaDouaji/detr-doc-table-detection 코드 예시 metrics: import os from transformers import DetrImageProcessor, DetrForObjectDetection import torch import cv2 from PIL import Image, ImageDraw, ImageFont model = DetrForObjectDetection.from_pretrained("lms7127/table_detr_10ep") processor = DetrImageProcessor.from_pretrained("lms7127/table_detr_10ep") font_path = os.path.join(cv2.__path__[0],'qt','fonts','DejaVuSans.ttf') #변환할 이미지 목록 불러오기 image_path = '/path/to/image' save_path ="/path/to/save" img = Image.open(image_path) inputs = processor(images=img, return_tensors="pt") with torch.no_grad():#추가학습 방지 outputs = model(**inputs) # convert outputs (bounding boxes and class logits) to COCO API # let's only keep detections with score > 0.9 width, height = img.size postprocessed_outputs = processor.post_process_object_detection(outputs, target_sizes=[(height, width)], threshold=0.7) results = postprocessed_outputs[0] draw = ImageDraw.Draw(img) for score, label, box in zip(results["scores"], results["labels"], results["boxes"]): box = [round(i, 2) for i in box.tolist()] class_label = model.config.id2label[label.item()] confidence = round(score.item(), 3) # Draw rectangle draw.rectangle(box, outline="red", width = 5) # Add text font_size=50 font = ImageFont.truetype(font_path, font_size) text = f"{class_label}: {confidence}" text_width, text_height = draw.textsize(text) text_location = [box[0], box[1] - text_height - 4] draw.rectangle([text_location[0], text_location[1], text_location[0] + text_width, text_location[1] + text_height], fill="red") draw.text(text_location, text, fill="white", font=font) img.save(save_path,"JPEG")