Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -503,9 +503,6 @@ import traceback
|
|
503 |
# iface.launch()
|
504 |
|
505 |
|
506 |
-
model_yolo = YOLO('yolov8l.pt')
|
507 |
-
|
508 |
-
history_manager = UserHistoryManager()
|
509 |
|
510 |
dog_breeds = ["Afghan_Hound", "African_Hunting_Dog", "Airedale", "American_Staffordshire_Terrier",
|
511 |
"Appenzeller", "Australian_Terrier", "Bedlington_Terrier", "Bernese_Mountain_Dog", "Bichon_Frise",
|
@@ -537,6 +534,8 @@ dog_breeds = ["Afghan_Hound", "African_Hunting_Dog", "Airedale", "American_Staff
|
|
537 |
|
538 |
device_mgr = DeviceManager()
|
539 |
|
|
|
|
|
540 |
class MultiHeadAttention(nn.Module):
|
541 |
|
542 |
def __init__(self, in_dim, num_heads=8):
|
@@ -597,15 +596,18 @@ num_classes = len(dog_breeds)
|
|
597 |
|
598 |
# Initialize base model
|
599 |
model = BaseModel(num_classes=num_classes)
|
600 |
-
|
601 |
# Load model path
|
602 |
model_path = '124_best_model_dog.pth'
|
603 |
-
checkpoint = torch.load(model_path, map_location=device_mgr.
|
604 |
|
605 |
# Load model state
|
606 |
model.load_state_dict(checkpoint['base_model'], strict=False)
|
607 |
model.eval()
|
608 |
|
|
|
|
|
|
|
609 |
# Image preprocessing function
|
610 |
def preprocess_image(image):
|
611 |
# If the image is numpy.ndarray turn into PIL.Image
|
@@ -621,74 +623,59 @@ def preprocess_image(image):
|
|
621 |
|
622 |
return transform(image).unsqueeze(0)
|
623 |
|
|
|
|
|
624 |
async def predict_single_dog(image):
|
625 |
-
"""
|
626 |
-
|
627 |
-
|
628 |
-
|
629 |
-
|
|
|
|
|
|
|
630 |
|
631 |
-
|
632 |
-
|
633 |
-
|
634 |
-
probs = F.softmax(logits, dim=1)
|
635 |
-
|
636 |
-
top5_prob, top5_idx = torch.topk(probs, k=5)
|
637 |
-
breeds = [dog_breeds[idx.item()] for idx in top5_idx[0]]
|
638 |
-
probabilities = [prob.item() for prob in top5_prob[0]]
|
639 |
-
|
640 |
-
sum_probs = sum(probabilities[:3])
|
641 |
-
relative_probs = [f"{(prob/sum_probs * 100):.2f}%" for prob in probabilities[:3]]
|
642 |
-
|
643 |
-
print("\nClassifier Predictions:")
|
644 |
-
for breed, prob in zip(breeds[:5], probabilities[:5]):
|
645 |
-
print(f"{breed}: {prob:.4f}")
|
646 |
-
|
647 |
-
return probabilities[0], breeds[:3], relative_probs
|
648 |
-
|
649 |
-
except RuntimeError as e:
|
650 |
-
if "out of memory" in str(e):
|
651 |
-
logger.warning("GPU memory exceeded, falling back to CPU")
|
652 |
-
device_mgr._current_device = torch.device('cpu')
|
653 |
-
return await predict_single_dog(image)
|
654 |
-
raise e
|
655 |
|
|
|
|
|
656 |
|
657 |
-
|
658 |
-
|
659 |
-
|
660 |
-
|
661 |
-
|
662 |
-
for box in results.boxes:
|
663 |
-
if box.cls == 16: # COCO dataset class for dog is 16
|
664 |
-
xyxy = box.xyxy[0].tolist()
|
665 |
-
confidence = box.conf.item()
|
666 |
-
boxes.append((xyxy, confidence))
|
667 |
-
|
668 |
-
if not boxes:
|
669 |
-
dogs.append((image, 1.0, [0, 0, image.width, image.height]))
|
670 |
-
else:
|
671 |
-
nms_boxes = non_max_suppression(boxes, iou_threshold)
|
672 |
-
|
673 |
-
for box, confidence in nms_boxes:
|
674 |
-
x1, y1, x2, y2 = box
|
675 |
-
w, h = x2 - x1, y2 - y1
|
676 |
-
x1 = max(0, x1 - w * 0.05)
|
677 |
-
y1 = max(0, y1 - h * 0.05)
|
678 |
-
x2 = min(image.width, x2 + w * 0.05)
|
679 |
-
y2 = min(image.height, y2 + h * 0.05)
|
680 |
-
cropped_image = image.crop((x1, y1, x2, y2))
|
681 |
-
dogs.append((cropped_image, confidence, [x1, y1, x2, y2]))
|
682 |
-
|
683 |
-
return dogs
|
684 |
|
685 |
-
|
686 |
-
|
687 |
-
|
688 |
-
|
689 |
-
|
690 |
-
|
691 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
692 |
|
693 |
|
694 |
def non_max_suppression(boxes, iou_threshold):
|
|
|
503 |
# iface.launch()
|
504 |
|
505 |
|
|
|
|
|
|
|
506 |
|
507 |
dog_breeds = ["Afghan_Hound", "African_Hunting_Dog", "Airedale", "American_Staffordshire_Terrier",
|
508 |
"Appenzeller", "Australian_Terrier", "Bedlington_Terrier", "Bernese_Mountain_Dog", "Bichon_Frise",
|
|
|
534 |
|
535 |
device_mgr = DeviceManager()
|
536 |
|
537 |
+
history_manager = UserHistoryManager()
|
538 |
+
|
539 |
class MultiHeadAttention(nn.Module):
|
540 |
|
541 |
def __init__(self, in_dim, num_heads=8):
|
|
|
596 |
|
597 |
# Initialize base model
|
598 |
model = BaseModel(num_classes=num_classes)
|
599 |
+
model = device_mgr.to_device(model)
|
600 |
# Load model path
|
601 |
model_path = '124_best_model_dog.pth'
|
602 |
+
checkpoint = torch.load(model_path, map_location=device_mgr.get_device(), weights_only=True)
|
603 |
|
604 |
# Load model state
|
605 |
model.load_state_dict(checkpoint['base_model'], strict=False)
|
606 |
model.eval()
|
607 |
|
608 |
+
model_yolo = YOLO('yolov8l.pt')
|
609 |
+
model_yolo = device_mgr.to_device(model_yolo)
|
610 |
+
|
611 |
# Image preprocessing function
|
612 |
def preprocess_image(image):
|
613 |
# If the image is numpy.ndarray turn into PIL.Image
|
|
|
623 |
|
624 |
return transform(image).unsqueeze(0)
|
625 |
|
626 |
+
|
627 |
+
@adaptive_gpu(duration=30)
|
628 |
async def predict_single_dog(image):
|
629 |
+
"""單獨的狗預測函數"""
|
630 |
+
image_tensor = preprocess_image(image)
|
631 |
+
image_tensor = device_mgr.to_device(image_tensor)
|
632 |
+
|
633 |
+
with torch.no_grad():
|
634 |
+
outputs = model(image_tensor)
|
635 |
+
logits = outputs[0] if isinstance(outputs, tuple) else outputs
|
636 |
+
probs = F.softmax(logits, dim=1)
|
637 |
|
638 |
+
top5_prob, top5_idx = torch.topk(probs, k=5)
|
639 |
+
breeds = [dog_breeds[idx.item()] for idx in top5_idx[0]]
|
640 |
+
probabilities = [prob.item() for prob in top5_prob[0]]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
641 |
|
642 |
+
sum_probs = sum(probabilities[:3])
|
643 |
+
relative_probs = [f"{(prob/sum_probs * 100):.2f}%" for prob in probabilities[:3]]
|
644 |
|
645 |
+
print("\nClassifier Predictions:")
|
646 |
+
for breed, prob in zip(breeds[:5], probabilities[:5]):
|
647 |
+
print(f"{breed}: {prob:.4f}")
|
648 |
+
|
649 |
+
return probabilities[0], breeds[:3], relative_probs
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
650 |
|
651 |
+
@adaptive_gpu(duration=30)
|
652 |
+
async def detect_multiple_dogs(image, conf_threshold=0.3, iou_threshold=0.55):
|
653 |
+
"""複數狗預測函數"""
|
654 |
+
results = model_yolo(image, conf=conf_threshold, iou=iou_threshold)[0]
|
655 |
+
dogs = []
|
656 |
+
boxes = []
|
657 |
+
for box in results.boxes:
|
658 |
+
if box.cls == 16: # COCO dataset class for dog is 16
|
659 |
+
xyxy = box.xyxy[0].tolist()
|
660 |
+
confidence = box.conf.item()
|
661 |
+
boxes.append((xyxy, confidence))
|
662 |
+
|
663 |
+
if not boxes:
|
664 |
+
dogs.append((image, 1.0, [0, 0, image.width, image.height]))
|
665 |
+
else:
|
666 |
+
nms_boxes = non_max_suppression(boxes, iou_threshold)
|
667 |
+
|
668 |
+
for box, confidence in nms_boxes:
|
669 |
+
x1, y1, x2, y2 = box
|
670 |
+
w, h = x2 - x1, y2 - y1
|
671 |
+
x1 = max(0, x1 - w * 0.05)
|
672 |
+
y1 = max(0, y1 - h * 0.05)
|
673 |
+
x2 = min(image.width, x2 + w * 0.05)
|
674 |
+
y2 = min(image.height, y2 + h * 0.05)
|
675 |
+
cropped_image = image.crop((x1, y1, x2, y2))
|
676 |
+
dogs.append((cropped_image, confidence, [x1, y1, x2, y2]))
|
677 |
+
|
678 |
+
return dogs
|
679 |
|
680 |
|
681 |
def non_max_suppression(boxes, iou_threshold):
|