import torch import torchvision from torchvision.models.detection import FasterRCNN from torchvision.models.detection.faster_rcnn import FastRCNNPredictor from torchvision.transforms import functional as F from PIL import Image, ImageDraw, ImageFont import gradio as gr # Force CPU device = torch.device('cpu') # COCO-style class map COCO_CLASSES = { 0: "Background", 1: "Stand", 2: "Sit", 3: "Ruku", 4: "Sijdah" } # Load model def get_model(num_classes): model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model model = get_model(num_classes=5) model.load_state_dict(torch.load("Salatfasterrcnn_resnet50_epoch_3.pth", map_location=device)) model.to(device) model.eval() # Prediction function def predict(image): image = image.convert("RGB") image_tensor = F.to_tensor(image).unsqueeze(0).to(device) with torch.no_grad(): prediction = model(image_tensor) draw = ImageDraw.Draw(image) boxes = prediction[0]["boxes"].cpu().numpy() labels = prediction[0]["labels"].cpu().numpy() scores = prediction[0]["scores"].cpu().numpy() for box, label, score in zip(boxes, labels, scores): if score > 0.5: x_min, y_min, x_max, y_max = box class_name = COCO_CLASSES.get(label, "Unknown") draw.rectangle([x_min, y_min, x_max, y_max], outline="red", width=3) draw.text((x_min, y_min), f"{class_name} ({score:.2f})", fill="red") return image # Gradio interface gr.Interface( fn=predict, inputs=gr.Image(type="pil"), outputs=gr.Image(type="pil"), title="Salat Posture Detection", description="Upload an image to detect salat postures (stand, sit, ruku, sijdah)." ).launch()