Anaglyph-Image-Generator / segmentation.py
kevinbjl's picture
Upload 14 files
29fa6f4 verified
# CS5330 Lab 4
# This file includes functions for image segmentation
import torch
import numpy as np
from torchvision import models, transforms
from PIL import Image
# Load a pretrained DeepLabV3 model
model = models.segmentation.deeplabv3_resnet101(pretrained=True)
model.eval()
# Define the preprocessing transform
preprocess = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
# Extract the person out of the image
def extract_person(image):
input_tensor = preprocess(image).unsqueeze(0)
# Perform segmentation
with torch.no_grad():
output = model(input_tensor)['out'][0]
output_predictions = output.argmax(0)
# Create a mask for the person
# Person class is 15 in COCO dataset
person_mask = (output_predictions == 15).cpu().numpy()
# Handle the case where there's no person detected
if not person_mask.any():
return None
# Convert mask to binary (0 for background, 1 for person)
binary_mask = np.where(person_mask, 1, 0).astype(np.uint8)
# Extract the person from the original image
extracted_person = Image.new("RGBA", image.size, (0, 0, 0, 0))
for y in range(image.size[1]):
for x in range(image.size[0]):
# If pixel is part of the person, draw that the pixel
if binary_mask[y, x] == 1:
extracted_person.putpixel((x, y), image.getpixel((x, y)))
# Else, leave the background transparent
# Resize for consistency
# Keep the aspect ratio, and adjust width to 800 pixels
target_width = 800
aspect_ratio = image.size[1] / image.size[0]
target_height = int(target_width * aspect_ratio)
extracted_person = extracted_person.resize((target_width, target_height))
# Return the extracted person as a transparent PNG
return extracted_person
# ==========Test case==========
# input = Image.open('false_img/ManyPeople.jpg')
# image = extract_person(input)
# if image is None:
# print('Non-person')
# else:
# image.show()