3DFuse

Build error

App Files Files Community

3DFuse / lora_dataset.py

jyseo

first commit

d661b19 almost 2 years ago

raw

history blame

7.11 kB

	import random
	from pathlib import Path
	from typing import Dict, List, Optional, Tuple, Union

	from PIL import Image
	from torch import zeros_like
	from torch.utils.data import Dataset
	from torchvision import transforms
	import glob
	from lora_diffusion.preprocess_files import face_mask_google_mediapipe

	OBJECT_TEMPLATE = [
	"a photo of a {}",
	"a rendering of a {}",
	"a cropped photo of the {}",
	"the photo of a {}",
	"a photo of a clean {}",
	"a photo of a dirty {}",
	"a dark photo of the {}",
	"a photo of my {}",
	"a photo of the cool {}",
	"a close-up photo of a {}",
	"a bright photo of the {}",
	"a cropped photo of a {}",
	"a photo of the {}",
	"a good photo of the {}",
	"a photo of one {}",
	"a close-up photo of the {}",
	"a rendition of the {}",
	"a photo of the clean {}",
	"a rendition of a {}",
	"a photo of a nice {}",
	"a good photo of a {}",
	"a photo of the nice {}",
	"a photo of the small {}",
	"a photo of the weird {}",
	"a photo of the large {}",
	"a photo of a cool {}",
	"a photo of a small {}",
	]

	STYLE_TEMPLATE = [
	"a painting in the style of {}",
	"a rendering in the style of {}",
	"a cropped painting in the style of {}",
	"the painting in the style of {}",
	"a clean painting in the style of {}",
	"a dirty painting in the style of {}",
	"a dark painting in the style of {}",
	"a picture in the style of {}",
	"a cool painting in the style of {}",
	"a close-up painting in the style of {}",
	"a bright painting in the style of {}",
	"a cropped painting in the style of {}",
	"a good painting in the style of {}",
	"a close-up painting in the style of {}",
	"a rendition in the style of {}",
	"a nice painting in the style of {}",
	"a small painting in the style of {}",
	"a weird painting in the style of {}",
	"a large painting in the style of {}",
	]

	NULL_TEMPLATE = ["{}"]

	TEMPLATE_MAP = {
	"object": OBJECT_TEMPLATE,
	"style": STYLE_TEMPLATE,
	"null": NULL_TEMPLATE,
	}


	def _randomset(lis):
	ret = []
	for i in range(len(lis)):
	if random.random() < 0.5:
	ret.append(lis[i])
	return ret


	def _shuffle(lis):

	return random.sample(lis, len(lis))


	def _get_cutout_holes(
	height,
	width,
	min_holes=8,
	max_holes=32,
	min_height=16,
	max_height=128,
	min_width=16,
	max_width=128,
	):
	holes = []
	for _n in range(random.randint(min_holes, max_holes)):
	hole_height = random.randint(min_height, max_height)
	hole_width = random.randint(min_width, max_width)
	y1 = random.randint(0, height - hole_height)
	x1 = random.randint(0, width - hole_width)
	y2 = y1 + hole_height
	x2 = x1 + hole_width
	holes.append((x1, y1, x2, y2))
	return holes


	def _generate_random_mask(image):
	mask = zeros_like(image[:1])
	holes = _get_cutout_holes(mask.shape[1], mask.shape[2])
	for (x1, y1, x2, y2) in holes:
	mask[:, y1:y2, x1:x2] = 1.0
	if random.uniform(0, 1) < 0.25:
	mask.fill_(1.0)
	masked_image = image * (mask < 0.5)
	return mask, masked_image


	class PivotalTuningDatasetCapation(Dataset):
	"""
	A dataset to prepare the instance and class images with the prompts for fine-tuning the model.
	It pre-processes the images and the tokenizes prompts.
	"""

	def __init__(
	self,
	images,
	caption,
	tokenizer,
	token_map: Optional[dict] = None,
	use_template: Optional[str] = None,
	size=512,
	h_flip=True,
	color_jitter=False,
	resize=True,
	use_mask_captioned_data=False,
	use_face_segmentation_condition=False,
	train_inpainting=False,
	blur_amount: int = 70,
	):
	self.size = size
	self.tokenizer = tokenizer
	self.resize = resize
	self.train_inpainting = train_inpainting

	assert not (
	use_mask_captioned_data and use_template
	), "Can't use both mask caption data and template."

	# Prepare the instance images
	# self.instance_images_path = None
	self.images = images
	self.captions = [caption] * len(images)

	self.use_mask = use_face_segmentation_condition or use_mask_captioned_data
	self.use_mask_captioned_data = use_mask_captioned_data

	self.num_instance_images = len(self.images)
	self.token_map = token_map

	self.use_template = use_template
	if use_template is not None:
	self.templates = TEMPLATE_MAP[use_template]

	self._length = self.num_instance_images

	self.h_flip = h_flip
	self.image_transforms = transforms.Compose(
	[
	transforms.Resize(
	size, interpolation=transforms.InterpolationMode.BILINEAR
	)
	if resize
	else transforms.Lambda(lambda x: x),
	transforms.ColorJitter(0.1, 0.1)
	if color_jitter
	else transforms.Lambda(lambda x: x),
	transforms.CenterCrop(size),
	transforms.ToTensor(),
	transforms.Normalize([0.5], [0.5]),
	]
	)

	self.blur_amount = blur_amount

	def __len__(self):
	return self._length

	def __getitem__(self, index):

	example = {}
	instance_image = self.images[index % self.num_instance_images]
	if not instance_image.mode == "RGB":
	instance_image = instance_image.convert("RGB")
	example["instance_images"] = self.image_transforms(instance_image)

	if self.train_inpainting:
	(
	example["instance_masks"],
	example["instance_masked_images"],
	) = _generate_random_mask(example["instance_images"])

	if self.use_template:
	assert self.token_map is not None

	input_tok = list(self.token_map.values())[0]

	text = random.choice(self.templates).format(input_tok)

	else:
	text = self.captions[index % self.num_instance_images].strip()

	if self.token_map is not None:
	for token, value in self.token_map.items():
	text = text.replace(token, value)

	print(text)

	if self.use_mask:
	example["mask"] = (
	self.image_transforms(
	Image.open(self.mask_path[index % self.num_instance_images])
	)
	* 0.5
	+ 1.0
	)

	if self.h_flip and random.random() > 0.5:
	hflip = transforms.RandomHorizontalFlip(p=1)

	example["instance_images"] = hflip(example["instance_images"])
	if self.use_mask:
	example["mask"] = hflip(example["mask"])

	example["instance_prompt_ids"] = self.tokenizer(
	text,
	padding="do_not_pad",
	truncation=True,
	max_length=self.tokenizer.model_max_length,
	).input_ids

	return example