Spaces:

sleepytaco
/

TextureSynthCNN

Runtime error

App Files Files Community

TextureSynthCNN / model /model.py

sleepytaco

initial commit

a4d851a about 1 year ago

raw

history blame

6.16 kB

	import torch
	from torch import fft
	from model.vgg19 import VGG19
	from tqdm import tqdm
	import model.utils as utils
	import os


	class TextureSynthesisCNN:
	def __init__(self, tex_exemplar_image):
	"""
	tex_exemplar_path: ideal texture image w.r.t which we are synthesizing our textures
	"""
	self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	# self.tex_exemplar_name = os.path.splitext(os.path.basename(tex_exemplar_path))[0]

	# init VGGs
	vgg_exemplar = VGG19(freeze_weights=True) # vgg to generate ideal feature maps
	self.vgg_synthesis = VGG19(freeze_weights=False) # vgg whose weights will be trained

	# calculate and save gram matrices for the texture exemplar once (as this does not change)
	self.tex_exemplar_image = utils.load_image_tensor(tex_exemplar_image).to(self.device) # image path -> image Tensor
	self.gram_matrices_ideal = vgg_exemplar(self.tex_exemplar_image).get_gram_matrices()

	# set up the initial random noise image output which the network will optimize
	self.output_image = torch.sigmoid(torch.randn_like(self.tex_exemplar_image)).to(self.device) # sigmoid to ensure values b/w 0 and 1
	self.output_image.requires_grad = True # set to True so that the rand noise image can be optimized

	self.LBFGS = torch.optim.LBFGS([self.output_image])
	self.layer_weights = [10*9] len(vgg_exemplar.output_layers) # output layer weights as per paper
	self.beta = 10**5 # beta as per paper
	self.losses = []

	def synthesize_texture(self, num_epochs=250, display_when_done=False):
	"""
	- Idea: Each time the optimizer starts off from a random noise image, the network optimizes/synthesizes
	the original tex exemplar in a slightly different way - i.e. introduce variation in the synthesis.
	- Can be called multiple times to generate different texture variations of the tex exemplar this model holds
	- IMPT: resets the output_image to random noise each time this is called
	"""
	self.losses = []

	# reset output image to random noise
	self.output_image = torch.sigmoid(torch.randn_like(self.tex_exemplar_image)).to(self.device)
	self.output_image.requires_grad = True
	self.LBFGS = torch.optim.LBFGS([self.output_image]) # update LBFGS to hold the new output image

	synthesized_texture = self.optimize(num_epochs=num_epochs)
	if display_when_done:
	utils.display_image_tensor(synthesized_texture)

	return synthesized_texture

	def optimize(self, num_epochs=250):
	"""
	Perform num_epochs steps of L-BFGS algorithm
	"""
	progress_bar = tqdm(total=num_epochs, desc="Optimizing...")
	epoch_offset = len(self.losses)

	for epoch in range(num_epochs):
	epoch_loss = self.get_loss().item()
	progress_bar.update(1)
	progress_bar.set_description(f"Loss @ Epoch {epoch_offset + epoch + 1} - {epoch_loss} ")
	self.LBFGS.step(self.LBFGS_closure) # LBFGS optimizer expects loss in the form of closure function
	self.losses.append(epoch_loss)

	return self.output_image.detach().cpu()

	def LBFGS_closure(self):
	"""
	Closure function for LBFGS which passes the curr output_image through vgg_synth, computes prediction gram_mats,
	and uses that to compute loss for the network.
	"""
	self.LBFGS.zero_grad()
	loss = self.get_loss()
	loss.backward()
	return loss

	def get_loss(self):
	"""
	CNN loss: Generates the feature maps for the current output synth image, and uses the ideal feature maps to come
	up with a loss E_l at one layer l. All the E_l's are added up to return the total cnn loss.
	Spectrum loss: project tex synth to tex exemplar to come up with the spectrum constraint as per paper
	Overall loss = loss_cnn + loss_spec
	"""
	# calculate spectrum constraint loss using current output_image and tex_exemplar_image
	# - projects image I_hat (tex_synth) onto image I (tex_exemplar) and return I_proj (equation as per paper)
	I_hat = utils.get_grayscale(self.output_image)
	I_fourier = fft.fft2(utils.get_grayscale(self.tex_exemplar_image))
	I_hat_fourier = fft.fft2(I_hat)
	I_fourier_conj = torch.conj(I_fourier)
	epsilon = 10e-12 # epsilon to avoid div by 0 and nan values
	I_proj = fft.ifft2((I_hat_fourier * I_fourier_conj) / (torch.abs(I_hat_fourier * I_fourier_conj) + epsilon) * I_fourier)
	loss_spec = (0.5 * (I_hat - I_proj) ** 2.).sum().real

	# get the gram mats for the synth output_image by passing it to second vgg network
	gram_matrices_pred = self.vgg_synthesis(self.output_image).get_gram_matrices()

	# calculate cnn loss
	loss_cnn = 0. # (w1E1 + w2E2 + ... + wl*El)
	for i in range(len(self.layer_weights)):
	# E_l = w_l * \|\|G_ideal_l - G_pred_l\|\|^2
	E = self.layer_weights[i] * ((self.gram_matrices_ideal[i] - gram_matrices_pred[i]) ** 2.).sum()
	loss_cnn += E

	return loss_cnn + (self.beta * loss_spec)

	def save_textures(self, output_dir="./results/", display_when_done=False):
	"""
	Saves and displays the current tex_exemplar_image and the output_image tensors that this model holds
	into the results directory (creates it if not yet created)
	"""
	tex_exemplar = utils.save_image_tensor(self.tex_exemplar_image.cpu(),
	output_dir=output_dir,
	image_name=f"exemplar_{self.tex_exemplar_name}.png")
	tex_synth = utils.save_image_tensor(self.output_image.detach().cpu(),
	output_dir=output_dir,
	image_name=f"synth_{self.tex_exemplar_name}.png")
	if display_when_done:
	tex_exemplar.show()
	print()
	tex_synth.show()