Spaces:

sleepytaco
/

TextureSynthCNN

Runtime error

App Files Files Community

sleepytaco commited on Oct 7, 2023

Commit

a4d851a

•

1 Parent(s): cc7aecd

initial commit

Browse files

Files changed (12) hide show

.gitignore +1 -0
app.py +31 -0
images/blotchy_0025.png +0 -0
images/blotchy_0027.png +0 -0
images/cracked_0080.png +0 -0
images/scenery.png +0 -0
model/__init__.py +0 -0
model/main.py +13 -0
model/model.py +123 -0
model/utils.py +66 -0
model/vgg19.py +56 -0
requirements.txt +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .DS_Store

app.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import gradio as gr
+import os
+from model.model import TextureSynthesisCNN
+from model.utils import convert_tensor_to_PIL_image
+def image_mod(image):
+    return image.rotate(45)
+def synth_image(image):
+    synthesizer = TextureSynthesisCNN(tex_exemplar_image=image)
+    output_tensor = synthesizer.synthesize_texture(num_epochs=10)
+    output_image = convert_tensor_to_PIL_image(output_tensor)
+    return output_image
+demo = gr.Interface(
+    fn=synth_image,
+    inputs=[gr.Image(type="numpy")],
+    outputs=[gr.Image(type="pil")],
+    flagging_options=["blurry", "incorrect"],
+    examples=[
+        os.path.join(os.path.dirname(__file__), "images/blotchy_0025.png"),
+        os.path.join(os.path.dirname(__file__), "images/blotchy_0027.png"),
+        os.path.join(os.path.dirname(__file__), "images/cracked_0080.png"),
+        os.path.join(os.path.dirname(__file__), "images/scenery.png"),
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()

images/blotchy_0025.png ADDED Viewed

images/blotchy_0027.png ADDED Viewed

images/cracked_0080.png ADDED Viewed

images/scenery.png ADDED Viewed

model/__init__.py ADDED Viewed

File without changes

model/main.py ADDED Viewed

	@@ -0,0 +1,13 @@

+from model import TextureSynthesisCNN
+def main():
+    synthesizer = TextureSynthesisCNN(tex_exemplar_path="data/cracked_0063.png")
+    synthesizer.synthesize_texture(num_epochs=10)
+    # synthesizer.optimize(num_epochs=500)  # can call this on an existing model object to continue optimization
+    synthesizer.save_textures(output_dir="./results/",  # directory automatically is created if not found
+                              display_when_done=True)  # saves exemplar and synth into the output_dir folder
+if __name__ == '__main__':
+    main()

model/model.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import torch
+from torch import fft
+from model.vgg19 import VGG19
+from tqdm import tqdm
+import model.utils as utils
+import os
+class TextureSynthesisCNN:
+    def __init__(self, tex_exemplar_image):
+        """
+        tex_exemplar_path: ideal texture image w.r.t which we are synthesizing our textures
+        """
+        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        # self.tex_exemplar_name = os.path.splitext(os.path.basename(tex_exemplar_path))[0]
+        # init VGGs
+        vgg_exemplar = VGG19(freeze_weights=True)  # vgg to generate ideal feature maps
+        self.vgg_synthesis = VGG19(freeze_weights=False)  # vgg whose weights will be trained
+        # calculate and save gram matrices for the texture exemplar once (as this does not change)
+        self.tex_exemplar_image = utils.load_image_tensor(tex_exemplar_image).to(self.device)  # image path -> image Tensor
+        self.gram_matrices_ideal = vgg_exemplar(self.tex_exemplar_image).get_gram_matrices()
+        # set up the initial random noise image output which the network will optimize
+        self.output_image = torch.sigmoid(torch.randn_like(self.tex_exemplar_image)).to(self.device)  # sigmoid to ensure values b/w 0 and 1
+        self.output_image.requires_grad = True  # set to True so that the rand noise image can be optimized
+        self.LBFGS = torch.optim.LBFGS([self.output_image])
+        self.layer_weights = [10**9] * len(vgg_exemplar.output_layers)  # output layer weights as per paper
+        self.beta = 10**5  # beta as per paper
+        self.losses = []
+    def synthesize_texture(self, num_epochs=250, display_when_done=False):
+        """
+        - Idea: Each time the optimizer starts off from a random noise image, the network optimizes/synthesizes
+          the original tex exemplar in a slightly different way - i.e. introduce variation in the synthesis.
+        - Can be called multiple times to generate different texture variations of the tex exemplar this model holds
+        - IMPT: resets the output_image to random noise each time this is called
+        """
+        self.losses = []
+        # reset output image to random noise
+        self.output_image = torch.sigmoid(torch.randn_like(self.tex_exemplar_image)).to(self.device)
+        self.output_image.requires_grad = True
+        self.LBFGS = torch.optim.LBFGS([self.output_image])  # update LBFGS to hold the new output image
+        synthesized_texture = self.optimize(num_epochs=num_epochs)
+        if display_when_done:
+            utils.display_image_tensor(synthesized_texture)
+        return synthesized_texture
+    def optimize(self, num_epochs=250):
+        """
+        Perform num_epochs steps of L-BFGS algorithm
+        """
+        progress_bar = tqdm(total=num_epochs, desc="Optimizing...")
+        epoch_offset = len(self.losses)
+        for epoch in range(num_epochs):
+            epoch_loss = self.get_loss().item()
+            progress_bar.update(1)
+            progress_bar.set_description(f"Loss @ Epoch {epoch_offset + epoch + 1}  - {epoch_loss} ")
+            self.LBFGS.step(self.LBFGS_closure)  # LBFGS optimizer expects loss in the form of closure function
+            self.losses.append(epoch_loss)
+        return self.output_image.detach().cpu()
+    def LBFGS_closure(self):
+        """
+        Closure function for LBFGS which passes the curr output_image through vgg_synth, computes prediction gram_mats,
+        and uses that to compute loss for the network.
+        """
+        self.LBFGS.zero_grad()
+        loss = self.get_loss()
+        loss.backward()
+        return loss
+    def get_loss(self):
+        """
+        CNN loss: Generates the feature maps for the current output synth image, and uses the ideal feature maps to come
+        up with a loss E_l at one layer l. All the E_l's are added up to return the total cnn loss.
+        Spectrum loss: project tex synth to tex exemplar to come up with the spectrum constraint as per paper
+        Overall loss = loss_cnn + loss_spec
+        """
+        # calculate spectrum constraint loss using current output_image and tex_exemplar_image
+        # - projects image I_hat (tex_synth) onto image I (tex_exemplar) and return I_proj (equation as per paper)
+        I_hat = utils.get_grayscale(self.output_image)
+        I_fourier = fft.fft2(utils.get_grayscale(self.tex_exemplar_image))
+        I_hat_fourier = fft.fft2(I_hat)
+        I_fourier_conj = torch.conj(I_fourier)
+        epsilon = 10e-12  # epsilon to avoid div by 0 and nan values
+        I_proj = fft.ifft2((I_hat_fourier * I_fourier_conj) / (torch.abs(I_hat_fourier * I_fourier_conj) + epsilon) * I_fourier)
+        loss_spec = (0.5 * (I_hat - I_proj) ** 2.).sum().real
+        # get the gram mats for the synth output_image by passing it to second vgg network
+        gram_matrices_pred = self.vgg_synthesis(self.output_image).get_gram_matrices()
+        # calculate cnn loss
+        loss_cnn = 0.  # (w1*E1 + w2*E2 + ... + wl*El)
+        for i in range(len(self.layer_weights)):
+            # E_l = w_l * ||G_ideal_l - G_pred_l||^2
+            E = self.layer_weights[i] * ((self.gram_matrices_ideal[i] - gram_matrices_pred[i]) ** 2.).sum()
+            loss_cnn += E
+        return loss_cnn + (self.beta * loss_spec)
+    def save_textures(self, output_dir="./results/", display_when_done=False):
+        """
+        Saves and displays the current tex_exemplar_image and the output_image tensors that this model holds
+        into the results directory (creates it if not yet created)
+        """
+        tex_exemplar = utils.save_image_tensor(self.tex_exemplar_image.cpu(),
+                                               output_dir=output_dir,
+                                               image_name=f"exemplar_{self.tex_exemplar_name}.png")
+        tex_synth = utils.save_image_tensor(self.output_image.detach().cpu(),
+                                            output_dir=output_dir,
+                                            image_name=f"synth_{self.tex_exemplar_name}.png")
+        if display_when_done:
+            tex_exemplar.show()
+            print()
+            tex_synth.show()

model/utils.py ADDED Viewed

	@@ -0,0 +1,66 @@

+from PIL import Image
+from torchvision import transforms
+from skimage import io, transform, util
+import numpy as np
+import os
+"""
+Contains utility functions to work with images in tensor and jpg/png forms
+"""
+def load_image_tensor(image, path=""):
+    """
+    Returns Image as a Pytorch Tensor of shape ((img_size),3).
+    Values between 0 and 1.
+    """
+    img_size = (256, 256)
+    # image = io.imread(path)
+    cropped_image = util.crop(image, ((0, 0), (0, image.shape[1] - image.shape[0]), (0, 0)))
+    resized_image = (transform.resize(image=cropped_image, output_shape=img_size, anti_aliasing=True))
+    to_tensor = transforms.Compose([transforms.ToTensor()])
+    tensor = to_tensor(resized_image)
+    # tensor = tensor.permute(1,2,0)  # the model expects w, h, 3!
+    return tensor.float()
+def convert_tensor_to_PIL_image(image_tensor):
+    output_image = image_tensor.numpy().transpose(1, 2, 0)
+    output_image = np.clip(output_image, 0, 1) * 255
+    output_image = output_image.astype(np.uint8)
+    output_image = Image.fromarray(output_image)
+    return output_image
+def save_image_tensor(tensor, output_dir="./", image_name="output.png"):
+    """
+    Saves a 3D tensor as an image.
+    """
+    output_image = tensor.numpy().transpose(1, 2, 0)
+    output_image = np.clip(output_image, 0, 1) * 255
+    output_image = output_image.astype(np.uint8)
+    output_image = Image.fromarray(output_image)
+    if not os.path.exists(output_dir):
+        os.mkdir(output_dir)
+    output_image.save(output_dir + image_name)
+    return output_image
+def display_image_tensor(tensor):
+    """
+    Displays the passed in 3D image tensor
+    """
+    output_image = tensor.numpy().transpose(1, 2, 0)
+    output_image = np.clip(output_image, 0, 1) * 255
+    output_image = output_image.astype(np.uint8)
+    output_image = Image.fromarray(output_image)
+    output_image.show()
+def get_grayscale(tensor):
+    """
+    Converts a 3D image tensor to greyscale
+    """
+    greyscale_transform = transforms.Grayscale()
+    return greyscale_transform(tensor)

model/vgg19.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import torch
+from torch import nn
+from torchvision.models import VGG19_Weights, vgg19
+class VGG19:
+    """
+    Custom version of VGG19 with the maxpool layers replaced with avgpool as per the paper
+    """
+    def __init__(self, freeze_weights):
+        """
+        If True, the gradients for the VGG params are turned off
+        """
+        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+        self.model = vgg19(weights=VGG19_Weights(VGG19_Weights.DEFAULT)).to(device)
+        # note: added one extra maxpool (layer 36) from the vgg... worked well so kept it in
+        self.output_layers = [0, 4, 9, 18, 27, 36]  # vgg19 layers [convlayer1, maxpool, ..., maxpool]
+        for layer in self.output_layers[1:]:  # convert the maxpool layers to an avgpool
+            self.model.features[layer] = nn.AvgPool2d(kernel_size=2, stride=2)
+        self.feature_maps = []
+        for param in self.model.parameters():
+            if freeze_weights:
+                param.requires_grad = False
+            else:
+                param.requires_grad = True
+    def __call__(self, x):
+        """
+        Take in image, pass it through the VGG, capture feature maps at each of the output layers of VGG
+        """
+        self.feature_maps = []
+        for index, layer in enumerate(self.model.features):
+            # print(layer)
+            x = layer(x)  # pass the img through the layer to get feature maps of the img
+            if index in self.output_layers:
+                self.feature_maps.append(x)
+            if index == self.output_layers[-1]:
+                # stop VGG execution as we've captured the feature maps from all the important layers
+                break
+        return self
+    def get_gram_matrices(self):
+        """
+        Convert the featuremaps captured by the call method into gram matrices
+        """
+        gram_matrices = []
+        for fm in self.feature_maps:
+            n, x, y = fm.size()  # num filters n and (filter dims x and y)
+            F = fm.reshape(n, x * y)  # reshape filterbank into a 2D mat before doing auto correlation
+            gram_mat = (F @ F.t()) / (4. * n * x * y)  # auto corr + normalize by layer output dims
+            gram_matrices.append(gram_mat)
+        return gram_matrices

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+gradio
+torch~=2.0.0
+torchvision~=0.15.1
+scikit-image~=0.20.0
+tqdm~=4.64.1
+numpy~=1.24.1
+pillow~=9.4.0