Spaces:

dataroots
/

SofaStyler

Build error

App Files Files Community

Sophie98 commited on May 5, 2022

Commit

e4fb230

1 Parent(s): 79c6687

restructured code

Browse files

Files changed (6) hide show

.gitignore +8 -0
StyTR.py +1 -1
app.py +72 -2
segmentation.py +6 -8
styleTransfer.py +108 -71
test.py +0 -175

.gitignore ADDED Viewed

	@@ -0,0 +1,8 @@

+__pycache__/box_ops.cpython-37.pyc
+__pycache__/function.cpython-37.pyc
+__pycache__/misc.cpython-37.pyc
+__pycache__/segmentation.cpython-37.pyc
+__pycache__/styleTransfer.cpython-37.pyc
+__pycache__/StyTR.cpython-37.pyc
+__pycache__/transformer.cpython-37.pyc
+__pycache__/ViT_helper.cpython-37.pyc

StyTR.py CHANGED Viewed

@@ -137,7 +137,7 @@ class MLP(nn.Module):
 class StyTrans(nn.Module):
     """ This is the style transform transformer module """
-    def __init__(self,encoder,decoder,PatchEmbed, transformer,args):
         super().__init__()
         enc_layers = list(encoder.children())

 class StyTrans(nn.Module):
     """ This is the style transform transformer module """
+    def __init__(self,encoder,decoder,PatchEmbed, transformer):
         super().__init__()
         enc_layers = list(encoder.children())

app.py CHANGED Viewed

@@ -1,9 +1,78 @@
 import numpy as np
 import gradio as gr
 from segmentation import get_mask,replace_sofa
-from styleTransfer import resize_sofa,resize_style,create_styledSofa
 from PIL import Image
 def style_sofa(input_img: np.ndarray, style_img: np.ndarray):
     """
     Styles (all) the sofas in the image to the given style.
@@ -17,6 +86,7 @@ def style_sofa(input_img: np.ndarray, style_img: np.ndarray):
     """
     # preprocess input images to be (640,640) squares to fit requirements of the segmentation model
     resized_img,box = resize_sofa(input_img)
     resized_style = resize_style(style_img)
     # generate mask for image
@@ -24,7 +94,7 @@ def style_sofa(input_img: np.ndarray, style_img: np.ndarray):
     styled_sofa = create_styledSofa(resized_img,resized_style)
     # postprocess the final image
     new_sofa = replace_sofa(resized_img,mask,styled_sofa)
-    new_sofa = Image.fromarray(new_sofa).crop(box)
     return new_sofa
 image = gr.inputs.Image()

 import numpy as np
 import gradio as gr
 from segmentation import get_mask,replace_sofa
+from styleTransfer import create_styledSofa
 from PIL import Image
+def resize_sofa(img):
+    """
+    This function adds padding to make the orignal image square and 640by640.
+    It also returns the orignal ratio of the image, such that it can be reverted later.
+    Parameters:
+        img = original image
+    Return:
+        im1 = squared image
+        box = parameters to later crop the image to it original ratio
+    """
+    width, height = img.size
+    idx = np.argmin([width,height])
+    newsize = (640, 640) # parameters from test script
+    if idx==0:
+        img1 = Image.new(img.mode, (height, height), (255, 255, 255))
+        img1.paste(img, ((height-width)//2, 0))
+        box = ( newsize[0]*(1-width/height)//2,
+                0,
+                newsize[0]-newsize[0]*(1-width/height)//2,
+                newsize[1])
+    else:
+        img1 = Image.new(img.mode, (width, width), (255, 255, 255))
+        img1.paste(img, (0, (width-height)//2))
+        box = (0,
+                newsize[1]*(1-height/width)//2,
+                newsize[0],
+                newsize[1]-newsize[1]*(1-height/width)//2)
+    im1 = img1.resize(newsize)
+    return im1,box
+def resize_style(img):
+    """
+    This function generates a zoomed out version of the style image and resizes it to a 640by640 square.
+    Parameters:
+        img = image containing the style/pattern
+    Return:
+        dst = a zoomed-out and resized version of the pattern
+    """
+    width, height = img.size
+    idx = np.argmin([width,height])
+    # Makes the image square by cropping
+    if idx==0:
+        top= (height-width)//2
+        bottom= height-(height-width)//2
+        left = 0
+        right= width
+    else:
+        left = (width-height)//2
+        right = width - (width-height)//2
+        top = 0
+        bottom = height
+    newsize = (640, 640) # parameters from test script
+    im1 = img.crop((left, top, right, bottom))
+    # Constructs a zoomed-out version
+    copies = 8
+    resize = (newsize[0]//copies,newsize[1]//copies)
+    dst = Image.new('RGB', (resize[0]*copies,resize[1]*copies))
+    im2 = im1.resize((resize))
+    for row in range(copies):
+        im2 = im2.transpose(Image.FLIP_LEFT_RIGHT)
+        for column in range(copies):
+            im2 = im2.transpose(Image.FLIP_TOP_BOTTOM)
+            dst.paste(im2, (resize[0]*row, resize[1]*column))
+    dst = dst.resize((newsize))
+    return dst
 def style_sofa(input_img: np.ndarray, style_img: np.ndarray):
     """
     Styles (all) the sofas in the image to the given style.
     """
     # preprocess input images to be (640,640) squares to fit requirements of the segmentation model
+    input_img,style_img = Image.fromarray(input_img),Image.fromarray(style_img)
     resized_img,box = resize_sofa(input_img)
     resized_style = resize_style(style_img)
     # generate mask for image
     styled_sofa = create_styledSofa(resized_img,resized_style)
     # postprocess the final image
     new_sofa = replace_sofa(resized_img,mask,styled_sofa)
+    new_sofa = new_sofa.crop(box)
     return new_sofa
 image = gr.inputs.Image()

segmentation.py CHANGED Viewed

@@ -7,7 +7,7 @@ import matplotlib.pyplot as plt
 from PIL import Image
 import segmentation_models as sm
-def get_mask(image):
     """
     This function generates a mask of the image that highlights all the sofas in the image.
     This uses a pre-trained Unet model with a resnet50 backbone.
@@ -50,13 +50,11 @@ def get_mask(image):
     test_img = cv2.cvtColor(test_img, cv2.COLOR_RGB2BGR)
     test_img = np.expand_dims(test_img, axis=0)
-    prediction = model.predict(preprocess_input(test_img)).round()
-    print("generated mask")
     mask = Image.fromarray(prediction[...,0].squeeze()*255).convert("L")
-    #mask.save("masks/sofa.jpg")
-    return np.array(mask)
-def replace_sofa(image,mask,styled_sofa):
     """
     This function replaces the original sofa in the image by the new styled sofa according
     to the mask.
@@ -68,7 +66,7 @@ def replace_sofa(image,mask,styled_sofa):
     Return:
         new_image   = Image containing the styled sofa
     """
-    image = np.array(image)
     #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     styled_sofa = cv2.cvtColor(styled_sofa, cv2.COLOR_BGR2RGB)
@@ -77,7 +75,7 @@ def replace_sofa(image,mask,styled_sofa):
     image_bg = cv2.bitwise_and(image,image,mask = mask_inv)
     sofa_fg = cv2.bitwise_and(styled_sofa,styled_sofa,mask = mask)
     new_image = cv2.add(image_bg,sofa_fg)
-    return new_image
 # image = cv2.imread('input/sofa.jpg')
 # mask = cv2.imread('masks/sofa.jpg')

 from PIL import Image
 import segmentation_models as sm
+def get_mask(image:Image) -> Image:
     """
     This function generates a mask of the image that highlights all the sofas in the image.
     This uses a pre-trained Unet model with a resnet50 backbone.
     test_img = cv2.cvtColor(test_img, cv2.COLOR_RGB2BGR)
     test_img = np.expand_dims(test_img, axis=0)
+    prediction = model.predict(preprocess_input(np.array(test_img))).round()
     mask = Image.fromarray(prediction[...,0].squeeze()*255).convert("L")
+    return mask
+def replace_sofa(image:Image, mask:Image, styled_sofa:Image) -> Image:
     """
     This function replaces the original sofa in the image by the new styled sofa according
     to the mask.
     Return:
         new_image   = Image containing the styled sofa
     """
+    image,mask,styled_sofa = np.array(image),np.array(mask),np.array(styled_sofa)
     #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
     styled_sofa = cv2.cvtColor(styled_sofa, cv2.COLOR_BGR2RGB)
     image_bg = cv2.bitwise_and(image,image,mask = mask_inv)
     sofa_fg = cv2.bitwise_and(styled_sofa,styled_sofa,mask = mask)
     new_image = cv2.add(image_bg,sofa_fg)
+    return Image.fromarray(new_image)
 # image = cv2.imread('input/sofa.jpg')
 # mask = cv2.imread('masks/sofa.jpg')

styleTransfer.py CHANGED Viewed

@@ -1,77 +1,114 @@
 from PIL import Image
 import numpy as np
-import os
-import cv2
-def resize_sofa(img):
-    img = Image.fromarray(img)
-    width, height = img.size
-    idx = np.argmin([width,height])
-    newsize = (640, 640) # parameters from test script
-    if idx==0:
-        img1 = Image.new(img.mode, (height, height), (255, 255, 255))
-        img1.paste(img, ((height-width)//2, 0))
-        box = ( newsize[0]*(1-width/height)//2,
-                0,
-                newsize[0]-newsize[0]*(1-width/height)//2,
-                newsize[1])
-    else:
-        img1 = Image.new(img.mode, (width, width), (255, 255, 255))
-        img1.paste(img, (0, (width-height)//2))
-        box = (0,
-                newsize[1]*(1-height/width)//2,
-                newsize[0],
-                newsize[1]-newsize[1]*(1-height/width)//2)
-    im1 = img1.resize(newsize)
-    return im1,box
-def resize_style(img):
-    #img = Image.open(path)#"../style5.jpg")
-    img = Image.fromarray(img)
-    width, height = img.size
-    idx = np.argmin([width,height])
-    #print(width,height)
-    if idx==0:
-        top= (height-width)//2
-        bottom= height-(height-width)//2
-        left = 0
-        right= width
-    else:
-        left = (width-height)//2
-        right = width - (width-height)//2
-        top = 0
-        bottom = height
-    newsize = (640, 640) # parameters from test script
-    im1 = img.crop((left, top, right, bottom))
-    copies = 8
-    resize = (newsize[0]//copies,newsize[1]//copies)
-    dst = Image.new('RGB', (resize[0]*copies,resize[1]*copies))
-    im2 = im1.resize((resize))
-    for row in range(copies):
-        im2 = im2.transpose(Image.FLIP_LEFT_RIGHT)
-        for column in range(copies):
-            im2 = im2.transpose(Image.FLIP_TOP_BOTTOM)
-            dst.paste(im2, (resize[0]*row, resize[1]*column))
-    dst = dst.resize((newsize))
-    return dst
-def create_styledSofa(sofa,style):
-    path_sofa,path_style = 'sofa.jpg','style.jpg'
-    sofa.save(path_sofa)
-    style.save(path_style)
-    os.system("python3 test.py  --content "+path_sofa+" \
-                    --style "+path_style+" \
-                    --output . \
-                    --vgg vgg_normalised.pth \
-                    --decoder_path decoder_iter_160000.pth \
-                    --Trans_path transformer_iter_160000.pth \
-                    --embedding_path embedding_iter_160000.pth")
-    styled_sofa = cv2.imread('sofa_stylized_style.jpg')
     return styled_sofa
 # image = Image.open('sofa_office.jpg')

 from PIL import Image
 import numpy as np
+import torch
+import torch.nn as nn
+from PIL import Image
+from torchvision import transforms
+import transformer as transformer
+import StyTR as StyTR
+import numpy as np
+from collections import OrderedDict
+def test_transform(size, crop):
+    transform_list = []
+    if size != 0:
+        transform_list.append(transforms.Resize(size))
+    if crop:
+        transform_list.append(transforms.CenterCrop(size))
+    transform_list.append(transforms.ToTensor())
+    transform = transforms.Compose(transform_list)
+    return transform
+def style_transform(h,w):
+    k = (h,w)
+    size = int(np.max(k))
+    transform_list = []
+    transform_list.append(transforms.CenterCrop((h,w)))
+    transform_list.append(transforms.ToTensor())
+    transform = transforms.Compose(transform_list)
+    return transform
+def content_transform():
+    transform_list = []
+    transform_list.append(transforms.ToTensor())
+    transform = transforms.Compose(transform_list)
+    return transform
+def Transformer(content_img: Image, style_img: Image,
+                vgg_path:str = 'vgg_normalised.pth', decoder_path:str = 'decoder_iter_160000.pth',
+                Trans_path:str = 'transformer_iter_160000.pth', embedding_path:str = 'embedding_iter_160000.pth'):
+    # Advanced options
+    content_size=640
+    style_size=640
+    crop='store_true'
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    vgg = StyTR.vgg
+    vgg.load_state_dict(torch.load(vgg_path))
+    vgg = nn.Sequential(*list(vgg.children())[:44])
+    decoder = StyTR.decoder
+    Trans = transformer.Transformer()
+    embedding = StyTR.PatchEmbed()
+    decoder.eval()
+    Trans.eval()
+    vgg.eval()
+    new_state_dict = OrderedDict()
+    state_dict = torch.load(decoder_path)
+    for k, v in state_dict.items():
+        #namekey = k[7:] # remove `module.`
+        namekey = k
+        new_state_dict[namekey] = v
+    decoder.load_state_dict(new_state_dict)
+    new_state_dict = OrderedDict()
+    state_dict = torch.load(Trans_path)
+    for k, v in state_dict.items():
+        #namekey = k[7:] # remove `module.`
+        namekey = k
+        new_state_dict[namekey] = v
+    Trans.load_state_dict(new_state_dict)
+    new_state_dict = OrderedDict()
+    state_dict = torch.load(embedding_path)
+    for k, v in state_dict.items():
+        #namekey = k[7:] # remove `module.`
+        namekey = k
+        new_state_dict[namekey] = v
+    embedding.load_state_dict(new_state_dict)
+    network = StyTR.StyTrans(vgg,decoder,embedding,Trans)
+    network.eval()
+    network.to(device)
+    content_tf = test_transform(content_size, crop)
+    style_tf = test_transform(style_size, crop)
+    content_tf1 = content_transform()
+    content = content_tf(content_img.convert("RGB"))
+    h,w,c=np.shape(content)
+    style_tf1 = style_transform(h,w)
+    style = style_tf(style_img.convert("RGB"))
+    style = style.to(device).unsqueeze(0)
+    content = content.to(device).unsqueeze(0)
+    with torch.no_grad():
+        output= network(content,style)
+    output = output[0].cpu()
+    output = transforms.ToPILImage(output)
+    return output
+def create_styledSofa(sofa:Image, style:Image):
+    styled_sofa = transformer(sofa,style)
     return styled_sofa
 # image = Image.open('sofa_office.jpg')

test.py DELETED Viewed

@@ -1,175 +0,0 @@
-import argparse
-from pathlib import Path
-import os
-import torch
-import torch.nn as nn
-from PIL import Image
-from os.path import basename
-from os.path import splitext
-from torchvision import transforms
-from torchvision.utils import save_image
-from function import calc_mean_std, normal, coral
-import transformer as transformer
-import StyTR as StyTR
-import matplotlib.pyplot as plt
-from matplotlib import cm
-from function import normal
-import numpy as np
-def test_transform(size, crop):
-    transform_list = []
-    if size != 0:
-        transform_list.append(transforms.Resize(size))
-    if crop:
-        transform_list.append(transforms.CenterCrop(size))
-    transform_list.append(transforms.ToTensor())
-    transform = transforms.Compose(transform_list)
-    return transform
-def style_transform(h,w):
-    k = (h,w)
-    size = int(np.max(k))
-    transform_list = []
-    transform_list.append(transforms.CenterCrop((h,w)))
-    transform_list.append(transforms.ToTensor())
-    transform = transforms.Compose(transform_list)
-    return transform
-def content_transform():
-    transform_list = []
-    transform_list.append(transforms.ToTensor())
-    transform = transforms.Compose(transform_list)
-    return transform
-parser = argparse.ArgumentParser()
-# Basic options
-parser.add_argument('--content', type=str,
-                    help='File path to the content image')
-parser.add_argument('--content_dir', type=str,
-                    help='Directory path to a batch of content images')
-parser.add_argument('--style', type=str,
-                    help='File path to the style image, or multiple style \
-                    images separated by commas if you want to do style \
-                    interpolation or spatial control')
-parser.add_argument('--style_dir', type=str,
-                    help='Directory path to a batch of style images')
-parser.add_argument('--output', type=str, default='output',
-                    help='Directory to save the output image(s)')
-parser.add_argument('--vgg', type=str, default='./experiments/vgg_normalised.pth')
-parser.add_argument('--decoder_path', type=str, default='experiments/decoder_iter_160000.pth')
-parser.add_argument('--Trans_path', type=str, default='experiments/transformer_iter_160000.pth')
-parser.add_argument('--embedding_path', type=str, default='experiments/embedding_iter_160000.pth')
-parser.add_argument('--style_interpolation_weights', type=str, default="")
-parser.add_argument('--a', type=float, default=1.0)
-parser.add_argument('--position_embedding', default='sine', type=str, choices=('sine', 'learned'),
-                        help="Type of positional embedding to use on top of the image features")
-parser.add_argument('--hidden_dim', default=512, type=int,
-                        help="Size of the embeddings (dimension of the transformer)")
-args = parser.parse_args()
-# Advanced options
-content_size=640
-style_size=640
-crop='store_true'
-save_ext='.jpg'
-output_path=args.output
-preserve_color='store_true'
-alpha=args.a
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-# Either --content or --content_dir should be given.
-if args.content:
-    content_paths = [Path(args.content)]
-else:
-    content_dir = Path(args.content_dir)
-    content_paths = [f for f in content_dir.glob('*')]
-# Either --style or --style_dir should be given.
-if args.style:
-    style_paths = [Path(args.style)]
-else:
-    style_dir = Path(args.style_dir)
-    style_paths = [f for f in style_dir.glob('*')]
-if not os.path.exists(output_path):
-    os.mkdir(output_path)
-vgg = StyTR.vgg
-vgg.load_state_dict(torch.load(args.vgg))
-vgg = nn.Sequential(*list(vgg.children())[:44])
-decoder = StyTR.decoder
-Trans = transformer.Transformer()
-embedding = StyTR.PatchEmbed()
-decoder.eval()
-Trans.eval()
-vgg.eval()
-from collections import OrderedDict
-new_state_dict = OrderedDict()
-state_dict = torch.load(args.decoder_path)
-for k, v in state_dict.items():
-    #namekey = k[7:] # remove `module.`
-    namekey = k
-    new_state_dict[namekey] = v
-decoder.load_state_dict(new_state_dict)
-new_state_dict = OrderedDict()
-state_dict = torch.load(args.Trans_path)
-for k, v in state_dict.items():
-    #namekey = k[7:] # remove `module.`
-    namekey = k
-    new_state_dict[namekey] = v
-Trans.load_state_dict(new_state_dict)
-new_state_dict = OrderedDict()
-state_dict = torch.load(args.embedding_path)
-for k, v in state_dict.items():
-    #namekey = k[7:] # remove `module.`
-    namekey = k
-    new_state_dict[namekey] = v
-embedding.load_state_dict(new_state_dict)
-network = StyTR.StyTrans(vgg,decoder,embedding,Trans,args)
-network.eval()
-network.to(device)
-content_tf = test_transform(content_size, crop)
-style_tf = test_transform(style_size, crop)
-for content_path in content_paths:
-    for style_path in style_paths:
-        content_tf1 = content_transform()
-        content = content_tf(Image.open(content_path).convert("RGB"))
-        h,w,c=np.shape(content)
-        style_tf1 = style_transform(h,w)
-        style = style_tf(Image.open(style_path).convert("RGB"))
-        style = style.to(device).unsqueeze(0)
-        content = content.to(device).unsqueeze(0)
-        with torch.no_grad():
-            output= network(content,style)
-        output = output[0].cpu()
-        output_name = '{:s}/{:s}_stylized_{:s}{:s}'.format(
-            output_path, splitext(basename(content_path))[0],
-            splitext(basename(style_path))[0], save_ext
-        )
-        save_image(output, output_name)