Spaces:

dataroots
/

SofaStyler

Build error

App Files Files Community

Sophie98 commited on May 17, 2022

Commit

ad1ac8f

•

1 Parent(s): 6048967

change to streamlit

Browse files

Files changed (26) hide show

.flake8 +0 -12
.streamlit/config.toml +3 -0
README.md +2 -2
Segmentation/{model_checkpoint.h5 → model_final.h5} +2 -2
Segmentation/segmentation.py +24 -39
StyleTransfer/{StyTR.py → srcTransformer/StyTR.py} +89 -60
StyleTransfer/{models → srcTransformer/Transformer_models}/decoder_iter_160000.pth +0 -0
StyleTransfer/{models → srcTransformer/Transformer_models}/embedding_iter_160000.pth +0 -0
StyleTransfer/{models → srcTransformer/Transformer_models}/transformer_iter_160000.pth +0 -0
StyleTransfer/{models → srcTransformer/Transformer_models}/vgg_normalised.pth +0 -0
StyleTransfer/{ViT_helper.py → srcTransformer/ViT_helper.py} +53 -35
StyleTransfer/srcTransformer/__init__.py +0 -0
StyleTransfer/{function.py → srcTransformer/function.py} +27 -22
StyleTransfer/{misc.py → srcTransformer/misc.py} +114 -71
StyleTransfer/{transformer.py → srcTransformer/transformer.py} +213 -119
StyleTransfer/styleTransfer.py +136 -70
app.py +290 -134
{gradio_cached_examples/output → figures}/0.png +0 -0
{gradio_cached_examples/output → figures}/1.png +0 -0
{gradio_cached_examples/output → figures}/2.png +0 -0
figures/StyleGANsofa.png +0 -0
figures/Transformersofa.jpg +0 -0
figures/logo.png +0 -0
gradio_cached_examples/log.csv +0 -4
packages.txt +0 -3
requirements.txt +3 -3

.flake8 DELETED Viewed

@@ -1,12 +0,0 @@
-[flake8]
-exclude =
-    .git,
-    *.egg-info,
-    __pycache__,
-    .tox,
-    .pytest_cache,
-    build,
-    dist,
-    tests
-max-line-length = 88
-ignore = D202,W503,E203 # conflicts with black

.streamlit/config.toml ADDED Viewed

	@@ -0,0 +1,3 @@

+[theme]
+base="dark"
+primaryColor="#04b188"

README.md CHANGED Viewed

@@ -3,8 +3,8 @@ title: SofaStyler
 emoji: 🛋
 colorFrom: blue
 colorTo: green
-sdk: gradio
-sdk_version: 2.9.4
 app_file: app.py
 pinned: false
 ---

 emoji: 🛋
 colorFrom: blue
 colorTo: green
+sdk: streamlit
+sdk_version: 1.9.0
 app_file: app.py
 pinned: false
 ---

Segmentation/{model_checkpoint.h5 → model_final.h5} RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ffd37e18fd15d753a6b7a9f8c589712508894fca6cbcfd2002d5053743788b70
-size 130752128

 version https://git-lfs.github.com/spec/v1
+oid sha256:9a456f38c83897d9d8b5c8dd989ff7ee2fe13bb123a70a00b6e987d4efac1c6e
+size 130858696

Segmentation/segmentation.py CHANGED Viewed

@@ -1,60 +1,45 @@
 import cv2
 from tensorflow import keras
 import numpy as np
 from PIL import Image
 import segmentation_models as sm
-sm.set_framework('tf.keras')
-# Load model at build time
-model_path = "Segmentation/model_checkpoint.h5"
-CLASSES = ['sofa']
-BACKBONE = 'resnet50'
-# define network parameters (only neede to load the weights)
-n_classes = 1 if len(CLASSES) == 1 else (len(CLASSES) + 1)
-activation = 'sigmoid' if n_classes == 1 else 'softmax'
 preprocess_input = sm.get_preprocessing(BACKBONE)
-LR=0.0001
-#create model architecture
-model = sm.Unet(BACKBONE, classes=n_classes, activation=activation)
-# define optomizer
-optim = keras.optimizers.Adam(LR)
-dice_loss = sm.losses.DiceLoss()
-focal_loss = sm.losses.BinaryFocalLoss() if n_classes == 1 else sm.losses.CategoricalFocalLoss()
-total_loss = dice_loss + (1 * focal_loss)
-metrics = [sm.metrics.IOUScore(threshold=0.5), sm.metrics.FScore(threshold=0.5)]
-# compile keras model with defined optimozer, loss and metrics
-model.compile(optim, total_loss, metrics)
-model.load_weights(model_path)
-def get_mask(image:Image.Image) -> Image.Image:
     """
-    This function generates a mask of the image that highlights all the sofas in the image.
-    This uses a pre-trained Unet model with a resnet50 backbone.
-    Remark: The model was trained on 640by640 images and it is therefore best that the image has the same size.
     Parameters:
             image = original image
     Return:
             mask  = corresponding maks of the image
     """
-    # #load model
-    #model = keras.models.load_model('model_final.h5', compile=False)
-    print('loaded model')
-    test_img = np.array(image)
     test_img = cv2.resize(test_img, (640, 640))
     test_img = cv2.cvtColor(test_img, cv2.COLOR_RGB2BGR)
     test_img = np.expand_dims(test_img, axis=0)
     prediction = model.predict(preprocess_input(np.array(test_img))).round()
-    mask = Image.fromarray(prediction[...,0].squeeze()*255).convert("L")
-    return mask
-def replace_sofa(image:Image.Image, mask:Image.Image, styled_sofa:Image.Image) -> Image.Image:
     """
-    This function replaces the original sofa in the image by the new styled sofa according
-    to the mask.
     Remark: All images should have the same size.
     Input:
         image       = Original image
@@ -63,11 +48,11 @@ def replace_sofa(image:Image.Image, mask:Image.Image, styled_sofa:Image.Image) -
     Return:
         new_image   = Image containing the styled sofa
     """
-    image,mask,styled_sofa = np.array(image),np.array(mask),np.array(styled_sofa)
     _, mask = cv2.threshold(mask, 10, 255, cv2.THRESH_BINARY)
     mask_inv = cv2.bitwise_not(mask)
-    image_bg = cv2.bitwise_and(image,image,mask = mask_inv)
-    sofa_fg = cv2.bitwise_and(styled_sofa,styled_sofa,mask = mask)
-    new_image = cv2.add(image_bg,sofa_fg)
     return Image.fromarray(new_image)

+# Import libraries
 import cv2
 from tensorflow import keras
 import numpy as np
 from PIL import Image
 import segmentation_models as sm
+sm.set_framework("tf.keras")
+# Load segmentation model
+BACKBONE = "resnet50"
 preprocess_input = sm.get_preprocessing(BACKBONE)
+model = keras.models.load_model("Segmentation/model_final.h5", compile=False)
+def get_mask(image: Image) -> Image:
     """
+    This function generates a mask of the image that highlights all the sofas
+    in the image. This uses a pre-trained Unet model with a resnet50 backbone.
+    Remark: The model was trained on 640by640 images and it is therefore best
+    that the image has the same size.
     Parameters:
             image = original image
     Return:
             mask  = corresponding maks of the image
     """
+    test_img = np.array(image)
     test_img = cv2.resize(test_img, (640, 640))
     test_img = cv2.cvtColor(test_img, cv2.COLOR_RGB2BGR)
     test_img = np.expand_dims(test_img, axis=0)
     prediction = model.predict(preprocess_input(np.array(test_img))).round()
+    mask = Image.fromarray(prediction[..., 0].squeeze() * 255).convert("L")
+    return mask
+def replace_sofa(image: Image, mask: Image, styled_sofa: Image) -> Image:
     """
+    This function replaces the original sofa in the image by the new styled
+    sofa according to the mask.
     Remark: All images should have the same size.
     Input:
         image       = Original image
     Return:
         new_image   = Image containing the styled sofa
     """
+    image, mask, styled_sofa = np.array(image), np.array(mask), np.array(styled_sofa)
     _, mask = cv2.threshold(mask, 10, 255, cv2.THRESH_BINARY)
     mask_inv = cv2.bitwise_not(mask)
+    image_bg = cv2.bitwise_and(image, image, mask=mask_inv)
+    sofa_fg = cv2.bitwise_and(styled_sofa, styled_sofa, mask=mask)
+    new_image = cv2.add(image_bg, sofa_fg)
     return Image.fromarray(new_image)

StyleTransfer/{StyTR.py → srcTransformer/StyTR.py} RENAMED Viewed

@@ -1,17 +1,24 @@
 import torch
 import torch.nn.functional as F
 from torch import nn
-from StyleTransfer.misc import (NestedTensor, nested_tensor_from_tensor_list,
-                       accuracy, get_world_size, interpolate,
-                       is_dist_avail_and_initialized)
-from StyleTransfer.function import normal,normal_style
-from StyleTransfer.function import calc_mean_std
-from StyleTransfer.ViT_helper import DropPath, to_2tuple, trunc_normal_
 class PatchEmbed(nn.Module):
-    """ Image to Patch Embedding
-    """
-    def __init__(self, img_size=256, patch_size=8, in_chans=3, embed_dim=512):
         super().__init__()
         img_size = to_2tuple(img_size)
         patch_size = to_2tuple(patch_size)
@@ -19,9 +26,11 @@ class PatchEmbed(nn.Module):
         self.img_size = img_size
         self.patch_size = patch_size
         self.num_patches = num_patches
-        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
-        self.up1 = nn.Upsample(scale_factor=2, mode='nearest')
     def forward(self, x):
         B, C, H, W = x.shape
@@ -34,7 +43,7 @@ decoder = nn.Sequential(
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(512, 256, (3, 3)),
     nn.ReLU(),
-    nn.Upsample(scale_factor=2, mode='nearest'),
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(256, 256, (3, 3)),
     nn.ReLU(),
@@ -47,14 +56,14 @@ decoder = nn.Sequential(
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(256, 128, (3, 3)),
     nn.ReLU(),
-    nn.Upsample(scale_factor=2, mode='nearest'),
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(128, 128, (3, 3)),
     nn.ReLU(),
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(128, 64, (3, 3)),
     nn.ReLU(),
-    nn.Upsample(scale_factor=2, mode='nearest'),
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(64, 64, (3, 3)),
     nn.ReLU(),
@@ -115,26 +124,35 @@ vgg = nn.Sequential(
     nn.ReLU(),  # relu5-3
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(512, 512, (3, 3)),
-    nn.ReLU()  # relu5-4
 )
 class MLP(nn.Module):
-    """ Very simple multi-layer perceptron (also called FFN)"""
-    def __init__(self, input_dim, hidden_dim, output_dim, num_layers):
         super().__init__()
         self.num_layers = num_layers
         h = [hidden_dim] * (num_layers - 1)
-        self.layers = nn.ModuleList(nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim]))
     def forward(self, x):
         for i, layer in enumerate(self.layers):
             x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
         return x
 class StyTrans(nn.Module):
-    """ This is the style transform transformer module """
-    def __init__(self,encoder,decoder,PatchEmbed, transformer):
         super().__init__()
         enc_layers = list(encoder.children())
@@ -143,85 +161,96 @@ class StyTrans(nn.Module):
         self.enc_3 = nn.Sequential(*enc_layers[11:18])  # relu2_1 -> relu3_1
         self.enc_4 = nn.Sequential(*enc_layers[18:31])  # relu3_1 -> relu4_1
         self.enc_5 = nn.Sequential(*enc_layers[31:44])  # relu4_1 -> relu5_1
-        for name in ['enc_1', 'enc_2', 'enc_3', 'enc_4', 'enc_5']:
             for param in getattr(self, name).parameters():
                 param.requires_grad = False
         self.mse_loss = nn.MSELoss()
         self.transformer = transformer
-        hidden_dim = transformer.d_model
         self.decode = decoder
         self.embedding = PatchEmbed
     def encode_with_intermediate(self, input):
         results = [input]
         for i in range(5):
-            func = getattr(self, 'enc_{:d}'.format(i + 1))
             results.append(func(results[-1]))
         return results[1:]
     def calc_content_loss(self, input, target):
-      assert (input.size() == target.size())
-      assert (target.requires_grad is False)
-      return self.mse_loss(input, target)
     def calc_style_loss(self, input, target):
-        assert (input.size() == target.size())
-        assert (target.requires_grad is False)
         input_mean, input_std = calc_mean_std(input)
         target_mean, target_std = calc_mean_std(target)
-        return self.mse_loss(input_mean, target_mean) + \
-               self.mse_loss(input_std, target_std)
-    def forward(self, samples_c: NestedTensor,samples_s: NestedTensor):
-        """ The forward expects a NestedTensor, which consists of:
-               - samples.tensor: batched images, of shape [batch_size x 3 x H x W]
-               - samples.mask: a binary mask of shape [batch_size x H x W], containing 1 on padded pixels
         """
         content_input = samples_c
         style_input = samples_s
         if isinstance(samples_c, (list, torch.Tensor)):
-            samples_c = nested_tensor_from_tensor_list(samples_c)   # support different-sized images padding is used for mask [tensor, mask]
         if isinstance(samples_s, (list, torch.Tensor)):
-            samples_s = nested_tensor_from_tensor_list(samples_s)
-        # ### features used to calcate loss
         content_feats = self.encode_with_intermediate(samples_c.tensors)
         style_feats = self.encode_with_intermediate(samples_s.tensors)
-        ### Linear projection
         style = self.embedding(samples_s.tensors)
         content = self.embedding(samples_c.tensors)
         # postional embedding is calculated in transformer.py
         pos_s = None
         pos_c = None
         mask = None
-        hs = self.transformer(style, mask , content, pos_c, pos_s)
         Ics = self.decode(hs)
         Ics_feats = self.encode_with_intermediate(Ics)
-        loss_c = self.calc_content_loss(normal(Ics_feats[-1]), normal(content_feats[-1]))+self.calc_content_loss(normal(Ics_feats[-2]), normal(content_feats[-2]))
         # Style loss
         loss_s = self.calc_style_loss(Ics_feats[0], style_feats[0])
         for i in range(1, 5):
             loss_s += self.calc_style_loss(Ics_feats[i], style_feats[i])
-        Icc = self.decode(self.transformer(content, mask , content, pos_c, pos_c))
-        Iss = self.decode(self.transformer(style, mask , style, pos_s, pos_s))
-        #Identity losses lambda 1
-        loss_lambda1 = self.calc_content_loss(Icc,content_input)+self.calc_content_loss(Iss,style_input)
-        #Identity losses lambda 2
-        Icc_feats=self.encode_with_intermediate(Icc)
-        Iss_feats=self.encode_with_intermediate(Iss)
-        loss_lambda2 = self.calc_content_loss(Icc_feats[0], content_feats[0])+self.calc_content_loss(Iss_feats[0], style_feats[0])
         for i in range(1, 5):
-            loss_lambda2 += self.calc_content_loss(Icc_feats[i], content_feats[i])+self.calc_content_loss(Iss_feats[i], style_feats[i])
         # Please select and comment out one of the following two sentences
-        return Ics,  loss_c, loss_s, loss_lambda1, loss_lambda2   #train
-        # return Ics    #test

 import torch
 import torch.nn.functional as F
+from StyleTransfer.srcTransformer.function import calc_mean_std, normal
+from StyleTransfer.srcTransformer.misc import (
+    NestedTensor,
+    nested_tensor_from_tensor_list,
+)
+from StyleTransfer.srcTransformer.ViT_helper import to_2tuple
 from torch import nn
 class PatchEmbed(nn.Module):
+    """Image to Patch Embedding"""
+    def __init__(
+        self,
+        img_size: int = 256,
+        patch_size: int = 8,
+        in_chans: int = 3,
+        embed_dim: int = 512,
+    ):
         super().__init__()
         img_size = to_2tuple(img_size)
         patch_size = to_2tuple(patch_size)
         self.img_size = img_size
         self.patch_size = patch_size
         self.num_patches = num_patches
+        self.proj = nn.Conv2d(
+            in_chans, embed_dim, kernel_size=patch_size, stride=patch_size
+        )
+        self.up1 = nn.Upsample(scale_factor=2, mode="nearest")
     def forward(self, x):
         B, C, H, W = x.shape
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(512, 256, (3, 3)),
     nn.ReLU(),
+    nn.Upsample(scale_factor=2, mode="nearest"),
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(256, 256, (3, 3)),
     nn.ReLU(),
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(256, 128, (3, 3)),
     nn.ReLU(),
+    nn.Upsample(scale_factor=2, mode="nearest"),
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(128, 128, (3, 3)),
     nn.ReLU(),
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(128, 64, (3, 3)),
     nn.ReLU(),
+    nn.Upsample(scale_factor=2, mode="nearest"),
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(64, 64, (3, 3)),
     nn.ReLU(),
     nn.ReLU(),  # relu5-3
     nn.ReflectionPad2d((1, 1, 1, 1)),
     nn.Conv2d(512, 512, (3, 3)),
+    nn.ReLU(),  # relu5-4
 )
 class MLP(nn.Module):
+    """Very simple multi-layer perceptron (also called FFN)"""
+    def __init__(
+        self, input_dim: int, hidden_dim: int, output_dim: int, num_layers: int
+    ):
         super().__init__()
         self.num_layers = num_layers
         h = [hidden_dim] * (num_layers - 1)
+        self.layers = nn.ModuleList(
+            nn.Linear(n, k) for n, k in zip([input_dim] + h, h + [output_dim])
+        )
     def forward(self, x):
         for i, layer in enumerate(self.layers):
             x = F.relu(layer(x)) if i < self.num_layers - 1 else layer(x)
         return x
 class StyTrans(nn.Module):
+    """This is the style transform transformer module"""
+    def __init__(
+        self, encoder: nn.Sequential, decoder: nn.Sequential, PatchEmbed, transformer
+    ):
         super().__init__()
         enc_layers = list(encoder.children())
         self.enc_3 = nn.Sequential(*enc_layers[11:18])  # relu2_1 -> relu3_1
         self.enc_4 = nn.Sequential(*enc_layers[18:31])  # relu3_1 -> relu4_1
         self.enc_5 = nn.Sequential(*enc_layers[31:44])  # relu4_1 -> relu5_1
+        for name in ["enc_1", "enc_2", "enc_3", "enc_4", "enc_5"]:
             for param in getattr(self, name).parameters():
                 param.requires_grad = False
         self.mse_loss = nn.MSELoss()
         self.transformer = transformer
         self.decode = decoder
         self.embedding = PatchEmbed
     def encode_with_intermediate(self, input):
         results = [input]
         for i in range(5):
+            func = getattr(self, "enc_{:d}".format(i + 1))
             results.append(func(results[-1]))
         return results[1:]
     def calc_content_loss(self, input, target):
+        assert input.size() == target.size()
+        assert target.requires_grad is False
+        return self.mse_loss(input, target)
     def calc_style_loss(self, input, target):
+        assert input.size() == target.size()
+        assert target.requires_grad is False
         input_mean, input_std = calc_mean_std(input)
         target_mean, target_std = calc_mean_std(target)
+        return self.mse_loss(input_mean, target_mean) + self.mse_loss(
+            input_std, target_std
+        )
+    def forward(self, samples_c: NestedTensor, samples_s: NestedTensor):
+        """The forward expects a NestedTensor, which consists of:
+        - samples.tensor: batched images, of shape [batch_size x 3 x H x W]
+        - samples.mask: a binary mask of shape [batch_size x H x W],
+            containing 1 on padded pixels
         """
         content_input = samples_c
         style_input = samples_s
         if isinstance(samples_c, (list, torch.Tensor)):
+            samples_c = nested_tensor_from_tensor_list(
+                samples_c
+            )  # support different-sized images padding is used for mask [tensor, mask]
         if isinstance(samples_s, (list, torch.Tensor)):
+            samples_s = nested_tensor_from_tensor_list(samples_s)
+        # features used to calcate loss
         content_feats = self.encode_with_intermediate(samples_c.tensors)
         style_feats = self.encode_with_intermediate(samples_s.tensors)
+        # Linear projection
         style = self.embedding(samples_s.tensors)
         content = self.embedding(samples_c.tensors)
         # postional embedding is calculated in transformer.py
         pos_s = None
         pos_c = None
         mask = None
+        hs = self.transformer(style, mask, content, pos_c, pos_s)
         Ics = self.decode(hs)
         Ics_feats = self.encode_with_intermediate(Ics)
+        loss_c = self.calc_content_loss(
+            normal(Ics_feats[-1]), normal(content_feats[-1])
+        ) + self.calc_content_loss(normal(Ics_feats[-2]), normal(content_feats[-2]))
         # Style loss
         loss_s = self.calc_style_loss(Ics_feats[0], style_feats[0])
         for i in range(1, 5):
             loss_s += self.calc_style_loss(Ics_feats[i], style_feats[i])
+        Icc = self.decode(self.transformer(content, mask, content, pos_c, pos_c))
+        Iss = self.decode(self.transformer(style, mask, style, pos_s, pos_s))
+        # Identity losses lambda 1
+        loss_lambda1 = self.calc_content_loss(
+            Icc, content_input
+        ) + self.calc_content_loss(Iss, style_input)
+        # Identity losses lambda 2
+        Icc_feats = self.encode_with_intermediate(Icc)
+        Iss_feats = self.encode_with_intermediate(Iss)
+        loss_lambda2 = self.calc_content_loss(
+            Icc_feats[0], content_feats[0]
+        ) + self.calc_content_loss(Iss_feats[0], style_feats[0])
         for i in range(1, 5):
+            loss_lambda2 += self.calc_content_loss(
+                Icc_feats[i], content_feats[i]
+            ) + self.calc_content_loss(Iss_feats[i], style_feats[i])
         # Please select and comment out one of the following two sentences
+        return Ics, loss_c, loss_s, loss_lambda1, loss_lambda2  # train
+        # return Ics    #test

StyleTransfer/{models → srcTransformer/Transformer_models}/decoder_iter_160000.pth RENAMED Viewed

File without changes

StyleTransfer/{models → srcTransformer/Transformer_models}/embedding_iter_160000.pth RENAMED Viewed

File without changes

StyleTransfer/{models → srcTransformer/Transformer_models}/transformer_iter_160000.pth RENAMED Viewed

File without changes

StyleTransfer/{models → srcTransformer/Transformer_models}/vgg_normalised.pth RENAMED Viewed

File without changes

StyleTransfer/{ViT_helper.py → srcTransformer/ViT_helper.py} RENAMED Viewed

@@ -1,18 +1,30 @@
 import torch
 from torch import nn
-def drop_path(x, drop_prob: float = 0., training: bool = False):
-    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
-    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
-    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
-    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
-    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
-    'survival rate' as the argument.
     """
-    if drop_prob == 0. or not training:
         return x
     keep_prob = 1 - drop_prob
-    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
     random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
     random_tensor.floor_()  # binarize
     output = x.div(keep_prob) * random_tensor
@@ -20,25 +32,26 @@ def drop_path(x, drop_prob: float = 0., training: bool = False):
 class DropPath(nn.Module):
-    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
     """
-    def __init__(self, drop_prob=None):
         super(DropPath, self).__init__()
         self.drop_prob = drop_prob
     def forward(self, x):
         return drop_path(x, self.drop_prob, self.training)
-from itertools import repeat
-from torch._six import container_abcs
 # From PyTorch internals
-def _ntuple(n):
     def parse(x):
         if isinstance(x, container_abcs.Iterable):
             return x
         return tuple(repeat(x, n))
     return parse
@@ -48,41 +61,41 @@ to_3tuple = _ntuple(3)
 to_4tuple = _ntuple(4)
-import torch
-import math
-import warnings
-def _no_grad_trunc_normal_(tensor, mean, std, a, b):
-    # Cut & paste from PyTorch official master until it's in a few official releases - RW
-    # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
     def norm_cdf(x):
         # Computes standard normal cumulative distribution function
-        return (1. + math.erf(x / math.sqrt(2.))) / 2.
     if (mean < a - 2 * std) or (mean > b + 2 * std):
-        warnings.warn("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
-                      "The distribution of values may be incorrect.",
-                      stacklevel=2)
     with torch.no_grad():
         # Values are generated by using a truncated uniform distribution and
         # then using the inverse CDF for the normal distribution.
         # Get upper and lower cdf values
-        l = norm_cdf((a - mean) / std)
-        u = norm_cdf((b - mean) / std)
         # Uniformly fill tensor with values from [l, u], then translate to
         # [2l-1, 2u-1].
-        tensor.uniform_(2 * l - 1, 2 * u - 1)
         # Use inverse cdf transform for normal distribution to get truncated
         # standard normal
         tensor.erfinv_()
         # Transform to proper mean, std
-        tensor.mul_(std * math.sqrt(2.))
         tensor.add_(mean)
         # Clamp to ensure it's in the proper range
@@ -90,8 +103,13 @@ def _no_grad_trunc_normal_(tensor, mean, std, a, b):
         return tensor
-def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
-    # type: (Tensor, float, float, float, float) -> Tensor
     r"""Fills the input Tensor with values drawn from a truncated
     normal distribution. The values are effectively drawn from the
     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
@@ -108,4 +126,4 @@ def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
         >>> w = torch.empty(3, 5)
         >>> nn.init.trunc_normal_(w)
     """
-    return _no_grad_trunc_normal_(tensor, mean, std, a, b)

+import math
+import warnings
+from itertools import repeat
 import torch
 from torch import nn
+from torch._six import container_abcs
+def drop_path(x, drop_prob: float = 0.0, training: bool = False):
+    """
+    Drop paths (Stochastic Depth) per sample (when applied in main
+    path of residual blocks). This is the same as the DropConnect impl
+    I created for EfficientNet, etc networks, however, the original name
+    is misleading as 'Drop Connect' is a different form of dropout in a
+    separate paper... See discussion:
+    https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
+    I've opted for changing the layer and argument names to 'drop path'
+    rather than mix DropConnect as a layer name and use 'survival rate'
+    as the argument.
     """
+    if drop_prob == 0.0 or not training:
         return x
     keep_prob = 1 - drop_prob
+    shape = (x.shape[0],) + (1,) * (
+        x.ndim - 1
+    )  # work with diff dim tensors, not just 2D ConvNets
     random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
     random_tensor.floor_()  # binarize
     output = x.div(keep_prob) * random_tensor
 class DropPath(nn.Module):
     """
+    Drop paths (Stochastic Depth) per sample
+    (when applied in main path of residual blocks).
+    """
+    def __init__(self, drop_prob: float = None):
         super(DropPath, self).__init__()
         self.drop_prob = drop_prob
     def forward(self, x):
         return drop_path(x, self.drop_prob, self.training)
 # From PyTorch internals
+def _ntuple(n: int):
     def parse(x):
         if isinstance(x, container_abcs.Iterable):
             return x
         return tuple(repeat(x, n))
     return parse
 to_4tuple = _ntuple(4)
+def _no_grad_trunc_normal_(
+    tensor: torch.tensor, mean: float, std: float, a: float, b: float
+):
+    # Cut & paste from PyTorch official master
+    # until it's in a few official releases - RW
+    # Method based on:
+    # https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
     def norm_cdf(x):
         # Computes standard normal cumulative distribution function
+        return (1.0 + math.erf(x / math.sqrt(2.0))) / 2.0
     if (mean < a - 2 * std) or (mean > b + 2 * std):
+        warnings.warn(
+            "mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
+            "The distribution of values may be incorrect.",
+            stacklevel=2,
+        )
     with torch.no_grad():
         # Values are generated by using a truncated uniform distribution and
         # then using the inverse CDF for the normal distribution.
         # Get upper and lower cdf values
+        lower = norm_cdf((a - mean) / std)
+        upper = norm_cdf((b - mean) / std)
         # Uniformly fill tensor with values from [l, u], then translate to
         # [2l-1, 2u-1].
+        tensor.uniform_(2 * lower - 1, 2 * upper - 1)
         # Use inverse cdf transform for normal distribution to get truncated
         # standard normal
         tensor.erfinv_()
         # Transform to proper mean, std
+        tensor.mul_(std * math.sqrt(2.0))
         tensor.add_(mean)
         # Clamp to ensure it's in the proper range
         return tensor
+def trunc_normal_(
+    tensor: torch.tensor,
+    mean: float = 0.0,
+    std: float = 1.0,
+    a: float = -2.0,
+    b: float = 2.0,
+):
     r"""Fills the input Tensor with values drawn from a truncated
     normal distribution. The values are effectively drawn from the
     normal distribution :math:`\mathcal{N}(\text{mean}, \text{std}^2)`
         >>> w = torch.empty(3, 5)
         >>> nn.init.trunc_normal_(w)
     """
+    return _no_grad_trunc_normal_(tensor, mean, std, a, b)

StyleTransfer/srcTransformer/__init__.py ADDED Viewed

File without changes

StyleTransfer/{function.py → srcTransformer/function.py} RENAMED Viewed

@@ -4,35 +4,41 @@ import torch
 def calc_mean_std(feat, eps=1e-5):
     # eps is a small value added to the variance to avoid divide-by-zero.
     size = feat.size()
-    assert (len(size) == 4)
     N, C = size[:2]
     feat_var = feat.view(N, C, -1).var(dim=2) + eps
     feat_std = feat_var.sqrt().view(N, C, 1, 1)
     feat_mean = feat.view(N, C, -1).mean(dim=2).view(N, C, 1, 1)
     return feat_mean, feat_std
 def calc_mean_std1(feat, eps=1e-5):
     # eps is a small value added to the variance to avoid divide-by-zero.
     size = feat.size()
     # assert (len(size) == 4)
-    WH,N, C = size
     feat_var = feat.var(dim=0) + eps
     feat_std = feat_var.sqrt()
     feat_mean = feat.mean(dim=0)
     return feat_mean, feat_std
 def normal(feat, eps=1e-5):
-    feat_mean, feat_std= calc_mean_std(feat, eps)
-    normalized=(feat-feat_mean)/feat_std
-    return normalized
 def normal_style(feat, eps=1e-5):
-    feat_mean, feat_std= calc_mean_std1(feat, eps)
-    normalized=(feat-feat_mean)/feat_std
     return normalized
 def _calc_feat_flatten_mean_std(feat):
     # takes 3D feat (C, H, W), return mean and std of array within channels
-    assert (feat.size()[0] == 3)
-    assert (isinstance(feat, torch.FloatTensor))
     feat_flatten = feat.view(3, -1)
     mean = feat_flatten.mean(dim=-1, keepdim=True)
     std = feat_flatten.std(dim=-1, keepdim=True)
@@ -49,25 +55,24 @@ def coral(source, target):
     # Note: flatten -> f
     source_f, source_f_mean, source_f_std = _calc_feat_flatten_mean_std(source)
-    source_f_norm = (source_f - source_f_mean.expand_as(
-        source_f)) / source_f_std.expand_as(source_f)
-    source_f_cov_eye = \
-        torch.mm(source_f_norm, source_f_norm.t()) + torch.eye(3)
     target_f, target_f_mean, target_f_std = _calc_feat_flatten_mean_std(target)
-    target_f_norm = (target_f - target_f_mean.expand_as(
-        target_f)) / target_f_std.expand_as(target_f)
-    target_f_cov_eye = \
-        torch.mm(target_f_norm, target_f_norm.t()) + torch.eye(3)
     source_f_norm_transfer = torch.mm(
         _mat_sqrt(target_f_cov_eye),
-        torch.mm(torch.inverse(_mat_sqrt(source_f_cov_eye)),
-                 source_f_norm)
     )
-    source_f_transfer = source_f_norm_transfer * \
-                        target_f_std.expand_as(source_f_norm) + \
-                        target_f_mean.expand_as(source_f_norm)
     return source_f_transfer.view(source.size())

 def calc_mean_std(feat, eps=1e-5):
     # eps is a small value added to the variance to avoid divide-by-zero.
     size = feat.size()
+    assert len(size) == 4
     N, C = size[:2]
     feat_var = feat.view(N, C, -1).var(dim=2) + eps
     feat_std = feat_var.sqrt().view(N, C, 1, 1)
     feat_mean = feat.view(N, C, -1).mean(dim=2).view(N, C, 1, 1)
     return feat_mean, feat_std
 def calc_mean_std1(feat, eps=1e-5):
     # eps is a small value added to the variance to avoid divide-by-zero.
     size = feat.size()
     # assert (len(size) == 4)
+    WH, N, C = size
     feat_var = feat.var(dim=0) + eps
     feat_std = feat_var.sqrt()
     feat_mean = feat.mean(dim=0)
     return feat_mean, feat_std
 def normal(feat, eps=1e-5):
+    feat_mean, feat_std = calc_mean_std(feat, eps)
+    normalized = (feat - feat_mean) / feat_std
+    return normalized
 def normal_style(feat, eps=1e-5):
+    feat_mean, feat_std = calc_mean_std1(feat, eps)
+    normalized = (feat - feat_mean) / feat_std
     return normalized
 def _calc_feat_flatten_mean_std(feat):
     # takes 3D feat (C, H, W), return mean and std of array within channels
+    assert feat.size()[0] == 3
+    assert isinstance(feat, torch.FloatTensor)
     feat_flatten = feat.view(3, -1)
     mean = feat_flatten.mean(dim=-1, keepdim=True)
     std = feat_flatten.std(dim=-1, keepdim=True)
     # Note: flatten -> f
     source_f, source_f_mean, source_f_std = _calc_feat_flatten_mean_std(source)
+    source_f_norm = (
+        source_f - source_f_mean.expand_as(source_f)
+    ) / source_f_std.expand_as(source_f)
+    source_f_cov_eye = torch.mm(source_f_norm, source_f_norm.t()) + torch.eye(3)
     target_f, target_f_mean, target_f_std = _calc_feat_flatten_mean_std(target)
+    target_f_norm = (
+        target_f - target_f_mean.expand_as(target_f)
+    ) / target_f_std.expand_as(target_f)
+    target_f_cov_eye = torch.mm(target_f_norm, target_f_norm.t()) + torch.eye(3)
     source_f_norm_transfer = torch.mm(
         _mat_sqrt(target_f_cov_eye),
+        torch.mm(torch.inverse(_mat_sqrt(source_f_cov_eye)), source_f_norm),
     )
+    source_f_transfer = source_f_norm_transfer * target_f_std.expand_as(
+        source_f_norm
+    ) + target_f_mean.expand_as(source_f_norm)
     return source_f_transfer.view(source.size())

StyleTransfer/{misc.py → srcTransformer/misc.py} RENAMED Viewed

@@ -4,20 +4,21 @@ Misc functions, including distributed helpers.
 Mostly copy-paste from torchvision references.
 """
 import os
 import subprocess
 import time
 from collections import defaultdict, deque
-import datetime
-import pickle
-from typing import Optional, List
 import torch
 import torch.distributed as dist
-from torch import Tensor
 # needed due to empty tensor bug in pytorch and torchvision 0.5
 import torchvision
 if float(torchvision.__version__[:3]) < 0.7:
     from torchvision.ops import _new_empty_tensor
     from torchvision.ops.misc import _output_size
@@ -47,7 +48,7 @@ class SmoothedValue(object):
         """
         if not is_dist_avail_and_initialized():
             return
-        t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
         dist.barrier()
         dist.all_reduce(t)
         t = t.tolist()
@@ -82,7 +83,8 @@ class SmoothedValue(object):
             avg=self.avg,
             global_avg=self.global_avg,
             max=self.max,
-            value=self.value)
 def all_gather(data):
@@ -116,7 +118,9 @@ def all_gather(data):
     for _ in size_list:
         tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
     if local_size != max_size:
-        padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
         tensor = torch.cat((tensor, padding), dim=0)
     dist.all_gather(tensor_list, tensor)
@@ -172,15 +176,14 @@ class MetricLogger(object):
             return self.meters[attr]
         if attr in self.__dict__:
             return self.__dict__[attr]
-        raise AttributeError("'{}' object has no attribute '{}'".format(
-            type(self).__name__, attr))
     def __str__(self):
         loss_str = []
         for name, meter in self.meters.items():
-            loss_str.append(
-                "{}: {}".format(name, str(meter))
-            )
         return self.delimiter.join(loss_str)
     def synchronize_between_processes(self):
@@ -193,31 +196,35 @@ class MetricLogger(object):
     def log_every(self, iterable, print_freq, header=None):
         i = 0
         if not header:
-            header = ''
         start_time = time.time()
         end = time.time()
-        iter_time = SmoothedValue(fmt='{avg:.4f}')
-        data_time = SmoothedValue(fmt='{avg:.4f}')
-        space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
         if torch.cuda.is_available():
-            log_msg = self.delimiter.join([
-                header,
-                '[{0' + space_fmt + '}/{1}]',
-                'eta: {eta}',
-                '{meters}',
-                'time: {time}',
-                'data: {data}',
-                'max mem: {memory:.0f}'
-            ])
         else:
-            log_msg = self.delimiter.join([
-                header,
-                '[{0' + space_fmt + '}/{1}]',
-                'eta: {eta}',
-                '{meters}',
-                'time: {time}',
-                'data: {data}'
-            ])
         MB = 1024.0 * 1024.0
         for obj in iterable:
             data_time.update(time.time() - end)
@@ -227,38 +234,54 @@ class MetricLogger(object):
                 eta_seconds = iter_time.global_avg * (len(iterable) - i)
                 eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                 if torch.cuda.is_available():
-                    print(log_msg.format(
-                        i, len(iterable), eta=eta_string,
-                        meters=str(self),
-                        time=str(iter_time), data=str(data_time),
-                        memory=torch.cuda.max_memory_allocated() / MB))
                 else:
-                    print(log_msg.format(
-                        i, len(iterable), eta=eta_string,
-                        meters=str(self),
-                        time=str(iter_time), data=str(data_time)))
             i += 1
             end = time.time()
         total_time = time.time() - start_time
         total_time_str = str(datetime.timedelta(seconds=int(total_time)))
-        print('{} Total time: {} ({:.4f} s / it)'.format(
-            header, total_time_str, total_time / len(iterable)))
 def get_sha():
     cwd = os.path.dirname(os.path.abspath(__file__))
     def _run(command):
-        return subprocess.check_output(command, cwd=cwd).decode('ascii').strip()
-    sha = 'N/A'
     diff = "clean"
-    branch = 'N/A'
     try:
-        sha = _run(['git', 'rev-parse', 'HEAD'])
-        subprocess.check_output(['git', 'diff'], cwd=cwd)
-        diff = _run(['git', 'diff-index', 'HEAD'])
         diff = "has uncommited changes" if diff else "clean"
-        branch = _run(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
     except Exception:
         pass
     message = f"sha: {sha}, status: {diff}, branch: {branch}"
@@ -324,9 +347,9 @@ def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
         mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
         for img, pad_img, m in zip(tensor_list, tensor, mask):
             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
-            m[: img.shape[1], :img.shape[2]] = False
     else:
-        raise ValueError('not supported')
     return NestedTensor(tensor, mask)
@@ -336,7 +359,9 @@ def nested_tensor_from_tensor_list(tensor_list: List[Tensor]):
 def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
     max_size = []
     for i in range(tensor_list[0].dim()):
-        max_size_i = torch.max(torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)).to(torch.int64)
         max_size.append(max_size_i)
     max_size = tuple(max_size)
@@ -348,11 +373,15 @@ def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTen
     padded_masks = []
     for img in tensor_list:
         padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
-        padded_img = torch.nn.functional.pad(img, (0, padding[2], 0, padding[1], 0, padding[0]))
         padded_imgs.append(padded_img)
         m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
-        padded_mask = torch.nn.functional.pad(m, (0, padding[2], 0, padding[1]), "constant", 1)
         padded_masks.append(padded_mask.to(torch.bool))
     tensor = torch.stack(padded_imgs)
@@ -366,10 +395,11 @@ def setup_for_distributed(is_master):
     This function disables printing when not in master process
     """
     import builtins as __builtin__
     builtin_print = __builtin__.print
     def print(*args, **kwargs):
-        force = kwargs.pop('force', False)
         if is_master or force:
             builtin_print(*args, **kwargs)
@@ -406,26 +436,31 @@ def save_on_master(*args, **kwargs):
 def init_distributed_mode(args):
-    if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
         args.rank = int(os.environ["RANK"])
-        args.world_size = int(os.environ['WORLD_SIZE'])
-        args.gpu = int(os.environ['LOCAL_RANK'])
-    elif 'SLURM_PROCID' in os.environ:
-        args.rank = int(os.environ['SLURM_PROCID'])
         args.gpu = args.rank % torch.cuda.device_count()
     else:
-        print('Not using distributed mode')
         args.distributed = False
         return
     args.distributed = True
     torch.cuda.set_device(args.gpu)
-    args.dist_backend = 'nccl'
-    print('| distributed init (rank {}): {}'.format(
-        args.rank, args.dist_url), flush=True)
-    torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
-                                         world_size=args.world_size, rank=args.rank)
     torch.distributed.barrier()
     setup_for_distributed(args.rank == 0)
@@ -449,8 +484,14 @@ def accuracy(output, target, topk=(1,)):
     return res
-def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corners=None):
-    # type: (Tensor, Optional[List[int]], Optional[float], str, Optional[bool]) -> Tensor
     """
     Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
     This will eventually be supported natively by PyTorch, and this
@@ -466,4 +507,6 @@ def interpolate(input, size=None, scale_factor=None, mode="nearest", align_corne
         output_shape = list(input.shape[:-2]) + list(output_shape)
         return _new_empty_tensor(input, output_shape)
     else:
-        return torchvision.ops.misc.interpolate(input, size, scale_factor, mode, align_corners)

 Mostly copy-paste from torchvision references.
 """
+import datetime
 import os
+import pickle
 import subprocess
 import time
 from collections import defaultdict, deque
+from typing import List, Optional
 import torch
 import torch.distributed as dist
 # needed due to empty tensor bug in pytorch and torchvision 0.5
 import torchvision
+from torch import Tensor
 if float(torchvision.__version__[:3]) < 0.7:
     from torchvision.ops import _new_empty_tensor
     from torchvision.ops.misc import _output_size
         """
         if not is_dist_avail_and_initialized():
             return
+        t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
         dist.barrier()
         dist.all_reduce(t)
         t = t.tolist()
             avg=self.avg,
             global_avg=self.global_avg,
             max=self.max,
+            value=self.value,
+        )
 def all_gather(data):
     for _ in size_list:
         tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
     if local_size != max_size:
+        padding = torch.empty(
+            size=(max_size - local_size,), dtype=torch.uint8, device="cuda"
+        )
         tensor = torch.cat((tensor, padding), dim=0)
     dist.all_gather(tensor_list, tensor)
             return self.meters[attr]
         if attr in self.__dict__:
             return self.__dict__[attr]
+        raise AttributeError(
+            "'{}' object has no attribute '{}'".format(type(self).__name__, attr)
+        )
     def __str__(self):
         loss_str = []
         for name, meter in self.meters.items():
+            loss_str.append("{}: {}".format(name, str(meter)))
         return self.delimiter.join(loss_str)
     def synchronize_between_processes(self):
     def log_every(self, iterable, print_freq, header=None):
         i = 0
         if not header:
+            header = ""
         start_time = time.time()
         end = time.time()
+        iter_time = SmoothedValue(fmt="{avg:.4f}")
+        data_time = SmoothedValue(fmt="{avg:.4f}")
+        space_fmt = ":" + str(len(str(len(iterable)))) + "d"
         if torch.cuda.is_available():
+            log_msg = self.delimiter.join(
+                [
+                    header,
+                    "[{0" + space_fmt + "}/{1}]",
+                    "eta: {eta}",
+                    "{meters}",
+                    "time: {time}",
+                    "data: {data}",
+                    "max mem: {memory:.0f}",
+                ]
+            )
         else:
+            log_msg = self.delimiter.join(
+                [
+                    header,
+                    "[{0" + space_fmt + "}/{1}]",
+                    "eta: {eta}",
+                    "{meters}",
+                    "time: {time}",
+                    "data: {data}",
+                ]
+            )
         MB = 1024.0 * 1024.0
         for obj in iterable:
             data_time.update(time.time() - end)
                 eta_seconds = iter_time.global_avg * (len(iterable) - i)
                 eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
                 if torch.cuda.is_available():
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
+                            meters=str(self),
+                            time=str(iter_time),
+                            data=str(data_time),
+                            memory=torch.cuda.max_memory_allocated() / MB,
+                        )
+                    )
                 else:
+                    print(
+                        log_msg.format(
+                            i,
+                            len(iterable),
+                            eta=eta_string,
+                            meters=str(self),
+                            time=str(iter_time),
+                            data=str(data_time),
+                        )
+                    )
             i += 1
             end = time.time()
         total_time = time.time() - start_time
         total_time_str = str(datetime.timedelta(seconds=int(total_time)))
+        print(
+            "{} Total time: {} ({:.4f} s / it)".format(
+                header, total_time_str, total_time / len(iterable)
+            )
+        )
 def get_sha():
     cwd = os.path.dirname(os.path.abspath(__file__))
     def _run(command):
+        return subprocess.check_output(command, cwd=cwd).decode("ascii").strip()
+    sha = "N/A"
     diff = "clean"
+    branch = "N/A"
     try:
+        sha = _run(["git", "rev-parse", "HEAD"])
+        subprocess.check_output(["git", "diff"], cwd=cwd)
+        diff = _run(["git", "diff-index", "HEAD"])
         diff = "has uncommited changes" if diff else "clean"
+        branch = _run(["git", "rev-parse", "--abbrev-ref", "HEAD"])
     except Exception:
         pass
     message = f"sha: {sha}, status: {diff}, branch: {branch}"
         mask = torch.ones((b, h, w), dtype=torch.bool, device=device)
         for img, pad_img, m in zip(tensor_list, tensor, mask):
             pad_img[: img.shape[0], : img.shape[1], : img.shape[2]].copy_(img)
+            m[: img.shape[1], : img.shape[2]] = False
     else:
+        raise ValueError("not supported")
     return NestedTensor(tensor, mask)
 def _onnx_nested_tensor_from_tensor_list(tensor_list: List[Tensor]) -> NestedTensor:
     max_size = []
     for i in range(tensor_list[0].dim()):
+        max_size_i = torch.max(
+            torch.stack([img.shape[i] for img in tensor_list]).to(torch.float32)
+        ).to(torch.int64)
         max_size.append(max_size_i)
     max_size = tuple(max_size)
     padded_masks = []
     for img in tensor_list:
         padding = [(s1 - s2) for s1, s2 in zip(max_size, tuple(img.shape))]
+        padded_img = torch.nn.functional.pad(
+            img, (0, padding[2], 0, padding[1], 0, padding[0])
+        )
         padded_imgs.append(padded_img)
         m = torch.zeros_like(img[0], dtype=torch.int, device=img.device)
+        padded_mask = torch.nn.functional.pad(
+            m, (0, padding[2], 0, padding[1]), "constant", 1
+        )
         padded_masks.append(padded_mask.to(torch.bool))
     tensor = torch.stack(padded_imgs)
     This function disables printing when not in master process
     """
     import builtins as __builtin__
     builtin_print = __builtin__.print
     def print(*args, **kwargs):
+        force = kwargs.pop("force", False)
         if is_master or force:
             builtin_print(*args, **kwargs)
 def init_distributed_mode(args):
+    if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
         args.rank = int(os.environ["RANK"])
+        args.world_size = int(os.environ["WORLD_SIZE"])
+        args.gpu = int(os.environ["LOCAL_RANK"])
+    elif "SLURM_PROCID" in os.environ:
+        args.rank = int(os.environ["SLURM_PROCID"])
         args.gpu = args.rank % torch.cuda.device_count()
     else:
+        print("Not using distributed mode")
         args.distributed = False
         return
     args.distributed = True
     torch.cuda.set_device(args.gpu)
+    args.dist_backend = "nccl"
+    print(
+        "| distributed init (rank {}): {}".format(args.rank, args.dist_url), flush=True
+    )
+    torch.distributed.init_process_group(
+        backend=args.dist_backend,
+        init_method=args.dist_url,
+        world_size=args.world_size,
+        rank=args.rank,
+    )
     torch.distributed.barrier()
     setup_for_distributed(args.rank == 0)
     return res
+def interpolate(
+    input: torch.tensor,
+    size: List[int] = None,
+    scale_factor: float = None,
+    mode: str = "nearest",
+    align_corners: bool = None,
+) -> torch.tensor:
     """
     Equivalent to nn.functional.interpolate, but with support for empty batch sizes.
     This will eventually be supported natively by PyTorch, and this
         output_shape = list(input.shape[:-2]) + list(output_shape)
         return _new_empty_tensor(input, output_shape)
     else:
+        return torchvision.ops.misc.interpolate(
+            input, size, scale_factor, mode, align_corners
+        )

StyleTransfer/{transformer.py → srcTransformer/transformer.py} RENAMED Viewed

@@ -1,40 +1,59 @@
 import copy
-from typing import Optional, List
 import torch
 import torch.nn.functional as F
-from torch import nn, Tensor
-from StyleTransfer.function import normal,normal_style
-import numpy as np
-import os
 device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
 os.environ["CUDA_VISIBLE_DEVICES"] = "2, 3"
-class Transformer(nn.Module):
-    def __init__(self, d_model=512, nhead=8, num_encoder_layers=3,
-                 num_decoder_layers=3, dim_feedforward=2048, dropout=0.1,
-                 activation="relu", normalize_before=False,
-                 return_intermediate_dec=False):
         super().__init__()
-        encoder_layer = TransformerEncoderLayer(d_model, nhead, dim_feedforward,
-                                                dropout, activation, normalize_before)
         encoder_norm = nn.LayerNorm(d_model) if normalize_before else None
-        self.encoder_c = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
-        self.encoder_s = TransformerEncoder(encoder_layer, num_encoder_layers, encoder_norm)
-        decoder_layer = TransformerDecoderLayer(d_model, nhead, dim_feedforward,
-                                                dropout, activation, normalize_before)
         decoder_norm = nn.LayerNorm(d_model)
-        self.decoder = TransformerDecoder(decoder_layer, num_decoder_layers, decoder_norm,
-                                          return_intermediate=return_intermediate_dec)
         self._reset_parameters()
         self.d_model = d_model
         self.nhead = nhead
-        self.new_ps = nn.Conv2d(512 , 512 , (1,1))
         self.averagepooling = nn.AdaptiveAvgPool2d(18)
     def _reset_parameters(self):
@@ -42,54 +61,64 @@ class Transformer(nn.Module):
             if p.dim() > 1:
                 nn.init.xavier_uniform_(p)
-    def forward(self, style, mask , content, pos_embed_c, pos_embed_s):
         # content-aware positional embedding
-        content_pool = self.averagepooling(content)
         pos_c = self.new_ps(content_pool)
-        pos_embed_c = F.interpolate(pos_c, mode='bilinear',size= style.shape[-2:])
-        ###flatten NxCxHxW to HWxNxC
         style = style.flatten(2).permute(2, 0, 1)
         if pos_embed_s is not None:
             pos_embed_s = pos_embed_s.flatten(2).permute(2, 0, 1)
         content = content.flatten(2).permute(2, 0, 1)
         if pos_embed_c is not None:
             pos_embed_c = pos_embed_c.flatten(2).permute(2, 0, 1)
         style = self.encoder_s(style, src_key_padding_mask=mask, pos=pos_embed_s)
         content = self.encoder_c(content, src_key_padding_mask=mask, pos=pos_embed_c)
-        hs = self.decoder(content, style, memory_key_padding_mask=mask,
-                          pos=pos_embed_s, query_pos=pos_embed_c)[0]
-        ### HWxNxC to NxCxHxW to
-        N, B, C= hs.shape
         H = int(np.sqrt(N))
         hs = hs.permute(1, 2, 0)
-        hs = hs.view(B, C, -1,H)
         return hs
 class TransformerEncoder(nn.Module):
     def __init__(self, encoder_layer, num_layers, norm=None):
         super().__init__()
         self.layers = _get_clones(encoder_layer, num_layers)
         self.num_layers = num_layers
         self.norm = norm
-    def forward(self, src,
-                mask: Optional[Tensor] = None,
-                src_key_padding_mask: Optional[Tensor] = None,
-                pos: Optional[Tensor] = None):
         output = src
         for layer in self.layers:
-            output = layer(output, src_mask=mask,
-                           src_key_padding_mask=src_key_padding_mask, pos=pos)
         if self.norm is not None:
             output = self.norm(output)
@@ -98,7 +127,6 @@ class TransformerEncoder(nn.Module):
 class TransformerDecoder(nn.Module):
     def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False):
         super().__init__()
         self.layers = _get_clones(decoder_layer, num_layers)
@@ -106,23 +134,32 @@ class TransformerDecoder(nn.Module):
         self.norm = norm
         self.return_intermediate = return_intermediate
-    def forward(self, tgt, memory,
-                tgt_mask: Optional[Tensor] = None,
-                memory_mask: Optional[Tensor] = None,
-                tgt_key_padding_mask: Optional[Tensor] = None,
-                memory_key_padding_mask: Optional[Tensor] = None,
-                pos: Optional[Tensor] = None,
-                query_pos: Optional[Tensor] = None):
         output = tgt
         intermediate = []
         for layer in self.layers:
-            output = layer(output, memory, tgt_mask=tgt_mask,
-                           memory_mask=memory_mask,
-                           tgt_key_padding_mask=tgt_key_padding_mask,
-                           memory_key_padding_mask=memory_key_padding_mask,
-                           pos=pos, query_pos=query_pos)
             if self.return_intermediate:
                 intermediate.append(self.norm(output))
@@ -139,9 +176,15 @@ class TransformerDecoder(nn.Module):
 class TransformerEncoderLayer(nn.Module):
-    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
-                 activation="relu", normalize_before=False):
         super().__init__()
         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
         # Implementation of Feedforward model
@@ -160,16 +203,19 @@ class TransformerEncoderLayer(nn.Module):
     def with_pos_embed(self, tensor, pos: Optional[Tensor]):
         return tensor if pos is None else tensor + pos
-    def forward_post(self,
-                     src,
-                     src_mask: Optional[Tensor] = None,
-                     src_key_padding_mask: Optional[Tensor] = None,
-                     pos: Optional[Tensor] = None):
         q = k = self.with_pos_embed(src, pos)
         # q = k = src
         # print(q.size(),k.size(),src.size())
-        src2 = self.self_attn(q, k, value=src, attn_mask=src_mask,
-                              key_padding_mask=src_key_padding_mask)[0]
         src = src + self.dropout1(src2)
         src = self.norm1(src)
         src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
@@ -177,33 +223,46 @@ class TransformerEncoderLayer(nn.Module):
         src = self.norm2(src)
         return src
-    def forward_pre(self, src,
-                    src_mask: Optional[Tensor] = None,
-                    src_key_padding_mask: Optional[Tensor] = None,
-                    pos: Optional[Tensor] = None):
         src2 = self.norm1(src)
         q = k = self.with_pos_embed(src2, pos)
-        src2 = self.self_attn(q, k, value=src2, attn_mask=src_mask,
-                              key_padding_mask=src_key_padding_mask)[0]
         src = src + self.dropout1(src2)
         src2 = self.norm2(src)
         src2 = self.linear2(self.dropout(self.activation(self.linear1(src2))))
         src = src + self.dropout2(src2)
         return src
-    def forward(self, src,
-                src_mask: Optional[Tensor] = None,
-                src_key_padding_mask: Optional[Tensor] = None,
-                pos: Optional[Tensor] = None):
         if self.normalize_before:
             return self.forward_pre(src, src_mask, src_key_padding_mask, pos)
         return self.forward_post(src, src_mask, src_key_padding_mask, pos)
 class TransformerDecoderLayer(nn.Module):
-    def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1,
-                 activation="relu", normalize_before=False):
         super().__init__()
         # d_model embedding dim
         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
@@ -226,28 +285,35 @@ class TransformerDecoderLayer(nn.Module):
     def with_pos_embed(self, tensor, pos: Optional[Tensor]):
         return tensor if pos is None else tensor + pos
-    def forward_post(self, tgt, memory,
-                     tgt_mask: Optional[Tensor] = None,
-                     memory_mask: Optional[Tensor] = None,
-                     tgt_key_padding_mask: Optional[Tensor] = None,
-                     memory_key_padding_mask: Optional[Tensor] = None,
-                     pos: Optional[Tensor] = None,
-                     query_pos: Optional[Tensor] = None):
         q = self.with_pos_embed(tgt, query_pos)
         k = self.with_pos_embed(memory, pos)
-        v = memory
-        tgt2 = self.self_attn(q, k, v, attn_mask=tgt_mask,
-                              key_padding_mask=tgt_key_padding_mask)[0]
         tgt = tgt + self.dropout1(tgt2)
         tgt = self.norm1(tgt)
-        tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt, query_pos),
-                                   key=self.with_pos_embed(memory, pos),
-                                   value=memory, attn_mask=memory_mask,
-                                   key_padding_mask=memory_key_padding_mask)[0]
         tgt = tgt + self.dropout2(tgt2)
         tgt = self.norm2(tgt)
         tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
@@ -255,24 +321,32 @@ class TransformerDecoderLayer(nn.Module):
         tgt = self.norm3(tgt)
         return tgt
-    def forward_pre(self, tgt, memory,
-                    tgt_mask: Optional[Tensor] = None,
-                    memory_mask: Optional[Tensor] = None,
-                    tgt_key_padding_mask: Optional[Tensor] = None,
-                    memory_key_padding_mask: Optional[Tensor] = None,
-                    pos: Optional[Tensor] = None,
-                    query_pos: Optional[Tensor] = None):
         tgt2 = self.norm1(tgt)
         q = k = self.with_pos_embed(tgt2, query_pos)
-        tgt2 = self.self_attn(q, k, value=tgt2, attn_mask=tgt_mask,
-                              key_padding_mask=tgt_key_padding_mask)[0]
         tgt = tgt + self.dropout1(tgt2)
         tgt2 = self.norm2(tgt)
-        tgt2 = self.multihead_attn(query=self.with_pos_embed(tgt2, query_pos),
-                                   key=self.with_pos_embed(memory, pos),
-                                   value=memory, attn_mask=memory_mask,
-                                   key_padding_mask=memory_key_padding_mask)[0]
         tgt = tgt + self.dropout2(tgt2)
         tgt2 = self.norm3(tgt)
@@ -280,18 +354,38 @@ class TransformerDecoderLayer(nn.Module):
         tgt = tgt + self.dropout3(tgt2)
         return tgt
-    def forward(self, tgt, memory,
-                tgt_mask: Optional[Tensor] = None,
-                memory_mask: Optional[Tensor] = None,
-                tgt_key_padding_mask: Optional[Tensor] = None,
-                memory_key_padding_mask: Optional[Tensor] = None,
-                pos: Optional[Tensor] = None,
-                query_pos: Optional[Tensor] = None):
         if self.normalize_before:
-            return self.forward_pre(tgt, memory, tgt_mask, memory_mask,
-                                    tgt_key_padding_mask, memory_key_padding_mask, pos, query_pos)
-        return self.forward_post(tgt, memory, tgt_mask, memory_mask,
-                                 tgt_key_padding_mask, memory_key_padding_mask, pos, query_pos)
 def _get_clones(module, N):
@@ -319,4 +413,4 @@ def _get_activation_fn(activation):
         return F.gelu
     if activation == "glu":
         return F.glu
-    raise RuntimeError(F"activation should be relu/gelu, not {activation}.")

 import copy
+import os
+from typing import Optional
+import numpy as np
 import torch
 import torch.nn.functional as F
+from torch import Tensor, nn
 device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
 os.environ["CUDA_VISIBLE_DEVICES"] = "2, 3"
+class Transformer(nn.Module):
+    def __init__(
+        self,
+        d_model=512,
+        nhead=8,
+        num_encoder_layers=3,
+        num_decoder_layers=3,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+        return_intermediate_dec=False,
+    ):
         super().__init__()
+        encoder_layer = TransformerEncoderLayer(
+            d_model, nhead, dim_feedforward, dropout, activation, normalize_before
+        )
         encoder_norm = nn.LayerNorm(d_model) if normalize_before else None
+        self.encoder_c = TransformerEncoder(
+            encoder_layer, num_encoder_layers, encoder_norm
+        )
+        self.encoder_s = TransformerEncoder(
+            encoder_layer, num_encoder_layers, encoder_norm
+        )
+        decoder_layer = TransformerDecoderLayer(
+            d_model, nhead, dim_feedforward, dropout, activation, normalize_before
+        )
         decoder_norm = nn.LayerNorm(d_model)
+        self.decoder = TransformerDecoder(
+            decoder_layer,
+            num_decoder_layers,
+            decoder_norm,
+            return_intermediate=return_intermediate_dec,
+        )
         self._reset_parameters()
         self.d_model = d_model
         self.nhead = nhead
+        self.new_ps = nn.Conv2d(512, 512, (1, 1))
         self.averagepooling = nn.AdaptiveAvgPool2d(18)
     def _reset_parameters(self):
             if p.dim() > 1:
                 nn.init.xavier_uniform_(p)
+    def forward(self, style, mask, content, pos_embed_c, pos_embed_s):
         # content-aware positional embedding
+        content_pool = self.averagepooling(content)
         pos_c = self.new_ps(content_pool)
+        pos_embed_c = F.interpolate(pos_c, mode="bilinear", size=style.shape[-2:])
+        # flatten NxCxHxW to HWxNxC
         style = style.flatten(2).permute(2, 0, 1)
         if pos_embed_s is not None:
             pos_embed_s = pos_embed_s.flatten(2).permute(2, 0, 1)
         content = content.flatten(2).permute(2, 0, 1)
         if pos_embed_c is not None:
             pos_embed_c = pos_embed_c.flatten(2).permute(2, 0, 1)
         style = self.encoder_s(style, src_key_padding_mask=mask, pos=pos_embed_s)
         content = self.encoder_c(content, src_key_padding_mask=mask, pos=pos_embed_c)
+        hs = self.decoder(
+            content,
+            style,
+            memory_key_padding_mask=mask,
+            pos=pos_embed_s,
+            query_pos=pos_embed_c,
+        )[0]
+        # HWxNxC to NxCxHxW to
+        N, B, C = hs.shape
         H = int(np.sqrt(N))
         hs = hs.permute(1, 2, 0)
+        hs = hs.view(B, C, -1, H)
         return hs
 class TransformerEncoder(nn.Module):
     def __init__(self, encoder_layer, num_layers, norm=None):
         super().__init__()
         self.layers = _get_clones(encoder_layer, num_layers)
         self.num_layers = num_layers
         self.norm = norm
+    def forward(
+        self,
+        src,
+        mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
         output = src
         for layer in self.layers:
+            output = layer(
+                output,
+                src_mask=mask,
+                src_key_padding_mask=src_key_padding_mask,
+                pos=pos,
+            )
         if self.norm is not None:
             output = self.norm(output)
 class TransformerDecoder(nn.Module):
     def __init__(self, decoder_layer, num_layers, norm=None, return_intermediate=False):
         super().__init__()
         self.layers = _get_clones(decoder_layer, num_layers)
         self.norm = norm
         self.return_intermediate = return_intermediate
+    def forward(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
         output = tgt
         intermediate = []
         for layer in self.layers:
+            output = layer(
+                output,
+                memory,
+                tgt_mask=tgt_mask,
+                memory_mask=memory_mask,
+                tgt_key_padding_mask=tgt_key_padding_mask,
+                memory_key_padding_mask=memory_key_padding_mask,
+                pos=pos,
+                query_pos=query_pos,
+            )
             if self.return_intermediate:
                 intermediate.append(self.norm(output))
 class TransformerEncoderLayer(nn.Module):
+    def __init__(
+        self,
+        d_model,
+        nhead,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+    ):
         super().__init__()
         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
         # Implementation of Feedforward model
     def with_pos_embed(self, tensor, pos: Optional[Tensor]):
         return tensor if pos is None else tensor + pos
+    def forward_post(
+        self,
+        src,
+        src_mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
         q = k = self.with_pos_embed(src, pos)
         # q = k = src
         # print(q.size(),k.size(),src.size())
+        src2 = self.self_attn(
+            q, k, value=src, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
+        )[0]
         src = src + self.dropout1(src2)
         src = self.norm1(src)
         src2 = self.linear2(self.dropout(self.activation(self.linear1(src))))
         src = self.norm2(src)
         return src
+    def forward_pre(
+        self,
+        src,
+        src_mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
         src2 = self.norm1(src)
         q = k = self.with_pos_embed(src2, pos)
+        src2 = self.self_attn(
+            q, k, value=src2, attn_mask=src_mask, key_padding_mask=src_key_padding_mask
+        )[0]
         src = src + self.dropout1(src2)
         src2 = self.norm2(src)
         src2 = self.linear2(self.dropout(self.activation(self.linear1(src2))))
         src = src + self.dropout2(src2)
         return src
+    def forward(
+        self,
+        src,
+        src_mask: Optional[Tensor] = None,
+        src_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+    ):
         if self.normalize_before:
             return self.forward_pre(src, src_mask, src_key_padding_mask, pos)
         return self.forward_post(src, src_mask, src_key_padding_mask, pos)
 class TransformerDecoderLayer(nn.Module):
+    def __init__(
+        self,
+        d_model,
+        nhead,
+        dim_feedforward=2048,
+        dropout=0.1,
+        activation="relu",
+        normalize_before=False,
+    ):
         super().__init__()
         # d_model embedding dim
         self.self_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
     def with_pos_embed(self, tensor, pos: Optional[Tensor]):
         return tensor if pos is None else tensor + pos
+    def forward_post(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
         q = self.with_pos_embed(tgt, query_pos)
         k = self.with_pos_embed(memory, pos)
+        v = memory
+        tgt2 = self.self_attn(
+            q, k, v, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )[0]
         tgt = tgt + self.dropout1(tgt2)
         tgt = self.norm1(tgt)
+        tgt2 = self.multihead_attn(
+            query=self.with_pos_embed(tgt, query_pos),
+            key=self.with_pos_embed(memory, pos),
+            value=memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+        )[0]
         tgt = tgt + self.dropout2(tgt2)
         tgt = self.norm2(tgt)
         tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
         tgt = self.norm3(tgt)
         return tgt
+    def forward_pre(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
         tgt2 = self.norm1(tgt)
         q = k = self.with_pos_embed(tgt2, query_pos)
+        tgt2 = self.self_attn(
+            q, k, value=tgt2, attn_mask=tgt_mask, key_padding_mask=tgt_key_padding_mask
+        )[0]
         tgt = tgt + self.dropout1(tgt2)
         tgt2 = self.norm2(tgt)
+        tgt2 = self.multihead_attn(
+            query=self.with_pos_embed(tgt2, query_pos),
+            key=self.with_pos_embed(memory, pos),
+            value=memory,
+            attn_mask=memory_mask,
+            key_padding_mask=memory_key_padding_mask,
+        )[0]
         tgt = tgt + self.dropout2(tgt2)
         tgt2 = self.norm3(tgt)
         tgt = tgt + self.dropout3(tgt2)
         return tgt
+    def forward(
+        self,
+        tgt,
+        memory,
+        tgt_mask: Optional[Tensor] = None,
+        memory_mask: Optional[Tensor] = None,
+        tgt_key_padding_mask: Optional[Tensor] = None,
+        memory_key_padding_mask: Optional[Tensor] = None,
+        pos: Optional[Tensor] = None,
+        query_pos: Optional[Tensor] = None,
+    ):
         if self.normalize_before:
+            return self.forward_pre(
+                tgt,
+                memory,
+                tgt_mask,
+                memory_mask,
+                tgt_key_padding_mask,
+                memory_key_padding_mask,
+                pos,
+                query_pos,
+            )
+        return self.forward_post(
+            tgt,
+            memory,
+            tgt_mask,
+            memory_mask,
+            tgt_key_padding_mask,
+            memory_key_padding_mask,
+            pos,
+            query_pos,
+        )
 def _get_clones(module, N):
         return F.gelu
     if activation == "glu":
         return F.glu
+    raise RuntimeError(f"activation should be relu/gelu, not {activation}.")

StyleTransfer/styleTransfer.py CHANGED Viewed

@@ -1,115 +1,181 @@
-from PIL import Image
 import numpy as np
 import torch
-print(torch.cuda.is_available())
 import torch.nn as nn
 from torchvision import transforms
-import StyleTransfer.transformer as transformer
-import StyleTransfer.StyTR as StyTR
-from collections import OrderedDict
-import tensorflow_hub as tfhub
-import tensorflow as tf
-import paddlehub as phub
-import os
-############################################# TRANSFORMER ############################################
-def style_transform(h:int,w:int) -> transforms.Compose:
-    k = (h,w)
-    transform_list = []
-    transform_list.append(transforms.CenterCrop((h,w)))
     transform_list.append(transforms.ToTensor())
     transform = transforms.Compose(transform_list)
     return transform
-def content_transform() -> transforms.Compose:
-    transform_list = []
     transform_list.append(transforms.ToTensor())
     transform = transforms.Compose(transform_list)
     return transform
 def StyleTransformer(content_img: Image.Image, style_img: Image.Image) -> Image.Image:
-    vgg_path        = 'StyleTransfer/models/vgg_normalised.pth'
-    decoder_path    = 'StyleTransfer/models/decoder_iter_160000.pth'
-    Trans_path      = 'StyleTransfer/models/transformer_iter_160000.pth'
-    embedding_path  = 'StyleTransfer/models/embedding_iter_160000.pth'
-    # Advanced options
-    content_size=640
-    style_size=640
-    vgg = StyTR.vgg
-    vgg.load_state_dict(torch.load(vgg_path))
-    vgg = nn.Sequential(*list(vgg.children())[:44])
-    decoder = StyTR.decoder
-    Trans = transformer.Transformer()
-    embedding = StyTR.PatchEmbed()
     decoder.eval()
     Trans.eval()
     vgg.eval()
-    new_state_dict = OrderedDict()
     state_dict = torch.load(decoder_path)
     decoder.load_state_dict(state_dict)
-    new_state_dict = OrderedDict()
     state_dict = torch.load(Trans_path)
     Trans.load_state_dict(state_dict)
-    new_state_dict = OrderedDict()
     state_dict = torch.load(embedding_path)
     embedding.load_state_dict(state_dict)
-    network = StyTR.StyTrans(vgg,decoder,embedding,Trans)
     network.eval()
-    content_tf = content_transform()
-    style_tf   = style_transform(style_size,style_size)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     network.to(device)
-    content = content_tf(content_img.convert("RGB"))
     style = style_tf(style_img.convert("RGB"))
     style = style.to(device).unsqueeze(0)
     content = content.to(device).unsqueeze(0)
     with torch.no_grad():
-        output= network(content,style)
     output = output[0].cpu().squeeze()
-    output = output.mul(255).add_(0.5).clamp_(0, 255).permute(1, 2, 0).to('cpu', torch.uint8).numpy()
     return Image.fromarray(output)
-############################################## STYLE-FAST #############################################
-style_transfer_model = tfhub.load("https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2")
-def StyleFAST(content_image:Image.Image, style_image:Image.Image) -> Image.Image:
-    content_image = tf.convert_to_tensor(np.array(content_image), np.float32)[tf.newaxis, ...] / 255.
-    style_image = tf.convert_to_tensor(np.array(style_image), np.float32)[tf.newaxis, ...] / 255.
     output = style_transfer_model(content_image, style_image)
     stylized_image = output[0]
     return Image.fromarray(np.uint8(stylized_image[0] * 255))
-########################################### STYLE PROJECTION ##########################################
-os.system("hub install stylepro_artistic==1.0.1")
 stylepro_artistic = phub.Module(name="stylepro_artistic")
-def StyleProjection(content_image:Image.Image,style_image:Image.Image) -> Image.Image:
-    print('line92')
-    result = stylepro_artistic.style_transfer(
-    images=[{
-        'content': np.array(content_image.convert('RGB') )[:, :, ::-1],
-        'styles': [np.array(style_image.convert('RGB') )[:, :, ::-1]]}],
-    alpha=0.8)
-    print('line97')
-    return Image.fromarray(np.uint8(result[0]['data'])[:,:,::-1]).convert('RGB')
-def create_styledSofa(content_image:Image.Image,style_image:Image.Image,choice:str) -> Image.Image:
-    if choice =="Style Transformer":
-        output = StyleTransformer(content_image,style_image)
-    elif choice =="Style FAST":
-        output = StyleFAST(content_image,style_image)
-    elif choice =="Style Projection":
-        output = StyleProjection(content_image,style_image)
-    else:
-        output = content_image
-    return output

 import numpy as np
+import paddlehub as phub
+import StyleTransfer.srcTransformer.StyTR as StyTR
+import StyleTransfer.srcTransformer.transformer as transformer
+import tensorflow as tf
+import tensorflow_hub as tfhub
 import torch
 import torch.nn as nn
+from PIL import Image
 from torchvision import transforms
+# TRANSFORMER
+vgg_path = "StyleTransfer/srcTransformer/Transformer_models/vgg_normalised.pth"
+decoder_path = "StyleTransfer/srcTransformer/Transformer_models/decoder_iter_160000.pth"
+Trans_path = (
+    "StyleTransfer/srcTransformer/Transformer_models/transformer_iter_160000.pth"
+)
+embedding_path = (
+    "StyleTransfer/srcTransformer/Transformer_models/embedding_iter_160000.pth"
+)
+def style_transform(h, w):
+    """
+    This function creates a transformation for the style image,
+    that crops it and formats it into a tensor.
+    Parameters:
+        h = height
+        w = width
+    Return:
+        transform = transformation pipeline
+    """
+    transform_list = []
+    transform_list.append(transforms.CenterCrop((h, w)))
     transform_list.append(transforms.ToTensor())
     transform = transforms.Compose(transform_list)
     return transform
+def content_transform():
+    """
+    This function simply creates a transformation pipeline,
+    that formats the content image into a tensor.
+    Returns:
+        transform = the transformation pipeline
+    """
+    transform_list = []
     transform_list.append(transforms.ToTensor())
     transform = transforms.Compose(transform_list)
     return transform
+# This loads the network architecture already at building time
+vgg = StyTR.vgg
+vgg.load_state_dict(torch.load(vgg_path))
+vgg = nn.Sequential(*list(vgg.children())[:44])
+decoder = StyTR.decoder
+Trans = transformer.Transformer()
+embedding = StyTR.PatchEmbed()
+# The (square) shape of the content and style image is fixed
+content_size = 640
+style_size = 640
 def StyleTransformer(content_img: Image.Image, style_img: Image.Image) -> Image.Image:
+    """
+    This function creates the Transformer network and applies it on
+    a content and style image to create a styled image.
+    Parameters:
+        content_img = the image with the content
+        style_img   = the image with the style/pattern
+    Returns:
+        output      = an image that is a combination of both
+    """
     decoder.eval()
     Trans.eval()
     vgg.eval()
     state_dict = torch.load(decoder_path)
     decoder.load_state_dict(state_dict)
     state_dict = torch.load(Trans_path)
     Trans.load_state_dict(state_dict)
     state_dict = torch.load(embedding_path)
     embedding.load_state_dict(state_dict)
+    network = StyTR.StyTrans(vgg, decoder, embedding, Trans)
     network.eval()
+    content_tf = content_transform()
+    style_tf = style_transform(style_size, style_size)
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     network.to(device)
+    content = content_tf(content_img.convert("RGB"))
     style = style_tf(style_img.convert("RGB"))
     style = style.to(device).unsqueeze(0)
     content = content.to(device).unsqueeze(0)
     with torch.no_grad():
+        output = network(content, style)
     output = output[0].cpu().squeeze()
+    output = (
+        output.mul(255)
+        .add_(0.5)
+        .clamp_(0, 255)
+        .permute(1, 2, 0)
+        .to("cpu", torch.uint8)
+        .numpy()
+    )
     return Image.fromarray(output)
+# STYLE-FAST
+style_transfer_model = tfhub.load(
+    "https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2"
+)
+def StyleFAST(content_image: Image.Image, style_image: Image.Image) -> Image.Image:
+    """
+    This function applies a Fast image style transfer technique,
+    which uses a pretrained model from tensorhub.
+    Parameters:
+        content_image   = the image with the content
+        style_image     = the image with the style/pattern
+    Returns:
+        stylized_image  = an image that is a combination of both
+    """
+    content_image = (
+        tf.convert_to_tensor(np.array(content_image), np.float32)[tf.newaxis, ...]
+        / 255.0
+    )
+    style_image = (
+        tf.convert_to_tensor(np.array(style_image), np.float32)[tf.newaxis, ...] / 255.0
+    )
     output = style_transfer_model(content_image, style_image)
     stylized_image = output[0]
     return Image.fromarray(np.uint8(stylized_image[0] * 255))
+# STYLE PROJECTION
 stylepro_artistic = phub.Module(name="stylepro_artistic")
+def styleProjection(
+    content_image: Image.Image, style_image: Image.Image, alpha: float = 1.0
+):
+    """
+    This function uses parameter free style transfer,
+    based on a model from paddlehub.
+    There is an optional weight parameter alpha, which
+    allows to control the balance between image and style.
+    Parameters:
+        content_image   = the image with the content
+        style_image     = the image with the style/pattern
+        alpha           = weight for the image vs style.
+                          This should be a float between 0 and 1.
+    Returns:
+        result          = an image that is a combination of both
+    """
+    result = stylepro_artistic.style_transfer(
+        images=[
+            {
+                "content": np.array(content_image.convert("RGB"))[:, :, ::-1],
+                "styles": [np.array(style_image.convert("RGB"))[:, :, ::-1]],
+            }
+        ],
+        alpha=alpha,
+    )
+    return Image.fromarray(np.uint8(result[0]["data"])[:, :, ::-1]).convert("RGB")

app.py CHANGED Viewed

@@ -1,50 +1,71 @@
-from random import randint
-from typing import Tuple
-import gradio as gr
 import numpy as np
-from PIL import Image, ExifTags
 from Segmentation.segmentation import get_mask, replace_sofa
-from StyleTransfer.styleTransfer import create_styledSofa
 def fix_orient(img: Image.Image) -> Image.Image:
     for orientation in ExifTags.TAGS.keys():
-        if ExifTags.TAGS[orientation]=='Orientation':
             break
-    info=img.getexif()
-    if (info):
         info = dict(info.items())
-        orientation = info[orientation]
-        if (orientation == 1) | (orientation == 2):
-            img = img
-        if (orientation == 3) | (orientation == 4):
-            img = img.rotate(180,expand=True)
-        if (orientation == 5) | (orientation == 6):
-            img = img.rotate(270,expand=True)
-        if (orientation == 7) | (orientation == 8):
-            img = img.rotate(90,expand=True)
     return img
-def resize_sofa(img: Image.Image) -> Tuple[Image.Image, tuple]:
     """
-    This function adds padding to make the original image square
-    and 640by640. It also returns the original ratio of the image,
-    such that it can be reverted later.
     Parameters:
         img = original image
     Return:
-        im1 = squared image
-        box = parameters to later crop the image to it original ratio
     """
     width, height = img.size
     idx = np.argmin([width, height])
     newsize = (640, 640)  # parameters from test script
     if idx == 0:
-        img1 = Image.new(img.mode, (height, height), (255, 255, 255))
-        img1.paste(img, ((height - width) // 2, 0))
         box = (
             newsize[0] * (1 - width / height) // 2,
             0,
@@ -52,22 +73,22 @@ def resize_sofa(img: Image.Image) -> Tuple[Image.Image, tuple]:
             newsize[1],
         )
     else:
-        img1 = Image.new(img.mode, (width, width), (255, 255, 255))
-        img1.paste(img, (0, (width - height) // 2))
         box = (
             0,
             newsize[1] * (1 - height / width) // 2,
             newsize[0],
             newsize[1] - newsize[1] * (1 - height / width) // 2,
         )
-    im1 = img1.resize(newsize)
-    return im1, box
 def resize_style(img: Image.Image) -> Image.Image:
     """
-    This function generates a zoomed out version of the style
-    image and resizes it to a 640by640 square.
     Parameters:
         img = image containing the style/pattern
     Return:
@@ -88,114 +109,249 @@ def resize_style(img: Image.Image) -> Image.Image:
         top = 0
         bottom = height
     newsize = (640, 640)  # parameters from test script
-    im1 = img.crop((left, top, right, bottom))
     # Constructs a zoomed-out version
     copies = 8
     resize = (newsize[0] // copies, newsize[1] // copies)
-    dst = Image.new("RGB", (resize[0] * copies, resize[1] * copies))
-    im2 = im1.resize((resize))
     for row in range(copies):
-        im2 = im2.transpose(Image.FLIP_LEFT_RIGHT)
         for column in range(copies):
-            im2 = im2.transpose(Image.FLIP_TOP_BOTTOM)
-            dst.paste(im2, (resize[0] * row, resize[1] * column))
-    dst = dst.resize((newsize))
-    return dst
-def style_sofa(
-    Input_image: Image.Image, Style_image: Image.Image, Choice_of_algorithm: str
-) -> Image.Image:
-    """
-    Styles (all) the sofas in the image to the given style.
-    This function uses a transformer to combine the image with
-    the desired style according to a generated mask of the sofas
-    in the image.
-    Input:
-        input_img = image containing a sofa
-        style_img = image containing a style
-        choice    = Style transfer algorithm to use
-    Return:
-        new_sofa  = image containing the styled sofa
-    """
-    id = randint(0, 10)
-    print("Starting job ", id)
-    # preprocess input images to fit requirements of the segmentation model
-    resized_img, box = resize_sofa(fix_orient(Input_image))
-    resized_style = resize_style(fix_orient(Style_image))
-    # resized_style.save('resized_style.jpg')
-    # generate mask for image
-    print("generating mask...")
-    mask = get_mask(resized_img)
-    # mask.save('mask.jpg')
-    # Created a styled sofa
-    print("Styling sofa...")
-    styled_sofa = create_styledSofa(resized_img, resized_style, Choice_of_algorithm)
-    # styled_sofa.save('styled_sofa.jpg')
-    # postprocess the final image
-    print("Replacing sofa...")
-    new_sofa = replace_sofa(resized_img, mask, styled_sofa)
-    new_sofa = new_sofa.crop(box)
-    print("Finishing job", id)
-    return new_sofa
-demo = gr.Interface(
-    style_sofa,
-    inputs=[
-        gr.inputs.Image(type="pil"),
-        gr.inputs.Image(type="pil"),
-        gr.inputs.Radio(
-            ["Style Transformer", "Style FAST", "Style Projection"],
-            default="Style FAST",
-        ),
-    ],
-    outputs="image",
-    examples=[
-        [
-            "figures/sofa_example1.jpg",
-            "figures/style_example1.jpg",
-            "Style Transformer",
-        ],
-        [
-            "figures/sofa_example3.jpg",
-            "figures/style_example10.jpg",
-            "Style FAST",
-        ],
-        [
-            "figures/sofa_example2.jpg",
-            "figures/style_example6.jpg",
-            "Style Projection",
-        ],
-    ],
-    title="🛋 Style your sofa 🛋 ",
-    description="Customize your sofa to your wildest dreams 💭!\
-                \nProvide a picture of your sofa, a desired pattern\
-                and (optionally) choose one of the algorithms.\
-                \nOr just pick one of the examples below. ⬇",
-    theme="huggingface",
-    enable_queue=True,
-    article="**References**\n\n"
-    "<a href='https://tianchi.aliyun.com/specials/promotion/alibaba-3d-future' \
-    target='_blank'>\
-    1. The data that was used to train the segmentation model. \
-    </a>  \n"
-    "<a href='https://github.com/qubvel/segmentation_models' \
-    target='_blank'> \
-    2. Github repository used to train a segmentation model with transfer. \
-    learning.\
-    </a>  \n"
-    "<a href='https://github.com/diyiiyiii/StyTR-2' \
-    target='_blank'> \
-    3. The github repository that is used for the style transformer. \
-    </a>  \n"
-    "<a href='https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2' \
-    target='_blank'> \
-    4. A tensorflow model for fast arbitrary image style transfer. \
-    </a>  \n"
-    "<a href='https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.2/modules/image/Image_gan/style_transfer/stylepro_artistic' target='_blank'> 5. A paddleHub model for parameter free style transfer.  </a>  \n", # noqa
 )
-if __name__ == "__main__":
-    demo.launch(cache_examples=True)

+# Import libraries
 import numpy as np
+import streamlit as st
+from PIL import ExifTags, Image
 from Segmentation.segmentation import get_mask, replace_sofa
+from StyleTransfer.styleTransfer import (
+    StyleFAST,
+    StyleTransformer,
+    styleProjection,
+)
+PAGE_CONFIG = {
+    "page_title": "SofaStyler.io",
+    "page_icon": ":art:",
+    "layout": "centered",
+}
+st.set_page_config(**PAGE_CONFIG)
 def fix_orient(img: Image.Image) -> Image.Image:
+    """
+    This function fix the orientation of input images.
+    This is especially usefull in the context of images from a mobile phone.
+    Parameters:
+        img = input image
+    Return:
+        img = img with correct orientation
+    """
+    flag = False
     for orientation in ExifTags.TAGS.keys():
+        if ExifTags.TAGS[orientation] == "Orientation":
+            flag = True
             break
+    info = img.getexif()
+    if len(info) & flag:
         info = dict(info.items())
+        if orientation in info.keys():
+            orientation = info[orientation]
+            if (orientation == 1) | (orientation == 2):
+                img = img
+            if (orientation == 3) | (orientation == 4):
+                img = img.rotate(180, expand=True)
+            if (orientation == 5) | (orientation == 6):
+                img = img.rotate(270, expand=True)
+            if (orientation == 7) | (orientation == 8):
+                img = img.rotate(90, expand=True)
     return img
+def resize_sofa(img: Image.Image) -> Image.Image:
     """
+    This function adds padding to make the original image square and 640by640.
+    It also returns the original ratio of the image, such that it can be reverted later.
     Parameters:
         img = original image
     Return:
+        img_square  = squared image
+        box         = parameters to later crop the image to it original ratio
     """
     width, height = img.size
     idx = np.argmin([width, height])
     newsize = (640, 640)  # parameters from test script
     if idx == 0:
+        img_square = Image.new(img.mode, (height, height), (255, 255, 255))
+        img_square.paste(img, ((height - width) // 2, 0))
         box = (
             newsize[0] * (1 - width / height) // 2,
             0,
             newsize[1],
         )
     else:
+        img_square = Image.new(img.mode, (width, width), (255, 255, 255))
+        img_square.paste(img, (0, (width - height) // 2))
         box = (
             0,
             newsize[1] * (1 - height / width) // 2,
             newsize[0],
             newsize[1] - newsize[1] * (1 - height / width) // 2,
         )
+    img_square = img_square.resize(newsize)
+    return img_square, box
 def resize_style(img: Image.Image) -> Image.Image:
     """
+    This function generates a zoomed out version of
+    the style image and resizes it to a 640by640 square.
     Parameters:
         img = image containing the style/pattern
     Return:
         top = 0
         bottom = height
     newsize = (640, 640)  # parameters from test script
+    img = img.crop((left, top, right, bottom))
     # Constructs a zoomed-out version
     copies = 8
     resize = (newsize[0] // copies, newsize[1] // copies)
+    img_zoomed_out = Image.new("RGB", (resize[0] * copies, resize[1] * copies))
+    img = img.resize((resize))
     for row in range(copies):
+        img = img.transpose(Image.FLIP_LEFT_RIGHT)
         for column in range(copies):
+            img = img.transpose(Image.FLIP_TOP_BOTTOM)
+            img_zoomed_out.paste(img, (resize[0] * row, resize[1] * column))
+    img_zoomed_out = img_zoomed_out.resize((newsize))
+    return img_zoomed_out
+image = Image.open("figures/logo.png")  # Brand logo image (optional)
+options = [
+    "Style Transformer",
+    "StyleFAST",
+    "Style Projection",
+]
+# Create two columns with different width
+col1, col2 = st.columns([0.8, 0.2])
+with col1:  # To display the header text using css style
+    st.markdown(
+        """
+        <style>
+        @import url('https://fonts.googleapis.com/css2?family=Arvo&display=swap');
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+    st.markdown(
+        """
+        <style>
+        html, body, [class*="css"]  {
+        font-family: 'Arvo';
+        }
+        </style>
+        """,
+        unsafe_allow_html=True,
+    )
+    st.markdown(
+        """ <style> .font {
+    font-size:30px ; font-family: 'Arvo'; color: #04b188;
+    src:url("https://fonts.googleapis.com/css2?family=Arvo&display=swap");}
+    </style> """,
+        unsafe_allow_html=True,
+    )  # } </style> """, unsafe_allow_html=True)
+    st.markdown(
+        '<p class="font">Upload your photos here...</p>', unsafe_allow_html=True
+    )
+with col2:  # To display brand logo
+    st.image(image, width=150)
+# Add a header and expander in side bar
+st.sidebar.markdown('<center class="font">🛋 </center>', unsafe_allow_html=True)
+st.sidebar.markdown(
+    '<center class="font">A sofastyler App</center>', unsafe_allow_html=True
+)
+st.sidebar.markdown("")
+with st.sidebar.text("About the App"):
+    st.write(
+        """
+        Customize your sofa to your wildest dreams 💭!\
+        \nProvide a picture of your sofa, a desired pattern and\
+        choose one of the algorithms below.
+        \nOr just look at an example.
+     """
+    )
+st.sidebar.title("")
+with st.sidebar.expander("References"):
+    st.write(
+        "[1. The data that was used to train the segmentation model.]"
+        + "(https://tianchi.aliyun.com/specials/promotion/alibaba-3d-future)"
+        + "\n\n"
+        + "[2. Github repository used to train a segmentation model with transfer "
+        + "learning.]"
+        + "(https://github.com/qubvel/segmentation_models)"
+        + "\n\n"
+        + "[3. The github repository that is used for the style transformer.]"
+        + "(https://github.com/diyiiyiii/StyTR-2)"
+        + "\n\n"
+        + "[4. A tensorflow model for fast arbitrary image style transfer.]"
+        + "(https://tfhub.dev/google/magenta/arbitrary-image-stylization-v1-256/2)"
+        + "\n\n"
+        + "[5. A paddleHub model for parameter free style transfer.]"
+        + "(https://github.com/PaddlePaddle/PaddleHub/tree/release/v2.2/modules/"
+        + "image/Image_gan/style_transfer/stylepro_artistic)"
+    )
+# Add file uploader to allow users to upload photos
+uploaded_content = st.file_uploader(
+    label="Image with sofa", type=["jpg", "png", "jpeg"]
 )
+uploaded_style = st.file_uploader(
+    label="Image with pattern", type=["jpg", "png", "jpeg"]
+)
+# Example section
+checkbox = st.checkbox("Show example")
+if checkbox:
+    filter = st.radio("Style your sofa with:", options)
+    col1, col2 = st.columns([0.5, 0.5])
+    if filter == "Style Transformer":
+        content = Image.open("figures/sofa_example1.jpg")
+        style = Image.open("figures/style_example1.jpg")
+        output = "figures/0.png"
+        with col1:
+            st.markdown(
+                '<p style="text-align: center;">Before</p>', unsafe_allow_html=True
+            )
+            st.image(content, width=300)
+            st.image(style, width=300)
+        with col2:
+            st.markdown(
+                '<p style="text-align: center;">After</p>', unsafe_allow_html=True
+            )
+            st.image(output, width=300)
+    elif filter == "StyleFAST":
+        content = Image.open("figures/sofa_example3.jpg")
+        style = Image.open("figures/style_example10.jpg")
+        output = "figures/1.png"
+        with col1:
+            st.markdown(
+                '<p style="text-align: center;">Before</p>', unsafe_allow_html=True
+            )
+            st.image(content, width=300)
+            st.image(style, width=300)
+        with col2:
+            st.markdown(
+                '<p style="text-align: center;">After</p>', unsafe_allow_html=True
+            )
+            st.image(output, width=300)
+    elif filter == "Style Projection":
+        content = Image.open("figures/sofa_example2.jpg")
+        style = Image.open("figures/style_example6.jpg")
+        output = "figures/2.png"
+        with col1:
+            st.markdown(
+                '<p style="text-align: center;">Before</p>', unsafe_allow_html=True
+            )
+            st.image(content, width=300)
+            st.image(style, width=300)
+        with col2:
+            st.markdown(
+                '<p style="text-align: center;">After</p>', unsafe_allow_html=True
+            )
+            st.image(output, width=300)
+# Add 'before' and 'after' columns
+elif (uploaded_content is not None) & (uploaded_style is not None):
+    content = fix_orient(Image.open(uploaded_content))
+    style = fix_orient(Image.open(uploaded_style))
+    filter = st.radio("Style your sofa with:", options)
+    ETA = "Unknown"
+    if filter == "Style Transformer":
+        ETA = "50s with CPU, 9s with GPU"
+    elif filter == "StyleFAST":
+        ETA = "15s with CPU, 3s with GPU"
+    elif filter == "Style Projection":
+        alpha = st.slider(
+            "Adjust the weight of the image vs style", 0.0, 1.0, 0.8, step=0.1
+        )
+        ETA = "20s with CPU, 10s with GPU"
+    st.info("Estimated processing time: " + ETA)
+    button = st.button("Style my sofa")
+    col1, col2 = st.columns([0.5, 0.5])
+    with col1:
+        st.markdown('<p style="text-align: center;">Before</p>', unsafe_allow_html=True)
+        st.image(content, width=300)
+        st.image(style, width=300)
+    if button:
+        with col2:
+            st.markdown(
+                '<p style="text-align: center;">After</p>', unsafe_allow_html=True
+            )
+            with st.spinner("Preprocessing images..."):
+                # preprocess input images to be (640,640) squares
+                # to fit requirements of the segmentation model
+                resized_img, box = resize_sofa(content)
+                resized_style = resize_style(style)
+            # generate mask for image
+            with st.spinner("generating mask..."):
+                mask = get_mask(resized_img)
+            if filter == "Style Transformer":
+                # Created a styled sofa
+                with st.spinner("Styling sofa..."):
+                    styled_sofa = StyleTransformer(resized_img, resized_style)
+                # postprocess the final image
+                with st.spinner("Replacing sofa..."):
+                    new_sofa = replace_sofa(resized_img, mask, styled_sofa)
+                    new_sofa = new_sofa.crop(box)
+                st.balloons()
+                st.image(new_sofa, width=300)
+            elif filter == "StyleFAST":
+                # Created a styled sofa
+                with st.spinner("Styling sofa..."):
+                    styled_sofa = StyleFAST(resized_img, resized_style)
+                # postprocess the final image
+                with st.spinner("Replacing sofa..."):
+                    new_sofa = replace_sofa(resized_img, mask, styled_sofa)
+                    new_sofa = new_sofa.crop(box)
+                st.balloons()
+                st.image(new_sofa, width=300)
+            elif filter == "Style Projection":
+                # Created a styled sofa
+                with st.spinner("Styling sofa..."):
+                    styled_sofa = styleProjection(resized_img, resized_style, alpha)
+                # postprocess the final image
+                with st.spinner("Replacing sofa..."):
+                    new_sofa = replace_sofa(resized_img, mask, styled_sofa)
+                    new_sofa = new_sofa.crop(box)
+                st.balloons()
+                st.image(new_sofa, width=300)
+            else:
+                st.image(image, width=300)
+# Add a feedback section in the sidebar
+st.sidebar.title(" ")  # create space
+st.sidebar.markdown(" ")
+st.sidebar.subheader("Please help us improve!")
+with st.sidebar.form(key="columns_in_form", clear_on_submit=True):
+    rating = st.slider(
+        "Please rate the app",
+        min_value=1,
+        max_value=5,
+        value=3,
+        help="Drag the slider to rate the app."
+        + "This is a 1-5 rating scale where 5 is the highest rating",
+    )
+    text = st.text_input(label="Please leave your feedback here")
+    submitted = st.form_submit_button("Submit")
+    if submitted:
+        st.write("Thanks for your feedback!")
+        st.markdown("Your Rating:")
+        st.markdown(rating)
+        st.markdown("Your Feedback:")
+        st.markdown(text)

{gradio_cached_examples/output → figures}/0.png RENAMED Viewed

File without changes

{gradio_cached_examples/output → figures}/1.png RENAMED Viewed

File without changes

{gradio_cached_examples/output → figures}/2.png RENAMED Viewed

File without changes

figures/StyleGANsofa.png ADDED Viewed

figures/Transformersofa.jpg ADDED Viewed

figures/logo.png ADDED Viewed

gradio_cached_examples/log.csv DELETED Viewed

@@ -1,4 +0,0 @@
-'output'
-'output/0.png'
-'output/1.png'
-'output/2.png'

packages.txt DELETED Viewed

@@ -1,3 +0,0 @@
-ffmpeg
-libsm6
-libxext6

requirements.txt CHANGED Viewed

@@ -5,12 +5,12 @@ scipy
 numpy
 tqdm
 matplotlib
-gradio
 segmentation_models
-opencv-python-headless
 tensorflow
 tensorflow_hub
 paddlepaddle
-paddlehub

 numpy
 tqdm
 matplotlib
+streamlit
 segmentation_models
+opencv-python-headless==4.1.2.30
 tensorflow
 tensorflow_hub
 paddlepaddle
+paddlehub