Spaces:

rajatsingh0702
/

Anime-Color2Sketch-Sketch2Color

Running

App Files Files Community

rajatsingh0702 commited on Jul 31, 2023

Commit

7234ee2

1 Parent(s): 5748770

files added

Browse files

Files changed (19) hide show

.gitattributes +2 -0
app.py +122 -0
color2edge.pth +3 -0
dataloader.py +173 -0
edge2color.pth +3 -0
examples/img1.jpg +0 -0
examples/img2.jpg +0 -0
examples/img3.jpg +0 -0
examples/img4.jpg +0 -0
examples/ref1.jpg +0 -0
examples/ref2.jpg +0 -0
examples/ref3.jpg +0 -0
examples/ref4.jpg +0 -0
examples/sketch1.jpg +0 -0
examples/sketch2.jpg +0 -0
examples/sketch3.jpg +0 -0
examples/sketch4.jpg +0 -0
mymodels.py +460 -0
requirements.txt +4 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+color2edge.pth filter=lfs diff=lfs merge=lfs -text
+edge2color.pth filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,122 @@

+# For plotting
+import numpy as np
+# For utilities
+from timeit import default_timer as timer
+# For conversion
+import opencv_transforms.transforms as TF
+import opencv_transforms.functional as FF
+# For everything
+import torch
+# For our model
+import mymodels
+# For demo api
+import gradio as gr
+# To ignore warning
+import warnings
+warnings.simplefilter("ignore", UserWarning)
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+ncluster = 9
+nc = 3 * (ncluster + 1)
+netC2S = mymodels.Color2Sketch(pretrained=True).to(device)
+netG = mymodels.Sketch2Color(nc=nc, pretrained=True).to(device)
+transform = TF.Resize((512, 512))
+def make_tensor(img):
+    img = FF.to_tensor(img)
+    img = FF.normalize(img, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+    return img
+def predictC2S(img):
+    final_transform = TF.Resize((img.size[0], img.size[1]))
+    img = np.array(img)
+    img = transform(img)
+    img = make_tensor(img)
+    start_time = timer()
+    with torch.inference_mode():
+        img_edge = netC2S(img.unsqueeze(0).to(device)).squeeze().permute(1, 2, 0).cpu().numpy()
+        img_edge = FF.to_grayscale(img_edge, num_output_channels=3)
+        img = FF.to_tensor(img_edge).permute(1, 2, 0).cpu().numpy()
+    end_time = timer()
+    img = final_transform(img)
+    return img, round(end_time - start_time, 3)
+def predictS2C(img, ref):
+    final_transform = TF.Resize((img.size[0], img.size[1]))
+    img = np.array(img)
+    ref = np.array(ref)
+    ref = transform(ref)
+    img = transform(img)
+    img = make_tensor(img)
+    color_palette = mymodels.color_cluster(ref)
+    for i in range(0, len(color_palette)):
+        color = color_palette[i]
+        color_palette[i] = make_tensor(color)
+    start_time = timer()
+    with torch.inference_mode():
+        img_edge = netC2S(img.unsqueeze(0).to(device)).squeeze().permute(1, 2, 0).cpu().numpy()
+        img_edge = FF.to_grayscale(img_edge, num_output_channels=3)
+        img = FF.to_tensor(img_edge)
+    input_tensor = torch.cat([img.cpu()] + color_palette, dim=0).to(device)
+    with torch.inference_mode():
+        fake = netG(input_tensor.unsqueeze(0).to(device)).squeeze().permute(1, 2, 0).cpu().numpy()
+    end_time = timer()
+    fake = final_transform(fake)
+    return fake, round(end_time - start_time, 3)
+example_list1 = [["./examples/img1.jpg", "./examples/ref1.jpg"],
+                 ["./examples/img2.jpg", "./examples/ref2.jpg"],
+                 ["./examples/img3.jpg", "./examples/ref3.jpg"],
+                 ["./examples/img4.jpg", "./examples/ref4.jpg"]]
+example_list2 = [["./examples/sketch1.jpg"],
+                 ["./examples/sketch2.jpg"],
+                 ["./examples/sketch3.jpg"],
+                 ["./examples/sketch4.jpg"]]
+with gr.Blocks() as demo:
+    gr.Markdown("# Color2Sketch & Sketch2Color")
+    with gr.Tab("Sketch To Color"):
+        gr.Markdown("### Enter the **Sketch** & **Reference** on the left side. You can use example list.")
+        with gr.Row():
+            with gr.Column():
+                input1 = [gr.inputs.Image(type="pil", label="Sketch"), gr.inputs.Image(type="pil", label="Reference")]
+                with gr.Row():
+                    # Clear Button
+                    gr.ClearButton(input1)
+                    btn1 = gr.Button("Submit")
+                gr.Examples(examples=example_list1, inputs=input1)
+            with gr.Column():
+                output1 = [gr.inputs.Image(type="pil", label="Colored Sketch"), gr.Number(label="Prediction time (s)")]
+    with gr.Tab("Color To Sketch"):
+        gr.Markdown(
+            "### Enter the **Colored Sketch** on the left side. You can use example list.")
+        with gr.Row():
+            with gr.Column():
+                input2 = gr.inputs.Image(type="pil", label="Color Sketch")
+                with gr.Row():
+                    # Clear Button
+                    gr.ClearButton(input2)
+                    btn2 = gr.Button("Submit")
+                gr.Examples(example_list2, inputs=input2)
+            with gr.Column():
+                output2 = [gr.inputs.Image(type="pil", label="Sketch"), gr.Number(label="Prediction time (s)")]
+    btn1.click(predictS2C, inputs=input1, outputs=output1)
+    btn2.click(predictC2S, inputs=input2, outputs=output2)
+    gr.Markdown("""
+    ### The model is taken from [this GitHub Repo.](https://github.com/delta6189/Anime-Sketch-Colorizer)
+    Email : rajatsingh072002@gmail.com | My [GitHub Repo](https://github.com/Rajatsingh24/Anime-Sketch2Color-Color2Sketch)
+    """)
+if __name__ == "__main__":
+    demo.launch(debug=False)

color2edge.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:922c4e109a0f7086d48e211156c7f6fbeff6b0393baecb606f22f44c7cda9877
+size 254000447

dataloader.py ADDED Viewed

	@@ -0,0 +1,173 @@

+import cv2
+import numpy as np
+import torch
+import torchvision
+import opencv_transforms.functional as FF
+from torchvision import datasets
+from PIL import Image
+def color_cluster(img, nclusters=9):
+    """
+    Apply K-means clustering to the input image
+    Args:
+        img: Numpy array which has shape of (H, W, C)
+        nclusters: # of clusters (default = 9)
+    Returns:
+        color_palette: list of 3D numpy arrays which have same shape of that of input image
+        e.g. If input image has shape of (256, 256, 3) and nclusters is 4, the return color_palette is [color1, color2, color3, color4]
+            and each component is (256, 256, 3) numpy array.
+    Note:
+        K-means clustering algorithm is quite computaionally intensive.
+        Thus, before extracting dominant colors, the input images are resized to x0.25 size.
+    """
+    img_size = img.shape
+    small_img = cv2.resize(img, None, fx=0.25, fy=0.25, interpolation=cv2.INTER_AREA)
+    sample = small_img.reshape((-1, 3))
+    sample = np.float32(sample)
+    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
+    flags = cv2.KMEANS_PP_CENTERS
+    _, _, centers = cv2.kmeans(sample, nclusters, None, criteria, 10, flags)
+    centers = np.uint8(centers)
+    color_palette = []
+    for i in range(0, nclusters):
+        dominant_color = np.zeros(img_size, dtype='uint8')
+        dominant_color[:,:,:] = centers[i]
+        color_palette.append(dominant_color)
+    return color_palette
+class PairImageFolder(datasets.ImageFolder):
+    """
+    A generic data loader where the images are arranged in this way: ::
+        root/dog/xxx.png
+        root/dog/xxy.png
+        root/dog/xxz.png
+        root/cat/123.png
+        root/cat/nsdf3.png
+        root/cat/asd932_.png
+    This class works properly for paired image in form of [sketch, color_image]
+    Args:
+        root (string): Root directory path.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        loader (callable, optional): A function to load an image given its path.
+        is_valid_file (callable, optional): A function that takes path of an Image file
+            and check if the file is a valid file (used to check of corrupt files)
+        sketch_net: The network to convert color image to sketch image
+        ncluster: Number of clusters when extracting color palette.
+     Attributes:
+        classes (list): List of the class names.
+        class_to_idx (dict): Dict with items (class_name, class_index).
+        imgs (list): List of (image path, class_index) tuples
+     Getitem:
+        img_edge: Edge image
+        img: Color Image
+        color_palette: Extracted color paltette
+    """
+    def __init__(self, root, transform, sketch_net, ncluster):
+        super(PairImageFolder, self).__init__(root, transform)
+        self.ncluster = ncluster
+        self.sketch_net = sketch_net
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    def __getitem__(self, index):
+        path, label = self.imgs[index]
+        img = self.loader(path)
+        img = np.asarray(img)
+        img = img[:, 0:512, :]
+        img = self.transform(img)
+        color_palette = color_cluster(img, nclusters=self.ncluster)
+        img = self.make_tensor(img)
+        with torch.no_grad():
+            img_edge = self.sketch_net(img.unsqueeze(0).to(self.device)).squeeze().permute(1,2,0).cpu().numpy()
+            img_edge = FF.to_grayscale(img_edge, num_output_channels=3)
+            img_edge = FF.to_tensor(img_edge)
+        for i in range(0, len(color_palette)):
+            color = color_palette[i]
+            color_palette[i] = self.make_tensor(color)
+        return img_edge, img, color_palette
+    def make_tensor(self, img):
+        img = FF.to_tensor(img)
+        img = FF.normalize(img, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+        return img
+class GetImageFolder(datasets.ImageFolder):
+    """
+    A generic data loader where the images are arranged in this way: ::
+        root/dog/xxx.png
+        root/dog/xxy.png
+        root/dog/xxz.png
+        root/cat/123.png
+        root/cat/nsdf3.png
+        root/cat/asd932_.png
+    Args:
+        root (string): Root directory path.
+        transform (callable, optional): A function/transform that  takes in an PIL image
+            and returns a transformed version. E.g, ``transforms.RandomCrop``
+        target_transform (callable, optional): A function/transform that takes in the
+            target and transforms it.
+        loader (callable, optional): A function to load an image given its path.
+        is_valid_file (callable, optional): A function that takes path of an Image file
+            and check if the file is a valid file (used to check of corrupt files)
+        sketch_net: The network to convert color image to sketch image
+        ncluster: Number of clusters when extracting color palette.
+     Attributes:
+        classes (list): List of the class names.
+        class_to_idx (dict): Dict with items (class_name, class_index).
+        imgs (list): List of (image path, class_index) tuples
+     Getitem:
+        img_edge: Edge image
+        img: Color Image
+        color_palette: Extracted color paltette
+    """
+    def __init__(self, root, transform, sketch_net, ncluster):
+        super(GetImageFolder, self).__init__(root, transform)
+        self.ncluster = ncluster
+        self.sketch_net = sketch_net
+        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
+    def __getitem__(self, index):
+        path, label = self.imgs[index]
+        img = self.loader(path)
+        img = np.asarray(img)
+        img = self.transform(img)
+        color_palette = color_cluster(img, nclusters=self.ncluster)
+        img = self.make_tensor(img)
+        with torch.no_grad():
+            img_edge = self.sketch_net(img.unsqueeze(0).to(self.device)).squeeze().permute(1, 2, 0).cpu().numpy()
+            img_edge = FF.to_grayscale(img_edge, num_output_channels=3)
+            img_edge = FF.to_tensor(img_edge)
+        for i in range(0, len(color_palette)):
+            color = color_palette[i]
+            color_palette[i] = self.make_tensor(color)
+        return img_edge, img, color_palette
+    def make_tensor(self, img):
+        img = FF.to_tensor(img)
+        img = FF.normalize(img, (0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
+        return img

edge2color.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:221a9a798919af697a590a8c98f6cdb0b29f3ab836ca8876ca5724f8eea4f7bd
+size 254069569

examples/img1.jpg ADDED Viewed

examples/img2.jpg ADDED Viewed

examples/img3.jpg ADDED Viewed

examples/img4.jpg ADDED Viewed

examples/ref1.jpg ADDED Viewed

examples/ref2.jpg ADDED Viewed

examples/ref3.jpg ADDED Viewed

examples/ref4.jpg ADDED Viewed

examples/sketch1.jpg ADDED Viewed

examples/sketch2.jpg ADDED Viewed

examples/sketch3.jpg ADDED Viewed

examples/sketch4.jpg ADDED Viewed

mymodels.py ADDED Viewed

	@@ -0,0 +1,460 @@

+import torch
+import torch.nn as nn
+import os
+import cv2
+import numpy as np
+__all__ = [
+    'color_cluster','Color2Sketch', 'Sketch2Color', 'Discriminator',
+]
+def color_cluster(img, nclusters=9):
+    """
+    Apply K-means clustering to the input image
+    Args:
+        img: Numpy array which has shape of (H, W, C)
+        nclusters: # of clusters (default = 9)
+    Returns:
+        color_palette: list of 3D numpy arrays which have same shape of that of input image
+        e.g. If input image has shape of (256, 256, 3) and nclusters is 4, the return color_palette is [color1, color2, color3, color4]
+            and each component is (256, 256, 3) numpy array.
+    Note:
+        K-means clustering algorithm is quite computaionally intensive.
+        Thus, before extracting dominant colors, the input images are resized to x0.25 size.
+    """
+    img_size = img.shape
+    small_img = cv2.resize(img, None, fx=0.25, fy=0.25, interpolation=cv2.INTER_AREA)
+    sample = small_img.reshape((-1, 3))
+    sample = np.float32(sample)
+    criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
+    flags = cv2.KMEANS_PP_CENTERS
+    _, _, centers = cv2.kmeans(sample, nclusters, None, criteria, 10, flags)
+    centers = np.uint8(centers)
+    color_palette = []
+    for i in range(0, nclusters):
+        dominant_color = np.zeros(img_size, dtype='uint8')
+        dominant_color[:, :, :] = centers[i]
+        color_palette.append(dominant_color)
+    return color_palette
+class ApplyNoise(nn.Module):
+    def __init__(self, channels):
+        super().__init__()
+        self.weight = nn.Parameter(torch.zeros(channels))
+    def forward(self, x, noise=None):
+        if noise is None:
+            noise = torch.randn(x.size(0), 1, x.size(2), x.size(3), device=x.device, dtype=x.dtype)
+        return x + self.weight.view(1, -1, 1, 1) * noise.to(x.device)
+class Conv2d_WS(nn.Conv2d):
+    def __init__(self, in_chan, out_chan, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True):
+        super().__init__(in_chan, out_chan, kernel_size, stride, padding, dilation, groups, bias)
+    def forward(self, x):
+        weight = self.weight
+        weight_mean = weight.mean(dim=1, keepdim=True).mean(dim=2, keepdim=True).mean(dim=3, keepdim=True)
+        weight = weight - weight_mean
+        std = weight.view(weight.size(0), -1).std(dim=1).view(-1, 1, 1, 1) + 1e-5
+        weight = weight / std.expand_as(weight)
+        return torch.nn.functional.conv2d(x, weight, self.bias, self.stride, self.padding, self.dilation, self.groups)
+class ResidualBlock(nn.Module):
+    def __init__(self, in_channels, out_channels, stride=1, sample=None):
+        super(ResidualBlock, self).__init__()
+        self.ic = in_channels
+        self.oc = out_channels
+        self.conv1 = Conv2d_WS(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn1 = nn.GroupNorm(32, out_channels)
+        self.conv2 = Conv2d_WS(out_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
+        self.bn2 = nn.GroupNorm(32, out_channels)
+        self.convr = Conv2d_WS(in_channels, out_channels, kernel_size=1, stride=1, padding=0, bias=False)
+        self.bnr = nn.GroupNorm(32, out_channels)
+        self.relu = nn.ReLU(inplace=True)
+        self.sample = sample
+        if self.sample == 'down':
+            self.sampling = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        elif self.sample == 'up':
+            self.sampling = nn.Upsample(scale_factor=2, mode='nearest')
+    def forward(self, x):
+        if self.ic != self.oc:
+            residual = self.convr(x)
+            residual = self.bnr(residual)
+        else:
+            residual = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out += residual
+        out = self.relu(out)
+        if self.sample is not None:
+            out = self.sampling(out)
+        return out
+class Attention_block(nn.Module):
+    def __init__(self, F_g, F_l, F_int):
+        super(Attention_block, self).__init__()
+        self.W_g = nn.Sequential(
+            Conv2d_WS(F_g, F_int, kernel_size=1, stride=1, padding=0, bias=True),
+            nn.GroupNorm(32, F_int)
+        )
+        self.W_x = nn.Sequential(
+            Conv2d_WS(F_l, F_int, kernel_size=1, stride=1, padding=0, bias=True),
+            nn.GroupNorm(32, F_int)
+        )
+        self.psi = nn.Sequential(
+            Conv2d_WS(F_int, 1, kernel_size=1, stride=1, padding=0, bias=True),
+            nn.InstanceNorm2d(1),
+            nn.Sigmoid()
+        )
+        self.relu = nn.ReLU(inplace=True)
+    def forward(self, g, x):
+        g1 = self.W_g(g)
+        x1 = self.W_x(x)
+        psi = self.relu(g1 + x1)
+        psi = self.psi(psi)
+        return x * psi
+class Color2Sketch(nn.Module):
+    def __init__(self, nc=3, pretrained=False):
+        super(Color2Sketch, self).__init__()
+        class Encoder(nn.Module):
+            def __init__(self):
+                super(Encoder, self).__init__()
+                # Build ResNet and change first conv layer to accept single-channel input
+                self.layer1 = ResidualBlock(nc, 64, sample='down')
+                self.layer2 = ResidualBlock(64, 128, sample='down')
+                self.layer3 = ResidualBlock(128, 256, sample='down')
+                self.layer4 = ResidualBlock(256, 512, sample='down')
+                self.layer5 = ResidualBlock(512, 512, sample='down')
+                self.layer6 = ResidualBlock(512, 512, sample='down')
+                self.layer7 = ResidualBlock(512, 512, sample='down')
+            def forward(self, input_image):
+                # Pass input through ResNet-gray to extract features
+                x0 = input_image  # nc * 256 * 256
+                x1 = self.layer1(x0)  # 64 * 128 * 128
+                x2 = self.layer2(x1)  # 128 * 64 * 64
+                x3 = self.layer3(x2)  # 256 * 32 * 32
+                x4 = self.layer4(x3)  # 512 * 16 * 16
+                x5 = self.layer5(x4)  # 512 * 8 * 8
+                x6 = self.layer6(x5)  # 512 * 4 * 4
+                x7 = self.layer7(x6)  # 512 * 2 * 2
+                return x1, x2, x3, x4, x5, x6, x7
+        class Decoder(nn.Module):
+            def __init__(self):
+                super(Decoder, self).__init__()
+                # Convolutional layers and upsampling
+                self.noise7 = ApplyNoise(512)
+                self.layer7_up = ResidualBlock(512, 512, sample='up')
+                self.Att6 = Attention_block(F_g=512, F_l=512, F_int=256)
+                self.layer6 = ResidualBlock(1024, 512, sample=None)
+                self.noise6 = ApplyNoise(512)
+                self.layer6_up = ResidualBlock(512, 512, sample='up')
+                self.Att5 = Attention_block(F_g=512, F_l=512, F_int=256)
+                self.layer5 = ResidualBlock(1024, 512, sample=None)
+                self.noise5 = ApplyNoise(512)
+                self.layer5_up = ResidualBlock(512, 512, sample='up')
+                self.Att4 = Attention_block(F_g=512, F_l=512, F_int=256)
+                self.layer4 = ResidualBlock(1024, 512, sample=None)
+                self.noise4 = ApplyNoise(512)
+                self.layer4_up = ResidualBlock(512, 256, sample='up')
+                self.Att3 = Attention_block(F_g=256, F_l=256, F_int=128)
+                self.layer3 = ResidualBlock(512, 256, sample=None)
+                self.noise3 = ApplyNoise(256)
+                self.layer3_up = ResidualBlock(256, 128, sample='up')
+                self.Att2 = Attention_block(F_g=128, F_l=128, F_int=64)
+                self.layer2 = ResidualBlock(256, 128, sample=None)
+                self.noise2 = ApplyNoise(128)
+                self.layer2_up = ResidualBlock(128, 64, sample='up')
+                self.Att1 = Attention_block(F_g=64, F_l=64, F_int=32)
+                self.layer1 = ResidualBlock(128, 64, sample=None)
+                self.noise1 = ApplyNoise(64)
+                self.layer1_up = ResidualBlock(64, 32, sample='up')
+                self.noise0 = ApplyNoise(32)
+                self.layer0 = Conv2d_WS(32, 3, kernel_size=3, stride=1, padding=1)
+                self.activation = nn.ReLU(inplace=True)
+                self.tanh = nn.Tanh()
+            def forward(self, midlevel_input):  # , global_input):
+                x1, x2, x3, x4, x5, x6, x7 = midlevel_input
+                x = self.noise7(x7)
+                x = self.layer7_up(x)  # 512 * 4 * 4
+                x6 = self.Att6(g=x, x=x6)
+                x = torch.cat((x, x6), dim=1)  # 1024 * 4 * 4
+                x = self.layer6(x)  # 512 * 4 * 4
+                x = self.noise6(x)
+                x = self.layer6_up(x)  # 512 * 8 * 8
+                x5 = self.Att5(g=x, x=x5)
+                x = torch.cat((x, x5), dim=1)  # 1024 * 8 * 8
+                x = self.layer5(x)  # 512 * 8 * 8
+                x = self.noise5(x)
+                x = self.layer5_up(x)  # 512 * 16 * 16
+                x4 = self.Att4(g=x, x=x4)
+                x = torch.cat((x, x4), dim=1)  # 1024 * 16 * 16
+                x = self.layer4(x)  # 512 * 16 * 16
+                x = self.noise4(x)
+                x = self.layer4_up(x)  # 256 * 32 * 32
+                x3 = self.Att3(g=x, x=x3)
+                x = torch.cat((x, x3), dim=1)  # 512 * 32 * 32
+                x = self.layer3(x)  # 256 * 32 * 32
+                x = self.noise3(x)
+                x = self.layer3_up(x)  # 128 * 64 * 64
+                x2 = self.Att2(g=x, x=x2)
+                x = torch.cat((x, x2), dim=1)  # 256 * 64 * 64
+                x = self.layer2(x)  # 128 * 64 * 64
+                x = self.noise2(x)
+                x = self.layer2_up(x)  # 64 * 128 * 128
+                x1 = self.Att1(g=x, x=x1)
+                x = torch.cat((x, x1), dim=1)  # 128 * 128 * 128
+                x = self.layer1(x)  # 64 * 128 * 128
+                x = self.noise1(x)
+                x = self.layer1_up(x)  # 32 * 256 * 256
+                x = self.noise0(x)
+                x = self.layer0(x)  # 3 * 256 * 256
+                x = self.tanh(x)
+                return x
+        self.encoder = Encoder()
+        self.decoder = Decoder()
+        if pretrained:
+            print('Loading pretrained {0} model...'.format('Color2Sketch'), end=' ')
+            assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
+            checkpoint = torch.load('color2edge.pth')
+            self.load_state_dict(checkpoint['netG'], strict=True)
+            print("Done!")
+        else:
+            self.apply(weights_init)
+            print('Weights of {0} model are initialized'.format('Color2Sketch'))
+    def forward(self, inputs):
+        encode = self.encoder(inputs)
+        output = self.decoder(encode)
+        return output
+class Sketch2Color(nn.Module):
+    def __init__(self, nc=3, pretrained=False):
+        super(Sketch2Color, self).__init__()
+        class Encoder(nn.Module):
+            def __init__(self):
+                super(Encoder, self).__init__()
+                # Build ResNet and change first conv layer to accept single-channel input
+                self.layer1 = ResidualBlock(nc, 64, sample='down')
+                self.layer2 = ResidualBlock(64, 128, sample='down')
+                self.layer3 = ResidualBlock(128, 256, sample='down')
+                self.layer4 = ResidualBlock(256, 512, sample='down')
+                self.layer5 = ResidualBlock(512, 512, sample='down')
+                self.layer6 = ResidualBlock(512, 512, sample='down')
+                self.layer7 = ResidualBlock(512, 512, sample='down')
+            def forward(self, input_image):
+                # Pass input through ResNet-gray to extract features
+                x0 = input_image  # nc * 256 * 256
+                x1 = self.layer1(x0)  # 64 * 128 * 128
+                x2 = self.layer2(x1)  # 128 * 64 * 64
+                x3 = self.layer3(x2)  # 256 * 32 * 32
+                x4 = self.layer4(x3)  # 512 * 16 * 16
+                x5 = self.layer5(x4)  # 512 * 8 * 8
+                x6 = self.layer6(x5)  # 512 * 4 * 4
+                x7 = self.layer7(x6)  # 512 * 2 * 2
+                return x1, x2, x3, x4, x5, x6, x7
+        class Decoder(nn.Module):
+            def __init__(self):
+                super(Decoder, self).__init__()
+                # Convolutional layers and upsampling
+                self.noise7 = ApplyNoise(512)
+                self.layer7_up = ResidualBlock(512, 512, sample='up')
+                self.Att6 = Attention_block(F_g=512, F_l=512, F_int=256)
+                self.layer6 = ResidualBlock(1024, 512, sample=None)
+                self.noise6 = ApplyNoise(512)
+                self.layer6_up = ResidualBlock(512, 512, sample='up')
+                self.Att5 = Attention_block(F_g=512, F_l=512, F_int=256)
+                self.layer5 = ResidualBlock(1024, 512, sample=None)
+                self.noise5 = ApplyNoise(512)
+                self.layer5_up = ResidualBlock(512, 512, sample='up')
+                self.Att4 = Attention_block(F_g=512, F_l=512, F_int=256)
+                self.layer4 = ResidualBlock(1024, 512, sample=None)
+                self.noise4 = ApplyNoise(512)
+                self.layer4_up = ResidualBlock(512, 256, sample='up')
+                self.Att3 = Attention_block(F_g=256, F_l=256, F_int=128)
+                self.layer3 = ResidualBlock(512, 256, sample=None)
+                self.noise3 = ApplyNoise(256)
+                self.layer3_up = ResidualBlock(256, 128, sample='up')
+                self.Att2 = Attention_block(F_g=128, F_l=128, F_int=64)
+                self.layer2 = ResidualBlock(256, 128, sample=None)
+                self.noise2 = ApplyNoise(128)
+                self.layer2_up = ResidualBlock(128, 64, sample='up')
+                self.Att1 = Attention_block(F_g=64, F_l=64, F_int=32)
+                self.layer1 = ResidualBlock(128, 64, sample=None)
+                self.noise1 = ApplyNoise(64)
+                self.layer1_up = ResidualBlock(64, 32, sample='up')
+                self.noise0 = ApplyNoise(32)
+                self.layer0 = Conv2d_WS(32, 3, kernel_size=3, stride=1, padding=1)
+                self.activation = nn.ReLU(inplace=True)
+                self.tanh = nn.Tanh()
+            def forward(self, midlevel_input):  # , global_input):
+                x1, x2, x3, x4, x5, x6, x7 = midlevel_input
+                x = self.noise7(x7)
+                x = self.layer7_up(x)  # 512 * 4 * 4
+                x6 = self.Att6(g=x, x=x6)
+                x = torch.cat((x, x6), dim=1)  # 1024 * 4 * 4
+                x = self.layer6(x)  # 512 * 4 * 4
+                x = self.noise6(x)
+                x = self.layer6_up(x)  # 512 * 8 * 8
+                x5 = self.Att5(g=x, x=x5)
+                x = torch.cat((x, x5), dim=1)  # 1024 * 8 * 8
+                x = self.layer5(x)  # 512 * 8 * 8
+                x = self.noise5(x)
+                x = self.layer5_up(x)  # 512 * 16 * 16
+                x4 = self.Att4(g=x, x=x4)
+                x = torch.cat((x, x4), dim=1)  # 1024 * 16 * 16
+                x = self.layer4(x)  # 512 * 16 * 16
+                x = self.noise4(x)
+                x = self.layer4_up(x)  # 256 * 32 * 32
+                x3 = self.Att3(g=x, x=x3)
+                x = torch.cat((x, x3), dim=1)  # 512 * 32 * 32
+                x = self.layer3(x)  # 256 * 32 * 32
+                x = self.noise3(x)
+                x = self.layer3_up(x)  # 128 * 64 * 64
+                x2 = self.Att2(g=x, x=x2)
+                x = torch.cat((x, x2), dim=1)  # 256 * 64 * 64
+                x = self.layer2(x)  # 128 * 64 * 64
+                x = self.noise2(x)
+                x = self.layer2_up(x)  # 64 * 128 * 128
+                x1 = self.Att1(g=x, x=x1)
+                x = torch.cat((x, x1), dim=1)  # 128 * 128 * 128
+                x = self.layer1(x)  # 64 * 128 * 128
+                x = self.noise1(x)
+                x = self.layer1_up(x)  # 32 * 256 * 256
+                x = self.noise0(x)
+                x = self.layer0(x)  # 3 * 256 * 256
+                x = self.tanh(x)
+                return x
+        self.encoder = Encoder()
+        self.decoder = Decoder()
+        if pretrained:
+            print('Loading pretrained {0} model...'.format('Sketch2Color'), end=' ')
+            assert os.path.isdir('checkpoint'), 'Error: no checkpoint directory found!'
+            checkpoint = torch.load('edge2color.pth')
+            self.load_state_dict(checkpoint['netG'], strict=True)
+            print("Done!")
+        else:
+            self.apply(weights_init)
+            print('Weights of {0} model are initialized'.format('Sketch2Color'))
+    def forward(self, inputs):
+        encode = self.encoder(inputs)
+        output = self.decoder(encode)
+        return output
+class Discriminator(nn.Module):
+    def __init__(self, nc=6, pretrained=False):
+        super(Discriminator, self).__init__()
+        self.conv1 = torch.nn.utils.spectral_norm(nn.Conv2d(nc, 64, kernel_size=4, stride=2, padding=1))
+        self.bn1 = nn.GroupNorm(32, 64)
+        self.conv2 = torch.nn.utils.spectral_norm(nn.Conv2d(64, 128, kernel_size=4, stride=2, padding=1))
+        self.bn2 = nn.GroupNorm(32, 128)
+        self.conv3 = torch.nn.utils.spectral_norm(nn.Conv2d(128, 256, kernel_size=4, stride=2, padding=1))
+        self.bn3 = nn.GroupNorm(32, 256)
+        self.conv4 = torch.nn.utils.spectral_norm(nn.Conv2d(256, 512, kernel_size=4, stride=1, padding=1))
+        self.bn4 = nn.GroupNorm(32, 512)
+        self.conv5 = torch.nn.utils.spectral_norm(nn.Conv2d(512, 1, kernel_size=4, stride=1, padding=1))
+        self.activation = nn.LeakyReLU(0.2, inplace=True)
+        self.sigmoid = nn.Sigmoid()
+        if pretrained:
+            pass
+        else:
+            self.apply(weights_init)
+            print('Weights of {0} model are initialized'.format('Discriminator'))
+    def forward(self, base, unknown):
+        input = torch.cat((base, unknown), dim=1)
+        x = self.activation(self.conv1(input))
+        x = self.activation(self.bn2(self.conv2(x)))
+        x = self.activation(self.bn3(self.conv3(x)))
+        x = self.activation(self.bn4(self.conv4(x)))
+        x = self.sigmoid(self.conv5(x))
+        return x.mean((2, 3))
+# To initialize model weights
+def weights_init(model):
+    classname = model.__class__.__name__
+    if classname.find('Conv') != -1:
+        nn.init.normal_(model.weight.data, 0.0, 0.02)
+    elif classname.find('Conv2d_WS') != -1:
+        nn.init.normal_(model.weight.data, 0.0, 0.02)
+    elif classname.find('BatchNorm') != -1:
+        nn.init.normal_(model.weight.data, 1.0, 0.02)
+        nn.init.constant_(model.bias.data, 0)
+    elif classname.find('GroupNorm') != -1:
+        nn.init.normal_(model.weight.data, 1.0, 0.02)
+        nn.init.constant_(model.bias.data, 0)
+    else:
+        pass

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch==2.0.1
+gradio==3.38.0
+opencv-python==4.8.0.74
+opencv-transforms==0.0.6