import os os.environ['MKL_SERVICE_FORCE_INTEL'] = '1' from loss_functions import ContentLoss, StyleLoss import torchvision.models as models from torch import optim from pathlib import Path import gradio as gr import cv2 from PIL import Image import cv2 import random, os import numpy as np import torch import torch.nn as nn import torchvision.transforms as transforms seed = 2023 random.seed(seed) os.environ['PYTHONHASHSEED'] = str(seed) np.random.seed(seed) torch.manual_seed(seed) torch.cuda.manual_seed(seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = True # model = create_vgg_model() device = 'cuda' if torch.cuda.is_available() else 'cpu' cnn = models.vgg16(pretrained=True).features.to(device).eval() duration = 5 content_layers = ['conv_4'] style_layers = ['conv_1', 'conv_2', 'conv_3', 'conv_4', 'conv_5'] def predict(content_img, style_img, style, content, lr, epoch): # progress(0, desc="Starting...") i = 0 content_losses = [] style_losses = [] model = nn.Sequential().to(device) imsize = tuple(content_img.shape[:-1]) loader = transforms.Compose([ transforms.ToTensor()]) style_img = cv2.resize(style_img, imsize) content_img = loader(content_img).to(device).unsqueeze(0) style_img = loader(style_img).to(device).unsqueeze(0) print(content_img.shape, style_img.shape) for layer in cnn.children(): if isinstance(layer, nn.Conv2d): i += 1 name = 'conv_{}'.format(i) elif isinstance(layer, nn.ReLU): name = 'relu_{}'.format(i) layer = nn.ReLU(inplace=False) elif isinstance(layer, nn.MaxPool2d): name = 'pool_{}'.format(i) elif isinstance(layer, nn.BatchNorm2d): name = 'bn_{}'.format(i) else: raise RuntimeError('Unrecognized layer: {}'.format(layer.__class__.__name__)) model.add_module(name, layer) if name in content_layers: target = model(content_img).detach() content_loss = ContentLoss(target) model.add_module("content_loss_{}".format(i), content_loss) content_losses.append(content_loss) if name in style_layers: target_feature = model(style_img).detach() style_loss = StyleLoss(target_feature) model.add_module("style_loss_{}".format(i), style_loss) style_losses.append(style_loss) for i in range(len(model) - 1, -1, -1): if isinstance(model[i], ContentLoss) or isinstance(model[i], StyleLoss): break model = model[:(i + 1)] # model = torch.compile(model) input_img = torch.randn(content_img.data.size(), device=device) input_img.requires_grad_(True) model.requires_grad_(False) optimizer = optim.Adam([input_img], lr) #We are using input_img instead of model.parameters bcos input_img is modified i_=0 img_history = [] for i_ in range(epoch): with torch.no_grad(): input_img.clamp_(0, 1) optimizer.zero_grad() model(input_img) temp_style_loss = 0 temp_content_loss = 0 for i in style_losses: temp_style_loss = temp_style_loss + i.loss for i in content_losses: temp_content_loss = temp_content_loss + i.loss loss = temp_style_loss*style + temp_content_loss*content loss.backward() optimizer.step() img_history.append(np.uint8(torch.permute(input_img[0].clone().cpu().detach(), (1, 2, 0)).numpy()*255.0)) i_+=1 with torch.no_grad(): input_img.clamp_(0, 1) print(input_img.shape) fps = len(img_history) //duration fourcc = cv2.VideoWriter_fourcc(*'mp4v') pic = img_history[-1] out = cv2.VideoWriter('output.mp4', fourcc, int(fps), (pic.shape[1], pic.shape[0])) for img in img_history: out.write(img[::,::,::-1]) out.release() return Image.fromarray(np.uint8(torch.permute(input_img[0], (1, 2, 0)).cpu().detach().numpy()*255.0)), 'output.mp4' example_list = [['content/content2.jpg', 'style/style2.jpg', 100000, 0.6, 0.3, 400], ['content/content2.jpg', 'style/curvy.jpeg', 100000, 1, 0.3, 400], ['content/content2.jpg', 'style/water_color.jpg', 30000, 1, 0.1, 300], ['content/content2.jpg', 'style/rgb.png', 50000, 1, 0.1, 400], ['style/water_color.jpg', 'style/rgb.png', 70000, 1, 0.1, 400], ] title = "Neural Style Transfer 🎨" description = "You can run the code on [Kaggle](https://www.kaggle.com/frozenwolf/neural-style-transfer). See the code on [GitHub](https://github.com/FrozenWolf-Cyber/Neural-Style-Transfer) for Neural Style Transfer comparison between VGG16 and AlexNet" article = "" content_input = gr.inputs.Image(label="Upload an image to which you want the style to be applied.",shape= (256,256)) style_input = gr.inputs.Image( label="Upload Style Image ",shape= (256,256), ) style_slider = gr.inputs.Slider(1,100000,label="Adjust Style Density" ,default=100000,) content_slider = gr.inputs.Slider(1/100000,1,label="Content Sharpness" ,default=1,) lr_slider = gr.inputs.Slider(0.001,1,label="Learning Rate" ,default=0.1,) epoch_slider = gr.inputs.Slider(50,500,label="Epoch Slider" ,default=100,) demo = gr.Interface(fn=predict, inputs=[content_input, style_input, style_slider , content_slider, lr_slider, epoch_slider # style_checkbox ], outputs=[gr.Image(shape= (256,256),), gr.Video(shape= (256,256),)], examples=example_list, title=title, description=description, article=article) demo.launch(debug=False, share=False)