Neural Style Transfer using PaddleHub model and Clip.

Browse files

Clip is used to get an image for content based on text input.

Files changed (11) hide show

.gitattributes +1 -0
.gitignore +1 -0
README.md +4 -4
app.py +91 -0
packages.txt +3 -0
requirements.txt +4 -0
styles/mona1.jpeg +0 -0
styles/starry.jpeg +0 -0
unsplash-dataset/features.npy +3 -0
unsplash-dataset/photo_ids.csv +0 -0
unsplash-dataset/photos.tsv000 +0 -0

.gitattributes CHANGED Viewed

@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+unsplash-dataset/features.npy filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ gradio_queue.db

README.md CHANGED Viewed

@@ -1,11 +1,11 @@
 ---
 title: Neural Style Transfer
-emoji: 🐨
-colorFrom: red
-colorTo: green
 sdk: gradio
 app_file: app.py
-pinned: false
 ---
 # Configuration

 ---
 title: Neural Style Transfer
+emoji: 🔥
+colorFrom: pink
+colorTo: yellow
 sdk: gradio
 app_file: app.py
+pinned: true
 ---
 # Configuration

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import os
+from io import BytesIO
+import requests
+# Interface utilities
+import gradio as gr
+# Data utilities
+import numpy as np
+import pandas as pd
+# Image utilities
+from PIL import Image
+import cv2
+# Clip Model
+import torch
+from transformers import CLIPTokenizer, CLIPModel
+# Style Transfer Model
+import paddlehub as hub
+os.system("hub install stylepro_artistic==1.0.1")
+stylepro_artistic = hub.Module(name="stylepro_artistic")
+# Clip Model
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
+model = model.to(device)
+# Load Data
+photos = pd.read_csv("unsplash-dataset/photos.tsv000", sep="\t", header=0)
+photo_features = np.load("unsplash-dataset/features.npy")
+photo_ids = pd.read_csv("unsplash-dataset/photo_ids.csv")
+photo_ids = list(photo_ids["photo_id"])
+def image_from_text(text_input):
+    ## Inference
+    with torch.no_grad():
+        inputs = tokenizer([text_input],  padding=True, return_tensors="pt")
+        text_features = model.get_text_features(**inputs).cpu().numpy()
+    ## Find similarity
+    similarities = list((text_features @ photo_features.T).squeeze(0))
+    ## Return best image :)
+    idx = sorted(zip(similarities, range(photo_features.shape[0])), key=lambda x: x[0], reverse=True)[0][1]
+    photo_id = photo_ids[idx]
+    photo_data = photos[photos["photo_id"] == photo_id].iloc[0]
+    # Downlaod image
+    response = requests.get(photo_data["photo_image_url"] + "?w=640")
+    pil_image = Image.open(BytesIO(response.content)).convert("RGB")
+    open_cv_image = np.array(pil_image)
+    # Convert RGB to BGR
+    open_cv_image = open_cv_image[:, :, ::-1].copy()
+    return open_cv_image
+def inference(content, style):
+    result = stylepro_artistic.style_transfer(
+        images=[{
+            "content": image_from_text(content),
+            "styles": [cv2.imread(style.name)]
+        }])
+    return Image.fromarray(np.uint8(result[0]["data"])[:,:,::-1]).convert("RGB")
+title = "Neural Style Transfer"
+description = "Gradio demo for Neural Style Transfer. To use it, simply enter the text for image content and upload style image. Read more at the links below."
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2003.07694'target='_blank'>Parameter-Free Style Projection for Arbitrary Style Transfer</a> | <a href='https://github.com/PaddlePaddle/PaddleHub' target='_blank'>Github Repo</a></br><a href='https://arxiv.org/abs/2103.00020'target='_blank'>Clip paper</a> | <a href='https://huggingface.co/transformers/model_doc/clip.html' target='_blank'>Hugging Face Clip Implementation</a></p>"
+examples=[
+        ["a cute kangaroo", "styles/starry.jpeg"],
+        ["man holding beer", "styles/mona1.jpeg"],
+    ]
+interface = gr.Interface(inference,
+    inputs=[
+        gr.inputs.Textbox(lines=1, placeholder="Describe the content of the image", default="a cute kangaroo", label="Describe the image to which the style will be applied"),
+        gr.inputs.Image(type="file", label="Style to be applied"),
+    ],
+    outputs=gr.outputs.Image(type="pil"),
+    enable_queue=True,
+    title=title,
+    description=description,
+    article=article,
+    examples=examples)
+interface.launch()

packages.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+ffmpeg
+libsm6
+libxext6

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+paddlepaddle
+paddlehub
+transformers
+torch

styles/mona1.jpeg ADDED Viewed

styles/starry.jpeg ADDED Viewed

unsplash-dataset/features.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14ef8a96e6b6adae89432ab046909ab600b5793ba47f2c352168696e7eb9dfb0
+size 51191936

unsplash-dataset/photo_ids.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

unsplash-dataset/photos.tsv000 ADDED Viewed

The diff for this file is too large to render. See raw diff