File size: 2,480 Bytes
f28255b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import cv2
import gradio as gr
import numpy as np
import onnxruntime
from scipy.ndimage.filters import gaussian_filter
from skimage.color import rgb2gray


def xdog(im, gamma=0.98, phi=200, eps=-0.1, k=1.6, sigma=1):
    # Source : https://github.com/CemalUnal/XDoG-Filter
    # Reference : XDoG: An eXtended difference-of-Gaussians compendium including advanced image stylization
    # Link : http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.365.151&rep=rep1&type=pdf
    if im.shape[2] == 3:
        im = rgb2gray(im)
    imf1 = gaussian_filter(im, sigma)
    imf2 = gaussian_filter(im, sigma * k)
    imdiff = imf1 - gamma * imf2
    imdiff = (imdiff < eps) * 1.0 + (imdiff >= eps) * (1.0 + np.tanh(phi * imdiff))
    imdiff -= imdiff.min()
    imdiff /= imdiff.max()
    imdiff *= 255.0
    imdiff = imdiff.astype('uint8')
    imdiff = cv2.cvtColor(imdiff, cv2.COLOR_GRAY2BGR)
    return (imdiff / 255.0).astype('float32')


def swin_model(img):
    h, w = img.shape[0], img.shape[1]
    factor = max(h / 512, w / 512)
    if factor > 1:
        img = cv2.resize(img, (int(h / factor), int(w / factor)))
    img = np.transpose(img[:, :, [2, 1, 0]], (2, 0, 1)) / 255.0
    img = img[None, ...].astype(np.float32)
    mod_pad_h, mod_pad_w = 0, 0
    window_size = 8
    if h % window_size != 0:
        mod_pad_h = window_size - h % window_size
    if w % window_size != 0:
        mod_pad_w = window_size - w % window_size
    img = np.pad(img, (mod_pad_w, mod_pad_h), 'reflect')
    ort_session = onnxruntime.InferenceSession('model.onnx', providers=['CPUExecutionProvider'])
    ort_inputs = {ort_session.get_inputs()[0].name: img}
    output = ort_session.run(None, ort_inputs)[0]
    _, _, a, b = output.shape
    output = output[:, :, 0:a - mod_pad_h, 0:b - mod_pad_w]
    output = np.squeeze(output).clip(0, 1)
    if factor > 1:
        output = cv2.resize(output, h, w)
    return output


demo = gr.Blocks()
with demo:
    gr.Markdown(
        """
        # Hello,World!
        This is an sketch extraction using in anime art by swin_transformer, you can see the difference between X-dog and learning-based algorithm.
        """)
    input_image = gr.Image()
    output_xdog = gr.Image()
    output_deep = gr.Image()
    b_xdog = gr.Button("X_dog")
    b_learning_based = gr.Button("learning based")
    b_xdog.click(xdog, inputs=input_image, outputs=output_xdog)
    b_learning_based.click(swin_model, inputs=input_image, outputs=output_deep)
demo.launch()