nei10u commited on
Commit
464c12b
1 Parent(s): c8255da

add comic style of img2img

Browse files
app.py CHANGED
@@ -1,10 +1,15 @@
 
 
1
  import gradio as gr
2
  import translators as ts
 
3
  from PIL import Image
4
- from gradio import Blocks, Markdown, Button, Textbox, Row, Column, Dropdown, Video
5
  from langchain import Cohere, LLMChain, PromptTemplate
6
  from transformers import BlipProcessor, BlipForConditionalGeneration
7
 
 
 
8
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
9
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
10
 
@@ -16,7 +21,7 @@ def translate_into_cn(source):
16
 
17
 
18
  def predict_step(cohere_key, img, style):
19
- i_image = Image.fromarray(img.astype('int8'), 'RGB')
20
 
21
  pixel_values = processor(images=i_image, return_tensors="pt", max_length=1024, verbose=True).pixel_values
22
 
@@ -43,11 +48,18 @@ def predict_step(cohere_key, img, style):
43
 
44
 
45
  with Blocks() as demo:
46
- Markdown("图生文")
47
  with Row():
48
  with Column():
49
  cohere_key = gr.Text(label="cohere key:")
50
- image = gr.Image()
 
 
 
 
 
 
 
 
51
  dropdown = Dropdown(
52
  ["Shakespeare", "luxun", "xuzhimo", "moyan", "laoshe"],
53
  label="Style",
@@ -57,8 +69,11 @@ with Blocks() as demo:
57
  with Column():
58
  prediction_output = Textbox(label="Prediction")
59
  essay_output = Textbox(label="Essay")
 
60
  # Step 1
61
- essay_btn.click(fn=predict_step, inputs=[cohere_key, image, dropdown], outputs=[prediction_output, essay_output],
 
 
62
  api_name="essay_generate")
63
 
64
  demo.launch(debug=True)
 
1
+ import os
2
+
3
  import gradio as gr
4
  import translators as ts
5
+ import numpy as np
6
  from PIL import Image
7
+ from gradio import Blocks, Markdown, Button, Textbox, Row, Column, Dropdown, Examples
8
  from langchain import Cohere, LLMChain, PromptTemplate
9
  from transformers import BlipProcessor, BlipForConditionalGeneration
10
 
11
+ from comic_style.comic_style import inference
12
+
13
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
14
  model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
15
 
 
21
 
22
 
23
  def predict_step(cohere_key, img, style):
24
+ i_image = Image.fromarray(np.array(img), 'RGB')
25
 
26
  pixel_values = processor(images=i_image, return_tensors="pt", max_length=1024, verbose=True).pixel_values
27
 
 
48
 
49
 
50
  with Blocks() as demo:
 
51
  with Row():
52
  with Column():
53
  cohere_key = gr.Text(label="cohere key:")
54
+ with Row():
55
+ image_upload = gr.Image(type="pil")
56
+ comic_style_output = gr.Image(type="pil", label="Comic Style")
57
+ Examples(
58
+ examples=[os.path.join(os.path.dirname(__file__), "example1.jpeg"),
59
+ os.path.join(os.path.dirname(__file__), "example2.jpg")],
60
+ fn=inference,
61
+ inputs=image_upload,
62
+ )
63
  dropdown = Dropdown(
64
  ["Shakespeare", "luxun", "xuzhimo", "moyan", "laoshe"],
65
  label="Style",
 
69
  with Column():
70
  prediction_output = Textbox(label="Prediction")
71
  essay_output = Textbox(label="Essay")
72
+
73
  # Step 1
74
+ image_upload.change(fn=inference, inputs=image_upload, outputs=comic_style_output)
75
+ # Step 2
76
+ essay_btn.click(fn=predict_step, inputs=[cohere_key, image_upload, dropdown], outputs=[prediction_output, essay_output],
77
  api_name="essay_generate")
78
 
79
  demo.launch(debug=True)
comic_style/comic_style.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import cv2 as cv
2
+ import numpy as np
3
+ import torch
4
+ from PIL import Image, ImageOps
5
+
6
+ from comic_style.face_detection import align
7
+
8
+ torch.set_grad_enabled(False)
9
+ model = torch.jit.load('comic_style/u2net_bce_itr_16000_train_3.835149_tar_0.542587-400x_360x.jit.pt')
10
+ model.eval()
11
+
12
+
13
+ # https://en.wikipedia.org/wiki/Unsharp_masking
14
+ # https://stackoverflow.com/a/55590133/1495606
15
+ def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=2.0, threshold=0):
16
+ """Return a sharpened version of the image, using an unsharp mask."""
17
+ blurred = cv.GaussianBlur(image, kernel_size, sigma)
18
+ sharpened = float(amount + 1) * image - float(amount) * blurred
19
+ sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
20
+ sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
21
+ sharpened = sharpened.round().astype(np.uint8)
22
+ if threshold > 0:
23
+ low_contrast_mask = np.absolute(image - blurred) < threshold
24
+ np.copyto(sharpened, image, where=low_contrast_mask)
25
+ return sharpened
26
+
27
+
28
+ def normPRED(d):
29
+ ma = np.max(d)
30
+ mi = np.min(d)
31
+
32
+ dn = (d - mi) / (ma - mi)
33
+
34
+ return dn
35
+
36
+
37
+ def array_to_np(array_in):
38
+ array_in = normPRED(array_in)
39
+ array_in = np.squeeze(255.0 * (array_in))
40
+ array_in = np.transpose(array_in, (1, 2, 0))
41
+ return array_in
42
+
43
+
44
+ def array_to_image(array_in):
45
+ array_in = normPRED(array_in)
46
+ array_in = np.squeeze(255.0 * (array_in))
47
+ array_in = np.transpose(array_in, (1, 2, 0))
48
+ im = Image.fromarray(array_in.astype(np.uint8))
49
+ return im
50
+
51
+
52
+ def image_as_array(image_in):
53
+ image_in = np.array(image_in, np.float32)
54
+ tmpImg = np.zeros((image_in.shape[0], image_in.shape[1], 3))
55
+ image_in = image_in / np.max(image_in)
56
+ if image_in.shape[2] == 1:
57
+ tmpImg[:, :, 0] = (image_in[:, :, 0] - 0.485) / 0.229
58
+ tmpImg[:, :, 1] = (image_in[:, :, 0] - 0.485) / 0.229
59
+ tmpImg[:, :, 2] = (image_in[:, :, 0] - 0.485) / 0.229
60
+ else:
61
+ tmpImg[:, :, 0] = (image_in[:, :, 0] - 0.485) / 0.229
62
+ tmpImg[:, :, 1] = (image_in[:, :, 1] - 0.456) / 0.224
63
+ tmpImg[:, :, 2] = (image_in[:, :, 2] - 0.406) / 0.225
64
+
65
+ tmpImg = tmpImg.transpose((2, 0, 1))
66
+ image_out = np.expand_dims(tmpImg, 0)
67
+ return image_out
68
+
69
+
70
+ def find_aligned_face(image_in, size=400):
71
+ aligned_image, n_faces, quad = align(image_in, face_index=0, output_size=size)
72
+ return aligned_image, n_faces, quad
73
+
74
+
75
+ def align_first_face(image_in, size=400):
76
+ aligned_image, n_faces, quad = find_aligned_face(image_in, size=size)
77
+ if n_faces == 0:
78
+ try:
79
+ image_in = ImageOps.exif_transpose(image_in)
80
+ except:
81
+ print("exif problem, not rotating")
82
+ image_in = image_in.resize((size, size))
83
+ im_array = image_as_array(image_in)
84
+ else:
85
+ im_array = image_as_array(aligned_image)
86
+
87
+ return im_array
88
+
89
+
90
+ def img_concat_h(im1, im2):
91
+ dst = Image.new('RGB', (im1.width + im2.width, im1.height))
92
+ dst.paste(im1, (0, 0))
93
+ dst.paste(im2, (im1.width, 0))
94
+ return dst
95
+
96
+
97
+ def face2hero(
98
+ img: Image.Image,
99
+ size: int
100
+ ) -> Image.Image:
101
+ aligned_img = align_first_face(img)
102
+ if aligned_img is None:
103
+ output = None
104
+ else:
105
+ input = torch.Tensor(aligned_img)
106
+ results = model(input)
107
+ hero_np_image = array_to_np(results[1].detach().numpy())
108
+ hero_image = unsharp_mask(hero_np_image)
109
+ hero_image = Image.fromarray(hero_image)
110
+ # hero_image = hero_image.resize((int(hero_image.width * 0.3), int(hero_image.height * 0.3)), Image.ANTIALIAS)
111
+ # output = img_concat_h(array_to_image(aligned_img), hero_image)
112
+ del results
113
+ return hero_image
114
+
115
+
116
+ def inference(img):
117
+ out = face2hero(img, 400)
118
+ return out
comic_style/face_detection.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) 2021 Justin Pinkney
2
+
3
+ import cv2
4
+ import dlib
5
+ import numpy as np
6
+ from PIL import Image
7
+ from PIL import ImageOps
8
+ from scipy.ndimage import gaussian_filter
9
+
10
+ MODEL_PATH = "comic_style/shape_predictor_5_face_landmarks.dat"
11
+ detector = dlib.get_frontal_face_detector()
12
+
13
+
14
+ def align(image_in, face_index=0, output_size=256):
15
+ try:
16
+ image_in = ImageOps.exif_transpose(image_in)
17
+ except:
18
+ print("exif problem, not rotating")
19
+
20
+ landmarks = list(get_landmarks(image_in))
21
+ n_faces = len(landmarks)
22
+ face_index = min(n_faces - 1, face_index)
23
+ if n_faces == 0:
24
+ aligned_image = image_in
25
+ quad = None
26
+ else:
27
+ aligned_image, quad = image_align(image_in, landmarks[face_index], output_size=output_size)
28
+
29
+ return aligned_image, n_faces, quad
30
+
31
+
32
+ def composite_images(quad, img, output):
33
+ """Composite an image into and output canvas according to transformed co-ords"""
34
+ output = output.convert("RGBA")
35
+ img = img.convert("RGBA")
36
+ input_size = img.size
37
+ src = np.array(((0, 0), (0, input_size[1]), input_size, (input_size[0], 0)), dtype=np.float32)
38
+ dst = np.float32(quad)
39
+ mtx = cv2.getPerspectiveTransform(dst, src)
40
+ img = img.transform(output.size, Image.PERSPECTIVE, mtx.flatten(), Image.BILINEAR)
41
+ output.alpha_composite(img)
42
+
43
+ return output.convert("RGB")
44
+
45
+
46
+ def get_landmarks(image):
47
+ """Get landmarks from PIL image"""
48
+ shape_predictor = dlib.shape_predictor(MODEL_PATH)
49
+
50
+ max_size = max(image.size)
51
+ reduction_scale = int(max_size / 512)
52
+ if reduction_scale == 0:
53
+ reduction_scale = 1
54
+ downscaled = image.reduce(reduction_scale)
55
+ img = np.array(downscaled)
56
+ detections = detector(img, 0)
57
+
58
+ for detection in detections:
59
+ try:
60
+ face_landmarks = [(reduction_scale * item.x, reduction_scale * item.y) for item in
61
+ shape_predictor(img, detection).parts()]
62
+ yield face_landmarks
63
+ except Exception as e:
64
+ print(e)
65
+
66
+
67
+ def image_align(src_img, face_landmarks, output_size=512, transform_size=2048, enable_padding=True, x_scale=1, y_scale=1,
68
+ em_scale=0.1, alpha=False):
69
+ # Align function modified from ffhq-dataset
70
+ # See https://github.com/NVlabs/ffhq-dataset for license
71
+
72
+ lm = np.array(face_landmarks)
73
+ lm_eye_left = lm[2:3] # left-clockwise
74
+ lm_eye_right = lm[0:1] # left-clockwise
75
+
76
+ # Calculate auxiliary vectors.
77
+ eye_left = np.mean(lm_eye_left, axis=0)
78
+ eye_right = np.mean(lm_eye_right, axis=0)
79
+ eye_avg = (eye_left + eye_right) * 0.5
80
+ eye_to_eye = 0.71 * (eye_right - eye_left)
81
+ mouth_avg = lm[4]
82
+ eye_to_mouth = 1.35 * (mouth_avg - eye_avg)
83
+
84
+ # Choose oriented crop rectangle.
85
+ x = eye_to_eye.copy()
86
+ x /= np.hypot(*x)
87
+ x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
88
+ x *= x_scale
89
+ y = np.flipud(x) * [-y_scale, y_scale]
90
+ c = eye_avg + eye_to_mouth * em_scale
91
+ quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
92
+ quad_orig = quad.copy()
93
+ qsize = np.hypot(*x) * 2
94
+
95
+ img = src_img.convert('RGBA').convert('RGB')
96
+
97
+ # Shrink.
98
+ shrink = int(np.floor(qsize / output_size * 0.5))
99
+ if shrink > 1:
100
+ rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
101
+ img = img.resize(rsize, Image.ANTIALIAS)
102
+ quad /= shrink
103
+ qsize /= shrink
104
+
105
+ # Crop.
106
+ border = max(int(np.rint(qsize * 0.1)), 3)
107
+ crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
108
+ int(np.ceil(max(quad[:, 1]))))
109
+ crop = (
110
+ max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1]))
111
+ if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
112
+ img = img.crop(crop)
113
+ quad -= crop[0:2]
114
+
115
+ # Pad.
116
+ pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
117
+ int(np.ceil(max(quad[:, 1]))))
118
+ pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0),
119
+ max(pad[3] - img.size[1] + border, 0))
120
+ if enable_padding and max(pad) > border - 4:
121
+ pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
122
+ img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
123
+ h, w, _ = img.shape
124
+ y, x, _ = np.ogrid[:h, :w, :1]
125
+ mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]),
126
+ 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3]))
127
+ blur = qsize * 0.02
128
+ img += (gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
129
+ img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
130
+ img = np.uint8(np.clip(np.rint(img), 0, 255))
131
+ if alpha:
132
+ mask = 1 - np.clip(3.0 * mask, 0.0, 1.0)
133
+ mask = np.uint8(np.clip(np.rint(mask * 255), 0, 255))
134
+ img = np.concatenate((img, mask), axis=2)
135
+ img = Image.fromarray(img, 'RGBA')
136
+ else:
137
+ img = Image.fromarray(img, 'RGB')
138
+ quad += pad[:2]
139
+
140
+ # Transform.
141
+ img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
142
+ if output_size < transform_size:
143
+ img = img.resize((output_size, output_size), Image.ANTIALIAS)
144
+
145
+ return img, quad_orig
comic_style/u2net_bce_itr_16000_train_3.835149_tar_0.542587-400x_360x.jit.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c3cf228cb02287a658a4a2b06ba89e6e02a702890e8ed7554dfc1586a5a3ee00
3
+ size 177234648
example1.jpeg ADDED
example2.jpg ADDED
gradio_cached_examples/7/Comic Style/tmp0b1q0lm4.png ADDED
gradio_cached_examples/7/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Comic Style,flag,username,timestamp
2
+ /Users/liangou/Workspace/python/ai-mixer-blip/gradio_cached_examples/7/Comic Style/tmp0b1q0lm4.png,,,2023-07-26 01:12:44.435105
gradio_cached_examples/8/Comic Style/tmpcujjjff9.png ADDED
gradio_cached_examples/8/log.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Comic Style,flag,username,timestamp
2
+ /Users/liangou/Workspace/python/ai-mixer-blip/gradio_cached_examples/8/Comic Style/tmpcujjjff9.png,,,2023-07-26 01:17:30.589130
packages.txt CHANGED
@@ -1 +1,2 @@
1
- nodejs
 
 
1
+ nodejs
2
+ ffmpeg
requirements.txt CHANGED
@@ -6,4 +6,10 @@ torch==2.0.1
6
  torchvision==0.15.2
7
  cohere==4.8.0
8
  pyexecjs==1.5.1
9
- nodejs==0.1.1
 
 
 
 
 
 
 
6
  torchvision==0.15.2
7
  cohere==4.8.0
8
  pyexecjs==1.5.1
9
+ nodejs==0.1.1
10
+ numpy==1.22.0
11
+ opencv-python-headless
12
+ scikit-image
13
+ scipy
14
+ cmake
15
+ dlib