Spaces:

deepskyreal
/

ai-mixer-hotchpotch

Sleeping

App Files Files

nei10u commited on Jul 25, 2023

Commit

464c12b

1 Parent(s): c8255da

add comic style of img2img

Browse files

Files changed (12) hide show

app.py +20 -5
comic_style/comic_style.py +118 -0
comic_style/face_detection.py +145 -0
comic_style/u2net_bce_itr_16000_train_3.835149_tar_0.542587-400x_360x.jit.pt +3 -0
example1.jpeg +0 -0
example2.jpg +0 -0
gradio_cached_examples/7/Comic Style/tmp0b1q0lm4.png +0 -0
gradio_cached_examples/7/log.csv +2 -0
gradio_cached_examples/8/Comic Style/tmpcujjjff9.png +0 -0
gradio_cached_examples/8/log.csv +2 -0
packages.txt +2 -1
requirements.txt +7 -1

app.py CHANGED Viewed

@@ -1,10 +1,15 @@
 import gradio as gr
 import translators as ts
 from PIL import Image
-from gradio import Blocks, Markdown, Button, Textbox, Row, Column, Dropdown, Video
 from langchain import Cohere, LLMChain, PromptTemplate
 from transformers import BlipProcessor, BlipForConditionalGeneration
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
@@ -16,7 +21,7 @@ def translate_into_cn(source):
 def predict_step(cohere_key, img, style):
-    i_image = Image.fromarray(img.astype('int8'), 'RGB')
     pixel_values = processor(images=i_image, return_tensors="pt", max_length=1024, verbose=True).pixel_values
@@ -43,11 +48,18 @@ def predict_step(cohere_key, img, style):
 with Blocks() as demo:
-    Markdown("图生文")
     with Row():
         with Column():
             cohere_key = gr.Text(label="cohere key:")
-            image = gr.Image()
             dropdown = Dropdown(
                 ["Shakespeare", "luxun", "xuzhimo", "moyan", "laoshe"],
                 label="Style",
@@ -57,8 +69,11 @@ with Blocks() as demo:
         with Column():
             prediction_output = Textbox(label="Prediction")
             essay_output = Textbox(label="Essay")
     # Step 1
-    essay_btn.click(fn=predict_step, inputs=[cohere_key, image, dropdown], outputs=[prediction_output, essay_output],
                     api_name="essay_generate")
 demo.launch(debug=True)

+import os
 import gradio as gr
 import translators as ts
+import numpy as np
 from PIL import Image
+from gradio import Blocks, Markdown, Button, Textbox, Row, Column, Dropdown, Examples
 from langchain import Cohere, LLMChain, PromptTemplate
 from transformers import BlipProcessor, BlipForConditionalGeneration
+from comic_style.comic_style import inference
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
 def predict_step(cohere_key, img, style):
+    i_image = Image.fromarray(np.array(img), 'RGB')
     pixel_values = processor(images=i_image, return_tensors="pt", max_length=1024, verbose=True).pixel_values
 with Blocks() as demo:
     with Row():
         with Column():
             cohere_key = gr.Text(label="cohere key:")
+            with Row():
+                image_upload = gr.Image(type="pil")
+                comic_style_output = gr.Image(type="pil", label="Comic Style")
+                Examples(
+                    examples=[os.path.join(os.path.dirname(__file__), "example1.jpeg"),
+                              os.path.join(os.path.dirname(__file__), "example2.jpg")],
+                    fn=inference,
+                    inputs=image_upload,
+                )
             dropdown = Dropdown(
                 ["Shakespeare", "luxun", "xuzhimo", "moyan", "laoshe"],
                 label="Style",
         with Column():
             prediction_output = Textbox(label="Prediction")
             essay_output = Textbox(label="Essay")
     # Step 1
+    image_upload.change(fn=inference, inputs=image_upload, outputs=comic_style_output)
+    # Step 2
+    essay_btn.click(fn=predict_step, inputs=[cohere_key, image_upload, dropdown], outputs=[prediction_output, essay_output],
                     api_name="essay_generate")
 demo.launch(debug=True)

comic_style/comic_style.py ADDED Viewed

	@@ -0,0 +1,118 @@

+import cv2 as cv
+import numpy as np
+import torch
+from PIL import Image, ImageOps
+from comic_style.face_detection import align
+torch.set_grad_enabled(False)
+model = torch.jit.load('comic_style/u2net_bce_itr_16000_train_3.835149_tar_0.542587-400x_360x.jit.pt')
+model.eval()
+# https://en.wikipedia.org/wiki/Unsharp_masking
+# https://stackoverflow.com/a/55590133/1495606
+def unsharp_mask(image, kernel_size=(5, 5), sigma=1.0, amount=2.0, threshold=0):
+    """Return a sharpened version of the image, using an unsharp mask."""
+    blurred = cv.GaussianBlur(image, kernel_size, sigma)
+    sharpened = float(amount + 1) * image - float(amount) * blurred
+    sharpened = np.maximum(sharpened, np.zeros(sharpened.shape))
+    sharpened = np.minimum(sharpened, 255 * np.ones(sharpened.shape))
+    sharpened = sharpened.round().astype(np.uint8)
+    if threshold > 0:
+        low_contrast_mask = np.absolute(image - blurred) < threshold
+        np.copyto(sharpened, image, where=low_contrast_mask)
+    return sharpened
+def normPRED(d):
+    ma = np.max(d)
+    mi = np.min(d)
+    dn = (d - mi) / (ma - mi)
+    return dn
+def array_to_np(array_in):
+    array_in = normPRED(array_in)
+    array_in = np.squeeze(255.0 * (array_in))
+    array_in = np.transpose(array_in, (1, 2, 0))
+    return array_in
+def array_to_image(array_in):
+    array_in = normPRED(array_in)
+    array_in = np.squeeze(255.0 * (array_in))
+    array_in = np.transpose(array_in, (1, 2, 0))
+    im = Image.fromarray(array_in.astype(np.uint8))
+    return im
+def image_as_array(image_in):
+    image_in = np.array(image_in, np.float32)
+    tmpImg = np.zeros((image_in.shape[0], image_in.shape[1], 3))
+    image_in = image_in / np.max(image_in)
+    if image_in.shape[2] == 1:
+        tmpImg[:, :, 0] = (image_in[:, :, 0] - 0.485) / 0.229
+        tmpImg[:, :, 1] = (image_in[:, :, 0] - 0.485) / 0.229
+        tmpImg[:, :, 2] = (image_in[:, :, 0] - 0.485) / 0.229
+    else:
+        tmpImg[:, :, 0] = (image_in[:, :, 0] - 0.485) / 0.229
+        tmpImg[:, :, 1] = (image_in[:, :, 1] - 0.456) / 0.224
+        tmpImg[:, :, 2] = (image_in[:, :, 2] - 0.406) / 0.225
+    tmpImg = tmpImg.transpose((2, 0, 1))
+    image_out = np.expand_dims(tmpImg, 0)
+    return image_out
+def find_aligned_face(image_in, size=400):
+    aligned_image, n_faces, quad = align(image_in, face_index=0, output_size=size)
+    return aligned_image, n_faces, quad
+def align_first_face(image_in, size=400):
+    aligned_image, n_faces, quad = find_aligned_face(image_in, size=size)
+    if n_faces == 0:
+        try:
+            image_in = ImageOps.exif_transpose(image_in)
+        except:
+            print("exif problem, not rotating")
+        image_in = image_in.resize((size, size))
+        im_array = image_as_array(image_in)
+    else:
+        im_array = image_as_array(aligned_image)
+    return im_array
+def img_concat_h(im1, im2):
+    dst = Image.new('RGB', (im1.width + im2.width, im1.height))
+    dst.paste(im1, (0, 0))
+    dst.paste(im2, (im1.width, 0))
+    return dst
+def face2hero(
+        img: Image.Image,
+        size: int
+) -> Image.Image:
+    aligned_img = align_first_face(img)
+    if aligned_img is None:
+        output = None
+    else:
+        input = torch.Tensor(aligned_img)
+        results = model(input)
+        hero_np_image = array_to_np(results[1].detach().numpy())
+        hero_image = unsharp_mask(hero_np_image)
+        hero_image = Image.fromarray(hero_image)
+        # hero_image = hero_image.resize((int(hero_image.width * 0.3), int(hero_image.height * 0.3)), Image.ANTIALIAS)
+        # output = img_concat_h(array_to_image(aligned_img), hero_image)
+        del results
+    return hero_image
+def inference(img):
+    out = face2hero(img, 400)
+    return out

comic_style/face_detection.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# Copyright (c) 2021 Justin Pinkney
+import cv2
+import dlib
+import numpy as np
+from PIL import Image
+from PIL import ImageOps
+from scipy.ndimage import gaussian_filter
+MODEL_PATH = "comic_style/shape_predictor_5_face_landmarks.dat"
+detector = dlib.get_frontal_face_detector()
+def align(image_in, face_index=0, output_size=256):
+    try:
+        image_in = ImageOps.exif_transpose(image_in)
+    except:
+        print("exif problem, not rotating")
+    landmarks = list(get_landmarks(image_in))
+    n_faces = len(landmarks)
+    face_index = min(n_faces - 1, face_index)
+    if n_faces == 0:
+        aligned_image = image_in
+        quad = None
+    else:
+        aligned_image, quad = image_align(image_in, landmarks[face_index], output_size=output_size)
+    return aligned_image, n_faces, quad
+def composite_images(quad, img, output):
+    """Composite an image into and output canvas according to transformed co-ords"""
+    output = output.convert("RGBA")
+    img = img.convert("RGBA")
+    input_size = img.size
+    src = np.array(((0, 0), (0, input_size[1]), input_size, (input_size[0], 0)), dtype=np.float32)
+    dst = np.float32(quad)
+    mtx = cv2.getPerspectiveTransform(dst, src)
+    img = img.transform(output.size, Image.PERSPECTIVE, mtx.flatten(), Image.BILINEAR)
+    output.alpha_composite(img)
+    return output.convert("RGB")
+def get_landmarks(image):
+    """Get landmarks from PIL image"""
+    shape_predictor = dlib.shape_predictor(MODEL_PATH)
+    max_size = max(image.size)
+    reduction_scale = int(max_size / 512)
+    if reduction_scale == 0:
+        reduction_scale = 1
+    downscaled = image.reduce(reduction_scale)
+    img = np.array(downscaled)
+    detections = detector(img, 0)
+    for detection in detections:
+        try:
+            face_landmarks = [(reduction_scale * item.x, reduction_scale * item.y) for item in
+                              shape_predictor(img, detection).parts()]
+            yield face_landmarks
+        except Exception as e:
+            print(e)
+def image_align(src_img, face_landmarks, output_size=512, transform_size=2048, enable_padding=True, x_scale=1, y_scale=1,
+                em_scale=0.1, alpha=False):
+    # Align function modified from ffhq-dataset
+    # See https://github.com/NVlabs/ffhq-dataset for license
+    lm = np.array(face_landmarks)
+    lm_eye_left = lm[2:3]  # left-clockwise
+    lm_eye_right = lm[0:1]  # left-clockwise
+    # Calculate auxiliary vectors.
+    eye_left = np.mean(lm_eye_left, axis=0)
+    eye_right = np.mean(lm_eye_right, axis=0)
+    eye_avg = (eye_left + eye_right) * 0.5
+    eye_to_eye = 0.71 * (eye_right - eye_left)
+    mouth_avg = lm[4]
+    eye_to_mouth = 1.35 * (mouth_avg - eye_avg)
+    # Choose oriented crop rectangle.
+    x = eye_to_eye.copy()
+    x /= np.hypot(*x)
+    x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
+    x *= x_scale
+    y = np.flipud(x) * [-y_scale, y_scale]
+    c = eye_avg + eye_to_mouth * em_scale
+    quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
+    quad_orig = quad.copy()
+    qsize = np.hypot(*x) * 2
+    img = src_img.convert('RGBA').convert('RGB')
+    # Shrink.
+    shrink = int(np.floor(qsize / output_size * 0.5))
+    if shrink > 1:
+        rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
+        img = img.resize(rsize, Image.ANTIALIAS)
+        quad /= shrink
+        qsize /= shrink
+    # Crop.
+    border = max(int(np.rint(qsize * 0.1)), 3)
+    crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
+            int(np.ceil(max(quad[:, 1]))))
+    crop = (
+    max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1]))
+    if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
+        img = img.crop(crop)
+        quad -= crop[0:2]
+    # Pad.
+    pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int(np.ceil(max(quad[:, 0]))),
+           int(np.ceil(max(quad[:, 1]))))
+    pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0),
+           max(pad[3] - img.size[1] + border, 0))
+    if enable_padding and max(pad) > border - 4:
+        pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
+        img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
+        h, w, _ = img.shape
+        y, x, _ = np.ogrid[:h, :w, :1]
+        mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w - 1 - x) / pad[2]),
+                          1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h - 1 - y) / pad[3]))
+        blur = qsize * 0.02
+        img += (gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
+        img += (np.median(img, axis=(0, 1)) - img) * np.clip(mask, 0.0, 1.0)
+        img = np.uint8(np.clip(np.rint(img), 0, 255))
+        if alpha:
+            mask = 1 - np.clip(3.0 * mask, 0.0, 1.0)
+            mask = np.uint8(np.clip(np.rint(mask * 255), 0, 255))
+            img = np.concatenate((img, mask), axis=2)
+            img = Image.fromarray(img, 'RGBA')
+        else:
+            img = Image.fromarray(img, 'RGB')
+        quad += pad[:2]
+    # Transform.
+    img = img.transform((transform_size, transform_size), Image.QUAD, (quad + 0.5).flatten(), Image.BILINEAR)
+    if output_size < transform_size:
+        img = img.resize((output_size, output_size), Image.ANTIALIAS)
+    return img, quad_orig

comic_style/u2net_bce_itr_16000_train_3.835149_tar_0.542587-400x_360x.jit.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c3cf228cb02287a658a4a2b06ba89e6e02a702890e8ed7554dfc1586a5a3ee00
+size 177234648

example1.jpeg ADDED Viewed

example2.jpg ADDED Viewed

gradio_cached_examples/7/Comic Style/tmp0b1q0lm4.png ADDED Viewed

gradio_cached_examples/7/log.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Comic Style,flag,username,timestamp
2	+ /Users/liangou/Workspace/python/ai-mixer-blip/gradio_cached_examples/7/Comic Style/tmp0b1q0lm4.png,,,2023-07-26 01:12:44.435105

gradio_cached_examples/8/Comic Style/tmpcujjjff9.png ADDED Viewed

gradio_cached_examples/8/log.csv ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ Comic Style,flag,username,timestamp
2	+ /Users/liangou/Workspace/python/ai-mixer-blip/gradio_cached_examples/8/Comic Style/tmpcujjjff9.png,,,2023-07-26 01:17:30.589130

packages.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	- nodejs


1	+ nodejs
2	+ ffmpeg

requirements.txt CHANGED Viewed

@@ -6,4 +6,10 @@ torch==2.0.1
 torchvision==0.15.2
 cohere==4.8.0
 pyexecjs==1.5.1
-nodejs==0.1.1

 torchvision==0.15.2
 cohere==4.8.0
 pyexecjs==1.5.1
+nodejs==0.1.1
+numpy==1.22.0
+opencv-python-headless
+scikit-image
+scipy
+cmake
+dlib