zhiweili commited on
Commit
52c565a
·
1 Parent(s): 286713d

add segment_image

Browse files
app_upscale.py CHANGED
@@ -6,6 +6,11 @@ import torch
6
  import gradio as gr
7
  import spaces
8
 
 
 
 
 
 
9
  device = "cuda" if torch.cuda.is_available() else "cpu"
10
 
11
  print(f'{device} is available')
@@ -24,13 +29,13 @@ def create_demo() -> gr.Blocks:
24
  input_image: Image,
25
  prompt: str,
26
  ):
 
 
 
27
  upscaled_image = upscale_pipe(prompt=prompt, image=input_image).images[0]
28
- extension = 'png'
29
-
30
- path = f"output/{uuid.uuid4()}.{extension}"
31
- upscaled_image.save(path, quality=100)
32
 
33
- return upscaled_image, path, time_cost_str
34
 
35
  def get_time_cost(run_task_time, time_cost_str):
36
  now_time = int(time.time()*1000)
@@ -55,14 +60,27 @@ def create_demo() -> gr.Blocks:
55
  with gr.Column():
56
  input_image = gr.Image(label="Input Image", type="pil")
57
  with gr.Column():
 
58
  upscaled_image = gr.Image(label="Upscaled Image", format="png", type="pil", interactive=False)
59
  download_path = gr.File(label="Download the output image", interactive=False)
60
  generated_cost = gr.Textbox(label="Time cost by step (ms):", visible=True, interactive=False)
61
-
 
 
 
 
62
  g_btn.click(
 
 
 
 
63
  fn=upscale_image,
64
- inputs=[input_image, input_image_prompt],
65
- outputs=[upscaled_image, download_path, generated_cost],
 
 
 
 
66
  )
67
 
68
  return demo
 
6
  import gradio as gr
7
  import spaces
8
 
9
+ from segment_utils import(
10
+ segment_image,
11
+ restore_result,
12
+ )
13
+
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
  print(f'{device} is available')
 
29
  input_image: Image,
30
  prompt: str,
31
  ):
32
+ time_cost_str = ''
33
+ run_task_time = 0
34
+ run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
35
  upscaled_image = upscale_pipe(prompt=prompt, image=input_image).images[0]
36
+ run_task_time, time_cost_str = get_time_cost(run_task_time, time_cost_str)
 
 
 
37
 
38
+ return upscaled_image, time_cost_str
39
 
40
  def get_time_cost(run_task_time, time_cost_str):
41
  now_time = int(time.time()*1000)
 
60
  with gr.Column():
61
  input_image = gr.Image(label="Input Image", type="pil")
62
  with gr.Column():
63
+ origin_area_image = gr.Image(label="Origin Area Image", format="png", type="pil", interactive=False, visible=False)
64
  upscaled_image = gr.Image(label="Upscaled Image", format="png", type="pil", interactive=False)
65
  download_path = gr.File(label="Download the output image", interactive=False)
66
  generated_cost = gr.Textbox(label="Time cost by step (ms):", visible=True, interactive=False)
67
+ category = gr.Textbox(label="Category", value=DEFAULT_CATEGORY, visible=False)
68
+ generate_size = gr.Number(label="Generate Size", value=1024, visible=False)
69
+ mask_expansion = gr.Number(label="Mask Expansion", value=20, visible=False)
70
+ mask_dilation = gr.Slider(minimum=0, maximum=10, value=2, step=1, label="Mask Dilation", visible=False)
71
+
72
  g_btn.click(
73
+ fn=segment_image,
74
+ inputs=[input_image, category, generate_size, mask_expansion, mask_dilation],
75
+ outputs=[origin_area_image, croper],
76
+ ).success(
77
  fn=upscale_image,
78
+ inputs=[origin_area_image, input_image_prompt],
79
+ outputs=[upscaled_image, generated_cost],
80
+ ).success(
81
+ fn=restore_result,
82
+ inputs=[croper, category, enhanced_image],
83
+ outputs=[upscaled_image, download_path],
84
  )
85
 
86
  return demo
checkpoints/selfie_multiclass_256x256.tflite ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6748b1253a99067ef71f7e26ca71096cd449baefa8f101900ea23016507e0e0
3
+ size 16371837
enhance_utils.py CHANGED
@@ -38,7 +38,7 @@ face_enhancer = GFPGANer(model_path='GFPGANv1.4.pth', upscale=1, arch='clean', c
38
 
39
  def enhance_image(
40
  pil_image: Image,
41
- enhance_face: bool = True,
42
  ):
43
  img = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
44
 
 
38
 
39
  def enhance_image(
40
  pil_image: Image,
41
+ enhance_face: bool = False,
42
  ):
43
  img = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
44
 
segment_utils.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import mediapipe as mp
3
+ import uuid
4
+
5
+ from PIL import Image
6
+ from mediapipe.tasks import python
7
+ from mediapipe.tasks.python import vision
8
+ from scipy.ndimage import binary_dilation
9
+ from croper import Croper
10
+
11
+ segment_model = "checkpoints/selfie_multiclass_256x256.tflite"
12
+ base_options = python.BaseOptions(model_asset_path=segment_model)
13
+ options = vision.ImageSegmenterOptions(base_options=base_options,output_category_mask=True)
14
+ segmenter = vision.ImageSegmenter.create_from_options(options)
15
+
16
+ def restore_result(croper, category, generated_image):
17
+ square_length = croper.square_length
18
+ generated_image = generated_image.resize((square_length, square_length))
19
+
20
+ cropped_generated_image = generated_image.crop((croper.square_start_x, croper.square_start_y, croper.square_end_x, croper.square_end_y))
21
+ cropped_square_mask_image = get_restore_mask_image(croper, category, cropped_generated_image)
22
+
23
+ restored_image = croper.input_image.copy()
24
+ restored_image.paste(cropped_generated_image, (croper.origin_start_x, croper.origin_start_y), cropped_square_mask_image)
25
+
26
+ extension = 'png'
27
+ # if restored_image.mode == 'RGBA':
28
+ # extension = 'png'
29
+ # else:
30
+ # extension = 'jpg'
31
+
32
+ path = f"output/{uuid.uuid4()}.{extension}"
33
+ restored_image.save(path, quality=100)
34
+
35
+ return restored_image, path
36
+
37
+ def segment_image(input_image, category, input_size, mask_expansion, mask_dilation):
38
+ mask_size = int(input_size)
39
+ mask_expansion = int(mask_expansion)
40
+
41
+ image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.asarray(input_image))
42
+ segmentation_result = segmenter.segment(image)
43
+ category_mask = segmentation_result.category_mask
44
+ category_mask_np = category_mask.numpy_view()
45
+
46
+ if category == "hair":
47
+ target_mask = get_hair_mask(category_mask_np, mask_dilation)
48
+ elif category == "clothes":
49
+ target_mask = get_clothes_mask(category_mask_np, mask_dilation)
50
+ elif category == "face":
51
+ target_mask = get_face_mask(category_mask_np, mask_dilation)
52
+ else:
53
+ target_mask = get_face_mask(category_mask_np, mask_dilation)
54
+
55
+ croper = Croper(input_image, target_mask, mask_size, mask_expansion)
56
+ croper.corp_mask_image()
57
+ origin_area_image = croper.resized_square_image
58
+
59
+ return origin_area_image, croper
60
+
61
+ def get_face_mask(category_mask_np, dilation=1):
62
+ face_skin_mask = category_mask_np == 3
63
+ if dilation > 0:
64
+ face_skin_mask = binary_dilation(face_skin_mask, iterations=dilation)
65
+
66
+ return face_skin_mask
67
+
68
+ def get_clothes_mask(category_mask_np, dilation=1):
69
+ body_skin_mask = category_mask_np == 2
70
+ clothes_mask = category_mask_np == 4
71
+ combined_mask = np.logical_or(body_skin_mask, clothes_mask)
72
+ combined_mask = binary_dilation(combined_mask, iterations=4)
73
+ if dilation > 0:
74
+ combined_mask = binary_dilation(combined_mask, iterations=dilation)
75
+ return combined_mask
76
+
77
+ def get_hair_mask(category_mask_np, dilation=1):
78
+ hair_mask = category_mask_np == 1
79
+ if dilation > 0:
80
+ hair_mask = binary_dilation(hair_mask, iterations=dilation)
81
+ return hair_mask
82
+
83
+ def get_restore_mask_image(croper, category, generated_image):
84
+ image = mp.Image(image_format=mp.ImageFormat.SRGB, data=np.asarray(generated_image))
85
+ segmentation_result = segmenter.segment(image)
86
+ category_mask = segmentation_result.category_mask
87
+ category_mask_np = category_mask.numpy_view()
88
+
89
+ if category == "hair":
90
+ target_mask = get_hair_mask(category_mask_np, 0)
91
+ elif category == "clothes":
92
+ target_mask = get_clothes_mask(category_mask_np, 0)
93
+ elif category == "face":
94
+ target_mask = get_face_mask(category_mask_np, 0)
95
+
96
+ combined_mask = np.logical_or(target_mask, croper.corp_mask)
97
+ mask_image = Image.fromarray((combined_mask * 255).astype(np.uint8))
98
+ return mask_image