PoseAnything

Running

App Files Files Community

orhir commited on Dec 22, 2023

Commit

248b92d

1 Parent(s): ea9182f

Update app.py

Browse files

Files changed (1) hide show

app.py +83 -71

app.py CHANGED Viewed

@@ -66,23 +66,14 @@ COLORS = [
     [255, 0, 255], [255, 0, 170], [255, 0, 85], [255, 0, 0]
 ]
-kp_src = []
-skeleton = []
-count = 0
-color_idx = 0
-prev_pt = None
-prev_pt_idx = None
-prev_clicked = None
-original_support_image = None
-checkpoint_path = ''
-def process(query_img,
             cfg_path='configs/demo_b.py'):
-    global skeleton
     cfg = Config.fromfile(cfg_path)
-    kp_src_np = np.array(kp_src).copy().astype(np.float32)
-    kp_src_np[:, 0] = kp_src_np[:, 0] / 128. * cfg.model.encoder_config.img_size
-    kp_src_np[:, 1] = kp_src_np[:, 1] / 128. * cfg.model.encoder_config.img_size
     kp_src_np = np.flip(kp_src_np, 1).copy()
     kp_src_tensor = torch.tensor(kp_src_np).float()
     preprocess = transforms.Compose([
@@ -91,10 +82,10 @@ def process(query_img,
         Resize_Pad(cfg.model.encoder_config.img_size,
                    cfg.model.encoder_config.img_size)])
-    if len(skeleton) == 0:
         skeleton = [(0, 0)]
-    support_img = preprocess(original_support_image).flip(0)[None]
     np_query = np.array(query_img)[:, :, ::-1].copy()
     q_img = preprocess(np_query).flip(0)[None]
     # Create heatmap from keypoints
@@ -104,9 +95,9 @@ def process(query_img,
                                        cfg.model.encoder_config.img_size])
     data_cfg['joint_weights'] = None
     data_cfg['use_different_joint_weights'] = False
-    kp_src_3d = torch.cat(
         (kp_src_tensor, torch.zeros(kp_src_tensor.shape[0], 1)), dim=-1)
-    kp_src_3d_weight = torch.cat(
         (torch.ones_like(kp_src_tensor),
          torch.zeros(kp_src_tensor.shape[0], 1)), dim=-1)
     target_s, target_weight_s = genHeatMap._msra_generate_target(data_cfg,
@@ -125,8 +116,8 @@ def process(query_img,
         'target_q': None,
         'target_weight_q': None,
         'return_loss': False,
-        'img_metas': [{'sample_skeleton': [skeleton],
-                       'query_skeleton': skeleton,
                        'sample_joints_3d': [kp_src_3d],
                        'query_joints_3d': kp_src_3d,
                        'sample_center': [kp_src_tensor.mean(dim=0)],
@@ -165,54 +156,77 @@ def process(query_img,
                        vis_s_weight,
                        None,
                        vis_q_weight,
-                       skeleton,
                        None,
                        torch.tensor(outputs['points']).squeeze(0),
                        )
-    return out
 with gr.Blocks() as demo:
     gr.Markdown('''
     # Pose Anything Demo
-    We present a novel approach to category agnostic pose estimation that leverages the inherent geometrical relations between keypoints through a newly designed Graph Transformer Decoder. By capturing and incorporating this crucial structural information, our method enhances the accuracy of keypoint localization, marking a significant departure from conventional CAPE techniques that treat keypoints as isolated entities.
-    ### [Paper](https://arxiv.org/abs/2311.17891) | [Official Repo](https://github.com/orhir/PoseAnything)
-    ![](/file=gradio_teaser.png)
     ## Instructions
     1. Upload an image of the object you want to pose on the **left** image.
     2. Click on the **left** image to mark keypoints.
     3. Click on the keypoints on the **right** image to mark limbs.
-    4. Upload an image of the object you want to pose to the query image (**bottom**).
     5. Click **Evaluate** to pose the query image.
     ''')
     with gr.Row():
         support_img = gr.Image(label="Support Image",
                                type="pil",
                                info='Click to mark keypoints').style(
-            height=256, width=256)
         posed_support = gr.Image(label="Posed Support Image",
                                  type="pil",
-                                 interactive=False).style(height=256, width=256)
     with gr.Row():
         query_img = gr.Image(label="Query Image",
-                             type="pil").style(height=256, width=256)
     with gr.Row():
         eval_btn = gr.Button(value="Evaluate")
     with gr.Row():
-        output_img = gr.Plot(label="Output Image", height=256, width=256)
     def get_select_coords(kp_support,
                           limb_support,
                           evt: gr.SelectData,
                           r=0.015):
         pixels_in_queue = set()
         pixels_in_queue.add((evt.index[1], evt.index[0]))
         while len(pixels_in_queue) > 0:
             pixel = pixels_in_queue.pop()
             if pixel[0] is not None and pixel[
-                1] is not None and pixel not in kp_src:
-                kp_src.append(pixel)
             else:
                 print("Invalid pixel")
             if limb_support is None:
@@ -230,13 +244,13 @@ with gr.Blocks() as demo:
             draw_pose.ellipse(twoPointList, fill=(255, 0, 0, 255))
             draw_limb.ellipse(twoPointList, fill=(255, 0, 0, 255))
-        return canvas_kp, canvas_limb
     def get_limbs(kp_support,
                   evt: gr.SelectData,
                   r=0.02, width=0.02):
-        global count, color_idx, prev_pt, skeleton, prev_pt_idx, prev_clicked
         curr_pixel = (evt.index[1], evt.index[0])
         pixels_in_queue = set()
         pixels_in_queue.add((evt.index[1], evt.index[0]))
@@ -244,64 +258,62 @@ with gr.Blocks() as demo:
         w, h = canvas_kp.size
         r = int(r * w)
         width = int(width * w)
-        while (len(pixels_in_queue) > 0 and
-               curr_pixel != prev_clicked and
-               len(kp_src) > 0):
             pixel = pixels_in_queue.pop()
-            prev_clicked = pixel
-            closest_point = min(kp_src,
                                 key=lambda p: (p[0] - pixel[0]) ** 2 +
                                               (p[1] - pixel[1]) ** 2)
-            closest_point_index = kp_src.index(closest_point)
             draw_limb = ImageDraw.Draw(canvas_kp)
-            if color_idx < len(COLORS):
-                c = COLORS[color_idx]
             else:
                 c = random.choices(range(256), k=3)
             leftUpPoint = (closest_point[1] - r, closest_point[0] - r)
             rightDownPoint = (closest_point[1] + r, closest_point[0] + r)
             twoPointList = [leftUpPoint, rightDownPoint]
             draw_limb.ellipse(twoPointList, fill=tuple(c))
-            if count == 0:
-                prev_pt = closest_point[1], closest_point[0]
-                prev_pt_idx = closest_point_index
-                count = count + 1
             else:
-                if prev_pt_idx != closest_point_index:
                     # Create Line and add Limb
-                    draw_limb.line([prev_pt, (closest_point[1], closest_point[0])],
-                                   fill=tuple(c),
-                                   width=width)
-                    skeleton.append((prev_pt_idx, closest_point_index))
-                    color_idx = color_idx + 1
                 else:
                     draw_limb.ellipse(twoPointList, fill=(255, 0, 0, 255))
-                count = 0
-        return canvas_kp
-    def set_query(support_img):
-        global original_support_image
-        skeleton.clear()
-        kp_src.clear()
-        original_support_image = np.array(support_img)[:, :, ::-1].copy()
         support_img = support_img.resize((128, 128), Image.Resampling.LANCZOS)
-        return support_img, support_img
     support_img.select(get_select_coords,
-                       [support_img, posed_support],
-                       [support_img, posed_support],
-                       )
-    support_img.upload(set_query,
-                       inputs=support_img,
-                       outputs=[support_img,posed_support])
     posed_support.select(get_limbs,
-                         posed_support,
-                         posed_support)
     eval_btn.click(fn=process,
-                   inputs=[query_img],
-                   outputs=output_img)
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Pose Anything Demo')

     [255, 0, 255], [255, 0, 170], [255, 0, 85], [255, 0, 0]
 ]
+def process(query_img, state,
             cfg_path='configs/demo_b.py'):
     cfg = Config.fromfile(cfg_path)
+    kp_src_np = np.array(state['kp_src']).copy().astype(np.float32)
+    kp_src_np[:, 0] = kp_src_np[:,
+                      0] / 128. * cfg.model.encoder_config.img_size
+    kp_src_np[:, 1] = kp_src_np[:,
+                      1] / 128. * cfg.model.encoder_config.img_size
     kp_src_np = np.flip(kp_src_np, 1).copy()
     kp_src_tensor = torch.tensor(kp_src_np).float()
     preprocess = transforms.Compose([
         Resize_Pad(cfg.model.encoder_config.img_size,
                    cfg.model.encoder_config.img_size)])
+    if len(state['skeleton']) == 0:
         skeleton = [(0, 0)]
+    support_img = preprocess(state['original_support_image']).flip(0)[None]
     np_query = np.array(query_img)[:, :, ::-1].copy()
     q_img = preprocess(np_query).flip(0)[None]
     # Create heatmap from keypoints
                                        cfg.model.encoder_config.img_size])
     data_cfg['joint_weights'] = None
     data_cfg['use_different_joint_weights'] = False
+    kp_src_3d = torch.concatenate(
         (kp_src_tensor, torch.zeros(kp_src_tensor.shape[0], 1)), dim=-1)
+    kp_src_3d_weight = torch.concatenate(
         (torch.ones_like(kp_src_tensor),
          torch.zeros(kp_src_tensor.shape[0], 1)), dim=-1)
     target_s, target_weight_s = genHeatMap._msra_generate_target(data_cfg,
         'target_q': None,
         'target_weight_q': None,
         'return_loss': False,
+        'img_metas': [{'sample_skeleton': [state['skeleton']],
+                       'query_skeleton': state['skeleton'],
                        'sample_joints_3d': [kp_src_3d],
                        'query_joints_3d': kp_src_3d,
                        'sample_center': [kp_src_tensor.mean(dim=0)],
                        vis_s_weight,
                        None,
                        vis_q_weight,
+                       state['skeleton'],
                        None,
                        torch.tensor(outputs['points']).squeeze(0),
                        )
+    return out, state
 with gr.Blocks() as demo:
+    state = gr.State({
+        'kp_src': [],
+        'skeleton': [],
+        'count': 0,
+        'color_idx': 0,
+        'prev_pt': None,
+        'prev_pt_idx': None,
+        'prev_clicked': None,
+        'original_support_image': None,
+    })
     gr.Markdown('''
     # Pose Anything Demo
+    We present a novel approach to category agnostic pose estimation that
+    leverages the inherent geometrical relations between keypoints through a
+    newly designed Graph Transformer Decoder. By capturing and incorporating
+    this crucial structural information, our method enhances the accuracy of
+    keypoint localization, marking a significant departure from conventional
+    CAPE techniques that treat keypoints as isolated entities.
+    ### [Paper](https://arxiv.org/abs/2311.17891) | [Official Repo](
+    https://github.com/orhir/PoseAnything)
     ## Instructions
     1. Upload an image of the object you want to pose on the **left** image.
     2. Click on the **left** image to mark keypoints.
     3. Click on the keypoints on the **right** image to mark limbs.
+    4. Upload an image of the object you want to pose to the query image (
+    **bottom**).
     5. Click **Evaluate** to pose the query image.
     ''')
     with gr.Row():
         support_img = gr.Image(label="Support Image",
                                type="pil",
                                info='Click to mark keypoints').style(
+            height=400, width=400)
         posed_support = gr.Image(label="Posed Support Image",
                                  type="pil",
+                                 interactive=False).style(height=400,
+                                                          width=400)
     with gr.Row():
         query_img = gr.Image(label="Query Image",
+                             type="pil").style(height=400, width=400)
     with gr.Row():
         eval_btn = gr.Button(value="Evaluate")
     with gr.Row():
+        output_img = gr.Plot(label="Output Image", height=400, width=400)
     def get_select_coords(kp_support,
                           limb_support,
+                          state,
                           evt: gr.SelectData,
                           r=0.015):
+        # global original_support_image
+        # if len(kp_src) == 0:
+        #     original_support_image = np.array(kp_support)[:, :,
+        #                              ::-1].copy()
         pixels_in_queue = set()
         pixels_in_queue.add((evt.index[1], evt.index[0]))
         while len(pixels_in_queue) > 0:
             pixel = pixels_in_queue.pop()
             if pixel[0] is not None and pixel[
+                1] is not None and pixel not in state['kp_src']:
+                state['kp_src'].append(pixel)
             else:
                 print("Invalid pixel")
             if limb_support is None:
             draw_pose.ellipse(twoPointList, fill=(255, 0, 0, 255))
             draw_limb.ellipse(twoPointList, fill=(255, 0, 0, 255))
+        return canvas_kp, canvas_limb, state
     def get_limbs(kp_support,
+                  state,
                   evt: gr.SelectData,
                   r=0.02, width=0.02):
         curr_pixel = (evt.index[1], evt.index[0])
         pixels_in_queue = set()
         pixels_in_queue.add((evt.index[1], evt.index[0]))
         w, h = canvas_kp.size
         r = int(r * w)
         width = int(width * w)
+        while len(pixels_in_queue) > 0 and curr_pixel != state['prev_clicked']:
             pixel = pixels_in_queue.pop()
+            state['prev_clicked'] = pixel
+            closest_point = min(state['kp_src'],
                                 key=lambda p: (p[0] - pixel[0]) ** 2 +
                                               (p[1] - pixel[1]) ** 2)
+            closest_point_index = state['kp_src'].index(closest_point)
             draw_limb = ImageDraw.Draw(canvas_kp)
+            if state['color_idx'] < len(COLORS):
+                c = COLORS[state['color_idx']]
             else:
                 c = random.choices(range(256), k=3)
             leftUpPoint = (closest_point[1] - r, closest_point[0] - r)
             rightDownPoint = (closest_point[1] + r, closest_point[0] + r)
             twoPointList = [leftUpPoint, rightDownPoint]
             draw_limb.ellipse(twoPointList, fill=tuple(c))
+            if state['count'] == 0:
+                state['prev_pt'] = closest_point[1], closest_point[0]
+                state['prev_pt_idx'] = closest_point_index
+                state['count'] = state['count'] + 1
             else:
+                if state['prev_pt_idx'] != closest_point_index:
                     # Create Line and add Limb
+                    draw_limb.line(
+                        [state['prev_pt'], (closest_point[1], closest_point[0])],
+                        fill=tuple(c),
+                        width=width)
+                    state['skeleton'].append((state['prev_pt_idx'], closest_point_index))
+                    state['color_idx'] = state['color_idx'] + 1
                 else:
                     draw_limb.ellipse(twoPointList, fill=(255, 0, 0, 255))
+                state['count'] = 0
+        return canvas_kp, state
+    def set_qery(support_img, state):
+        state['skeleton'].clear()
+        state['kp_src'].clear()
+        state['original_support_image'] = np.array(support_img)[:, :, ::-1].copy()
         support_img = support_img.resize((128, 128), Image.Resampling.LANCZOS)
+        return support_img, support_img, state
     support_img.select(get_select_coords,
+                       [support_img, posed_support, state],
+                       [support_img, posed_support, state])
+    support_img.upload(set_qery,
+                       inputs=[support_img, state],
+                       outputs=[support_img, posed_support, state])
     posed_support.select(get_limbs,
+                         [posed_support, state],
+                         [posed_support, state])
     eval_btn.click(fn=process,
+                   inputs=[query_img, state],
+                   outputs=[output_img, state])
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description='Pose Anything Demo')