cheng commited on
Commit
f558a24
1 Parent(s): 1ab570a

add more time

Browse files
Files changed (2) hide show
  1. Equirec2Perspec.py +76 -0
  2. app.py +52 -41
Equirec2Perspec.py ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import cv2
4
+ import numpy as np
5
+
6
+
7
+ def xyz2lonlat(xyz):
8
+ atan2 = np.arctan2
9
+ asin = np.arcsin
10
+
11
+ norm = np.linalg.norm(xyz, axis=-1, keepdims=True)
12
+ xyz_norm = xyz / norm
13
+ x = xyz_norm[..., 0:1]
14
+ y = xyz_norm[..., 1:2]
15
+ z = xyz_norm[..., 2:]
16
+
17
+ lon = atan2(x, z)
18
+ lat = asin(y)
19
+ lst = [lon, lat]
20
+
21
+ out = np.concatenate(lst, axis=-1)
22
+ return out
23
+
24
+
25
+ def lonlat2XY(lonlat, shape):
26
+ X = (lonlat[..., 0:1] / (2 * np.pi) + 0.5) * (shape[1] - 1)
27
+ Y = (lonlat[..., 1:] / (np.pi) + 0.5) * (shape[0] - 1)
28
+ lst = [X, Y]
29
+ out = np.concatenate(lst, axis=-1)
30
+
31
+ return out
32
+
33
+
34
+ class Equirectangular:
35
+ def __init__(self, img):
36
+ # self._img = cv2.imread(img_name, cv2.IMREAD_COLOR)
37
+ self._img = img
38
+ [self._height, self._width, _] = self._img.shape
39
+ # cp = self._img.copy()
40
+ # w = self._width
41
+ # self._img[:, :w/8, :] = cp[:, 7*w/8:, :]
42
+ # self._img[:, w/8:, :] = cp[:, :7*w/8, :]
43
+
44
+ def GetPerspective(self, FOV, THETA, PHI, height, width):
45
+ #
46
+ # THETA is left/right angle, PHI is up/down angle, both in degree
47
+ #
48
+
49
+ f = 0.5 * width * 1 / np.tan(0.5 * FOV / 180.0 * np.pi)
50
+ cx = (width - 1) / 2.0
51
+ cy = (height - 1) / 2.0
52
+ K = np.array([
53
+ [f, 0, cx],
54
+ [0, f, cy],
55
+ [0, 0, 1],
56
+ ], np.float32)
57
+ K_inv = np.linalg.inv(K)
58
+
59
+ x = np.arange(width)
60
+ y = np.arange(height)
61
+ x, y = np.meshgrid(x, y)
62
+ z = np.ones_like(x)
63
+ xyz = np.concatenate([x[..., None], y[..., None], z[..., None]], axis=-1)
64
+ xyz = xyz @ K_inv.T
65
+
66
+ y_axis = np.array([0.0, 1.0, 0.0], np.float32)
67
+ x_axis = np.array([1.0, 0.0, 0.0], np.float32)
68
+ R1, _ = cv2.Rodrigues(y_axis * np.radians(THETA))
69
+ R2, _ = cv2.Rodrigues(np.dot(R1, x_axis) * np.radians(PHI))
70
+ R = R2 @ R1
71
+ xyz = xyz @ R.T
72
+ lonlat = xyz2lonlat(xyz)
73
+ XY = lonlat2XY(lonlat, shape=self._img.shape).astype(np.float32)
74
+ persp = cv2.remap(self._img, XY[..., 0], XY[..., 1], cv2.INTER_CUBIC, borderMode=cv2.BORDER_WRAP)
75
+
76
+ return persp
app.py CHANGED
@@ -8,16 +8,16 @@ from PIL import Image
8
  import numpy as np
9
  from pathlib import Path
10
  import gradio as gr
11
-
12
  import warnings
13
-
14
  import torch
 
 
 
15
 
16
  os.system("python setup.py build develop --user")
17
  os.system("pip install packaging==21.3")
18
  warnings.filterwarnings("ignore")
19
 
20
-
21
  from groundingdino.models import build_model
22
  from groundingdino.util.slconfig import SLConfig
23
  from groundingdino.util.utils import clean_state_dict
@@ -26,7 +26,9 @@ import groundingdino.datasets.transforms as T
26
 
27
  from huggingface_hub import hf_hub_download
28
 
29
-
 
 
30
 
31
  # Use this command for evaluate the GLIP-T model
32
  config_file = "groundingdino/config/GroundingDINO_SwinT_OGC.py"
@@ -34,8 +36,32 @@ ckpt_repo_id = "ShilongLiu/GroundingDINO"
34
  ckpt_filenmae = "groundingdino_swint_ogc.pth"
35
 
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
38
- args = SLConfig.fromfile(model_config_path)
39
  model = build_model(args)
40
  args.device = device
41
 
@@ -44,7 +70,8 @@ def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
44
  log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
45
  print("Model loaded from {} \n => {}".format(cache_file, log))
46
  _ = model.eval()
47
- return model
 
48
 
49
  def image_transform_grounding(init_image):
50
  transform = T.Compose([
@@ -52,18 +79,21 @@ def image_transform_grounding(init_image):
52
  T.ToTensor(),
53
  T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
54
  ])
55
- image, _ = transform(init_image, None) # 3, h, w
56
  return init_image, image
57
 
 
58
  def image_transform_grounding_for_vis(init_image):
59
  transform = T.Compose([
60
  T.RandomResize([800], max_size=1333),
61
  ])
62
- image, _ = transform(init_image, None) # 3, h, w
63
  return image
64
 
 
65
  model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
66
 
 
67
  def run_grounding(input_image, grounding_caption, box_threshold, text_threshold):
68
  init_image = input_image.convert("RGB")
69
  original_size = init_image.size
@@ -72,49 +102,30 @@ def run_grounding(input_image, grounding_caption, box_threshold, text_threshold)
72
  image_pil: Image = image_transform_grounding_for_vis(init_image)
73
 
74
  # run grounidng
75
- boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold, device='cpu')
 
76
  annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
77
  image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
78
 
79
-
80
  return image_with_box
81
 
82
- if __name__ == "__main__":
83
-
84
- parser = argparse.ArgumentParser("Grounding DINO demo", add_help=True)
85
- parser.add_argument("--debug", action="store_true", help="using debug mode")
86
- parser.add_argument("--share", action="store_true", help="share the app")
87
- args = parser.parse_args()
88
 
89
- block = gr.Blocks().queue()
90
- with block:
91
- gr.Markdown("# [Grounding DINO](https://github.com/IDEA-Research/GroundingDINO)")
92
- gr.Markdown("### Open-World Detection with Grounding DINO")
93
- gr.Markdown("Note the model runs on CPU, so it may take a while to run the model.")
94
 
95
  with gr.Row():
96
  with gr.Column():
97
- input_image = gr.Image(source='upload', type="pil")
98
- grounding_caption = gr.Textbox(label="Detection Prompt")
99
- run_button = gr.Button(label="Run")
100
- with gr.Accordion("Advanced options", open=False):
101
- box_threshold = gr.Slider(
102
- label="Box Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
103
- )
104
- text_threshold = gr.Slider(
105
- label="Text Threshold", minimum=0.0, maximum=1.0, value=0.25, step=0.001
106
- )
107
 
108
  with gr.Column():
109
- gallery = gr.outputs.Image(
110
- type="pil",
111
- # label="grounding results"
112
- ).style(full_width=True, full_height=True)
113
- # gallery = gr.Gallery(label="Generated images", show_label=False).style(
114
- # grid=[1], height="auto", container=True, full_width=True, full_height=True)
115
-
116
- run_button.click(fn=run_grounding, inputs=[
117
- input_image, grounding_caption, box_threshold, text_threshold], outputs=[gallery])
118
 
119
- block.launch(share=False, show_api=False, show_error=True)
 
120
 
 
 
8
  import numpy as np
9
  from pathlib import Path
10
  import gradio as gr
 
11
  import warnings
 
12
  import torch
13
+ import Equirec2Perspec as E2P
14
+ import cv2
15
+ import numpy as np
16
 
17
  os.system("python setup.py build develop --user")
18
  os.system("pip install packaging==21.3")
19
  warnings.filterwarnings("ignore")
20
 
 
21
  from groundingdino.models import build_model
22
  from groundingdino.util.slconfig import SLConfig
23
  from groundingdino.util.utils import clean_state_dict
 
26
 
27
  from huggingface_hub import hf_hub_download
28
 
29
+ picture_height = 360
30
+ picture_width = 540
31
+ picture_fov = 45
32
 
33
  # Use this command for evaluate the GLIP-T model
34
  config_file = "groundingdino/config/GroundingDINO_SwinT_OGC.py"
 
36
  ckpt_filenmae = "groundingdino_swint_ogc.pth"
37
 
38
 
39
+ def detection(image):
40
+ sub_images = processPanorama(image)
41
+ processed_images = [np.array(sub_image) for sub_image in sub_images]
42
+
43
+ return processed_images
44
+
45
+
46
+ def processPanorama(image):
47
+ equ = E2P.Equirectangular(image)
48
+ FOV = picture_fov
49
+ y_axis = 0
50
+
51
+ sub_images = []
52
+ while y_axis <= 0:
53
+ z_axis = -150
54
+ while z_axis <= 90:
55
+ img = equ.GetPerspective(FOV, z_axis, y_axis, picture_height, picture_width)
56
+ # cv2.imwrite(f'{directory_name}_{z_axis}z.jpg', img)
57
+ sub_images.append(img)
58
+ z_axis += FOV
59
+ y_axis += FOV
60
+ return sub_images
61
+
62
+
63
  def load_model_hf(model_config_path, repo_id, filename, device='cpu'):
64
+ args = SLConfig.fromfile(model_config_path)
65
  model = build_model(args)
66
  args.device = device
67
 
 
70
  log = model.load_state_dict(clean_state_dict(checkpoint['model']), strict=False)
71
  print("Model loaded from {} \n => {}".format(cache_file, log))
72
  _ = model.eval()
73
+ return model
74
+
75
 
76
  def image_transform_grounding(init_image):
77
  transform = T.Compose([
 
79
  T.ToTensor(),
80
  T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
81
  ])
82
+ image, _ = transform(init_image, None) # 3, h, w
83
  return init_image, image
84
 
85
+
86
  def image_transform_grounding_for_vis(init_image):
87
  transform = T.Compose([
88
  T.RandomResize([800], max_size=1333),
89
  ])
90
+ image, _ = transform(init_image, None) # 3, h, w
91
  return image
92
 
93
+
94
  model = load_model_hf(config_file, ckpt_repo_id, ckpt_filenmae)
95
 
96
+
97
  def run_grounding(input_image, grounding_caption, box_threshold, text_threshold):
98
  init_image = input_image.convert("RGB")
99
  original_size = init_image.size
 
102
  image_pil: Image = image_transform_grounding_for_vis(init_image)
103
 
104
  # run grounidng
105
+ boxes, logits, phrases = predict(model, image_tensor, grounding_caption, box_threshold, text_threshold,
106
+ device='cpu')
107
  annotated_frame = annotate(image_source=np.asarray(image_pil), boxes=boxes, logits=logits, phrases=phrases)
108
  image_with_box = Image.fromarray(cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB))
109
 
 
110
  return image_with_box
111
 
 
 
 
 
 
 
112
 
113
+ if __name__ == "__main__":
114
+ detect_app = gr.Blocks()
115
+ with detect_app:
116
+ gr.Markdown("# Panorama Traffic Sign Detection Demo")
117
+ gr.Markdown("Note the model runs on CPU for demo, so it may take a while to run the model.")
118
 
119
  with gr.Row():
120
  with gr.Column():
121
+ input_image = gr.Image(source='upload', type="numpy", label="Please upload a panorama picture.")
122
+ run_button = gr.Button(label="Process & Detect")
 
 
 
 
 
 
 
 
123
 
124
  with gr.Column():
125
+ gallery = gr.Gallery(label="Detection Results").style(
126
+ columns=[3], preview=False, object_fit="none")
 
 
 
 
 
 
 
127
 
128
+ run_button.click(fn=detection, inputs=[
129
+ input_image], outputs=[gallery])
130
 
131
+ detect_app.launch(share=False, show_api=False, show_error=True)