ariG23498 HF Staff commited on
Commit
d5766fb
·
1 Parent(s): ccc1913

refactor code

Browse files
Files changed (2) hide show
  1. app.py +77 -79
  2. app_demo1.py +138 -0
app.py CHANGED
@@ -1,101 +1,96 @@
1
- from models.builder import build_model
2
- from visualization import mask2rgb
3
- from segmentation.datasets import PascalVOCDataset
4
-
5
  import os
6
- from hydra import compose, initialize
 
 
7
  from PIL import Image
8
- import matplotlib.pyplot as plt
9
  from torchvision import transforms as T
10
  import torch.nn.functional as F
11
- import numpy as np
12
- from operator import itemgetter
13
- import torch
14
- import random
15
- import warnings
16
-
17
- warnings.filterwarnings("ignore")
18
- initialize(config_path="configs", version_base=None)
19
-
20
  from huggingface_hub import Repository
 
 
 
21
 
22
- repo = Repository(
23
- local_dir="clip-dinoiser",
24
- clone_from="ariG23498/clip-dinoiser",
25
- use_auth_token=os.environ.get("token")
26
- )
27
-
28
- check_path = 'clip-dinoiser/checkpoints/last.pt'
29
- device = "cuda" if torch.cuda.is_available() else "cpu"
30
-
31
- check = torch.load(check_path, map_location=device)
32
- dinoclip_cfg = "clip_dinoiser.yaml"
33
- cfg = compose(config_name=dinoclip_cfg)
34
-
35
- model = build_model(cfg.model, class_names=PascalVOCDataset.CLASSES).to(device)
36
- model.clip_backbone.decode_head.use_templates=False # switching off the imagenet templates for fast inference
37
- model.load_state_dict(check['model_state_dict'], strict=False)
38
- model = model.eval()
39
-
40
- import gradio as gr
41
 
42
- colors = [
 
 
 
 
43
  (0, 255, 0),
44
- (0, 0, 255),
45
- (255, 255, 0),
46
- (255, 0, 255),
47
  (0, 255, 255),
48
- (114, 128, 250),
49
- (0, 165, 255),
 
 
50
  (0, 128, 0),
51
  (144, 238, 144),
52
- (238, 238, 175),
53
- (255, 191, 0),
54
  (0, 128, 0),
55
- (226, 43, 138),
56
  (255, 0, 255),
57
- (0, 215, 255),
58
- (255, 0, 0),
59
  ]
60
 
61
- color_map = {
62
- f"{color_id}": f"#{hex(color[0])[2:].zfill(2)}{hex(color[1])[2:].zfill(2)}{hex(color[2])[2:].zfill(2)}" for color_id, color in enumerate(colors)
63
- }
 
 
 
 
 
 
 
 
 
 
 
64
 
65
- def run_clip_dinoiser(input_image, text_prompts):
 
 
 
 
 
66
  image = input_image.convert("RGB")
67
  text_prompts = text_prompts.split(",")
68
  palette = colors[:len(text_prompts)]
69
 
70
  model.clip_backbone.decode_head.update_vocab(text_prompts)
71
  model.to(device)
72
- model.apply_found = True
73
 
74
  img_tens = T.PILToTensor()(image).unsqueeze(0).to(device) / 255.
75
-
76
  h, w = img_tens.shape[-2:]
77
  output = model(img_tens).cpu()
78
- output = F.interpolate(output, scale_factor=model.clip_backbone.backbone.patch_size, mode="bilinear",
79
- align_corners=False)[..., :h, :w]
80
  output = output[0].argmax(dim=0)
81
- mask = mask2rgb(output, palette)
82
 
 
83
  classes = np.unique(output).tolist()
84
- palette_array = np.array(itemgetter(*classes)(palette)).reshape(1, -1, 3)
85
- alpha=0.5
86
- blend = (alpha)*np.array(image)/255. + (1-alpha) * mask/255.
87
 
88
- h_text = list()
89
- for idx, text in enumerate(text_prompts):
90
- h_text.append((text, f"{idx}"))
91
  return blend, mask, h_text
92
 
93
 
 
 
 
 
 
94
 
95
 
96
- if __name__ == "__main__":
 
97
 
98
- block = gr.Blocks().queue()
99
  with block:
100
  gr.Markdown("<h1><center>CLIP-DINOiser<h1><center>")
101
 
@@ -106,15 +101,8 @@ if __name__ == "__main__":
106
  run_button = gr.Button(value="Run")
107
 
108
  with gr.Column():
109
- with gr.Row():
110
- overlay_mask = gr.Image(
111
- type="numpy",
112
- label="Overlay Mask",
113
- )
114
- only_mask = gr.Image(
115
- type="numpy",
116
- label="Segmentation Mask"
117
- )
118
  h_text = gr.HighlightedText(
119
  label="Labels",
120
  combine_adjacent=False,
@@ -123,16 +111,26 @@ if __name__ == "__main__":
123
  )
124
 
125
  run_button.click(
126
- fn=run_clip_dinoiser,
127
- inputs=[input_image, text_prompts,],
128
  outputs=[overlay_mask, only_mask, h_text]
129
  )
 
130
  gr.Examples(
131
- [["vintage_bike.jpeg", "background, vintage bike, leather bag"]],
132
- inputs = [input_image, text_prompts,],
133
- outputs = [overlay_mask, only_mask, h_text],
134
- fn=run_clip_dinoiser,
135
  cache_examples=True,
136
  label='Try this example input!'
137
- )
138
- block.launch(share=False, show_api=False, show_error=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import warnings
3
+ import torch
4
+ import numpy as np
5
  from PIL import Image
 
6
  from torchvision import transforms as T
7
  import torch.nn.functional as F
8
+ import gradio as gr
9
+ from hydra import compose, initialize
 
 
 
 
 
 
 
10
  from huggingface_hub import Repository
11
+ from models.builder import build_model
12
+ from segmentation.datasets import PascalVOCDataset
13
+ from visualization import mask2rgb
14
 
15
+ # Suppress warnings
16
+ warnings.filterwarnings("ignore")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # Constants
19
+ CHECKPOINT_PATH = "clip-dinoiser/checkpoints/last.pt"
20
+ CONFIG_PATH = "configs"
21
+ DINOCLIP_CONFIG = "clip_dinoiser.yaml"
22
+ COLORS = [
23
  (0, 255, 0),
24
+ (255, 0, 0),
 
 
25
  (0, 255, 255),
26
+ (255, 0, 255),
27
+ (255, 255, 0),
28
+ (250, 128, 114),
29
+ (255, 165, 0),
30
  (0, 128, 0),
31
  (144, 238, 144),
32
+ (175, 238, 238),
33
+ (0, 191, 255),
34
  (0, 128, 0),
35
+ (138, 43, 226),
36
  (255, 0, 255),
37
+ (255, 215, 0),
38
+ (0, 0, 255),
39
  ]
40
 
41
+ # Initialize Hydra
42
+ initialize(config_path=CONFIG_PATH, version_base=None)
43
+
44
+ # Configuration and Model Initialization
45
+ def load_model():
46
+ Repository(
47
+ local_dir="clip-dinoiser",
48
+ clone_from="ariG23498/clip-dinoiser",
49
+ use_auth_token=os.environ.get("token")
50
+ )
51
+
52
+ device = "cuda" if torch.cuda.is_available() else "cpu"
53
+ checkpoint = torch.load(CHECKPOINT_PATH, map_location=device)
54
+ cfg = compose(config_name=DINOCLIP_CONFIG)
55
 
56
+ model = build_model(cfg.model, class_names=PascalVOCDataset.CLASSES).to(device)
57
+ model.clip_backbone.decode_head.use_templates = False
58
+ model.load_state_dict(checkpoint['model_state_dict'], strict=False)
59
+ return model.eval()
60
+
61
+ def run_clip_dinoiser(input_image, text_prompts, model, device, colors):
62
  image = input_image.convert("RGB")
63
  text_prompts = text_prompts.split(",")
64
  palette = colors[:len(text_prompts)]
65
 
66
  model.clip_backbone.decode_head.update_vocab(text_prompts)
67
  model.to(device)
 
68
 
69
  img_tens = T.PILToTensor()(image).unsqueeze(0).to(device) / 255.
 
70
  h, w = img_tens.shape[-2:]
71
  output = model(img_tens).cpu()
72
+ output = F.interpolate(output, scale_factor=model.clip_backbone.backbone.patch_size, mode="bilinear", align_corners=False)[..., :h, :w]
 
73
  output = output[0].argmax(dim=0)
 
74
 
75
+ mask = mask2rgb(output, palette)
76
  classes = np.unique(output).tolist()
77
+ alpha = 0.5
78
+ blend = (alpha * np.array(image) / 255.) + ((1 - alpha) * mask / 255.)
 
79
 
80
+ h_text = [(text, f"{idx}") for idx, text in enumerate(text_prompts)]
 
 
81
  return blend, mask, h_text
82
 
83
 
84
+ def create_color_map(colors):
85
+ return {
86
+ f"{color_id}": f"#{hex(color[0])[2:].zfill(2)}{hex(color[1])[2:].zfill(2)}{hex(color[2])[2:].zfill(2)}"
87
+ for color_id, color in enumerate(colors)
88
+ }
89
 
90
 
91
+ def setup_gradio_interface(model, device, colors, color_map):
92
+ block = gr.Blocks()
93
 
 
94
  with block:
95
  gr.Markdown("<h1><center>CLIP-DINOiser<h1><center>")
96
 
 
101
  run_button = gr.Button(value="Run")
102
 
103
  with gr.Column():
104
+ overlay_mask = gr.Image(type="numpy", label="Overlay Mask")
105
+ only_mask = gr.Image(type="numpy", label="Segmentation Mask")
 
 
 
 
 
 
 
106
  h_text = gr.HighlightedText(
107
  label="Labels",
108
  combine_adjacent=False,
 
111
  )
112
 
113
  run_button.click(
114
+ fn=lambda img, prompts: run_clip_dinoiser(img, prompts, model, device, colors),
115
+ inputs=[input_image, text_prompts],
116
  outputs=[overlay_mask, only_mask, h_text]
117
  )
118
+
119
  gr.Examples(
120
+ examples=[["vintage_bike.jpeg", "background, vintage bike, leather bag"]],
121
+ inputs=[input_image, text_prompts],
122
+ outputs=[overlay_mask, only_mask, h_text],
123
+ fn=lambda img, prompts: run_clip_dinoiser(img, prompts, model, device, colors),
124
  cache_examples=True,
125
  label='Try this example input!'
126
+ )
127
+
128
+ return block
129
+
130
+
131
+ if __name__ == "__main__":
132
+ model = load_model()
133
+ device = "cuda" if torch.cuda.is_available() else "cpu"
134
+ color_map = create_color_map(COLORS)
135
+ gradio_interface = setup_gradio_interface(model, device, COLORS, color_map)
136
+ gradio_interface.launch(share=False, show_api=False, show_error=True)
app_demo1.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from models.builder import build_model
2
+ # from visualization import mask2rgb
3
+ # from segmentation.datasets import PascalVOCDataset
4
+
5
+ # import os
6
+ # from hydra import compose, initialize
7
+ # from PIL import Image
8
+ # import matplotlib.pyplot as plt
9
+ # from torchvision import transforms as T
10
+ # import torch.nn.functional as F
11
+ # import numpy as np
12
+ from operator import itemgetter
13
+ # import torch
14
+ # import random
15
+ # import warnings
16
+
17
+ warnings.filterwarnings("ignore")
18
+ initialize(config_path="configs", version_base=None)
19
+
20
+ # from huggingface_hub import Repository
21
+
22
+ repo = Repository(
23
+ local_dir="clip-dinoiser",
24
+ clone_from="ariG23498/clip-dinoiser",
25
+ use_auth_token=os.environ.get("token")
26
+ )
27
+
28
+ check_path = 'clip-dinoiser/checkpoints/last.pt'
29
+ device = "cuda" if torch.cuda.is_available() else "cpu"
30
+
31
+ check = torch.load(check_path, map_location=device)
32
+ dinoclip_cfg = "clip_dinoiser.yaml"
33
+ cfg = compose(config_name=dinoclip_cfg)
34
+
35
+ model = build_model(cfg.model, class_names=PascalVOCDataset.CLASSES).to(device)
36
+ model.clip_backbone.decode_head.use_templates=False # switching off the imagenet templates for fast inference
37
+ model.load_state_dict(check['model_state_dict'], strict=False)
38
+ model = model.eval()
39
+
40
+ # import gradio as gr
41
+
42
+ colors = [
43
+ (0, 255, 0),
44
+ (0, 0, 255),
45
+ (255, 255, 0),
46
+ (255, 0, 255),
47
+ (0, 255, 255),
48
+ (114, 128, 250),
49
+ (0, 165, 255),
50
+ (0, 128, 0),
51
+ (144, 238, 144),
52
+ (238, 238, 175),
53
+ (255, 191, 0),
54
+ (0, 128, 0),
55
+ (226, 43, 138),
56
+ (255, 0, 255),
57
+ (0, 215, 255),
58
+ (255, 0, 0),
59
+ ]
60
+
61
+ color_map = {
62
+ f"{color_id}": f"#{hex(color[0])[2:].zfill(2)}{hex(color[1])[2:].zfill(2)}{hex(color[2])[2:].zfill(2)}" for color_id, color in enumerate(colors)
63
+ }
64
+
65
+ def run_clip_dinoiser(input_image, text_prompts):
66
+ image = input_image.convert("RGB")
67
+ text_prompts = text_prompts.split(",")
68
+ palette = colors[:len(text_prompts)]
69
+
70
+ model.clip_backbone.decode_head.update_vocab(text_prompts)
71
+ model.to(device)
72
+ model.apply_found = True
73
+
74
+ img_tens = T.PILToTensor()(image).unsqueeze(0).to(device) / 255.
75
+
76
+ h, w = img_tens.shape[-2:]
77
+ output = model(img_tens).cpu()
78
+ output = F.interpolate(output, scale_factor=model.clip_backbone.backbone.patch_size, mode="bilinear",
79
+ align_corners=False)[..., :h, :w]
80
+ output = output[0].argmax(dim=0)
81
+ mask = mask2rgb(output, palette)
82
+
83
+ classes = np.unique(output).tolist()
84
+ palette_array = np.array(itemgetter(*classes)(palette)).reshape(1, -1, 3)
85
+ alpha=0.5
86
+ blend = (alpha)*np.array(image)/255. + (1-alpha) * mask/255.
87
+
88
+ h_text = list()
89
+ for idx, text in enumerate(text_prompts):
90
+ h_text.append((text, f"{idx}"))
91
+ return blend, mask, h_text
92
+
93
+
94
+
95
+
96
+ if __name__ == "__main__":
97
+
98
+ block = gr.Blocks().queue()
99
+ with block:
100
+ gr.Markdown("<h1><center>CLIP-DINOiser<h1><center>")
101
+
102
+ with gr.Row():
103
+ with gr.Column():
104
+ input_image = gr.Image(type="pil", label="Input Image")
105
+ text_prompts = gr.Textbox(label="Enter comma-separated prompts")
106
+ run_button = gr.Button(value="Run")
107
+
108
+ with gr.Column():
109
+ with gr.Row():
110
+ overlay_mask = gr.Image(
111
+ type="numpy",
112
+ label="Overlay Mask",
113
+ )
114
+ only_mask = gr.Image(
115
+ type="numpy",
116
+ label="Segmentation Mask"
117
+ )
118
+ h_text = gr.HighlightedText(
119
+ label="Labels",
120
+ combine_adjacent=False,
121
+ show_legend=False,
122
+ color_map=color_map
123
+ )
124
+
125
+ run_button.click(
126
+ fn=run_clip_dinoiser,
127
+ inputs=[input_image, text_prompts,],
128
+ outputs=[overlay_mask, only_mask, h_text]
129
+ )
130
+ gr.Examples(
131
+ [["vintage_bike.jpeg", "background, vintage bike, leather bag"]],
132
+ inputs = [input_image, text_prompts,],
133
+ outputs = [overlay_mask, only_mask, h_text],
134
+ fn=run_clip_dinoiser,
135
+ cache_examples=True,
136
+ label='Try this example input!'
137
+ )
138
+ block.launch(share=False, show_api=False, show_error=True)