oshita-n commited on
Commit
59c3bc4
1 Parent(s): ee86500
Files changed (1) hide show
  1. app.py +19 -54
app.py CHANGED
@@ -1,69 +1,34 @@
1
- import torch
2
- import numpy as np
3
  import gradio as gr
4
- from lavis.models import load_model_and_preprocess
 
5
  from PIL import Image
6
- import matplotlib.pyplot as plt
7
- import matplotlib.cm as cm
8
- import torchvision
9
-
10
- def create_heatmap(activation_map):
11
- # アクティベーションマップをnumpy配列に変換
12
- activation_map_np = activation_map.squeeze().detach().cpu().numpy()
13
-
14
- # アクティベーションマップの最小値と最大値を取得
15
- min_value = np.min(activation_map_np)
16
- max_value = np.max(activation_map_np)
17
-
18
- # アクティベーションマップを0-1の範囲に正規化
19
- normalized_map = (activation_map_np - min_value) / (max_value - min_value)
20
-
21
- # 正規化されたアクティベーションマップをヒートマップに変換
22
- heatmap = cm.jet(normalized_map)
23
-
24
- # ヒートマップを [0, 255] の範囲にスケーリングし、uint8型に変換
25
- heatmap = np.uint8(255 * heatmap)
26
-
27
- return heatmap
28
 
29
  def process(input_image, prompt):
30
- device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
31
-
32
- model, vis_processors, txt_processors = load_model_and_preprocess(name="pnp_vqa", model_type="base", is_eval=True, device=device)
33
- input_image = input_image.resize((256, 256))
34
- image = vis_processors["eval"](input_image).unsqueeze(0).to(device)
35
- text_input = txt_processors["eval"](prompt)
36
- sample = {"image": image, "text_input": [text_input]}
37
-
38
- output = model.forward_itm(samples=sample)
39
- activation_map = output['gradcams'].reshape(24, 24)
40
-
41
- relu = torch.nn.ReLU()
42
- # ヒートマップを計算
43
- heatmap = create_heatmap(activation_map)
44
-
45
- heatmap = Image.fromarray(heatmap)
46
- heatmap = torchvision.transforms.functional.to_tensor(heatmap)
47
- heatmap = relu(heatmap)
48
- heatmap = torchvision.transforms.functional.to_pil_image(heatmap)
49
- heatmap = heatmap.resize((256, 256))
50
- heatmap = np.array(heatmap)
51
- heatmap = torch.sigmoid(torch.from_numpy(heatmap)).numpy()
52
- preds = heatmap.reshape(256, 256, -1)
53
- preds = Image.fromarray(preds.astype(np.uint8)).convert('L')
54
- preds = np.array(preds)
55
- preds = np.where(preds > 0.5, 255, 0)
56
-
57
  return preds
58
-
59
  if __name__ == '__main__':
 
 
60
  input_image = gr.inputs.Image(label='image', type='pil')
61
  prompt = gr.Textbox(label='Prompt')
62
  ips = [
63
  input_image, prompt
64
  ]
65
  outputs = "image"
 
 
66
  iface = gr.Interface(fn=process,
67
  inputs=ips,
68
- outputs=outputs)
 
 
69
  iface.launch()
 
 
 
1
  import gradio as gr
2
+ from transformers import CLIPSegProcessor, CLIPSegForImageSegmentation
3
+ import torch
4
  from PIL import Image
5
+ import numpy as np
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  def process(input_image, prompt):
8
+ inputs = processor(text=prompt, images=input_image, padding="max_length", return_tensors="pt")
9
+ # predict
10
+ with torch.no_grad():
11
+ outputs = model(**inputs)
12
+ preds = torch.sigmoid(outputs.logits).squeeze().detach().cpu().numpy()
13
+ preds = np.where(preds > 0.3, 255, 0).astype(np.uint8)
14
+ preds = Image.fromarray(preds.astype(np.uint8))
15
+ preds = np.array(preds.resize((input_image.width, input_image.height)))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  return preds
17
+
18
  if __name__ == '__main__':
19
+ processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
20
+ model = CLIPSegForImageSegmentation.from_pretrained("CIDAS/clipseg-rd64-refined")
21
  input_image = gr.inputs.Image(label='image', type='pil')
22
  prompt = gr.Textbox(label='Prompt')
23
  ips = [
24
  input_image, prompt
25
  ]
26
  outputs = "image"
27
+ input_size = (256, 256)
28
+ output_size = (256, 256)
29
  iface = gr.Interface(fn=process,
30
  inputs=ips,
31
+ outputs=outputs,
32
+ input_size=input_size,
33
+ output_size=output_size)
34
  iface.launch()