Vishakaraj commited on
Commit
a312060
β€’
1 Parent(s): c709b60

Save output results as json

Browse files
Files changed (1) hide show
  1. app.py +34 -12
app.py CHANGED
@@ -1,10 +1,5 @@
1
  import os
2
- # os.system("sudo apt-get update && sudo apt-get install -y git")
3
- # os.system("sudo apt-get -y install pybind11-dev")
4
- # os.system("git clone https://github.com/facebookresearch/detectron2.git")
5
- # os.system("pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 torchaudio==0.9.0 -f https://download.pytorch.org/whl/torch_stable.html")
6
  os.system("cd detectron2 && pip install detectron2-0.6-cp310-cp310-linux_x86_64.whl")
7
- # os.system("pip3 install torch torchvision torchaudio")
8
  os.system("pip install deepspeed==0.7.0")
9
 
10
  import site
@@ -12,10 +7,11 @@ from importlib import reload
12
  reload(site)
13
 
14
  from PIL import Image
 
15
  import argparse
16
  import sys
17
  import numpy as np
18
- import cv2
19
  import gradio as gr
20
 
21
  from detectron2.config import get_cfg
@@ -80,11 +76,37 @@ def setup_cfg(args):
80
 
81
  def predict(image_file):
82
  image_array = np.array(image_file)[:, :, ::-1] # BGR
83
- _, visualized_output = dense_captioning_demo.run_on_image(image_array)
84
- visualized_output.save(os.path.join(os.getcwd(), "output.jpg"))
85
- output_image = cv2.imread(os.path.join(os.getcwd(), "output.jpg"))
86
- output_image = cv2.cvtColor(output_image, cv2.COLOR_BGR2RGB)
87
- return Image.fromarray(output_image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
 
90
 
@@ -102,7 +124,7 @@ demo = gr.Interface(
102
  title="Dense Captioning - GRiT",
103
  fn=predict,
104
  inputs=gr.Image(type='pil', label="Original Image"),
105
- outputs=gr.Image(type="pil",label="Output Image"),
106
  examples=["example_1.jpg", "example_2.jpg"],
107
  )
108
 
 
1
  import os
 
 
 
 
2
  os.system("cd detectron2 && pip install detectron2-0.6-cp310-cp310-linux_x86_64.whl")
 
3
  os.system("pip install deepspeed==0.7.0")
4
 
5
  import site
 
7
  reload(site)
8
 
9
  from PIL import Image
10
+ from io import BytesIO
11
  import argparse
12
  import sys
13
  import numpy as np
14
+ import torch
15
  import gradio as gr
16
 
17
  from detectron2.config import get_cfg
 
76
 
77
  def predict(image_file):
78
  image_array = np.array(image_file)[:, :, ::-1] # BGR
79
+ predictions, visualized_output = dense_captioning_demo.run_on_image(image_array)
80
+ buffer = BytesIO()
81
+ visualized_output.fig.savefig(buffer, format='png')
82
+ buffer.seek(0)
83
+ detections = {}
84
+ predictions = predictions["instances"].to(torch.device("cpu"))
85
+
86
+ for box, description, score in zip(
87
+ predictions.pred_boxes,
88
+ predictions.pred_object_descriptions.data,
89
+ predictions.scores,
90
+ ):
91
+ if description not in detections:
92
+ detections[description] = []
93
+ detections[description].append(
94
+ {
95
+ "xmin": float(box[0]),
96
+ "ymin": float(box[1]),
97
+ "xmax": float(box[2]),
98
+ "ymax": float(box[3]),
99
+ "score": float(score),
100
+ }
101
+ )
102
+
103
+ output = {
104
+ "dense_captioning_results": {
105
+ "detections": detections,
106
+ }
107
+ }
108
+
109
+ return Image.open(buffer), output
110
 
111
 
112
 
 
124
  title="Dense Captioning - GRiT",
125
  fn=predict,
126
  inputs=gr.Image(type='pil', label="Original Image"),
127
+ outputs=[gr.Image(type="pil",label="Output Image"), "json"],
128
  examples=["example_1.jpg", "example_2.jpg"],
129
  )
130