Spaces:
Running
on
T4
Running
on
T4
Added @spaces.gpu decorator and switched to gpu officially
Browse files
app.py
CHANGED
@@ -28,34 +28,18 @@ cwd = os.getcwd()
|
|
28 |
print("Current working directory:", cwd)
|
29 |
|
30 |
# Installing dependencies not in requirements.txt
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
script = file.read()
|
37 |
-
return call(script, shell=True)
|
38 |
-
|
39 |
-
def build_custom_prompter():
|
40 |
-
with open('./build_custom_prompter.sh', 'rb') as file:
|
41 |
-
script = file.read()
|
42 |
-
return call(script, shell=True)
|
43 |
-
|
44 |
-
def build_multiscale_deform():
|
45 |
-
with open('./build_multiscale_deform.sh', 'rb') as file:
|
46 |
-
script = file.read()
|
47 |
-
return call(script, shell=True)
|
48 |
-
|
49 |
-
build_custom_prompter()
|
50 |
from gradio_image_prompter import ImagePrompter
|
|
|
51 |
subprocess.run(
|
52 |
shlex.split(
|
53 |
"pip install MultiScaleDeformableAttention-1.0-cp310-cp310-linux_x86_64.whl"
|
54 |
)
|
55 |
)
|
56 |
-
#print("torch version")
|
57 |
-
#print(torch.version.cuda)
|
58 |
-
#install_add_dependencies()
|
59 |
|
60 |
class AppSteps(Enum):
|
61 |
JUST_TEXT = 1
|
@@ -124,6 +108,12 @@ def get_args_parser():
|
|
124 |
parser.add_argument("--amp", action="store_true", help="Train with mixed precision")
|
125 |
return parser
|
126 |
|
|
|
|
|
|
|
|
|
|
|
|
|
127 |
|
128 |
# Get counting model.
|
129 |
@spaces.GPU
|
@@ -162,8 +152,6 @@ def build_model_and_transforms(args):
|
|
162 |
build_func = MODULE_BUILD_FUNCS.get(args.modelname)
|
163 |
model, _, _ = build_func(args)
|
164 |
|
165 |
-
#model.to(device)
|
166 |
-
|
167 |
checkpoint = torch.load(args.pretrain_model_path, map_location="cpu")["model"]
|
168 |
model.load_state_dict(checkpoint, strict=False)
|
169 |
|
@@ -174,11 +162,8 @@ def build_model_and_transforms(args):
|
|
174 |
|
175 |
parser = argparse.ArgumentParser("Counting Application", parents=[get_args_parser()])
|
176 |
args = parser.parse_args()
|
177 |
-
|
178 |
-
|
179 |
-
#else:
|
180 |
-
# args.device = torch.device('cpu')
|
181 |
-
args.device = torch.device('cpu')
|
182 |
model, transform = build_model_and_transforms(args)
|
183 |
|
184 |
examples = [
|
@@ -233,11 +218,12 @@ def get_ind_to_filter(text, word_ids, keywords):
|
|
233 |
|
234 |
return inds_to_filter
|
235 |
|
236 |
-
|
237 |
def count(image, text, prompts, state, device):
|
238 |
model.to(device)
|
239 |
-
|
240 |
keywords = "" # do not handle this for now
|
|
|
241 |
# Handle no prompt case.
|
242 |
if prompts is None:
|
243 |
prompts = {"image": image, "points": []}
|
@@ -259,11 +245,7 @@ def count(image, text, prompts, state, device):
|
|
259 |
)
|
260 |
|
261 |
ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
|
262 |
-
print(model_output["token"][0].tokens)
|
263 |
-
print(ind_to_filter)
|
264 |
-
print(model_output["pred_logits"].sigmoid()[0].shape)
|
265 |
logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
|
266 |
-
print(logits.shape)
|
267 |
boxes = model_output["pred_boxes"][0]
|
268 |
if len(keywords.strip()) > 0:
|
269 |
box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
|
@@ -339,7 +321,6 @@ def count_main(image, text, prompts, device):
|
|
339 |
input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
|
340 |
input_image_exemplars = input_image_exemplars.unsqueeze(0).to(device)
|
341 |
exemplars = [exemplars["exemplars"].to(device)]
|
342 |
-
print("image device: " + str(input_image.device))
|
343 |
|
344 |
with torch.no_grad():
|
345 |
model_output = model(
|
@@ -351,11 +332,7 @@ def count_main(image, text, prompts, device):
|
|
351 |
)
|
352 |
|
353 |
ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
|
354 |
-
print(model_output["token"][0].tokens)
|
355 |
-
print(ind_to_filter)
|
356 |
-
print(model_output["pred_logits"].sigmoid()[0].shape)
|
357 |
logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
|
358 |
-
print(logits.shape)
|
359 |
boxes = model_output["pred_boxes"][0]
|
360 |
if len(keywords.strip()) > 0:
|
361 |
box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
|
|
|
28 |
print("Current working directory:", cwd)
|
29 |
|
30 |
# Installing dependencies not in requirements.txt
|
31 |
+
subprocess.run(
|
32 |
+
shlex.split(
|
33 |
+
"pip install gradio_image_prompter-0.1.0-py3-none-any.whl"
|
34 |
+
)
|
35 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
from gradio_image_prompter import ImagePrompter
|
37 |
+
|
38 |
subprocess.run(
|
39 |
shlex.split(
|
40 |
"pip install MultiScaleDeformableAttention-1.0-cp310-cp310-linux_x86_64.whl"
|
41 |
)
|
42 |
)
|
|
|
|
|
|
|
43 |
|
44 |
class AppSteps(Enum):
|
45 |
JUST_TEXT = 1
|
|
|
108 |
parser.add_argument("--amp", action="store_true", help="Train with mixed precision")
|
109 |
return parser
|
110 |
|
111 |
+
@spaces.GPU
|
112 |
+
def get_device():
|
113 |
+
if torch.cuda.is_available():
|
114 |
+
return torch.device('cuda')
|
115 |
+
else:
|
116 |
+
return torch.device('cpu')
|
117 |
|
118 |
# Get counting model.
|
119 |
@spaces.GPU
|
|
|
152 |
build_func = MODULE_BUILD_FUNCS.get(args.modelname)
|
153 |
model, _, _ = build_func(args)
|
154 |
|
|
|
|
|
155 |
checkpoint = torch.load(args.pretrain_model_path, map_location="cpu")["model"]
|
156 |
model.load_state_dict(checkpoint, strict=False)
|
157 |
|
|
|
162 |
|
163 |
parser = argparse.ArgumentParser("Counting Application", parents=[get_args_parser()])
|
164 |
args = parser.parse_args()
|
165 |
+
|
166 |
+
args.device = get_device()
|
|
|
|
|
|
|
167 |
model, transform = build_model_and_transforms(args)
|
168 |
|
169 |
examples = [
|
|
|
218 |
|
219 |
return inds_to_filter
|
220 |
|
221 |
+
@spaces.GPU
|
222 |
def count(image, text, prompts, state, device):
|
223 |
model.to(device)
|
224 |
+
|
225 |
keywords = "" # do not handle this for now
|
226 |
+
|
227 |
# Handle no prompt case.
|
228 |
if prompts is None:
|
229 |
prompts = {"image": image, "points": []}
|
|
|
245 |
)
|
246 |
|
247 |
ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
|
|
|
|
|
|
|
248 |
logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
|
|
|
249 |
boxes = model_output["pred_boxes"][0]
|
250 |
if len(keywords.strip()) > 0:
|
251 |
box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
|
|
|
321 |
input_image_exemplars, exemplars = transform(prompts["image"], {"exemplars": torch.tensor(exemplars)})
|
322 |
input_image_exemplars = input_image_exemplars.unsqueeze(0).to(device)
|
323 |
exemplars = [exemplars["exemplars"].to(device)]
|
|
|
324 |
|
325 |
with torch.no_grad():
|
326 |
model_output = model(
|
|
|
332 |
)
|
333 |
|
334 |
ind_to_filter = get_ind_to_filter(text, model_output["token"][0].word_ids, keywords)
|
|
|
|
|
|
|
335 |
logits = model_output["pred_logits"].sigmoid()[0][:, ind_to_filter]
|
|
|
336 |
boxes = model_output["pred_boxes"][0]
|
337 |
if len(keywords.strip()) > 0:
|
338 |
box_mask = (logits > CONF_THRESH).sum(dim=-1) == len(ind_to_filter)
|