Nice work :)

#1
by Bailey24 - opened

I love your work

Could you please tell me how to speed up the predict time?

# predict
with torch.no_grad():
  outputs = model(**inputs)

I mean how can I use the GPU to predict to speed it up?

CAMPUS INSTITUTE DATA SCIENCE (CIDAS) org

You can move both the model and inputs on the GPU, like so:

import torch

device = "cuda" if torch.cuda.is_available() else "cpu"

model.to(device)

inputs = {k:v.to(device) for k,v for inputs.items()}

# predict
with torch.no_grad():
  outputs = model(**inputs)

Thanks a lot.

I preprocess the prompts and image, but I got the error.

ValueError                                Traceback (most recent call last)
<ipython-input-59-6c4240d4c8a3> in <module>
     11 # prompts = np.array(prompts)
     12 # prompts = torch.from_numpy(prompts)
---> 13 inputs = processor(text=prompts, images=[image] * len(prompts), padding="max_length", return_tensors="pt")

2 frames
/usr/local/lib/python3.8/dist-packages/transformers/models/clipseg/processing_clipseg.py in __call__(self, text, images, return_tensors, **kwargs)
     81 
     82         if text is not None:
---> 83             encoding = self.tokenizer(text, return_tensors=return_tensors, **kwargs)
     84 
     85         if images is not None:

/usr/local/lib/python3.8/dist-packages/transformers/tokenization_utils_base.py in __call__(self, text, text_pair, text_target, text_pair_target, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
   2518             if not self._in_target_context_manager:
   2519                 self._switch_to_input_mode()
-> 2520             encodings = self._call_one(text=text, text_pair=text_pair, **all_kwargs)
   2521         if text_target is not None:
   2522             self._switch_to_target_mode()

/usr/local/lib/python3.8/dist-packages/transformers/tokenization_utils_base.py in _call_one(self, text, text_pair, add_special_tokens, padding, truncation, max_length, stride, is_split_into_words, pad_to_multiple_of, return_tensors, return_token_type_ids, return_attention_mask, return_overflowing_tokens, return_special_tokens_mask, return_offsets_mapping, return_length, verbose, **kwargs)
   2576 
   2577         if not _is_valid_text_input(text):
-> 2578             raise ValueError(
   2579                 "text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) "
   2580                 "or `List[List[str]]` (batch of pretokenized examples)."

ValueError: text input must of type `str` (single example), `List[str]` (batch or single pretokenized example) or `List[List[str]]` (batch of pretokenized examples).

Here is my code.

from transformers import CLIPTokenizer
tokenizer = CLIPTokenizer.from_pretrained("CIDAS/clipseg-rd64-refined")
prompts = ["orange", "violet", "green", "black"]
prompts = tokenizer(prompts, padding=True, return_tensors="pt")

from torchvision import transforms
to_tensor = transforms.ToTensor()
image = to_tensor(image)

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)
prompts.to(device)
image.to(device)

inputs = processor(text=prompts, images=[image] * len(prompts), padding="max_length", return_tensors="pt")

Then I got the error.
Could you please help me?

Hi, I'm an NLP newer, so I didn't know how to input prompts into processor.
Because I think the processor requires the string list, but I want to use GPU, the prompts have to become the tensor. I'm confused about it.
Could you please help me?

I see.
Doesn't it as follow?

inputs = processor(text=prompts, images=[image] * len(prompts), padding="max_length", return_tensors="pt")

inputs.to(device)

with torch.no_grad():
    outputs = model(**inputs)

Because it speed up.

Sign up or log in to comment