dandelin
/

vilt-b32-mlm

Inference Endpoints

Model card Files Files and versions Community

Wa2erGo commited on Aug 30, 2022

Commit

360475e

·

1 Parent(s): 9507e9c

Update README.md

Fix "device" and "pixel_values" not defined

Files changed (1) hide show

README.md +3 -1

README.md CHANGED Viewed

@@ -22,6 +22,8 @@ import requests
 from PIL import Image
 import re
 url = "http://images.cocodataset.org/val2017/000000039769.jpg"
 image = Image.open(requests.get(url, stream=True).raw)
 text = "a bunch of [MASK] laying on a [MASK]."
@@ -44,7 +46,7 @@ with torch.no_grad():
         encoded = processor.tokenizer(inferred_token)
         input_ids = torch.tensor(encoded.input_ids).to(device)
         encoded = encoded["input_ids"][0][1:-1]
-        outputs = model(input_ids=input_ids, pixel_values=pixel_values)
         mlm_logits = outputs.logits[0]  # shape (seq_len, vocab_size)
         # only take into account text features (minus CLS and SEP token)
         mlm_logits = mlm_logits[1 : input_ids.shape[1] - 1, :]

 from PIL import Image
 import re
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 url = "http://images.cocodataset.org/val2017/000000039769.jpg"
 image = Image.open(requests.get(url, stream=True).raw)
 text = "a bunch of [MASK] laying on a [MASK]."
         encoded = processor.tokenizer(inferred_token)
         input_ids = torch.tensor(encoded.input_ids).to(device)
         encoded = encoded["input_ids"][0][1:-1]
+        outputs = model(input_ids=input_ids, pixel_values=encoding.pixel_values)
         mlm_logits = outputs.logits[0]  # shape (seq_len, vocab_size)
         # only take into account text features (minus CLS and SEP token)
         mlm_logits = mlm_logits[1 : input_ids.shape[1] - 1, :]