bbexx commited on
Commit
e7cb1dc
1 Parent(s): e3885ae

UPDATE README.md

Browse files
Files changed (1) hide show
  1. README.md +11 -5
README.md CHANGED
@@ -11,21 +11,27 @@ Load from HuggingFace:
11
  import torch
12
  from PIL import Image
13
  from transformers import AutoModel, CLIPImageProcessor
 
14
 
15
  model = AutoModel.from_pretrained(
16
  'jienengchen/ViTamin-XL-384px',
17
- torch_dtype=torch.bfloat16,
18
- low_cpu_mem_usage=True,
19
- trust_remote_code=True).cuda().eval()
20
 
21
  image = Image.open('./image.png').convert('RGB')
22
-
23
  image_processor = CLIPImageProcessor.from_pretrained('jienengchen/ViTamin-XL-384px')
24
 
25
  pixel_values = image_processor(images=image, return_tensors='pt').pixel_values
26
  pixel_values = pixel_values.to(torch.bfloat16).cuda()
27
 
28
- outputs = model(pixel_values)
 
 
 
 
 
 
 
 
29
  ```
30
 
31
  ## Main Results with CLIP Pre-training on DataComp-1B
 
11
  import torch
12
  from PIL import Image
13
  from transformers import AutoModel, CLIPImageProcessor
14
+ device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
  model = AutoModel.from_pretrained(
17
  'jienengchen/ViTamin-XL-384px',
18
+ trust_remote_code=True).to(device).eval()
 
 
19
 
20
  image = Image.open('./image.png').convert('RGB')
 
21
  image_processor = CLIPImageProcessor.from_pretrained('jienengchen/ViTamin-XL-384px')
22
 
23
  pixel_values = image_processor(images=image, return_tensors='pt').pixel_values
24
  pixel_values = pixel_values.to(torch.bfloat16).cuda()
25
 
26
+ tokenizer = open_clip.get_tokenizer('hf-hub:laion/CLIP-ViT-L-14-DataComp.XL-s13B-b90K')
27
+ text = tokenizer(["a photo of vitamin", "a dog", "a cat"]).to(device)
28
+
29
+ with torch.no_grad(), torch.cuda.amp.autocast():
30
+ image_features, text_features, logit_scale = model(pixel_values, text)
31
+ text_probs = (100.0 * image_features @ text_features.to(torch.float).T).softmax(dim=-1)
32
+
33
+ print("Label probs:", text_probs)
34
+
35
  ```
36
 
37
  ## Main Results with CLIP Pre-training on DataComp-1B