Weifeng-Chen commited on
Commit
59cea48
1 Parent(s): a6dcf85

using hugging face clip version

Browse files
Files changed (1) hide show
  1. README.md +9 -5
README.md CHANGED
@@ -30,20 +30,23 @@ import requests
30
  import clip
31
  import torch
32
  from transformers import BertForSequenceClassification, BertConfig, BertTokenizer
 
33
  import numpy as np
34
 
 
35
  # 加载Taiyi 中文 text encoder
36
  text_tokenizer = BertTokenizer.from_pretrained("IDEA-CCNL/Taiyi-CLIP-Roberta-102M-Chinese")
37
  text_encoder = BertForSequenceClassification.from_pretrained("IDEA-CCNL/Taiyi-CLIP-Roberta-102M-Chinese").eval()
38
- text = text_tokenizer(["一只猫", "一只狗",'两只猫', '两只老虎','一只老虎'], return_tensors='pt', padding=True)['input_ids']
39
 
 
40
  # 加载CLIP的image encoder
41
- url = "http://images.cocodataset.org/val2017/000000039769.jpg"
42
- clip_model, preprocess = clip.load("ViT-B/32", device='cpu')
43
- image = preprocess(Image.open(requests.get(url, stream=True).raw)).unsqueeze(0)
44
 
45
  with torch.no_grad():
46
- image_features = clip_model.encode_image(image)
47
  text_features = text_encoder(text).logits
48
  # 归一化
49
  image_features = image_features / image_features.norm(dim=1, keepdim=True)
@@ -54,6 +57,7 @@ with torch.no_grad():
54
  logits_per_text = logits_per_image.t()
55
  probs = logits_per_image.softmax(dim=-1).cpu().numpy()
56
  print(np.around(probs, 3))
 
57
  ```
58
 
59
  # Evaluation
 
30
  import clip
31
  import torch
32
  from transformers import BertForSequenceClassification, BertConfig, BertTokenizer
33
+ from transformers import CLIPProcessor, CLIPModel
34
  import numpy as np
35
 
36
+ query_texts = ["一只猫", "一只狗",'两只猫', '两只老虎','一只老虎'] # 这里是输入文本的,可以随意替换。
37
  # 加载Taiyi 中文 text encoder
38
  text_tokenizer = BertTokenizer.from_pretrained("IDEA-CCNL/Taiyi-CLIP-Roberta-102M-Chinese")
39
  text_encoder = BertForSequenceClassification.from_pretrained("IDEA-CCNL/Taiyi-CLIP-Roberta-102M-Chinese").eval()
40
+ text = text_tokenizer(query_texts, return_tensors='pt', padding=True)['input_ids']
41
 
42
+ url = "http://images.cocodataset.org/val2017/000000039769.jpg" # 这里可以换成任意图片的url
43
  # 加载CLIP的image encoder
44
+ clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
45
+ processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
46
+ image = processor(images=Image.open(requests.get(url, stream=True).raw), return_tensors="pt")
47
 
48
  with torch.no_grad():
49
+ image_features = clip_model.get_image_features(**image)
50
  text_features = text_encoder(text).logits
51
  # 归一化
52
  image_features = image_features / image_features.norm(dim=1, keepdim=True)
 
57
  logits_per_text = logits_per_image.t()
58
  probs = logits_per_image.softmax(dim=-1).cpu().numpy()
59
  print(np.around(probs, 3))
60
+
61
  ```
62
 
63
  # Evaluation