weifeng chen commited on
Commit
56b40e2
1 Parent(s): 0e7276c
Files changed (2) hide show
  1. README.md +7 -5
  2. pytorch_model.bin +1 -1
README.md CHANGED
@@ -2,6 +2,7 @@
2
  license: apache-2.0
3
  # inference: false
4
  # pipeline_tag: zero-shot-image-classification
 
5
 
6
  # inference:
7
  # parameters:
@@ -9,6 +10,7 @@ tags:
9
  - clip
10
  - zh
11
  - image-text
 
12
  ---
13
 
14
  # Model Details
@@ -31,8 +33,8 @@ from transformers import BertForSequenceClassification, BertConfig, BertTokenize
31
  import numpy as np
32
 
33
  # 加载TaiYi 中文 text encoder
34
- text_tokenizer = BertTokenizer.from_pretrained("IDEA-CCNL/TaiYi-CLIP-Roberta-Chinese")
35
- text_encoder = BertForSequenceClassification.from_pretrained("IDEA-CCNL/TaiYi-CLIP-Roberta-Chinese").eval()
36
  text = text_tokenizer(["一只猫", "一只狗",'两只猫', '两只老虎','一只老虎'], return_tensors='pt', padding=True)['input_ids']
37
 
38
  # 加载CLIP的image encoder
@@ -59,14 +61,14 @@ with torch.no_grad():
59
  ### Zero-Shot Classification
60
  | model | dataset | Top1 | Top5 |
61
  | ---- | ---- | ---- | ---- |
62
- | TaiYi-CLIP-ViT-B-32-Roberta-Chinese | ImageNet-CN | 40.64 % | 69.16% |
63
 
64
- ### Text-to-Image Retrieval
65
 
66
  | model | dataset | Top1 | Top5 | Top10 |
67
  | ---- | ---- | ---- | ---- | ---- |
68
  | TaiYi-CLIP-ViT-B-32-Roberta-Chinese | COCO-CN | 25.47 % | 51.70% | 63.07% |
69
- | TaiYi-CLIP-ViT-B-32-Roberta-Chinese | wukong50k | 47.64 % | 80.97% | 89.51% |
70
 
71
 
72
  # Citation
 
2
  license: apache-2.0
3
  # inference: false
4
  # pipeline_tag: zero-shot-image-classification
5
+ pipeline_tag: feature-extraction
6
 
7
  # inference:
8
  # parameters:
 
10
  - clip
11
  - zh
12
  - image-text
13
+ - feature-extraction
14
  ---
15
 
16
  # Model Details
 
33
  import numpy as np
34
 
35
  # 加载TaiYi 中文 text encoder
36
+ text_tokenizer = BertTokenizer.from_pretrained("IDEA-CCNL/TaiYi-CLIP-Roberta-102M-Chinese")
37
+ text_encoder = BertForSequenceClassification.from_pretrained("IDEA-CCNL/TaiYi-CLIP-Roberta-102M-Chinese").eval()
38
  text = text_tokenizer(["一只猫", "一只狗",'两只猫', '两只老虎','一只老虎'], return_tensors='pt', padding=True)['input_ids']
39
 
40
  # 加载CLIP的image encoder
 
61
  ### Zero-Shot Classification
62
  | model | dataset | Top1 | Top5 |
63
  | ---- | ---- | ---- | ---- |
64
+ | TaiYi-CLIP-ViT-B-32-Roberta-Chinese | ImageNet1k-CN | 41.00% | 69.19% |
65
 
66
+ ### Zero-Shot Text-to-Image Retrieval
67
 
68
  | model | dataset | Top1 | Top5 | Top10 |
69
  | ---- | ---- | ---- | ---- | ---- |
70
  | TaiYi-CLIP-ViT-B-32-Roberta-Chinese | COCO-CN | 25.47 % | 51.70% | 63.07% |
71
+ | TaiYi-CLIP-ViT-B-32-Roberta-Chinese | wukong50k | 48.67 % | 81.77% | 90.09% |
72
 
73
 
74
  # Citation
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c409d7373abd00263b4a4af3adbf38468636c51d10675b2a4efb7d05a0d5115
3
  size 410713709
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53ec5505ee1ce25f970c5ce488bbd49b5727c36faa2132de0f2cf82dddbf3e37
3
  size 410713709