weifeng chen
commited on
Commit
•
56b40e2
1
Parent(s):
0e7276c
update
Browse files- README.md +7 -5
- pytorch_model.bin +1 -1
README.md
CHANGED
@@ -2,6 +2,7 @@
|
|
2 |
license: apache-2.0
|
3 |
# inference: false
|
4 |
# pipeline_tag: zero-shot-image-classification
|
|
|
5 |
|
6 |
# inference:
|
7 |
# parameters:
|
@@ -9,6 +10,7 @@ tags:
|
|
9 |
- clip
|
10 |
- zh
|
11 |
- image-text
|
|
|
12 |
---
|
13 |
|
14 |
# Model Details
|
@@ -31,8 +33,8 @@ from transformers import BertForSequenceClassification, BertConfig, BertTokenize
|
|
31 |
import numpy as np
|
32 |
|
33 |
# 加载TaiYi 中文 text encoder
|
34 |
-
text_tokenizer = BertTokenizer.from_pretrained("IDEA-CCNL/TaiYi-CLIP-Roberta-Chinese")
|
35 |
-
text_encoder = BertForSequenceClassification.from_pretrained("IDEA-CCNL/TaiYi-CLIP-Roberta-Chinese").eval()
|
36 |
text = text_tokenizer(["一只猫", "一只狗",'两只猫', '两只老虎','一只老虎'], return_tensors='pt', padding=True)['input_ids']
|
37 |
|
38 |
# 加载CLIP的image encoder
|
@@ -59,14 +61,14 @@ with torch.no_grad():
|
|
59 |
### Zero-Shot Classification
|
60 |
| model | dataset | Top1 | Top5 |
|
61 |
| ---- | ---- | ---- | ---- |
|
62 |
-
| TaiYi-CLIP-ViT-B-32-Roberta-Chinese |
|
63 |
|
64 |
-
### Text-to-Image Retrieval
|
65 |
|
66 |
| model | dataset | Top1 | Top5 | Top10 |
|
67 |
| ---- | ---- | ---- | ---- | ---- |
|
68 |
| TaiYi-CLIP-ViT-B-32-Roberta-Chinese | COCO-CN | 25.47 % | 51.70% | 63.07% |
|
69 |
-
| TaiYi-CLIP-ViT-B-32-Roberta-Chinese | wukong50k |
|
70 |
|
71 |
|
72 |
# Citation
|
|
|
2 |
license: apache-2.0
|
3 |
# inference: false
|
4 |
# pipeline_tag: zero-shot-image-classification
|
5 |
+
pipeline_tag: feature-extraction
|
6 |
|
7 |
# inference:
|
8 |
# parameters:
|
|
|
10 |
- clip
|
11 |
- zh
|
12 |
- image-text
|
13 |
+
- feature-extraction
|
14 |
---
|
15 |
|
16 |
# Model Details
|
|
|
33 |
import numpy as np
|
34 |
|
35 |
# 加载TaiYi 中文 text encoder
|
36 |
+
text_tokenizer = BertTokenizer.from_pretrained("IDEA-CCNL/TaiYi-CLIP-Roberta-102M-Chinese")
|
37 |
+
text_encoder = BertForSequenceClassification.from_pretrained("IDEA-CCNL/TaiYi-CLIP-Roberta-102M-Chinese").eval()
|
38 |
text = text_tokenizer(["一只猫", "一只狗",'两只猫', '两只老虎','一只老虎'], return_tensors='pt', padding=True)['input_ids']
|
39 |
|
40 |
# 加载CLIP的image encoder
|
|
|
61 |
### Zero-Shot Classification
|
62 |
| model | dataset | Top1 | Top5 |
|
63 |
| ---- | ---- | ---- | ---- |
|
64 |
+
| TaiYi-CLIP-ViT-B-32-Roberta-Chinese | ImageNet1k-CN | 41.00% | 69.19% |
|
65 |
|
66 |
+
### Zero-Shot Text-to-Image Retrieval
|
67 |
|
68 |
| model | dataset | Top1 | Top5 | Top10 |
|
69 |
| ---- | ---- | ---- | ---- | ---- |
|
70 |
| TaiYi-CLIP-ViT-B-32-Roberta-Chinese | COCO-CN | 25.47 % | 51.70% | 63.07% |
|
71 |
+
| TaiYi-CLIP-ViT-B-32-Roberta-Chinese | wukong50k | 48.67 % | 81.77% | 90.09% |
|
72 |
|
73 |
|
74 |
# Citation
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 410713709
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:53ec5505ee1ce25f970c5ce488bbd49b5727c36faa2132de0f2cf82dddbf3e37
|
3 |
size 410713709
|