Commit
•
625be1c
1
Parent(s):
42a50d2
Update README.md
Browse files
README.md
CHANGED
@@ -71,4 +71,24 @@ python -W ignore run_clip.py --model_name_or_path openai/clip-vit-large-patch14
|
|
71 |
--logging_dir ./pmc_vit_logs \
|
72 |
--save_total_limit 2 \
|
73 |
--report_to tensorboard
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
```
|
|
|
71 |
--logging_dir ./pmc_vit_logs \
|
72 |
--save_total_limit 2 \
|
73 |
--report_to tensorboard
|
74 |
+
```
|
75 |
+
|
76 |
+
### usage
|
77 |
+
```python
|
78 |
+
from PIL import Image
|
79 |
+
import requests
|
80 |
+
|
81 |
+
from transformers import CLIPProcessor, CLIPModel
|
82 |
+
|
83 |
+
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
|
84 |
+
processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
|
85 |
+
|
86 |
+
url = "http://images.cocodataset.org/val2017/000000039769.jpg"
|
87 |
+
image = Image.open(requests.get(url, stream=True).raw)
|
88 |
+
|
89 |
+
inputs = processor(text=["a photo of a cat", "a photo of a dog"], images=image, return_tensors="pt", padding=True)
|
90 |
+
|
91 |
+
outputs = model(**inputs)
|
92 |
+
logits_per_image = outputs.logits_per_image # this is the image-text similarity score
|
93 |
+
probs = logits_per_image.softmax(dim=1) # we can take the softmax to get the label probabilities
|
94 |
```
|