Update README.md
Browse files
README.md
CHANGED
@@ -99,10 +99,10 @@ The following hyperparameters were used during training:
|
|
99 |
# Both models generate vectors with 768 dimensions.
|
100 |
from transformers import CLIPVisionModel, RobertaModel, AutoTokenizer, CLIPFeatureExtractor
|
101 |
# download pre-trained models
|
102 |
-
vision_encoder = CLIPVisionModel.from_pretrained('SeyedAli/
|
103 |
-
preprocessor = CLIPFeatureExtractor.from_pretrained('SeyedAli/
|
104 |
-
text_encoder = RobertaModel.from_pretrained('SeyedAli/
|
105 |
-
tokenizer = AutoTokenizer.from_pretrained('SeyedAli/
|
106 |
# define input image and input text
|
107 |
text = 'something'
|
108 |
image = PIL.Image.open('my_favorite_image.jpg')
|
@@ -119,8 +119,18 @@ The followings are just some use cases of Persian-CLIP on 25K Unsplash images
|
|
119 |
* use pip install -q git+https://github.com/sajjjadayobi/clipfa.git
|
120 |
```python
|
121 |
from clipfa import CLIPDemo
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
demo = CLIPDemo(vision_encoder, text_encoder, tokenizer)
|
123 |
demo.compute_text_embeddings(['متن 3' ,'متن 2' ,'متن 1'])
|
124 |
demo.compute_image_embeddings(['my_favorite_image.jpg'])
|
125 |
-
demo.zero_shot(image_path='my_favorite_image.jpg')
|
|
|
126 |
```
|
|
|
99 |
# Both models generate vectors with 768 dimensions.
|
100 |
from transformers import CLIPVisionModel, RobertaModel, AutoTokenizer, CLIPFeatureExtractor
|
101 |
# download pre-trained models
|
102 |
+
vision_encoder = CLIPVisionModel.from_pretrained('SeyedAli/Persian-CLIP')
|
103 |
+
preprocessor = CLIPFeatureExtractor.from_pretrained('SeyedAli/Persian-CLIP')
|
104 |
+
text_encoder = RobertaModel.from_pretrained('SeyedAli/Persian-CLIP')
|
105 |
+
tokenizer = AutoTokenizer.from_pretrained('SeyedAli/Persian-CLIP')
|
106 |
# define input image and input text
|
107 |
text = 'something'
|
108 |
image = PIL.Image.open('my_favorite_image.jpg')
|
|
|
119 |
* use pip install -q git+https://github.com/sajjjadayobi/clipfa.git
|
120 |
```python
|
121 |
from clipfa import CLIPDemo
|
122 |
+
import torch
|
123 |
+
# Both models generate vectors with 768 dimensions.
|
124 |
+
from transformers import CLIPVisionModel, RobertaModel, AutoTokenizer, CLIPFeatureExtractor
|
125 |
+
# download pre-trained models
|
126 |
+
vision_encoder = CLIPVisionModel.from_pretrained('SeyedAli/Persian-CLIP')
|
127 |
+
preprocessor = CLIPFeatureExtractor.from_pretrained('SeyedAli/Persian-CLIP')
|
128 |
+
text_encoder = RobertaModel.from_pretrained('SeyedAli/Persian-CLIP')
|
129 |
+
tokenizer = AutoTokenizer.from_pretrained('SeyedAli/Persian-CLIP')
|
130 |
+
|
131 |
demo = CLIPDemo(vision_encoder, text_encoder, tokenizer)
|
132 |
demo.compute_text_embeddings(['متن 3' ,'متن 2' ,'متن 1'])
|
133 |
demo.compute_image_embeddings(['my_favorite_image.jpg'])
|
134 |
+
demo.zero_shot(image_path='my_favorite_image.jpg')
|
135 |
+
|
136 |
```
|