SeyedAli
/

Persian-CLIP

Feature Extraction

clip_vision_model

Generated from Trainer

Inference Endpoints

Model card Files Files and versions Community

SeyedAli commited on Apr 9

Commit

419504c

•

1 Parent(s): 3d4a84d

Update README.md

Files changed (1) hide show

README.md +15 -5

README.md CHANGED Viewed

@@ -99,10 +99,10 @@ The following hyperparameters were used during training:
 # Both models generate vectors with 768 dimensions.
 from transformers import CLIPVisionModel, RobertaModel, AutoTokenizer, CLIPFeatureExtractor
 # download pre-trained models
-vision_encoder = CLIPVisionModel.from_pretrained('SeyedAli/persian-clip')
-preprocessor = CLIPFeatureExtractor.from_pretrained('SeyedAli/persian-clip')
-text_encoder = RobertaModel.from_pretrained('SeyedAli/persian-clip')
-tokenizer = AutoTokenizer.from_pretrained('SeyedAli/persian-clip')
 # define input image and input text
 text = 'something'
 image = PIL.Image.open('my_favorite_image.jpg')
@@ -119,8 +119,18 @@ The followings are just some use cases of Persian-CLIP on 25K Unsplash images
 * use pip install -q git+https://github.com/sajjjadayobi/clipfa.git
 ```python
 from clipfa import CLIPDemo
 demo = CLIPDemo(vision_encoder, text_encoder, tokenizer)
 demo.compute_text_embeddings(['متن 3' ,'متن 2' ,'متن 1'])
 demo.compute_image_embeddings(['my_favorite_image.jpg'])
-demo.zero_shot(image_path='my_favorite_image.jpg')
 ```

 # Both models generate vectors with 768 dimensions.
 from transformers import CLIPVisionModel, RobertaModel, AutoTokenizer, CLIPFeatureExtractor
 # download pre-trained models
+vision_encoder = CLIPVisionModel.from_pretrained('SeyedAli/Persian-CLIP')
+preprocessor = CLIPFeatureExtractor.from_pretrained('SeyedAli/Persian-CLIP')
+text_encoder = RobertaModel.from_pretrained('SeyedAli/Persian-CLIP')
+tokenizer = AutoTokenizer.from_pretrained('SeyedAli/Persian-CLIP')
 # define input image and input text
 text = 'something'
 image = PIL.Image.open('my_favorite_image.jpg')
 * use pip install -q git+https://github.com/sajjjadayobi/clipfa.git
 ```python
 from clipfa import CLIPDemo
+import torch
+# Both models generate vectors with 768 dimensions.
+from transformers import CLIPVisionModel, RobertaModel, AutoTokenizer, CLIPFeatureExtractor
+# download pre-trained models
+vision_encoder = CLIPVisionModel.from_pretrained('SeyedAli/Persian-CLIP')
+preprocessor = CLIPFeatureExtractor.from_pretrained('SeyedAli/Persian-CLIP')
+text_encoder = RobertaModel.from_pretrained('SeyedAli/Persian-CLIP')
+tokenizer = AutoTokenizer.from_pretrained('SeyedAli/Persian-CLIP')
 demo = CLIPDemo(vision_encoder, text_encoder, tokenizer)
 demo.compute_text_embeddings(['متن 3' ,'متن 2' ,'متن 1'])
 demo.compute_image_embeddings(['my_favorite_image.jpg'])
+demo.zero_shot(image_path='my_favorite_image.jpg')
 ```