marcusinthesky commited on
Commit
0f983db
·
1 Parent(s): b61121e

Upload model

Browse files
Files changed (2) hide show
  1. config.json +4 -3
  2. modelling.py +8 -2
config.json CHANGED
@@ -1,12 +1,13 @@
1
  {
2
- "_commit_hash": "092b10bbf4bc3d008a454897fba1141fb67c0b9e",
3
- "_name_or_path": "flavour/vtde-dinov2-small-jina-embedding-t-en-v1",
4
  "architectures": [
5
  "VTDEModel"
6
  ],
7
  "auto_map": {
8
  "AutoConfig": "modelling.VTDEConfig",
9
- "AutoModelForZeroShotImageClassification": "modelling.VTDEModel"
 
10
  },
11
  "logit_scale_init_value": 2.6592,
12
  "model_type": "vtde",
 
1
  {
2
+ "_commit_hash": "b61121e506fb7330d5fe093287b3ab12b3c8e564",
3
+ "_name_or_path": "flavour/clippy-dinov2-small-jina-embedding-t-en-v1",
4
  "architectures": [
5
  "VTDEModel"
6
  ],
7
  "auto_map": {
8
  "AutoConfig": "modelling.VTDEConfig",
9
+ "AutoModel": "modelling.VTDEModel",
10
+ "AutoModelForZeroShotImageClassification": "flavour/clippy-dinov2-small-jina-embedding-t-en-v1--modelling.VTDEModel"
11
  },
12
  "logit_scale_init_value": 2.6592,
13
  "model_type": "vtde",
modelling.py CHANGED
@@ -6,8 +6,9 @@ __all__ = ['VTDEConfig', 'VTDEModel']
6
  # %% ../notebooks/12_modelling.ipynb 1
7
  from transformers.models.clip.modeling_clip import CLIPOutput, clip_loss
8
  from typing import Optional, Tuple, Union
9
- from transformers import VisionTextDualEncoderConfig, AutoModel, PreTrainedModel, VisionTextDualEncoderModel
10
  import torch
 
11
 
12
  class VTDEConfig(VisionTextDualEncoderConfig):
13
  model_type = "vtde"
@@ -20,11 +21,16 @@ class VTDEConfig(VisionTextDualEncoderConfig):
20
  pooling_mode in ['mean', 'max', 'cls']
21
  https://arxiv.org/pdf/2210.09996.pdf
22
  https://github.com/kahnchana/clippy/blob/3c102c29c32f7c66c6e52e09b795fe9c061bbb03/src/open_clip/hf_model.py#L56
 
 
23
  """
24
  self.text_pooling_mode = text_pooling_mode
25
  self.vision_pooling_mode = vision_pooling_mode
26
  super().__init__(projection_dim, logit_scale_init_value, **kwargs)
27
 
 
 
 
28
  class VTDEModel(VisionTextDualEncoderModel):
29
  config_class = VTDEConfig
30
  base_model_prefix = "vtde"
@@ -170,5 +176,5 @@ class VTDEModel(VisionTextDualEncoderModel):
170
  vision_model_output=image_embeds,
171
  )
172
 
173
- VTDEConfig.register_for_auto_class()
174
  VTDEModel.register_for_auto_class("AutoModel")
 
6
  # %% ../notebooks/12_modelling.ipynb 1
7
  from transformers.models.clip.modeling_clip import CLIPOutput, clip_loss
8
  from typing import Optional, Tuple, Union
9
+ from transformers import PreTrainedModel, VisionTextDualEncoderModel
10
  import torch
11
+ from transformers import VisionTextDualEncoderConfig
12
 
13
  class VTDEConfig(VisionTextDualEncoderConfig):
14
  model_type = "vtde"
 
21
  pooling_mode in ['mean', 'max', 'cls']
22
  https://arxiv.org/pdf/2210.09996.pdf
23
  https://github.com/kahnchana/clippy/blob/3c102c29c32f7c66c6e52e09b795fe9c061bbb03/src/open_clip/hf_model.py#L56
24
+ also
25
+ https://arxiv.org/pdf/2301.07836.pdf
26
  """
27
  self.text_pooling_mode = text_pooling_mode
28
  self.vision_pooling_mode = vision_pooling_mode
29
  super().__init__(projection_dim, logit_scale_init_value, **kwargs)
30
 
31
+ VTDEConfig.register_for_auto_class()
32
+
33
+
34
  class VTDEModel(VisionTextDualEncoderModel):
35
  config_class = VTDEConfig
36
  base_model_prefix = "vtde"
 
176
  vision_model_output=image_embeds,
177
  )
178
 
179
+
180
  VTDEModel.register_for_auto_class("AutoModel")