Tom Aarsen commited on
Commit
c59e72d
1 Parent(s): af2246f

Add custom Sentence Transformer module

Browse files
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": true,
4
+ "pooling_mode_mean_tokens": false,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.1.0.dev0",
4
+ "transformers": "4.41.2",
5
+ "pytorch": "2.3.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "vision_transformer.VisionTransformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
vision_transformer.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Dict, List, Optional, Tuple, Union
2
+
3
+ import torch
4
+ from torch import nn
5
+ from transformers import AutoConfig, AutoModel, AutoProcessor
6
+
7
+
8
+ class VisionTransformer(nn.Module):
9
+ """Huggingface AutoModel to generate token embeddings.
10
+ Loads the correct class, e.g. BERT / RoBERTa etc.
11
+
12
+ Args:
13
+ model_name_or_path: Huggingface models name
14
+ (https://huggingface.co/models)
15
+ model_args: Keyword arguments passed to the Huggingface
16
+ Transformers model
17
+ tokenizer_args: Keyword arguments passed to the Huggingface
18
+ Transformers tokenizer
19
+ config_args: Keyword arguments passed to the Huggingface
20
+ Transformers config
21
+ cache_dir: Cache dir for Huggingface Transformers to store/load
22
+ models
23
+ """
24
+
25
+ def __init__(
26
+ self,
27
+ model_name_or_path: str,
28
+ model_args: Optional[Dict[str, Any]] = None,
29
+ tokenizer_args: Optional[Dict[str, Any]] = None,
30
+ config_args: Optional[Dict[str, Any]] = None,
31
+ cache_dir: Optional[str] = None,
32
+ ) -> None:
33
+ super(VisionTransformer, self).__init__()
34
+ if model_args is None:
35
+ model_args = {}
36
+ if tokenizer_args is None:
37
+ tokenizer_args = {}
38
+ if config_args is None:
39
+ config_args = {}
40
+
41
+ self.config = AutoConfig.from_pretrained(model_name_or_path, **config_args, cache_dir=cache_dir)
42
+ self.model = AutoModel.from_pretrained(model_name_or_path, config=self.config, **model_args, cache_dir=cache_dir)
43
+ self.processor = AutoProcessor.from_pretrained(model_name_or_path, config=self.config, **tokenizer_args, cache_dir=cache_dir)
44
+
45
+ def forward(self, features: Dict[str, torch.Tensor]) -> Dict[str, torch.Tensor]:
46
+ """Returns token_embeddings, cls_token"""
47
+ output_states = self.model(pixel_values=features["pixel_values"], return_dict=False)[0]
48
+ features.update({"token_embeddings": output_states})
49
+ return features
50
+
51
+ def get_word_embedding_dimension(self) -> int:
52
+ return self.config.hidden_size
53
+
54
+ def tokenize(
55
+ self, texts: Union[List[str], List[Dict], List[Tuple[str, str]]], padding: Union[str, bool] = True
56
+ ) -> Dict[str, torch.Tensor]:
57
+ return self.processor(texts, return_tensors="pt")
58
+
59
+ def get_config_dict(self) -> Dict[str, Any]:
60
+ return {key: self.__dict__[key] for key in self.config_keys}
61
+
62
+ def save(self, output_path: str, safe_serialization: bool = True) -> None:
63
+ self.model.save_pretrained(output_path, safe_serialization=safe_serialization)
64
+ self.processor.save_pretrained(output_path)