arjunsama commited on
Commit
a5fc726
1 Parent(s): a4e2a8c

Add new SentenceTransformer model.

Browse files
1_Pooling/config.json CHANGED
@@ -5,6 +5,5 @@
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
8
- "pooling_mode_lasttoken": false,
9
- "include_prompt": true
10
  }
 
5
  "pooling_mode_max_tokens": false,
6
  "pooling_mode_mean_sqrt_len_tokens": false,
7
  "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false
 
9
  }
README.md CHANGED
@@ -7,7 +7,7 @@ tags:
7
  - sentence-similarity
8
  - transformers
9
  datasets:
10
- - embedding-data/QQP_triplets
11
  ---
12
 
13
  # arjunsama/mine
@@ -82,12 +82,47 @@ print(sentence_embeddings)
82
  For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=arjunsama/mine)
83
 
84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  ## Full Model Architecture
87
  ```
88
  SentenceTransformer(
89
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
90
- (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
91
  )
92
  ```
93
 
 
7
  - sentence-similarity
8
  - transformers
9
  datasets:
10
+ - embedding-data/sentence-compression
11
  ---
12
 
13
  # arjunsama/mine
 
82
  For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name=arjunsama/mine)
83
 
84
 
85
+ ## Training
86
+ The model was trained with the parameters:
87
+
88
+ **DataLoader**:
89
+
90
+ `torch.utils.data.dataloader.DataLoader` of length 5469 with parameters:
91
+ ```
92
+ {'batch_size': 64, 'sampler': 'torch.utils.data.sampler.RandomSampler', 'batch_sampler': 'torch.utils.data.sampler.BatchSampler'}
93
+ ```
94
+
95
+ **Loss**:
96
+
97
+ `sentence_transformers.losses.MultipleNegativesRankingLoss.MultipleNegativesRankingLoss` with parameters:
98
+ ```
99
+ {'scale': 20.0, 'similarity_fct': 'cos_sim'}
100
+ ```
101
+
102
+ Parameters of the fit()-Method:
103
+ ```
104
+ {
105
+ "epochs": 10,
106
+ "evaluation_steps": 0,
107
+ "evaluator": "NoneType",
108
+ "max_grad_norm": 1,
109
+ "optimizer_class": "<class 'torch.optim.adamw.AdamW'>",
110
+ "optimizer_params": {
111
+ "lr": 2e-05
112
+ },
113
+ "scheduler": "WarmupLinear",
114
+ "steps_per_epoch": null,
115
+ "warmup_steps": 5469,
116
+ "weight_decay": 0.01
117
+ }
118
+ ```
119
+
120
 
121
  ## Full Model Architecture
122
  ```
123
  SentenceTransformer(
124
  (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
125
+ (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False})
126
  )
127
  ```
128
 
config.json CHANGED
@@ -1,30 +1,25 @@
1
  {
2
- "_name_or_path": "avsolatorio/GIST-small-Embedding-v0",
3
  "architectures": [
4
  "BertModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
 
8
  "hidden_act": "gelu",
9
  "hidden_dropout_prob": 0.1,
10
  "hidden_size": 384,
11
- "id2label": {
12
- "0": "LABEL_0"
13
- },
14
  "initializer_range": 0.02,
15
  "intermediate_size": 1536,
16
- "label2id": {
17
- "LABEL_0": 0
18
- },
19
  "layer_norm_eps": 1e-12,
20
  "max_position_embeddings": 512,
21
  "model_type": "bert",
22
  "num_attention_heads": 12,
23
- "num_hidden_layers": 12,
24
  "pad_token_id": 0,
25
  "position_embedding_type": "absolute",
26
  "torch_dtype": "float32",
27
- "transformers_version": "4.38.1",
28
  "type_vocab_size": 2,
29
  "use_cache": true,
30
  "vocab_size": 30522
 
1
  {
2
+ "_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
3
  "architectures": [
4
  "BertModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
  "classifier_dropout": null,
8
+ "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
  "hidden_size": 384,
 
 
 
12
  "initializer_range": 0.02,
13
  "intermediate_size": 1536,
 
 
 
14
  "layer_norm_eps": 1e-12,
15
  "max_position_embeddings": 512,
16
  "model_type": "bert",
17
  "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
  "pad_token_id": 0,
20
  "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
+ "transformers_version": "4.37.2",
23
  "type_vocab_size": 2,
24
  "use_cache": true,
25
  "vocab_size": 30522
config_sentence_transformers.json CHANGED
@@ -1,9 +1,7 @@
1
  {
2
  "__version__": {
3
- "sentence_transformers": "2.5.1",
4
- "transformers": "4.38.1",
5
- "pytorch": "2.1.0+cu121"
6
- },
7
- "prompts": {},
8
- "default_prompt_name": null
9
  }
 
1
  {
2
  "__version__": {
3
+ "sentence_transformers": "2.3.1",
4
+ "transformers": "4.37.2",
5
+ "pytorch": "2.2.0+cu121"
6
+ }
 
 
7
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:efa751a822b7eb0a7db5d0167b6e38ffd6ad1978e71de205d7c0efedcc18bf2a
3
- size 133462128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53cfb0a5e78786bcb47155c5cfba736d0eaff820dbee765bcab62dfdde852f16
3
+ size 90864192
tokenizer.json CHANGED
@@ -1,7 +1,19 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
4
- "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
+ "padding": {
10
+ "strategy": "BatchLongest",
11
+ "direction": "Right",
12
+ "pad_to_multiple_of": null,
13
+ "pad_id": 0,
14
+ "pad_type_id": 0,
15
+ "pad_token": "[PAD]"
16
+ },
17
  "added_tokens": [
18
  {
19
  "id": 0,
tokenizer_config.json CHANGED
@@ -46,12 +46,19 @@
46
  "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
  "mask_token": "[MASK]",
 
49
  "model_max_length": 512,
50
  "never_split": null,
 
51
  "pad_token": "[PAD]",
 
 
52
  "sep_token": "[SEP]",
 
53
  "strip_accents": null,
54
  "tokenize_chinese_chars": true,
55
  "tokenizer_class": "BertTokenizer",
 
 
56
  "unk_token": "[UNK]"
57
  }
 
46
  "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
  "mask_token": "[MASK]",
49
+ "max_length": 128,
50
  "model_max_length": 512,
51
  "never_split": null,
52
+ "pad_to_multiple_of": null,
53
  "pad_token": "[PAD]",
54
+ "pad_token_type_id": 0,
55
+ "padding_side": "right",
56
  "sep_token": "[SEP]",
57
+ "stride": 0,
58
  "strip_accents": null,
59
  "tokenize_chinese_chars": true,
60
  "tokenizer_class": "BertTokenizer",
61
+ "truncation_side": "right",
62
+ "truncation_strategy": "longest_first",
63
  "unk_token": "[UNK]"
64
  }