zpn commited on
Commit
59cfcc3
1 Parent(s): c9c6080

Add new SentenceTransformer model (#1)

Browse files

- Add new SentenceTransformer model (1756f3d958b5bde45ec16836ec8795dd466785c2)

1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md CHANGED
The diff for this file is too large to render. See raw diff
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "nomic-ai/modern-nomic-embed-unsup",
3
  "architectures": [
4
  "ModernBertModel"
5
  ],
 
1
  {
2
+ "_name_or_path": "nomic-ai/modernbert-embed",
3
  "architectures": [
4
  "ModernBertModel"
5
  ],
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.3.1",
4
+ "transformers": "4.48.0.dev0",
5
+ "pytorch": "2.4.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": "cosine"
10
+ }
modules.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ },
14
+ {
15
+ "idx": 2,
16
+ "name": "2",
17
+ "path": "2_Normalize",
18
+ "type": "sentence_transformers.models.Normalize"
19
+ }
20
+ ]
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 8192,
3
+ "do_lower_case": false
4
+ }
tokenizer_config.json CHANGED
@@ -937,7 +937,7 @@
937
  "input_ids",
938
  "attention_mask"
939
  ],
940
- "model_max_length": 1000000000000000019884624838656,
941
  "pad_token": "[PAD]",
942
  "sep_token": "[SEP]",
943
  "tokenizer_class": "PreTrainedTokenizerFast",
 
937
  "input_ids",
938
  "attention_mask"
939
  ],
940
+ "model_max_length": 8192,
941
  "pad_token": "[PAD]",
942
  "sep_token": "[SEP]",
943
  "tokenizer_class": "PreTrainedTokenizerFast",