Set 1024 as default dim, update usage snippets, store prompts in config

#1
by tomaarsen HF staff - opened
Files changed (3) hide show
  1. README.md +69 -22
  2. config_sentence_transformers.json +13 -0
  3. modules.json +1 -1
README.md CHANGED
@@ -5472,34 +5472,64 @@ Please refer to the following chapters for specific instructions on how to use t
5472
 
5473
  # Usage
5474
 
5475
- You can use `SentenceTransformer` or `transformer` library to encode text.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5476
 
5477
  ```python
5478
  import os
5479
  import torch
5480
  from transformers import AutoModel, AutoTokenizer
5481
- from sentence_transformers import SentenceTransformer
5482
  from sklearn.preprocessing import normalize
5483
 
5484
- prompt = "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: {query}"
5485
- queries = ["query1", "query2"]
5486
- queries = [prompt.replace("{query}", query) for query in queries]
5487
-
5488
- # doc do not need any prompts
5489
- docs = ["doc1", "doc2"]
 
 
 
 
 
5490
 
 
5491
  model_dir = "{Your MODEL_PATH}"
5492
 
5493
- #### method1: SentenceTransformer
5494
- # !!!!The default dimension is 8192,if you need other dimensions, please copy the files from the `2_Dense_{dims}` folder to overwrite them. For example, `copy -r ./2_Dense_1024/* ./2_Dense/` !!!!
5495
- model = SentenceTransformer(model_dir, trust_remote_code=True).cuda()
5496
- vectors = model.encode(queries, convert_to_numpy=True, normalize_embeddings=True)
5497
- print(vectors.shape)
5498
- print(vectors[:, :4])
5499
-
5500
- #### method2:transformers
5501
- vector_linear_directory = "2_Dense"
5502
- vector_dim = 8192
5503
  model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).cuda().eval()
5504
  tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
5505
  vector_linear = torch.nn.Linear(in_features=model.config.hidden_size, out_features=vector_dim)
@@ -5509,17 +5539,34 @@ vector_linear_dict = {
5509
  }
5510
  vector_linear.load_state_dict(vector_linear_dict)
5511
  vector_linear.cuda()
 
 
5512
  with torch.no_grad():
5513
  input_data = tokenizer(queries, padding="longest", truncation=True, max_length=512, return_tensors="pt")
5514
  input_data = {k: v.cuda() for k, v in input_data.items()}
5515
  attention_mask = input_data["attention_mask"]
5516
  last_hidden_state = model(**input_data)[0]
5517
  last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
5518
- vectors = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
5519
- vectors = normalize(vector_linear(vectors).cpu().numpy())
5520
- print(vectors.shape)
5521
- print(vectors[:, :4])
 
 
 
 
 
 
 
 
 
 
 
5522
 
 
 
 
 
5523
  ```
5524
 
5525
  # FAQ
 
5472
 
5473
  # Usage
5474
 
5475
+ You can use `SentenceTransformers` or `transformers` library to encode text.
5476
+
5477
+ ## Sentence Transformers
5478
+
5479
+ ```python
5480
+ from sentence_transformers import SentenceTransformer
5481
+
5482
+ # This model supports two prompts: "s2p_query" and "s2s_query" for sentence-to-passage and sentence-to-sentence tasks, respectively.
5483
+ # They are defined in `config_sentence_transformers.json`
5484
+ prompt_name = "s2p_query"
5485
+ queries = [
5486
+ "What are some ways to reduce stress?",
5487
+ "What are the benefits of drinking green tea?",
5488
+ ]
5489
+ # docs do not need any prompts
5490
+ docs = [
5491
+ "There are many effective ways to reduce stress. Some common techniques include deep breathing, meditation, and physical activity. Engaging in hobbies, spending time in nature, and connecting with loved ones can also help alleviate stress. Additionally, setting boundaries, practicing self-care, and learning to say no can prevent stress from building up.",
5492
+ "Green tea has been consumed for centuries and is known for its potential health benefits. It contains antioxidants that may help protect the body against damage caused by free radicals. Regular consumption of green tea has been associated with improved heart health, enhanced cognitive function, and a reduced risk of certain types of cancer. The polyphenols in green tea may also have anti-inflammatory and weight loss properties.",
5493
+ ]
5494
+
5495
+ # !The default dimension is 1024, if you need other dimensions, please clone the model and modify `modules.json` to replace `2_Dense_1024` with another dimension, e.g. `2_Dense_256` or `2_Dense_8192` !
5496
+ model = SentenceTransformer("infgrad/stella_en_400M_v5", trust_remote_code=True).cuda()
5497
+ query_embeddings = model.encode(queries, prompt_name=query_prompt_name)
5498
+ doc_embeddings = model.encode(docs)
5499
+ print(query_embeddings.shape, doc_embeddings.shape)
5500
+ # (2, 1024) (2, 1024)
5501
+
5502
+ similarities = model.similarity(query_embeddings, doc_embeddings)
5503
+ print(similarities)
5504
+ # tensor([[0.8398, 0.2990],
5505
+ # [0.3282, 0.8095]])
5506
+ ```
5507
+
5508
+ ## Transformers
5509
 
5510
  ```python
5511
  import os
5512
  import torch
5513
  from transformers import AutoModel, AutoTokenizer
 
5514
  from sklearn.preprocessing import normalize
5515
 
5516
+ query_prompt = "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: "
5517
+ queries = [
5518
+ "What are some ways to reduce stress?",
5519
+ "What are the benefits of drinking green tea?",
5520
+ ]
5521
+ queries = [query_prompt + query for query in queries]
5522
+ # docs do not need any prompts
5523
+ docs = [
5524
+ "There are many effective ways to reduce stress. Some common techniques include deep breathing, meditation, and physical activity. Engaging in hobbies, spending time in nature, and connecting with loved ones can also help alleviate stress. Additionally, setting boundaries, practicing self-care, and learning to say no can prevent stress from building up.",
5525
+ "Green tea has been consumed for centuries and is known for its potential health benefits. It contains antioxidants that may help protect the body against damage caused by free radicals. Regular consumption of green tea has been associated with improved heart health, enhanced cognitive function, and a reduced risk of certain types of cancer. The polyphenols in green tea may also have anti-inflammatory and weight loss properties.",
5526
+ ]
5527
 
5528
+ # The path of your model after cloning it
5529
  model_dir = "{Your MODEL_PATH}"
5530
 
5531
+ vector_dim = 1024
5532
+ vector_linear_directory = f"2_Dense_{vector_dim}"
 
 
 
 
 
 
 
 
5533
  model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).cuda().eval()
5534
  tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
5535
  vector_linear = torch.nn.Linear(in_features=model.config.hidden_size, out_features=vector_dim)
 
5539
  }
5540
  vector_linear.load_state_dict(vector_linear_dict)
5541
  vector_linear.cuda()
5542
+
5543
+ # Embed the queries
5544
  with torch.no_grad():
5545
  input_data = tokenizer(queries, padding="longest", truncation=True, max_length=512, return_tensors="pt")
5546
  input_data = {k: v.cuda() for k, v in input_data.items()}
5547
  attention_mask = input_data["attention_mask"]
5548
  last_hidden_state = model(**input_data)[0]
5549
  last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
5550
+ query_vectors = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
5551
+ query_vectors = normalize(vector_linear(query_vectors).cpu().numpy())
5552
+
5553
+ # Embed the documents
5554
+ with torch.no_grad():
5555
+ input_data = tokenizer(docs, padding="longest", truncation=True, max_length=512, return_tensors="pt")
5556
+ input_data = {k: v.cuda() for k, v in input_data.items()}
5557
+ attention_mask = input_data["attention_mask"]
5558
+ last_hidden_state = model(**input_data)[0]
5559
+ last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
5560
+ docs_vectors = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
5561
+ docs_vectors = normalize(vector_linear(docs_vectors).cpu().numpy())
5562
+
5563
+ print(query_vectors.shape, docs_vectors.shape)
5564
+ # (2, 1024) (2, 1024)
5565
 
5566
+ similarities = query_vectors @ docs_vectors.T
5567
+ print(similarities)
5568
+ # [[0.8397531 0.29900077]
5569
+ # [0.32818374 0.80954516]]
5570
  ```
5571
 
5572
  # FAQ
config_sentence_transformers.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.42.3",
5
+ "pytorch": "2.3.1+cu121"
6
+ },
7
+ "prompts": {
8
+ "s2p_query": "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: ",
9
+ "s2s_query": "Instruct: Retrieve semantically similar text.\nQuery: "
10
+ },
11
+ "default_prompt_name": null,
12
+ "similarity_fn_name": "cosine"
13
+ }
modules.json CHANGED
@@ -14,7 +14,7 @@
14
  {
15
  "idx": 2,
16
  "name": "2",
17
- "path": "2_Dense",
18
  "type": "sentence_transformers.models.Dense"
19
  }
20
  ]
 
14
  {
15
  "idx": 2,
16
  "name": "2",
17
+ "path": "2_Dense_1024",
18
  "type": "sentence_transformers.models.Dense"
19
  }
20
  ]