infgrad commited on
Commit
129dc50
1 Parent(s): efacca1

Set 1024 as default dim, update usage snippets, store prompts in config (#1)

Browse files

- Set 1024 as default dim, update usage snippets, store prompts in config (c21cfb18eedf0e20b9e57c2fc44524c6fbc8fcc3)

Files changed (3) hide show
  1. README.md +69 -22
  2. config_sentence_transformers.json +13 -0
  3. modules.json +1 -1
README.md CHANGED
@@ -5471,34 +5471,64 @@ Please refer to the following chapters for specific instructions on how to use t
5471
 
5472
  # Usage
5473
 
5474
- You can use `SentenceTransformer` or `transformer` library to encode text.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5475
 
5476
  ```python
5477
  import os
5478
  import torch
5479
  from transformers import AutoModel, AutoTokenizer
5480
- from sentence_transformers import SentenceTransformer
5481
  from sklearn.preprocessing import normalize
5482
 
5483
- prompt = "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: {query}"
5484
- queries = ["query1", "query2"]
5485
- queries = [prompt.replace("{query}", query) for query in queries]
5486
-
5487
- # doc do not need any prompts
5488
- docs = ["doc1", "doc2"]
 
 
 
 
 
5489
 
 
5490
  model_dir = "{Your MODEL_PATH}"
5491
 
5492
- #### method1: SentenceTransformer
5493
- # !!!!The default dimension is 8192,if you need other dimensions, please copy the files from the `2_Dense_{dims}` folder to overwrite them. For example, `copy -r ./2_Dense_1024/* ./2_Dense/` !!!!
5494
- model = SentenceTransformer(model_dir, trust_remote_code=True).cuda()
5495
- vectors = model.encode(queries, convert_to_numpy=True, normalize_embeddings=True)
5496
- print(vectors.shape)
5497
- print(vectors[:, :4])
5498
-
5499
- #### method2:transformers
5500
- vector_linear_directory = "2_Dense"
5501
- vector_dim = 8192
5502
  model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).cuda().eval()
5503
  tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
5504
  vector_linear = torch.nn.Linear(in_features=model.config.hidden_size, out_features=vector_dim)
@@ -5508,17 +5538,34 @@ vector_linear_dict = {
5508
  }
5509
  vector_linear.load_state_dict(vector_linear_dict)
5510
  vector_linear.cuda()
 
 
5511
  with torch.no_grad():
5512
  input_data = tokenizer(queries, padding="longest", truncation=True, max_length=512, return_tensors="pt")
5513
  input_data = {k: v.cuda() for k, v in input_data.items()}
5514
  attention_mask = input_data["attention_mask"]
5515
  last_hidden_state = model(**input_data)[0]
5516
  last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
5517
- vectors = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
5518
- vectors = normalize(vector_linear(vectors).cpu().numpy())
5519
- print(vectors.shape)
5520
- print(vectors[:, :4])
 
 
 
 
 
 
 
 
 
 
 
5521
 
 
 
 
 
5522
  ```
5523
 
5524
  # FAQ
 
5471
 
5472
  # Usage
5473
 
5474
+ You can use `SentenceTransformers` or `transformers` library to encode text.
5475
+
5476
+ ## Sentence Transformers
5477
+
5478
+ ```python
5479
+ from sentence_transformers import SentenceTransformer
5480
+
5481
+ # This model supports two prompts: "s2p_query" and "s2s_query" for sentence-to-passage and sentence-to-sentence tasks, respectively.
5482
+ # They are defined in `config_sentence_transformers.json`
5483
+ prompt_name = "s2p_query"
5484
+ queries = [
5485
+ "What are some ways to reduce stress?",
5486
+ "What are the benefits of drinking green tea?",
5487
+ ]
5488
+ # docs do not need any prompts
5489
+ docs = [
5490
+ "There are many effective ways to reduce stress. Some common techniques include deep breathing, meditation, and physical activity. Engaging in hobbies, spending time in nature, and connecting with loved ones can also help alleviate stress. Additionally, setting boundaries, practicing self-care, and learning to say no can prevent stress from building up.",
5491
+ "Green tea has been consumed for centuries and is known for its potential health benefits. It contains antioxidants that may help protect the body against damage caused by free radicals. Regular consumption of green tea has been associated with improved heart health, enhanced cognitive function, and a reduced risk of certain types of cancer. The polyphenols in green tea may also have anti-inflammatory and weight loss properties.",
5492
+ ]
5493
+
5494
+ # !The default dimension is 1024, if you need other dimensions, please clone the model and modify `modules.json` to replace `2_Dense_1024` with another dimension, e.g. `2_Dense_256` or `2_Dense_8192` !
5495
+ model = SentenceTransformer("infgrad/stella_en_1.5B_v5", trust_remote_code=True).cuda()
5496
+ query_embeddings = model.encode(queries, prompt_name=query_prompt_name)
5497
+ doc_embeddings = model.encode(docs)
5498
+ print(query_embeddings.shape, doc_embeddings.shape)
5499
+ # (2, 1024) (2, 1024)
5500
+
5501
+ similarities = model.similarity(query_embeddings, doc_embeddings)
5502
+ print(similarities)
5503
+ # tensor([[0.8179, 0.2958],
5504
+ # [0.3194, 0.7854]])
5505
+ ```
5506
+
5507
+ ## Transformers
5508
 
5509
  ```python
5510
  import os
5511
  import torch
5512
  from transformers import AutoModel, AutoTokenizer
 
5513
  from sklearn.preprocessing import normalize
5514
 
5515
+ query_prompt = "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: "
5516
+ queries = [
5517
+ "What are some ways to reduce stress?",
5518
+ "What are the benefits of drinking green tea?",
5519
+ ]
5520
+ queries = [query_prompt + query for query in queries]
5521
+ # docs do not need any prompts
5522
+ docs = [
5523
+ "There are many effective ways to reduce stress. Some common techniques include deep breathing, meditation, and physical activity. Engaging in hobbies, spending time in nature, and connecting with loved ones can also help alleviate stress. Additionally, setting boundaries, practicing self-care, and learning to say no can prevent stress from building up.",
5524
+ "Green tea has been consumed for centuries and is known for its potential health benefits. It contains antioxidants that may help protect the body against damage caused by free radicals. Regular consumption of green tea has been associated with improved heart health, enhanced cognitive function, and a reduced risk of certain types of cancer. The polyphenols in green tea may also have anti-inflammatory and weight loss properties.",
5525
+ ]
5526
 
5527
+ # The path of your model after cloning it
5528
  model_dir = "{Your MODEL_PATH}"
5529
 
5530
+ vector_dim = 1024
5531
+ vector_linear_directory = f"2_Dense_{vector_dim}"
 
 
 
 
 
 
 
 
5532
  model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).cuda().eval()
5533
  tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
5534
  vector_linear = torch.nn.Linear(in_features=model.config.hidden_size, out_features=vector_dim)
 
5538
  }
5539
  vector_linear.load_state_dict(vector_linear_dict)
5540
  vector_linear.cuda()
5541
+
5542
+ # Embed the queries
5543
  with torch.no_grad():
5544
  input_data = tokenizer(queries, padding="longest", truncation=True, max_length=512, return_tensors="pt")
5545
  input_data = {k: v.cuda() for k, v in input_data.items()}
5546
  attention_mask = input_data["attention_mask"]
5547
  last_hidden_state = model(**input_data)[0]
5548
  last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
5549
+ query_vectors = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
5550
+ query_vectors = normalize(vector_linear(query_vectors).cpu().numpy())
5551
+
5552
+ # Embed the documents
5553
+ with torch.no_grad():
5554
+ input_data = tokenizer(docs, padding="longest", truncation=True, max_length=512, return_tensors="pt")
5555
+ input_data = {k: v.cuda() for k, v in input_data.items()}
5556
+ attention_mask = input_data["attention_mask"]
5557
+ last_hidden_state = model(**input_data)[0]
5558
+ last_hidden = last_hidden_state.masked_fill(~attention_mask[..., None].bool(), 0.0)
5559
+ docs_vectors = last_hidden.sum(dim=1) / attention_mask.sum(dim=1)[..., None]
5560
+ docs_vectors = normalize(vector_linear(docs_vectors).cpu().numpy())
5561
+
5562
+ print(query_vectors.shape, docs_vectors.shape)
5563
+ # (2, 1024) (2, 1024)
5564
 
5565
+ similarities = query_vectors @ docs_vectors.T
5566
+ print(similarities)
5567
+ # [[0.8178789 0.2958377 ]
5568
+ # [0.31938642 0.7853526 ]]
5569
  ```
5570
 
5571
  # FAQ
config_sentence_transformers.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.0.1",
4
+ "transformers": "4.42.3",
5
+ "pytorch": "2.3.1+cu121"
6
+ },
7
+ "prompts": {
8
+ "s2p_query": "Instruct: Given a web search query, retrieve relevant passages that answer the query.\nQuery: ",
9
+ "s2s_query": "Instruct: Retrieve semantically similar text.\nQuery: "
10
+ },
11
+ "default_prompt_name": null,
12
+ "similarity_fn_name": "cosine"
13
+ }
modules.json CHANGED
@@ -14,7 +14,7 @@
14
  {
15
  "idx": 2,
16
  "name": "2",
17
- "path": "2_Dense",
18
  "type": "sentence_transformers.models.Dense"
19
  }
20
  ]
 
14
  {
15
  "idx": 2,
16
  "name": "2",
17
+ "path": "2_Dense_1024",
18
  "type": "sentence_transformers.models.Dense"
19
  }
20
  ]