BMRetriever
/

BMRetriever-410M

Feature Extraction

Inference Endpoints

text-generation-inference

Model card Files Files and versions Community

ritaranx commited on 11 days ago

Commit

42e33c1

•

1 Parent(s): 69b9288

Update README.md

Files changed (1) hide show

README.md +9 -4

README.md CHANGED Viewed

@@ -45,14 +45,15 @@ from transformers import AutoTokenizer, AutoModel
 def last_token_pool(last_hidden_states: Tensor,
                  attention_mask: Tensor) -> Tensor:
     left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
     if left_padding:
-        return last_hidden_states[:, -1]
     else:
         sequence_lengths = attention_mask.sum(dim=1) - 1
-        batch_size = last_hidden_states.shape[0]
-        return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
 def get_detailed_instruct_query(task_description: str, query: str) -> str:
     return f'{task_description}\nQuery: {query}'
@@ -79,6 +80,10 @@ max_length = 512
 # Tokenize the input texts
 batch_dict = tokenizer(input_texts, max_length=max_length, padding=True, truncation=True, return_tensors='pt')
 model.eval()
 with torch.no_grad():
     outputs = model(**batch_dict)

 def last_token_pool(last_hidden_states: Tensor,
                  attention_mask: Tensor) -> Tensor:
+    last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
     left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
     if left_padding:
+        embedding = last_hidden[:, -1]
     else:
         sequence_lengths = attention_mask.sum(dim=1) - 1
+        batch_size = last_hidden.shape[0]
+        embedding = last_hidden[torch.arange(batch_size, device=last_hidden.device), sequence_lengths]
+    return embedding
 def get_detailed_instruct_query(task_description: str, query: str) -> str:
     return f'{task_description}\nQuery: {query}'
 # Tokenize the input texts
 batch_dict = tokenizer(input_texts, max_length=max_length, padding=True, truncation=True, return_tensors='pt')
+# Important! Adding EOS token at the end
+batch_dict['input_ids'] = [input_ids + [tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
+batch_dict = tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt').to("cuda")
 model.eval()
 with torch.no_grad():
     outputs = model(**batch_dict)