Feature Extraction
Transformers
Safetensors
English
gpt_neox
medical
biology
retrieval
LLM
Inference Endpoints
text-generation-inference
ritaranx commited on
Commit
42e33c1
1 Parent(s): 69b9288

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +9 -4
README.md CHANGED
@@ -45,14 +45,15 @@ from transformers import AutoTokenizer, AutoModel
45
 
46
  def last_token_pool(last_hidden_states: Tensor,
47
  attention_mask: Tensor) -> Tensor:
 
48
  left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
49
  if left_padding:
50
- return last_hidden_states[:, -1]
51
  else:
52
  sequence_lengths = attention_mask.sum(dim=1) - 1
53
- batch_size = last_hidden_states.shape[0]
54
- return last_hidden_states[torch.arange(batch_size, device=last_hidden_states.device), sequence_lengths]
55
-
56
 
57
  def get_detailed_instruct_query(task_description: str, query: str) -> str:
58
  return f'{task_description}\nQuery: {query}'
@@ -79,6 +80,10 @@ max_length = 512
79
  # Tokenize the input texts
80
  batch_dict = tokenizer(input_texts, max_length=max_length, padding=True, truncation=True, return_tensors='pt')
81
 
 
 
 
 
82
  model.eval()
83
  with torch.no_grad():
84
  outputs = model(**batch_dict)
 
45
 
46
  def last_token_pool(last_hidden_states: Tensor,
47
  attention_mask: Tensor) -> Tensor:
48
+ last_hidden = last_hidden_states.masked_fill(~attention_mask[..., None].bool(), 0.0)
49
  left_padding = (attention_mask[:, -1].sum() == attention_mask.shape[0])
50
  if left_padding:
51
+ embedding = last_hidden[:, -1]
52
  else:
53
  sequence_lengths = attention_mask.sum(dim=1) - 1
54
+ batch_size = last_hidden.shape[0]
55
+ embedding = last_hidden[torch.arange(batch_size, device=last_hidden.device), sequence_lengths]
56
+ return embedding
57
 
58
  def get_detailed_instruct_query(task_description: str, query: str) -> str:
59
  return f'{task_description}\nQuery: {query}'
 
80
  # Tokenize the input texts
81
  batch_dict = tokenizer(input_texts, max_length=max_length, padding=True, truncation=True, return_tensors='pt')
82
 
83
+ # Important! Adding EOS token at the end
84
+ batch_dict['input_ids'] = [input_ids + [tokenizer.eos_token_id] for input_ids in batch_dict['input_ids']]
85
+ batch_dict = tokenizer.pad(batch_dict, padding=True, return_attention_mask=True, return_tensors='pt').to("cuda")
86
+
87
  model.eval()
88
  with torch.no_grad():
89
  outputs = model(**batch_dict)