ravfogs commited on
Commit
5032567
1 Parent(s): cd9a0ff

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +37 -0
README.md CHANGED
@@ -1,3 +1,40 @@
1
  ---
2
  license: cc-by-3.0
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: cc-by-3.0
3
+ language:
4
+ - en
5
  ---
6
+
7
+ The following python code demonstrates the loading of the model:
8
+
9
+ ```python
10
+
11
+ from transformers import AutoTokenizer, AutoModel
12
+ import torch
13
+
14
+ def load_finetuned_model():
15
+
16
+ def fix_module_prefix_in_state_dict(state_dict):
17
+ return {k.replace('module.', ''): v for k, v in state_dict.items()}
18
+
19
+
20
+ sentence_encoder = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
21
+ query_encoder = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
22
+ tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")
23
+
24
+ sentence_encoder.load_state_dict(params_sent_encoder)
25
+ query_encoder.load_state_dict(params_query_encoder)
26
+
27
+ query_encoder.eval()
28
+ sentence_encoder.eval()
29
+
30
+ return tokenizer, query_encoder, sentence_encoder
31
+
32
+
33
+ def encode_batch(model, tokenizer, sentences, device):
34
+ input_ids = tokenizer(sentences, padding=True, max_length=512, truncation=True, return_tensors="pt",
35
+ add_special_tokens=True).to(device)
36
+ features = model(**input_ids)[0]
37
+ features = torch.sum(features[:,1:,:] * input_ids["attention_mask"][:,1:].unsqueeze(-1), dim=1) / torch.clamp(torch.sum(input_ids["attention_mask"][:,1:], dim=1, keepdims=True), min=1e-9)
38
+ return features
39
+
40
+ ```