File size: 1,515 Bytes
cd9a0ff
 
5032567
 
cd9a0ff
5032567
3a1563d
5032567
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
---
license: cc-by-3.0
language:
- en
---

A model for mapping abstract sentence descriptions to sentences that fit the descriptions. Use ```load_finetuned_model``` to load the query and sentence encoder, and ```encode_batch()``` to encode a sentence with the model.

```python

from transformers import AutoTokenizer, AutoModel
import torch

def load_finetuned_model():

        def fix_module_prefix_in_state_dict(state_dict):
            return {k.replace('module.', ''): v for k, v in state_dict.items()}


        sentence_encoder = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
        query_encoder = AutoModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")
        tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-mpnet-base-v2")

        sentence_encoder.load_state_dict(params_sent_encoder)
        query_encoder.load_state_dict(params_query_encoder)

        query_encoder.eval()
        sentence_encoder.eval()

        return tokenizer, query_encoder, sentence_encoder


def encode_batch(model, tokenizer, sentences, device):
    input_ids = tokenizer(sentences, padding=True, max_length=512, truncation=True, return_tensors="pt",
                          add_special_tokens=True).to(device)
    features = model(**input_ids)[0]
    features =  torch.sum(features[:,1:,:] * input_ids["attention_mask"][:,1:].unsqueeze(-1), dim=1) / torch.clamp(torch.sum(input_ids["attention_mask"][:,1:], dim=1, keepdims=True), min=1e-9)
    return features

```