File size: 608 Bytes
41dd156
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

def encode_sentences(tokenizer, sentence1, sentence2):
    # Encode the sentences
    embedding1 = tokenizer.encode(sentence1, convert_to_tensor=True).cpu()
    embedding2 = tokenizer.encode(sentence2, convert_to_tensor=True).cpu()
    # Compute the absolute difference of embeddings as features
    feature = abs(embedding1 - embedding2).numpy().reshape(1, -1)
    # Scale features (use the same scaler as used during training)
    feature_scaled = scaler.fit_transform(feature)# scaler.transform(feature)
    return feature_scaled