Spaces:

mckabue
/

text-similarity-prediction-and-analysis

Runtime error

File size: 608 Bytes

41dd156

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

def encode_sentences(tokenizer, sentence1, sentence2):
    # Encode the sentences
    embedding1 = tokenizer.encode(sentence1, convert_to_tensor=True).cpu()
    embedding2 = tokenizer.encode(sentence2, convert_to_tensor=True).cpu()
    # Compute the absolute difference of embeddings as features
    feature = abs(embedding1 - embedding2).numpy().reshape(1, -1)
    # Scale features (use the same scaler as used during training)
    feature_scaled = scaler.fit_transform(feature)# scaler.transform(feature)
    return feature_scaled