from datasets import load_dataset from sentence_transformers import SentenceTransformer from torch.nn.functional import cosine_similarity as cos_sim model_name = "jinaai/jina-embedding-l-en-v1" model = SentenceTransformer(model_name) dataset = load_dataset('jinaai/negation-dataset', split='test') anchor_embeddings = model.encode([item['anchor'] for item in dataset], convert_to_tensor=True) entailment_embeddings = model.encode([item['entailment'] for item in dataset], convert_to_tensor=True) negative_embeddings = model.encode([item['negative'] for item in dataset], convert_to_tensor=True) positive_similarities = cos_sim(anchor_embeddings, entailment_embeddings) entailment_negatives = cos_sim(negative_embeddings, entailment_embeddings) anchor_negatives = cos_sim(anchor_embeddings, negative_embeddings) entailment_score = sum(positive_similarities > entailment_negatives).item() / len(anchor_embeddings) anchor_score = sum(positive_similarities > anchor_negatives).item() / len(anchor_embeddings) print('entailment_score: ', entailment_score) print('anchor_score: ', anchor_score)