metadata
base_model:
- answerdotai/ModernBERT-large
Evaluation Script
"""Evaluation script for the custom dataset."""
from pylate import evaluation, indexes, models, retrieve
model = models.ColBERT(
model_name_or_path="sigridjineth/ModernBERT-Korean-ColBERT-preview-v1",
document_length=300,
)
index = indexes.Voyager(override=True)
retriever = retrieve.ColBERT(index=index)
documents, queries, qrels = evaluation.load_custom_dataset(
"taeminlee/Ko-miracl"
)
documents_embeddings = model.encode(
sentences=[document["text"] for document in documents],
batch_size=32,
is_query=False,
show_progress_bar=True,
)
index.add_documents(
documents_ids=[document["id"] for document in documents],
documents_embeddings=documents_embeddings,
)
queries_embeddings = model.encode(
sentences=queries,
batch_size=32,
is_query=True,
show_progress_bar=True,
)
scores = retriever.retrieve(queries_embeddings=queries_embeddings, k=100)
evaluation_scores = evaluation.evaluate(
scores=scores,
qrels=qrels,
queries=queries,
metrics=["map", "ndcg@10", "ndcg@100", "recall@10", "recall@100"],
)
print(evaluation_scores)