Evaluation Script

"""Evaluation script for the custom dataset."""

from pylate import evaluation, indexes, models, retrieve

model = models.ColBERT(
    model_name_or_path="sigridjineth/ModernBERT-Korean-ColBERT-preview-v1",
    document_length=300,
)
index = indexes.Voyager(override=True)
retriever = retrieve.ColBERT(index=index)

documents, queries, qrels = evaluation.load_custom_dataset(
    "taeminlee/Ko-miracl"
)

documents_embeddings = model.encode(
    sentences=[document["text"] for document in documents],
    batch_size=32,
    is_query=False,
    show_progress_bar=True,
)

index.add_documents(
    documents_ids=[document["id"] for document in documents],
    documents_embeddings=documents_embeddings,
)

queries_embeddings = model.encode(
    sentences=queries,
    batch_size=32,
    is_query=True,
    show_progress_bar=True,
)

scores = retriever.retrieve(queries_embeddings=queries_embeddings, k=100)


evaluation_scores = evaluation.evaluate(
    scores=scores,
    qrels=qrels,
    queries=queries,
    metrics=["map", "ndcg@10", "ndcg@100", "recall@10", "recall@100"],
)

print(evaluation_scores)
Downloads last month
22
Safetensors
Model size
395M params
Tensor type
F32
·
Inference API
Unable to determine this model's library. Check the docs .

Model tree for sigridjineth/ModernBERT-Korean-ColBERT-preview-v1

Finetuned
(19)
this model