Charles Kabui
Initial Commit
41dd156
raw
history blame contribute delete
610 Bytes
import pandas as pd
from datasets import load_dataset
def get_samples():
dataset = load_dataset("mteb/stsbenchmark-sts")
get_where = lambda score: dataset['validation'].filter(lambda x: x['score'] == score, load_from_cache_file = False)[0]
test_samples = pd.DataFrame([
get_where(5),
get_where(4.5),
get_where(4),
get_where(3.5),
get_where(3),
get_where(2.5),
get_where(2),
get_where(1.5),
get_where(1),
get_where(0.5),
get_where(0),
], columns=['sentence1', 'sentence2', 'score'])
return test_samples