import pandas as pd from datasets import load_dataset def get_samples(): dataset = load_dataset("mteb/stsbenchmark-sts") get_where = lambda score: dataset['validation'].filter(lambda x: x['score'] == score, load_from_cache_file = False)[0] test_samples = pd.DataFrame([ get_where(5), get_where(4.5), get_where(4), get_where(3.5), get_where(3), get_where(2.5), get_where(2), get_where(1.5), get_where(1), get_where(0.5), get_where(0), ], columns=['sentence1', 'sentence2', 'score']) return test_samples