Spaces:
Runtime error
Runtime error
# | |
# Pyserini: Reproducible IR research with sparse and dense representations | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# | |
import os | |
import shutil | |
import unittest | |
from pyserini import search | |
def read_file_lines(path): | |
with open(path) as f: | |
return f.readlines() | |
class TestGetQrels(unittest.TestCase): | |
def setUp(self): | |
os.environ['PYSERINI_CACHE'] = 'temp_dir' | |
def test_trec1_adhoc(self): | |
qrels = search.get_qrels('trec1-adhoc') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_trec2_adhoc(self): | |
qrels = search.get_qrels('trec2-adhoc') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_trec3_adhoc(self): | |
qrels = search.get_qrels('trec3-adhoc') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_robust04a(self): | |
qrels_path = search.get_qrels_file('robust04') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length//2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 311410) | |
self.assertEqual(first_line, "301 0 FBIS3-10082 1") | |
self.assertEqual(mid_line, "409 0 LA010189-0112 0") | |
self.assertEqual(last_line, "700 0 LA123090-0137 0") | |
def test_robust04b(self): | |
qrels = search.get_qrels('robust04') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 249) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_robust05a(self): | |
qrels_path = search.get_qrels_file('robust05') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 37798) | |
self.assertEqual(first_line, "303 0 APW19980609.1531 2") | |
self.assertEqual(mid_line, "397 0 XIE19960920.0297 0") | |
self.assertEqual(last_line, "689 0 XIE20000925.0055 0") | |
def test_robust05b(self): | |
qrels = search.get_qrels('robust05') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_core17a(self): | |
qrels_path = search.get_qrels_file('core17') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 30030) | |
self.assertEqual(first_line, "307 0 1001536 1") | |
self.assertEqual(mid_line, "393 0 1586039 2") | |
self.assertEqual(last_line, "690 0 996059 0") | |
def test_core17b(self): | |
qrels = search.get_qrels('core17') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_core18a(self): | |
qrels_path = search.get_qrels_file('core18') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 26233) | |
self.assertEqual(first_line, "321 0 004c6120d0aa69da29cc045da0562168 0") | |
self.assertEqual(mid_line, "646 0 260365e8-eb18-11e2-a301-ea5a8116d211 0") | |
self.assertEqual(last_line, "825 0 ff3a25b0-0ba4-11e4-8341-b8072b1e7348 0") | |
def test_core18b(self): | |
qrels = search.get_qrels('core18') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_wt10g(self): | |
qrels = search.get_qrels('wt10g') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 100) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_trec2004_terabyte(self): | |
qrels = search.get_qrels('trec2004-terabyte') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 49) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_trec2005_terabyte(self): | |
qrels = search.get_qrels('trec2005-terabyte') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_trec2006_terabyte(self): | |
qrels = search.get_qrels('trec2006-terabyte') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_trec2011_web(self): | |
qrels = search.get_qrels('trec2011-web') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_trec2012_web(self): | |
qrels = search.get_qrels('trec2012-web') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_trec2013_web(self): | |
qrels = search.get_topics('trec2013-web') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_trec2014_web(self): | |
qrels = search.get_qrels('trec2014-web') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mb11(self): | |
qrels = search.get_qrels('mb11') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 49) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mb12(self): | |
qrels = search.get_qrels('mb12') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 59) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mb13(self): | |
qrels = search.get_qrels('mb13') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 60) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mb14(self): | |
qrels = search.get_qrels('mb14') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 55) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_car15a(self): | |
qrels_path = search.get_qrels_file('car17v1.5-benchmarkY1test') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 5820) | |
self.assertEqual(first_line, "Aftertaste/Aftertaste%20processing%20in%20the%20cerebral%20cortex " | |
"0 38c1bd25ddca2705164677a3f598c46df85afba7 1") | |
self.assertEqual(mid_line, "Insular%20cortex/Function/Interoceptive%20awareness " | |
"0 f037f925acd4c59e802a58aa74430fc6aa163606 1") | |
self.assertEqual(last_line, "Yellowstone%20National%20Park/Recreation" | |
" 0 e80b5185da1493edde41bea19a389a3f62167369 1") | |
def test_car15b(self): | |
qrels = search.get_qrels('car17v1.5-benchmarkY1test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 2125) | |
self.assertFalse(isinstance(next(iter(qrels.keys())), int)) | |
def test_car20a(self): | |
qrels_path = search.get_qrels_file('car17v2.0-benchmarkY1test') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 6192) | |
self.assertEqual(first_line, "enwiki:Aftertaste 0 327cca6c4d38953196fa6789f615546f03287b25 1") | |
self.assertEqual(mid_line, "enwiki:Insular%20cortex/Function/Interoceptive%20awareness" | |
" 0 f037f925acd4c59e802a58aa74430fc6aa163606 1") | |
self.assertEqual(last_line, "enwiki:Yellowstone%20National%20Park/Recreation" | |
" 0 b812fca195f74f8c563db4262260554fe3ff3731 1") | |
def test_car20b(self): | |
qrels = search.get_qrels('car17v2.0-benchmarkY1test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 2254) | |
self.assertFalse(isinstance(next(iter(qrels.keys())), int)) | |
def test_msmarco_doc1(self): | |
qrels_path = search.get_qrels_file('msmarco-doc-dev') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 5193) | |
self.assertEqual(first_line, "2 0 D1650436 1") | |
self.assertEqual(mid_line, "855050 0 D2851565 1") | |
self.assertEqual(last_line, "1102400 0 D677570 1") | |
def test_msmarco_doc2(self): | |
qrels = search.get_qrels('msmarco-doc-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 5193) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_msmarco_passage1(self): | |
qrels_path = search.get_qrels_file('msmarco-passage-dev-subset') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 7437) | |
self.assertEqual(first_line, "300674 0 7067032 1") | |
self.assertEqual(mid_line, "573452 0 3182069 1") | |
self.assertEqual(last_line, "195199 0 8009377 1") | |
def test_msmarco_passage2(self): | |
qrels = search.get_qrels('msmarco-passage-dev-subset') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 6980) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_dl19_doc(self): | |
topics = search.get_topics('dl19-doc') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('dl19-doc-unicoil') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('dl19-doc-unicoil-noexp') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
qrels = search.get_qrels('dl19-doc') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 43) | |
self.assertFalse(isinstance(next(iter(qrels.keys())), str)) | |
def test_dl19_passage(self): | |
topics = search.get_topics('dl19-passage') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('dl19-passage-unicoil') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
topics = search.get_topics('dl19-passage-unicoil-noexp') | |
self.assertIsNotNone(topics) | |
self.assertEqual(len(topics), 43) | |
self.assertFalse(isinstance(next(iter(topics.keys())), str)) | |
qrels = search.get_qrels('dl19-passage') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 43) | |
self.assertFalse(isinstance(next(iter(qrels.keys())), str)) | |
def test_dl20(self): | |
qrels = search.get_qrels('dl20-doc') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 45) | |
self.assertFalse(isinstance(next(iter(qrels.keys())), str)) | |
qrels = search.get_qrels('dl20-passage') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 54) | |
self.assertFalse(isinstance(next(iter(qrels.keys())), str)) | |
# MS MARCO V2 | |
def test_msmarco_v2_doc(self): | |
qrels = search.get_qrels('msmarco-v2-doc-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 4552) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('msmarco-v2-doc-dev2') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 5000) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_msmarco_v2_passage(self): | |
qrels = search.get_qrels('msmarco-v2-passage-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 3903) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('msmarco-v2-passage-dev2') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 4281) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_dl21(self): | |
qrels = search.get_qrels('dl21-doc') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 57) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 13058) | |
self.assertFalse(isinstance(next(iter(qrels.keys())), str)) | |
qrels = search.get_qrels('dl21-passage') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 53) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 10828) | |
self.assertFalse(isinstance(next(iter(qrels.keys())), str)) | |
# Various multi-lingual test collections | |
def test_ntcir8_zh(self): | |
qrels = search.get_qrels('ntcir8-zh') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 100) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), str)) | |
def test_clef2006_fr(self): | |
qrels = search.get_qrels('clef2006-fr') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 49) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), str)) | |
def test_trec2002_ar(self): | |
qrels = search.get_qrels('trec2002-ar') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_fire2012_bn(self): | |
qrels = search.get_qrels('fire2012-bn') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_fire2012_hi(self): | |
qrels = search.get_qrels('fire2012-hi') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_fire2012_en(self): | |
qrels = search.get_qrels('fire2012-en') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_covid_round1(self): | |
qrels_path = search.get_qrels_file('covid-round1') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 8691) | |
self.assertEqual(first_line, "1 0.5 010vptx3 2") | |
self.assertEqual(mid_line, "15 0.5 01rdlf8l 0") | |
self.assertEqual(last_line, "30 0.5 zn87f1lk 1") | |
qrels = search.get_qrels('covid-round1') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 30) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_covid_round2(self): | |
qrels_path = search.get_qrels_file('covid-round2') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 12037) | |
self.assertEqual(first_line, "1 1.5 08efpohc 0") | |
self.assertEqual(mid_line, "16 2 uj0i2anr 0") | |
self.assertEqual(last_line, "35 2 zzmfhr2s 0") | |
qrels = search.get_qrels('covid-round2') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 35) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_covid_round3(self): | |
qrels_path = search.get_qrels_file('covid-round3') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 12713) | |
self.assertEqual(first_line, "1 2.5 0194oljo 1") | |
self.assertEqual(mid_line, "19 2.5 s0o0egw8 2") | |
self.assertEqual(last_line, "40 3 zsx7wfyj 1") | |
qrels = search.get_qrels('covid-round3') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 40) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_covid_round4(self): | |
qrels_path = search.get_qrels_file('covid-round4') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 13262) | |
self.assertEqual(first_line, "1 4 00fmeepz 1") | |
self.assertEqual(mid_line, "27 4 hmh4s3w4 0") | |
self.assertEqual(last_line, "45 4 zzrsk1ls 2") | |
qrels = search.get_qrels('covid-round4') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 45) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_covid_round5(self): | |
qrels_path = search.get_qrels_file('covid-round5') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 23151) | |
self.assertEqual(first_line, "1 4.5 005b2j4b 2") | |
self.assertEqual(mid_line, "36 4.5 ylgmn69k 0") | |
self.assertEqual(last_line, "50 5 zz8wvos9 1") | |
qrels = search.get_qrels('covid-round5') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_covid_round3_cumulative(self): | |
qrels_path = search.get_qrels_file('covid-round3-cumulative') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 33068) | |
self.assertEqual(first_line, "1 0.5 010vptx3 2") | |
self.assertEqual(mid_line, "17 1.5 4txctk7k 0") | |
self.assertEqual(last_line, "40 3 zsx7wfyj 1") | |
def test_covid_round4_cumulative(self): | |
qrels_path = search.get_qrels_file('covid-round4-cumulative') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 46203) | |
self.assertEqual(first_line, "1 4 00fmeepz 1") | |
self.assertEqual(mid_line, "19 1 bt5857p3 0") | |
self.assertEqual(last_line, "45 4 zzrsk1ls 2") | |
def test_covid_complete(self): | |
qrels_path = search.get_qrels_file('covid-complete') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 69318) | |
self.assertEqual(first_line, "1 4.5 005b2j4b 2") | |
self.assertEqual(mid_line, "23 5 71jjbyds 0") | |
self.assertEqual(last_line, "50 5 zz8wvos9 1") | |
def test_trec2018_bl(self): | |
qrels_path = search.get_qrels_file('trec2018-bl') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 8508) | |
self.assertEqual(first_line, "321 0 00f57310e5c8ec7833d6756ba637332e 16") | |
self.assertEqual(mid_line, "809 0 921073ca-c0a3-11e1-9ce8-ff26651238d0 0") | |
self.assertEqual(last_line, "825 0 f66b624ba8689d704872fa776fb52860 0") | |
qrels = search.get_qrels('trec2018-bl') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_trec2019_bl(self): | |
qrels_path = search.get_qrels_file('trec2019-bl') | |
lines = read_file_lines(qrels_path) | |
length = len(lines) | |
first_line = lines[0].rstrip() | |
mid_line = lines[length // 2].rstrip() | |
last_line = lines[-1].rstrip() | |
self.assertEqual(length, 15655) | |
self.assertEqual(first_line, "826 0 0154349511cd8c49ab862d6cb0d8f6a8 2") | |
self.assertEqual(mid_line, "853 0 2444d88d62539b0b88dc919909cb9701 2") | |
self.assertEqual(last_line, "885 0 fde80cb0-b4f0-11e2-bbf2-a6f9e9d79e19 0") | |
qrels = search.get_qrels('trec2019-bl') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 57) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_trec2020_bl(self): | |
qrels = search.get_qrels('trec2020-bl') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 49) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mrtydi_11_ar(self): | |
qrels = search.get_qrels('mrtydi-v1.1-arabic-train') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 12377) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-arabic-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 3115) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-arabic-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1081) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mrtydi_11_bn(self): | |
qrels = search.get_qrels('mrtydi-v1.1-bengali-train') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1713) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-bengali-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 440) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-bengali-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 111) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mrtydi_11_en(self): | |
qrels = search.get_qrels('mrtydi-v1.1-english-train') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 3547) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-english-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 878) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-english-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 744) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mrtydi_11_fi(self): | |
qrels = search.get_qrels('mrtydi-v1.1-finnish-train') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 6561) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-finnish-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1738) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-finnish-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1254) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mrtydi_11_id(self): | |
qrels = search.get_qrels('mrtydi-v1.1-indonesian-train') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 4902) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-indonesian-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1224) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-indonesian-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 829) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mrtydi_11_ja(self): | |
qrels = search.get_qrels('mrtydi-v1.1-japanese-train') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 3697) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-japanese-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 928) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-japanese-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 720) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mrtydi_11_ko(self): | |
qrels = search.get_qrels('mrtydi-v1.1-korean-train') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1295) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-korean-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 303) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-korean-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 421) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mrtydi_11_ru(self): | |
qrels = search.get_qrels('mrtydi-v1.1-russian-train') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 5366) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-russian-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1375) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-russian-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 995) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mrtydi_11_sw(self): | |
qrels = search.get_qrels('mrtydi-v1.1-swahili-train') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 2072) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-swahili-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 526) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-swahili-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 670) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mrtydi_11_te(self): | |
qrels = search.get_qrels('mrtydi-v1.1-telugu-train') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 3880) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-telugu-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 983) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-telugu-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 646) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mrtydi_11_th(self): | |
qrels = search.get_qrels('mrtydi-v1.1-thai-train') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 3319) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-thai-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 807) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('mrtydi-v1.1-thai-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1190) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_mircal_10(self): | |
qrels = search.get_qrels('miracl-v1.0-ar-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 2896) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('miracl-v1.0-bn-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 411) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('miracl-v1.0-en-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 799) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('miracl-v1.0-es-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 648) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int | |
qrels = search.get_qrels('miracl-v1.0-fa-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 632) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int | |
qrels = search.get_qrels('miracl-v1.0-fi-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1271) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('miracl-v1.0-fr-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 343) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int | |
qrels = search.get_qrels('miracl-v1.0-hi-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 350) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int | |
qrels = search.get_qrels('miracl-v1.0-id-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 960) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('miracl-v1.0-ja-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 860) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('miracl-v1.0-ko-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 213) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('miracl-v1.0-ru-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1252) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('miracl-v1.0-sw-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 482) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('miracl-v1.0-te-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 828) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('miracl-v1.0-th-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 733) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('miracl-v1.0-zh-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 393) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int | |
qrels = search.get_qrels('miracl-v1.0-de-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 305) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int | |
qrels = search.get_qrels('miracl-v1.0-yo-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 119) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int | |
def test_beir(self): | |
qrels = search.get_qrels('beir-v1.0.0-trec-covid-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 66334) | |
qrels = search.get_qrels('beir-v1.0.0-bioasq-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 500) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 2359) | |
qrels = search.get_qrels('beir-v1.0.0-nfcorpus-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 323) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 12334) | |
qrels = search.get_qrels('beir-v1.0.0-nq-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 3452) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 4201) | |
qrels = search.get_qrels('beir-v1.0.0-hotpotqa-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 7405) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 14810) | |
qrels = search.get_qrels('beir-v1.0.0-fiqa-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 648) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1706) | |
qrels = search.get_qrels('beir-v1.0.0-signal1m-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 97) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1899) | |
qrels = search.get_qrels('beir-v1.0.0-trec-news-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 57) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 15655) | |
qrels = search.get_qrels('beir-v1.0.0-robust04-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 249) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 311410) | |
qrels = search.get_qrels('beir-v1.0.0-arguana-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1406) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1406) | |
qrels = search.get_qrels('beir-v1.0.0-webis-touche2020-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 49) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 932) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-android-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 699) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1696) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-english-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1570) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 3765) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-gaming-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1595) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 2263) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-gis-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 885) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1114) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-mathematica-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 804) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1358) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-physics-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1039) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1933) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-programmers-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 876) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1675) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-stats-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 652) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 913) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-tex-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 2906) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 5154) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-unix-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1072) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1693) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-webmasters-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 506) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1395) | |
qrels = search.get_qrels('beir-v1.0.0-cqadupstack-wordpress-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 541) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 744) | |
qrels = search.get_qrels('beir-v1.0.0-quora-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 10000) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 15675) | |
qrels = search.get_qrels('beir-v1.0.0-dbpedia-entity-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 400) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 43515) | |
qrels = search.get_qrels('beir-v1.0.0-scidocs-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1000) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 29928) | |
qrels = search.get_qrels('beir-v1.0.0-fever-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 6666) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 7937) | |
qrels = search.get_qrels('beir-v1.0.0-climate-fever-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 1535) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 4681) | |
qrels = search.get_qrels('beir-v1.0.0-scifact-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 300) | |
self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 339) | |
def test_hc4_10_fa(self): | |
qrels = search.get_qrels('hc4-v1.0-fa-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 10) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('hc4-v1.0-fa-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_hc4_10_ru(self): | |
qrels = search.get_qrels('hc4-v1.0-ru-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 4) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('hc4-v1.0-ru-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_hc4_10_zh(self): | |
qrels = search.get_qrels('hc4-v1.0-zh-dev') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 10) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('hc4-v1.0-zh-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def test_hc4_neuclir22(self): | |
qrels = search.get_qrels('hc4-neuclir22-fa-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('hc4-neuclir22-ru-test') | |
self.assertIsNotNone(qrels) | |
self.assertEqual(len(qrels), 50) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
qrels = search.get_qrels('hc4-neuclir22-zh-test') | |
self.assertIsNotNone(qrels) | |
# For whatever reason, these qrels also have dev topics. | |
self.assertEqual(len(qrels), 60) | |
self.assertTrue(isinstance(next(iter(qrels.keys())), int)) | |
def tearDown(self): | |
if os.path.exists('temp_dir'): | |
shutil.rmtree('temp_dir') | |
os.environ['PYSERINI_CACHE'] = '' | |
if __name__ == '__main__': | |
unittest.main() | |