# # Pyserini: Reproducible IR research with sparse and dense representations # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import os import shutil import unittest from pyserini import search def read_file_lines(path): with open(path) as f: return f.readlines() class TestGetQrels(unittest.TestCase): def setUp(self): os.environ['PYSERINI_CACHE'] = 'temp_dir' def test_trec1_adhoc(self): qrels = search.get_qrels('trec1-adhoc') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_trec2_adhoc(self): qrels = search.get_qrels('trec2-adhoc') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_trec3_adhoc(self): qrels = search.get_qrels('trec3-adhoc') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_robust04a(self): qrels_path = search.get_qrels_file('robust04') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length//2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 311410) self.assertEqual(first_line, "301 0 FBIS3-10082 1") self.assertEqual(mid_line, "409 0 LA010189-0112 0") self.assertEqual(last_line, "700 0 LA123090-0137 0") def test_robust04b(self): qrels = search.get_qrels('robust04') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 249) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_robust05a(self): qrels_path = search.get_qrels_file('robust05') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 37798) self.assertEqual(first_line, "303 0 APW19980609.1531 2") self.assertEqual(mid_line, "397 0 XIE19960920.0297 0") self.assertEqual(last_line, "689 0 XIE20000925.0055 0") def test_robust05b(self): qrels = search.get_qrels('robust05') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_core17a(self): qrels_path = search.get_qrels_file('core17') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 30030) self.assertEqual(first_line, "307 0 1001536 1") self.assertEqual(mid_line, "393 0 1586039 2") self.assertEqual(last_line, "690 0 996059 0") def test_core17b(self): qrels = search.get_qrels('core17') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_core18a(self): qrels_path = search.get_qrels_file('core18') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 26233) self.assertEqual(first_line, "321 0 004c6120d0aa69da29cc045da0562168 0") self.assertEqual(mid_line, "646 0 260365e8-eb18-11e2-a301-ea5a8116d211 0") self.assertEqual(last_line, "825 0 ff3a25b0-0ba4-11e4-8341-b8072b1e7348 0") def test_core18b(self): qrels = search.get_qrels('core18') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_wt10g(self): qrels = search.get_qrels('wt10g') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 100) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_trec2004_terabyte(self): qrels = search.get_qrels('trec2004-terabyte') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 49) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_trec2005_terabyte(self): qrels = search.get_qrels('trec2005-terabyte') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_trec2006_terabyte(self): qrels = search.get_qrels('trec2006-terabyte') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_trec2011_web(self): qrels = search.get_qrels('trec2011-web') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_trec2012_web(self): qrels = search.get_qrels('trec2012-web') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_trec2013_web(self): qrels = search.get_topics('trec2013-web') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_trec2014_web(self): qrels = search.get_qrels('trec2014-web') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mb11(self): qrels = search.get_qrels('mb11') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 49) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mb12(self): qrels = search.get_qrels('mb12') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 59) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mb13(self): qrels = search.get_qrels('mb13') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 60) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mb14(self): qrels = search.get_qrels('mb14') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 55) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_car15a(self): qrels_path = search.get_qrels_file('car17v1.5-benchmarkY1test') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 5820) self.assertEqual(first_line, "Aftertaste/Aftertaste%20processing%20in%20the%20cerebral%20cortex " "0 38c1bd25ddca2705164677a3f598c46df85afba7 1") self.assertEqual(mid_line, "Insular%20cortex/Function/Interoceptive%20awareness " "0 f037f925acd4c59e802a58aa74430fc6aa163606 1") self.assertEqual(last_line, "Yellowstone%20National%20Park/Recreation" " 0 e80b5185da1493edde41bea19a389a3f62167369 1") def test_car15b(self): qrels = search.get_qrels('car17v1.5-benchmarkY1test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 2125) self.assertFalse(isinstance(next(iter(qrels.keys())), int)) def test_car20a(self): qrels_path = search.get_qrels_file('car17v2.0-benchmarkY1test') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 6192) self.assertEqual(first_line, "enwiki:Aftertaste 0 327cca6c4d38953196fa6789f615546f03287b25 1") self.assertEqual(mid_line, "enwiki:Insular%20cortex/Function/Interoceptive%20awareness" " 0 f037f925acd4c59e802a58aa74430fc6aa163606 1") self.assertEqual(last_line, "enwiki:Yellowstone%20National%20Park/Recreation" " 0 b812fca195f74f8c563db4262260554fe3ff3731 1") def test_car20b(self): qrels = search.get_qrels('car17v2.0-benchmarkY1test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 2254) self.assertFalse(isinstance(next(iter(qrels.keys())), int)) def test_msmarco_doc1(self): qrels_path = search.get_qrels_file('msmarco-doc-dev') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 5193) self.assertEqual(first_line, "2 0 D1650436 1") self.assertEqual(mid_line, "855050 0 D2851565 1") self.assertEqual(last_line, "1102400 0 D677570 1") def test_msmarco_doc2(self): qrels = search.get_qrels('msmarco-doc-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 5193) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_msmarco_passage1(self): qrels_path = search.get_qrels_file('msmarco-passage-dev-subset') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 7437) self.assertEqual(first_line, "300674 0 7067032 1") self.assertEqual(mid_line, "573452 0 3182069 1") self.assertEqual(last_line, "195199 0 8009377 1") def test_msmarco_passage2(self): qrels = search.get_qrels('msmarco-passage-dev-subset') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 6980) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_dl19_doc(self): topics = search.get_topics('dl19-doc') self.assertIsNotNone(topics) self.assertEqual(len(topics), 43) self.assertFalse(isinstance(next(iter(topics.keys())), str)) topics = search.get_topics('dl19-doc-unicoil') self.assertIsNotNone(topics) self.assertEqual(len(topics), 43) self.assertFalse(isinstance(next(iter(topics.keys())), str)) topics = search.get_topics('dl19-doc-unicoil-noexp') self.assertIsNotNone(topics) self.assertEqual(len(topics), 43) self.assertFalse(isinstance(next(iter(topics.keys())), str)) qrels = search.get_qrels('dl19-doc') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 43) self.assertFalse(isinstance(next(iter(qrels.keys())), str)) def test_dl19_passage(self): topics = search.get_topics('dl19-passage') self.assertIsNotNone(topics) self.assertEqual(len(topics), 43) self.assertFalse(isinstance(next(iter(topics.keys())), str)) topics = search.get_topics('dl19-passage-unicoil') self.assertIsNotNone(topics) self.assertEqual(len(topics), 43) self.assertFalse(isinstance(next(iter(topics.keys())), str)) topics = search.get_topics('dl19-passage-unicoil-noexp') self.assertIsNotNone(topics) self.assertEqual(len(topics), 43) self.assertFalse(isinstance(next(iter(topics.keys())), str)) qrels = search.get_qrels('dl19-passage') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 43) self.assertFalse(isinstance(next(iter(qrels.keys())), str)) def test_dl20(self): qrels = search.get_qrels('dl20-doc') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 45) self.assertFalse(isinstance(next(iter(qrels.keys())), str)) qrels = search.get_qrels('dl20-passage') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 54) self.assertFalse(isinstance(next(iter(qrels.keys())), str)) # MS MARCO V2 def test_msmarco_v2_doc(self): qrels = search.get_qrels('msmarco-v2-doc-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 4552) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('msmarco-v2-doc-dev2') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 5000) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_msmarco_v2_passage(self): qrels = search.get_qrels('msmarco-v2-passage-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 3903) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('msmarco-v2-passage-dev2') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 4281) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_dl21(self): qrels = search.get_qrels('dl21-doc') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 57) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 13058) self.assertFalse(isinstance(next(iter(qrels.keys())), str)) qrels = search.get_qrels('dl21-passage') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 53) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 10828) self.assertFalse(isinstance(next(iter(qrels.keys())), str)) # Various multi-lingual test collections def test_ntcir8_zh(self): qrels = search.get_qrels('ntcir8-zh') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 100) self.assertTrue(isinstance(next(iter(qrels.keys())), str)) def test_clef2006_fr(self): qrels = search.get_qrels('clef2006-fr') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 49) self.assertTrue(isinstance(next(iter(qrels.keys())), str)) def test_trec2002_ar(self): qrels = search.get_qrels('trec2002-ar') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_fire2012_bn(self): qrels = search.get_qrels('fire2012-bn') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_fire2012_hi(self): qrels = search.get_qrels('fire2012-hi') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_fire2012_en(self): qrels = search.get_qrels('fire2012-en') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_covid_round1(self): qrels_path = search.get_qrels_file('covid-round1') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 8691) self.assertEqual(first_line, "1 0.5 010vptx3 2") self.assertEqual(mid_line, "15 0.5 01rdlf8l 0") self.assertEqual(last_line, "30 0.5 zn87f1lk 1") qrels = search.get_qrels('covid-round1') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 30) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_covid_round2(self): qrels_path = search.get_qrels_file('covid-round2') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 12037) self.assertEqual(first_line, "1 1.5 08efpohc 0") self.assertEqual(mid_line, "16 2 uj0i2anr 0") self.assertEqual(last_line, "35 2 zzmfhr2s 0") qrels = search.get_qrels('covid-round2') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 35) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_covid_round3(self): qrels_path = search.get_qrels_file('covid-round3') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 12713) self.assertEqual(first_line, "1 2.5 0194oljo 1") self.assertEqual(mid_line, "19 2.5 s0o0egw8 2") self.assertEqual(last_line, "40 3 zsx7wfyj 1") qrels = search.get_qrels('covid-round3') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 40) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_covid_round4(self): qrels_path = search.get_qrels_file('covid-round4') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 13262) self.assertEqual(first_line, "1 4 00fmeepz 1") self.assertEqual(mid_line, "27 4 hmh4s3w4 0") self.assertEqual(last_line, "45 4 zzrsk1ls 2") qrels = search.get_qrels('covid-round4') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 45) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_covid_round5(self): qrels_path = search.get_qrels_file('covid-round5') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 23151) self.assertEqual(first_line, "1 4.5 005b2j4b 2") self.assertEqual(mid_line, "36 4.5 ylgmn69k 0") self.assertEqual(last_line, "50 5 zz8wvos9 1") qrels = search.get_qrels('covid-round5') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_covid_round3_cumulative(self): qrels_path = search.get_qrels_file('covid-round3-cumulative') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 33068) self.assertEqual(first_line, "1 0.5 010vptx3 2") self.assertEqual(mid_line, "17 1.5 4txctk7k 0") self.assertEqual(last_line, "40 3 zsx7wfyj 1") def test_covid_round4_cumulative(self): qrels_path = search.get_qrels_file('covid-round4-cumulative') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 46203) self.assertEqual(first_line, "1 4 00fmeepz 1") self.assertEqual(mid_line, "19 1 bt5857p3 0") self.assertEqual(last_line, "45 4 zzrsk1ls 2") def test_covid_complete(self): qrels_path = search.get_qrels_file('covid-complete') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 69318) self.assertEqual(first_line, "1 4.5 005b2j4b 2") self.assertEqual(mid_line, "23 5 71jjbyds 0") self.assertEqual(last_line, "50 5 zz8wvos9 1") def test_trec2018_bl(self): qrels_path = search.get_qrels_file('trec2018-bl') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 8508) self.assertEqual(first_line, "321 0 00f57310e5c8ec7833d6756ba637332e 16") self.assertEqual(mid_line, "809 0 921073ca-c0a3-11e1-9ce8-ff26651238d0 0") self.assertEqual(last_line, "825 0 f66b624ba8689d704872fa776fb52860 0") qrels = search.get_qrels('trec2018-bl') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_trec2019_bl(self): qrels_path = search.get_qrels_file('trec2019-bl') lines = read_file_lines(qrels_path) length = len(lines) first_line = lines[0].rstrip() mid_line = lines[length // 2].rstrip() last_line = lines[-1].rstrip() self.assertEqual(length, 15655) self.assertEqual(first_line, "826 0 0154349511cd8c49ab862d6cb0d8f6a8 2") self.assertEqual(mid_line, "853 0 2444d88d62539b0b88dc919909cb9701 2") self.assertEqual(last_line, "885 0 fde80cb0-b4f0-11e2-bbf2-a6f9e9d79e19 0") qrels = search.get_qrels('trec2019-bl') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 57) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_trec2020_bl(self): qrels = search.get_qrels('trec2020-bl') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 49) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mrtydi_11_ar(self): qrels = search.get_qrels('mrtydi-v1.1-arabic-train') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 12377) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-arabic-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 3115) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-arabic-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1081) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mrtydi_11_bn(self): qrels = search.get_qrels('mrtydi-v1.1-bengali-train') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1713) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-bengali-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 440) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-bengali-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 111) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mrtydi_11_en(self): qrels = search.get_qrels('mrtydi-v1.1-english-train') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 3547) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-english-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 878) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-english-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 744) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mrtydi_11_fi(self): qrels = search.get_qrels('mrtydi-v1.1-finnish-train') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 6561) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-finnish-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1738) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-finnish-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1254) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mrtydi_11_id(self): qrels = search.get_qrels('mrtydi-v1.1-indonesian-train') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 4902) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-indonesian-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1224) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-indonesian-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 829) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mrtydi_11_ja(self): qrels = search.get_qrels('mrtydi-v1.1-japanese-train') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 3697) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-japanese-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 928) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-japanese-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 720) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mrtydi_11_ko(self): qrels = search.get_qrels('mrtydi-v1.1-korean-train') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1295) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-korean-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 303) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-korean-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 421) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mrtydi_11_ru(self): qrels = search.get_qrels('mrtydi-v1.1-russian-train') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 5366) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-russian-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1375) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-russian-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 995) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mrtydi_11_sw(self): qrels = search.get_qrels('mrtydi-v1.1-swahili-train') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 2072) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-swahili-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 526) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-swahili-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 670) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mrtydi_11_te(self): qrels = search.get_qrels('mrtydi-v1.1-telugu-train') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 3880) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-telugu-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 983) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-telugu-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 646) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mrtydi_11_th(self): qrels = search.get_qrels('mrtydi-v1.1-thai-train') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 3319) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-thai-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 807) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('mrtydi-v1.1-thai-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1190) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_mircal_10(self): qrels = search.get_qrels('miracl-v1.0-ar-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 2896) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('miracl-v1.0-bn-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 411) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('miracl-v1.0-en-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 799) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('miracl-v1.0-es-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 648) self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int qrels = search.get_qrels('miracl-v1.0-fa-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 632) self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int qrels = search.get_qrels('miracl-v1.0-fi-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1271) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('miracl-v1.0-fr-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 343) self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int qrels = search.get_qrels('miracl-v1.0-hi-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 350) self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int qrels = search.get_qrels('miracl-v1.0-id-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 960) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('miracl-v1.0-ja-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 860) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('miracl-v1.0-ko-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 213) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('miracl-v1.0-ru-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1252) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('miracl-v1.0-sw-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 482) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('miracl-v1.0-te-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 828) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('miracl-v1.0-th-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 733) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('miracl-v1.0-zh-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 393) self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int qrels = search.get_qrels('miracl-v1.0-de-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 305) self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int qrels = search.get_qrels('miracl-v1.0-yo-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 119) self.assertTrue(isinstance(next(iter(qrels.keys())), str)) # note, not int def test_beir(self): qrels = search.get_qrels('beir-v1.0.0-trec-covid-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 66334) qrels = search.get_qrels('beir-v1.0.0-bioasq-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 500) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 2359) qrels = search.get_qrels('beir-v1.0.0-nfcorpus-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 323) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 12334) qrels = search.get_qrels('beir-v1.0.0-nq-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 3452) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 4201) qrels = search.get_qrels('beir-v1.0.0-hotpotqa-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 7405) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 14810) qrels = search.get_qrels('beir-v1.0.0-fiqa-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 648) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1706) qrels = search.get_qrels('beir-v1.0.0-signal1m-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 97) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1899) qrels = search.get_qrels('beir-v1.0.0-trec-news-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 57) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 15655) qrels = search.get_qrels('beir-v1.0.0-robust04-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 249) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 311410) qrels = search.get_qrels('beir-v1.0.0-arguana-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1406) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1406) qrels = search.get_qrels('beir-v1.0.0-webis-touche2020-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 49) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 932) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-android-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 699) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1696) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-english-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1570) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 3765) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-gaming-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1595) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 2263) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-gis-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 885) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1114) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-mathematica-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 804) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1358) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-physics-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1039) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1933) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-programmers-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 876) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1675) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-stats-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 652) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 913) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-tex-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 2906) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 5154) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-unix-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1072) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1693) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-webmasters-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 506) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 1395) qrels = search.get_qrels('beir-v1.0.0-cqadupstack-wordpress-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 541) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 744) qrels = search.get_qrels('beir-v1.0.0-quora-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 10000) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 15675) qrels = search.get_qrels('beir-v1.0.0-dbpedia-entity-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 400) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 43515) qrels = search.get_qrels('beir-v1.0.0-scidocs-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1000) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 29928) qrels = search.get_qrels('beir-v1.0.0-fever-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 6666) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 7937) qrels = search.get_qrels('beir-v1.0.0-climate-fever-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 1535) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 4681) qrels = search.get_qrels('beir-v1.0.0-scifact-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 300) self.assertEqual(sum([len(qrels[topic_id]) for topic_id in qrels]), 339) def test_hc4_10_fa(self): qrels = search.get_qrels('hc4-v1.0-fa-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 10) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('hc4-v1.0-fa-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_hc4_10_ru(self): qrels = search.get_qrels('hc4-v1.0-ru-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 4) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('hc4-v1.0-ru-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_hc4_10_zh(self): qrels = search.get_qrels('hc4-v1.0-zh-dev') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 10) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('hc4-v1.0-zh-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def test_hc4_neuclir22(self): qrels = search.get_qrels('hc4-neuclir22-fa-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('hc4-neuclir22-ru-test') self.assertIsNotNone(qrels) self.assertEqual(len(qrels), 50) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) qrels = search.get_qrels('hc4-neuclir22-zh-test') self.assertIsNotNone(qrels) # For whatever reason, these qrels also have dev topics. self.assertEqual(len(qrels), 60) self.assertTrue(isinstance(next(iter(qrels.keys())), int)) def tearDown(self): if os.path.exists('temp_dir'): shutil.rmtree('temp_dir') os.environ['PYSERINI_CACHE'] = '' if __name__ == '__main__': unittest.main()