geonmin-kim's picture
Upload folder using huggingface_hub
d6585f5
#
# Pyserini: Reproducible IR research with sparse and dense representations
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import os
import shutil
import unittest
from random import randint
from integrations.lucenesearcher_score_checker import LuceneSearcherScoreChecker
from integrations.utils import run_command, parse_score
class TestSearchIntegration(unittest.TestCase):
def setUp(self):
# The current directory depends on if you're running inside an IDE or from command line.
curdir = os.getcwd()
if curdir.endswith('clprf'):
self.pyserini_root = '../..'
self.anserini_root = '../../../anserini'
else:
self.pyserini_root = '.'
self.anserini_root = '../anserini'
self.tmp = f'{self.pyserini_root}/integrations/tmp{randint(0, 10000)}'
if os.path.exists(self.tmp):
shutil.rmtree(self.tmp)
else:
os.mkdir(self.tmp)
self.pyserini_search_cmd = 'python -m pyserini.search.lucene'
self.pyserini_fusion_cmd = 'python -m pyserini.fusion'
self.pyserini_eval_cmd = 'python -m pyserini.eval.trec_eval'
self.core17_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.nyt')
self.core17_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core17.txt')
self.core18_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.wapo.v2')
self.core18_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt')
self.robust04_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.disk45')
self.robust04_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt')
self.robust05_index_path = os.path.join(self.anserini_root, 'indexes/lucene-index.robust05')
self.robust05_qrels_path = os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt')
self.core17_checker = LuceneSearcherScoreChecker(
index=self.core17_index_path,
topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core17.txt'),
pyserini_topics='core17',
qrels=self.core17_qrels_path,
eval=f'{self.pyserini_eval_cmd} -m map -m P.30')
self.core18_checker = LuceneSearcherScoreChecker(
index=self.core18_index_path,
topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.core18.txt'),
pyserini_topics='core18',
qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.core18.txt'),
eval=f'{self.pyserini_eval_cmd} -m map -m P.30')
self.robust04_checker = LuceneSearcherScoreChecker(
index=self.robust04_index_path,
topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust04.txt'),
pyserini_topics='robust04',
qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust04.txt'),
eval=f'{self.pyserini_eval_cmd} -m map -m P.30')
self.robust05_checker = LuceneSearcherScoreChecker(
index=self.robust05_index_path,
topics=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/topics.robust05.txt'),
pyserini_topics='robust05',
qrels=os.path.join(self.pyserini_root, 'tools/topics-and-qrels/qrels.robust05.txt'),
eval=f'{self.pyserini_eval_cmd} -m map -m P.30')
def test_cross_validation(self):
pyserini_topics = 'core17'
os.mkdir(f'{self.tmp}/core17')
for alpha in [x / 10.0 for x in range(0, 11)]:
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core17/core17_lr_A{alpha}_bm25.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha {alpha}'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
os.system(f'python {self.pyserini_root}/scripts/classifier_prf/cross_validate.py \
--anserini {self.anserini_root} --run_file {self.tmp} --pyserini {self.pyserini_root} \
--collection core17 --output {self.tmp}/core17_lr.txt --classifier lr ')
cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \
{self.tmp}/core17_lr.txt'
status = os.system(cmd)
stdout, stderr = run_command(cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2462, delta=0.0001)
def test_core17(self):
self.assertTrue(self.core17_checker.run('core17_bm25', '--bm25', 0.2087))
def test_core17_rm3(self):
self.assertTrue(self.core17_checker.run('core17_bm25', '--bm25 --rm3', 0.2798))
def test_core17_lr(self):
pyserini_topics = 'core17'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core17_lr.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \
{self.tmp}/core17_lr.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2473, delta=0.0001)
def test_core17_lr_rm3(self):
pyserini_topics = 'core17'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core17_lr_rm3.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \
{self.tmp}/core17_lr_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2926, delta=0.0001)
def test_core17_svm(self):
pyserini_topics = 'core17'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core17_svm.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \
{self.tmp}/core17_svm.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2385, delta=0.0001)
def test_core17_svm_rm3(self):
pyserini_topics = 'core17'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core17_svm_rm3.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \
{self.tmp}/core17_svm_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2956, delta=0.0001)
def test_core17_avg(self):
pyserini_topics = 'core17'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core17_avg.txt \
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \
{self.tmp}/core17_avg.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2442, delta=0.0001)
def test_core17_avg_rm3(self):
pyserini_topics = 'core17'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core17_avg_rm3.txt \
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \
{self.tmp}/core17_avg_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2950, delta=0.0001)
def test_core17_rrf(self):
pyserini_topics = 'core17'
lr_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core17_lr.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7'
status = os.system(lr_cmd)
self.assertEqual(status, 0)
svm_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core17_svm.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.7'
status = os.system(svm_cmd)
self.assertEqual(status, 0)
rrf_cmd = f'{self.pyserini_fusion_cmd} \
--runs {self.tmp}/core17_lr.txt {self.tmp}/core17_svm.txt \
--output {self.tmp}/core17_rrf.txt --resort'
status = os.system(rrf_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \
{self.tmp}/core17_rrf.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2446, delta=0.0001)
def test_core17_rrf_rm3(self):
pyserini_topics = 'core17'
lr_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core17_lr_rm3.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3'
status = os.system(lr_cmd)
self.assertEqual(status, 0)
svm_cmd = f'{self.pyserini_search_cmd} --index {self.core17_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core17_svm_rm3.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3'
status = os.system(svm_cmd)
self.assertEqual(status, 0)
rrf_cmd = f'{self.pyserini_fusion_cmd} \
--runs {self.tmp}/core17_lr_rm3.txt {self.tmp}/core17_svm_rm3.txt \
--output {self.tmp}/core17_rrf_rm3.txt --resort'
status = os.system(rrf_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core17.txt \
{self.tmp}/core17_rrf_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2957, delta=0.0001)
def test_core18(self):
self.assertTrue(self.core18_checker.run('core18_bm25', '--bm25', 0.2496))
def test_core18_rm3(self):
self.assertTrue(self.core18_checker.run('core18_bm25', '--bm25 --rm3', 0.3129))
def test_core18_lr(self):
pyserini_topics = 'core18'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core18_lr.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \
{self.tmp}/core18_lr.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2839, delta=0.0001)
def test_core18_lr_rm3(self):
pyserini_topics = 'core18'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core18_lr_rm3.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \
{self.tmp}/core18_lr_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.3216, delta=0.0001)
def test_core18_svm(self):
pyserini_topics = 'core18'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core18_svm.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \
{self.tmp}/core18_svm.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2841, delta=0.0001)
def test_core18_svm_rm3(self):
pyserini_topics = 'core18'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core18_svm_rm3.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \
{self.tmp}/core18_svm_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.3200, delta=0.0001)
def test_core18_avg(self):
pyserini_topics = 'core18'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core18_avg.txt \
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \
{self.tmp}/core18_avg.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2860, delta=0.0001)
def test_core18_avg_rm3(self):
pyserini_topics = 'core18'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core18_avg_rm3.txt \
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.4 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \
{self.tmp}/core18_avg_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.3215, delta=0.0001)
def test_core18_rrf(self):
pyserini_topics = 'core18'
lr_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core18_lr.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6'
status = os.system(lr_cmd)
self.assertEqual(status, 0)
svm_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core18_svm.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6'
status = os.system(svm_cmd)
self.assertEqual(status, 0)
rrf_cmd = f'{self.pyserini_fusion_cmd} \
--runs {self.tmp}/core18_lr.txt {self.tmp}/core18_svm.txt \
--output {self.tmp}/core18_rrf.txt --resort'
status = os.system(rrf_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \
{self.tmp}/core18_rrf.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2881, delta=0.0001)
def test_core18_rrf_rm3(self):
pyserini_topics = 'core18'
lr_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core18_lr_rm3.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3'
status = os.system(lr_cmd)
self.assertEqual(status, 0)
svm_cmd = f'{self.pyserini_search_cmd} --index {self.core18_index_path} \
--topics {pyserini_topics} --output {self.tmp}/core18_svm_rm3.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5 --rm3'
status = os.system(svm_cmd)
self.assertEqual(status, 0)
rrf_cmd = f'{self.pyserini_fusion_cmd} \
--runs {self.tmp}/core18_lr_rm3.txt {self.tmp}/core18_svm_rm3.txt \
--output {self.tmp}/core18_rrf_rm3.txt --resort'
status = os.system(rrf_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.core18.txt \
{self.tmp}/core18_rrf_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.3205, delta=0.0001)
def test_robust04(self):
self.assertTrue(self.robust04_checker.run('robust04_bm25', '--bm25', 0.2531))
def test_robust04_rm3(self):
self.assertTrue(self.robust04_checker.run('robust04_bm25_rm3', '--bm25 --rm3', 0.2908))
def test_robust04_lr(self):
pyserini_topics = 'robust04'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust04_lr.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \
{self.tmp}/robust04_lr.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2747, delta=0.0001)
def test_robust04_lr_rm3(self):
pyserini_topics = 'robust04'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust04_lr_rm3.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \
{self.tmp}/robust04_lr_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2969, delta=0.0001)
def test_robust04_svm(self):
pyserini_topics = 'robust04'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust04_svm.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \
{self.tmp}/robust04_svm.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2726, delta=0.0001)
def test_robust04_svm_rm3(self):
pyserini_topics = 'robust04'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust04_svm_rm3.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \
{self.tmp}/robust04_svm_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2972, delta=0.0001)
def test_robust04_avg(self):
pyserini_topics = 'robust04'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust04_avg.txt \
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \
{self.tmp}/robust04_avg.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.276, delta=0.0001)
def test_robust04_avg_rm3(self):
pyserini_topics = 'robust04'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust04_avg_rm3.txt \
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \
{self.tmp}/robust04_avg_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2980, delta=0.0001)
def test_robust04_rrf(self):
pyserini_topics = 'robust04'
lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust04_lr.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5'
status = os.system(lr_cmd)
self.assertEqual(status, 0)
svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust04_svm.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5'
status = os.system(svm_cmd)
self.assertEqual(status, 0)
rrf_cmd = f'{self.pyserini_fusion_cmd} \
--runs {self.tmp}/robust04_lr.txt {self.tmp}/robust04_svm.txt \
--output {self.tmp}/robust04_rrf.txt --resort'
status = os.system(rrf_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \
{self.tmp}/robust04_rrf.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.275, delta=0.0001)
def test_robust04_rrf_rm3(self):
pyserini_topics = 'robust04'
lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust04_lr_rm3.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3'
status = os.system(lr_cmd)
self.assertEqual(status, 0)
svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust04_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust04_svm_rm3.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3'
status = os.system(svm_cmd)
self.assertEqual(status, 0)
rrf_cmd = f'{self.pyserini_fusion_cmd} \
--runs {self.tmp}/robust04_lr_rm3.txt {self.tmp}/robust04_svm_rm3.txt \
--output {self.tmp}/robust04_rrf_rm3.txt --resort'
status = os.system(rrf_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust04.txt \
{self.tmp}/robust04_rrf_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2977, delta=0.0001)
def test_robust05(self):
self.assertTrue(self.robust05_checker.run('robust05_bm25', '--bm25', 0.2032))
def test_robust05_rm3(self):
self.assertTrue(self.robust05_checker.run('robust05_bm25_rm3', '--bm25 --rm3', 0.2624))
def test_robust05_lr(self):
pyserini_topics = 'robust05'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust05_lr.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.8'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \
{self.tmp}/robust05_lr.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2476, delta=0.0001)
def test_robust05_lr_rm3(self):
pyserini_topics = 'robust05'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust05_lr_rm3.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \
{self.tmp}/robust05_lr_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2872, delta=0.0001)
def test_robust05_svm(self):
pyserini_topics = 'robust05'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust05_svm.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.8'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \
{self.tmp}/robust05_svm.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2486, delta=0.0001)
def test_robust05_svm_rm3(self):
pyserini_topics = 'robust05'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust05_svm_rm3.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \
{self.tmp}/robust05_svm_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2871, delta=0.0001)
def test_robust05_avg(self):
pyserini_topics = 'robust05'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust05_avg.txt \
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.8'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \
{self.tmp}/robust05_avg.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2485, delta=0.0001)
def test_robust05_avg_rm3(self):
pyserini_topics = 'robust05'
run_file_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust05_avg_rm3.txt \
--prcl lr svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.6 --rm3'
status = os.system(run_file_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \
{self.tmp}/robust05_avg_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2880, delta=0.0001)
def test_robust05_rrf(self):
pyserini_topics = 'robust05'
lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust05_lr.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5'
status = os.system(lr_cmd)
self.assertEqual(status, 0)
svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust05_svm.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.5'
status = os.system(svm_cmd)
self.assertEqual(status, 0)
rrf_cmd = f'{self.pyserini_fusion_cmd} \
--runs {self.tmp}/robust05_lr.txt {self.tmp}/robust05_svm.txt \
--output {self.tmp}/robust05_rrf.txt --resort'
status = os.system(rrf_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \
{self.tmp}/robust05_rrf.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2401, delta=0.0001)
def test_robust05_rrf_rm3(self):
pyserini_topics = 'robust05'
lr_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust05_lr_rm3.txt \
--prcl lr --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3'
status = os.system(lr_cmd)
self.assertEqual(status, 0)
svm_cmd = f'{self.pyserini_search_cmd} --index {self.robust05_index_path} \
--topics {pyserini_topics} --output {self.tmp}/robust05_svm_rm3.txt \
--prcl svm --prcl.vectorizer TfidfVectorizer --prcl.alpha 0.3 --rm3'
status = os.system(svm_cmd)
self.assertEqual(status, 0)
rrf_cmd = f'{self.pyserini_fusion_cmd} \
--runs {self.tmp}/robust05_lr_rm3.txt {self.tmp}/robust05_svm_rm3.txt \
--output {self.tmp}/robust05_rrf_rm3.txt --resort'
status = os.system(rrf_cmd)
self.assertEqual(status, 0)
score_cmd = f'{self.pyserini_eval_cmd} -m map -m P.30 \
{self.anserini_root}/src/main/resources/topics-and-qrels/qrels.robust05.txt \
{self.tmp}/robust05_rrf_rm3.txt'
status = os.system(score_cmd)
stdout, stderr = run_command(score_cmd)
score = parse_score(stdout, 'map')
self.assertEqual(status, 0)
self.assertEqual(stderr, '')
self.assertAlmostEqual(score, 0.2808, delta=0.0001)
def tearDown(self):
shutil.rmtree(f'{self.tmp}')
if __name__ == '__main__':
unittest.main()