Spaces:

geonmin-kim
/

NetsPresso_QA

Runtime error

File size: 3,255 Bytes

d6585f5

#
# Pyserini: Reproducible IR research with sparse and dense representations
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Integration tests for commands in Pradeep et al. resource paper at ECIR 2023."""

import os
import unittest

from integrations.utils import clean_files, run_command, parse_score, parse_score_qa


class TestECIR2023(unittest.TestCase):
    def setUp(self):
        self.temp_files = []

    def test_section5_sub2_first(self):
        """Sample code of the first command in Section 5.2."""
        metrics = ["Top5", "Top20", "Top100"]
        ground_truth = [73.8, 84.27, 89.34]

        output_file = 'runs/run.nq-test.dkrr.trec'
        json_file = 'runs/run.nq-test.dkrr.json'
        self.temp_files.append(output_file)
        self.temp_files.append(json_file)

        # retrieval
        run_cmd = f'python -m pyserini.search.faiss \
                      --index wikipedia-dpr-dkrr-nq \
                      --topics nq-test \
                      --encoder castorini/dkrr-dpr-nq-retriever \
                      --output {output_file} --query-prefix question: \
                      --threads 72 --batch-size 72 \
                      --hits 100'
        status = os.system(run_cmd)
        self.assertEqual(status, 0)

        # conversion
        convert_cmd = f'python -m pyserini.eval.convert_trec_run_to_dpr_retrieval_run \
                        --topics nq-test \
                        --index wikipedia-dpr \
                        --input {output_file} \
                        --output {json_file}'
        status = os.system(convert_cmd)
        self.assertEqual(status, 0)

        # evaluation
        eval_cmd = f'python -m pyserini.eval.evaluate_dpr_retrieval \
                       --retrieval {json_file} \
                       --topk 5 20 100'
        stdout, stderr = run_command(eval_cmd)
        
        scores = [] 
        for mt in metrics: 
            scores.append(parse_score_qa(stdout, mt, 4) * 100)

        for score in zip(scores, ground_truth):
            self.assertAlmostEqual(score[0], score[1], delta=0.02)

    def test_section5_sub2_second(self):
        """Sample code of the second command in Section 5.2."""

        cmd_nq = 'python scripts/repro_matrix/run_all_odqa.py --topics nq'
        cmd_tqa = 'python scripts/repro_matrix/run_all_odqa.py --topics nq'

        # run both commands, check if all tests passed (i.e., returned OK)
        stdout_nq, stderr_nq = run_command(cmd_nq)
        self.assertEqual(stdout_nq.count('[OK]'), 21)

        stdout_tqa, stderr_tqa = run_command(cmd_tqa)
        self.assertEqual(stdout_tqa.count('[OK]'), 21)

    def tearDown(self):
        clean_files(self.temp_files)


if __name__ == '__main__':
    unittest.main()