Spaces:

geonmin-kim
/

NetsPresso_QA

Runtime error

File size: 24,164 Bytes

d6585f5

#
# Pyserini: Reproducible IR research with sparse and dense representations
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import argparse
import math
import os
import re
import sys
import time
from collections import defaultdict
from string import Template

import pkg_resources
import yaml

from ._base import run_eval_and_return_metric, ok_str, okish_str, fail_str

# The models: the rows of the results table will be ordered this way.
models = {
    'msmarco-v1-passage':
    ['bm25-default',
     'bm25-rm3-default',
     'bm25-rocchio-default',
     '',
     'bm25-tuned',
     'bm25-rm3-tuned',
     'bm25-rocchio-tuned',
     '',
     'bm25-d2q-t5-default',
     'bm25-rm3-d2q-t5-default',
     'bm25-rocchio-d2q-t5-default',
     '',
     'bm25-d2q-t5-tuned',
     'bm25-rm3-d2q-t5-tuned',
     'bm25-rocchio-d2q-t5-tuned',
     '',
     'unicoil-noexp',
     'unicoil',
     '',
     'unicoil-noexp-otf',
     'unicoil-otf',
     '',
     'ance',
     'distilbert-kd',
     'distilbert-kd-tasb',
     'tct_colbert-v2-hnp',
     '',
     'ance-otf',
     'distilbert-kd-otf',
     'distilbert-kd-tasb-otf',
     'tct_colbert-v2-hnp-otf'],
    'msmarco-v1-doc':
    ['bm25-doc-default',
     'bm25-doc-segmented-default',
     'bm25-rm3-doc-default',
     'bm25-rm3-doc-segmented-default',
     'bm25-rocchio-doc-default',
     'bm25-rocchio-doc-segmented-default',
     '',
     'bm25-doc-tuned',
     'bm25-doc-segmented-tuned',
     'bm25-rm3-doc-tuned',
     'bm25-rm3-doc-segmented-tuned',
     'bm25-rocchio-doc-tuned',
     'bm25-rocchio-doc-segmented-tuned',
     '',
     'bm25-d2q-t5-doc-default',
     'bm25-d2q-t5-doc-segmented-default',
     'bm25-rm3-d2q-t5-doc-default',
     'bm25-rm3-d2q-t5-doc-segmented-default',
     '',
     'bm25-d2q-t5-doc-tuned',
     'bm25-d2q-t5-doc-segmented-tuned',
     'bm25-rm3-d2q-t5-doc-tuned',
     'bm25-rm3-d2q-t5-doc-segmented-tuned',
     '',
     'unicoil-noexp',
     'unicoil',
     '',
     'unicoil-noexp-otf',
     'unicoil-otf'],
    'msmarco-v2-passage':
    ['bm25-default',
     'bm25-augmented-default',
     'bm25-rm3-default',
     'bm25-rm3-augmented-default',
     '',
     'bm25-d2q-t5-default',
     'bm25-d2q-t5-augmented-default',
     'bm25-rm3-d2q-t5-default',
     'bm25-rm3-d2q-t5-augmented-default',
     '',
     'unicoil-noexp',
     'unicoil',
     '',
     'unicoil-noexp-otf',
     'unicoil-otf'],
    'msmarco-v2-doc':
    ['bm25-doc-default',
     'bm25-doc-segmented-default',
     'bm25-rm3-doc-default',
     'bm25-rm3-doc-segmented-default',
     '',
     'bm25-d2q-t5-doc-default',
     'bm25-d2q-t5-doc-segmented-default',
     'bm25-rm3-d2q-t5-doc-default',
     'bm25-rm3-d2q-t5-doc-segmented-default',
     '',
     'unicoil-noexp',
     'unicoil',
     '',
     'unicoil-noexp-otf',
     'unicoil-otf'
     ]
}

trec_eval_metric_definitions = {
    'msmarco-v1-passage': {
        'msmarco-passage-dev-subset': {
            'MRR@10': '-c -M 10 -m recip_rank',
            'R@1K': '-c -m recall.1000'
        },
        'dl19-passage': {
            'MAP': '-c -l 2 -m map',
            'nDCG@10': '-c -m ndcg_cut.10',
            'R@1K': '-c -l 2 -m recall.1000'
        },
        'dl20-passage': {
            'MAP': '-c -l 2 -m map',
            'nDCG@10': '-c -m ndcg_cut.10',
            'R@1K': '-c -l 2 -m recall.1000'
        }
    },
    'msmarco-v1-doc': {
        'msmarco-doc-dev': {
            'MRR@10': '-c -M 100 -m recip_rank',
            'R@1K': '-c -m recall.1000'
        },
        'dl19-doc': {
            'MAP': '-c -M 100 -m map',
            'nDCG@10': '-c -m ndcg_cut.10',
            'R@1K': '-c -m recall.1000'
        },
        'dl20-doc': {
            'MAP': '-c -M 100 -m map',
            'nDCG@10': '-c -m ndcg_cut.10',
            'R@1K': '-c -m recall.1000'
        }
    },
    'msmarco-v2-passage': {
        'msmarco-v2-passage-dev': {
            'MRR@100': '-c -M 100 -m recip_rank',
            'R@1K': '-c -m recall.1000'
        },
        'msmarco-v2-passage-dev2': {
            'MRR@100': '-c -M 100 -m recip_rank',
            'R@1K': '-c -m recall.1000'
        },
        'dl21-passage': {
            'MAP@100': '-c -l 2 -M 100 -m map',
            'nDCG@10': '-c -m ndcg_cut.10',
            'MRR@100': '-c -l 2 -M 100 -m recip_rank',
            'R@100': '-c -l 2 -m recall.100',
            'R@1K': '-c -l 2 -m recall.1000'
        }
    },
    'msmarco-v2-doc': {
        'msmarco-v2-doc-dev': {
            'MRR@100': '-c -M 100 -m recip_rank',
            'R@1K': '-c -m recall.1000'
        },
        'msmarco-v2-doc-dev2': {
            'MRR@100': '-c -M 100 -m recip_rank',
            'R@1K': '-c -m recall.1000'
        },
        'dl21-doc': {
            'MAP@100': '-c -M 100 -m map',
            'nDCG@10': '-c -m ndcg_cut.10',
            'MRR@100': '-c -M 100 -m recip_rank',
            'R@100': '-c -m recall.100',
            'R@1K': '-c -m recall.1000'
        }
    }
}


def find_msmarco_table_topic_set_key_v1(topic_key):
    # E.g., we want to map variants like 'dl19-passage-unicoil' and 'dl19-passage' both into 'dl19'
    key = ''
    if topic_key.startswith('dl19'):
        key = 'dl19'
    elif topic_key.startswith('dl20'):
        key = 'dl20'
    elif topic_key.startswith('msmarco'):
        key = 'dev'

    return key


def find_msmarco_table_topic_set_key_v2(topic_key):
    key = ''
    if topic_key.endswith('dev') or topic_key.endswith('dev-unicoil') or topic_key.endswith('dev-unicoil-noexp'):
        key = 'dev'
    elif topic_key.endswith('dev2') or topic_key.endswith('dev2-unicoil') or topic_key.endswith('dev2-unicoil-noexp'):
        key = 'dev2'
    elif topic_key.startswith('dl21'):
        key = 'dl21'

    return key


def format_command(raw):
    # After "--output foo.txt" are additional options like "--hits 1000 --impact".
    # We want these on a separate line for better readability, but note that sometimes that might
    # be the end of the command, in which case we don't want to add an extra line break.
    return raw.replace('--topics', '\\\n  --topics') \
        .replace('--threads', '\\\n  --threads')\
        .replace('--index', '\\\n  --index')\
        .replace('--output', '\\\n  --output')\
        .replace('.txt ', '.txt \\\n  ')


def read_file(f):
    fin = open(f, 'r')
    text = fin.read()
    fin.close()

    return text


def list_conditions(args):
    for condition in models[args.collection]:
        if condition == '':
            continue
        print(condition)


def generate_report(args):
    yaml_file = pkg_resources.resource_filename(__name__, f'{args.collection}.yaml')

    if args.collection == 'msmarco-v1-passage':
        html_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_v1_passage.template'))
        row_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_row_v1.template'))
    elif args.collection == 'msmarco-v1-doc':
        html_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_v1_doc.template'))
        row_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_row_v1.template'))
    elif args.collection == 'msmarco-v2-passage':
        html_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_v2_passage.template'))
        row_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_row_v2.template'))
    elif args.collection == 'msmarco-v2-doc':
        html_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_v2_doc.template'))
        row_template = read_file(pkg_resources.resource_filename(__name__, 'msmarco_html_row_v2.template'))
    else:
        raise ValueError(f'Unknown corpus: {args.collection}')

    table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
    commands = defaultdict(lambda: defaultdict(lambda: ''))
    eval_commands = defaultdict(lambda: defaultdict(lambda: ''))

    table_keys = {}
    row_ids = {}

    with open(yaml_file) as f:
        yaml_data = yaml.safe_load(f)
        for condition in yaml_data['conditions']:
            name = condition['name']
            display = condition['display-html']
            row_id = condition['display-row'] if 'display-row' in condition else ''
            cmd_template = condition['command']

            row_ids[name] =row_id
            table_keys[name] = display

            for topic_set in condition['topics']:
                topic_key = topic_set['topic_key']
                eval_key = topic_set['eval_key']

                if args.collection == 'msmarco-v1-passage' or args.collection == 'msmarco-v1-doc':
                    short_topic_key = find_msmarco_table_topic_set_key_v1(topic_key)
                else:
                    short_topic_key = find_msmarco_table_topic_set_key_v2(topic_key)

                runfile = f'run.{args.collection}.{name}.{short_topic_key}.txt'
                cmd = Template(cmd_template).substitute(topics=topic_key, output=runfile)
                commands[name][short_topic_key] = cmd

                for expected in topic_set['scores']:
                    for metric in expected:
                        eval_cmd = f'python -m pyserini.eval.trec_eval ' + \
                                   f'{trec_eval_metric_definitions[args.collection][eval_key][metric]} {eval_key} {runfile}'
                        eval_commands[name][short_topic_key] += eval_cmd + '\n'
                        table[name][short_topic_key][metric] = expected[metric]

    if args.collection == 'msmarco-v1-passage' or args.collection == 'msmarco-v1-doc':
        row_cnt = 1

        html_rows = []
        for name in models[args.collection]:
            if not name:
                # Add blank row for spacing
                html_rows.append('<tr><td style="border-bottom: 0"></td></tr>')
                continue
            s = Template(row_template)
            s = s.substitute(row_cnt=row_cnt,
                             condition_name=table_keys[name],
                             row=row_ids[name],
                             s1=f'{table[name]["dl19"]["MAP"]:.4f}' if table[name]['dl19']['MAP'] != 0 else '-',
                             s2=f'{table[name]["dl19"]["nDCG@10"]:.4f}' if table[name]['dl19']['nDCG@10'] != 0 else '-',
                             s3=f'{table[name]["dl19"]["R@1K"]:.4f}' if table[name]['dl19']['R@1K'] != 0 else '-',
                             s4=f'{table[name]["dl20"]["MAP"]:.4f}' if table[name]['dl20']['MAP'] != 0 else '-',
                             s5=f'{table[name]["dl20"]["nDCG@10"]:.4f}' if table[name]['dl20']['nDCG@10'] != 0 else '-',
                             s6=f'{table[name]["dl20"]["R@1K"]:.4f}' if table[name]['dl20']['R@1K'] != 0 else '-',
                             s7=f'{table[name]["dev"]["MRR@10"]:.4f}' if table[name]['dev']['MRR@10'] != 0 else '-',
                             s8=f'{table[name]["dev"]["R@1K"]:.4f}' if table[name]['dev']['R@1K'] != 0 else '-',
                             cmd1=format_command(commands[name]['dl19']),
                             cmd2=format_command(commands[name]['dl20']),
                             cmd3=format_command(commands[name]['dev']),
                             eval_cmd1=eval_commands[name]['dl19'],
                             eval_cmd2=eval_commands[name]['dl20'],
                             eval_cmd3=eval_commands[name]['dev']
                             )

            # If we don't have scores, we want to remove the commands also. Use simple regexp substitution.
            if table[name]['dl19']['MAP'] == 0:
                s = re.sub(re.compile('Command to generate run on TREC 2019 queries:.*?</div>',
                                      re.MULTILINE | re.DOTALL),
                           'Not available.</div>', s)
            if table[name]['dl20']['MAP'] == 0:
                s = re.sub(re.compile('Command to generate run on TREC 2020 queries:.*?</div>',
                                      re.MULTILINE | re.DOTALL),
                           'Not available.</div>', s)
            if table[name]['dev']['MRR@10'] == 0:
                s = re.sub(re.compile('Command to generate run on dev queries:.*?</div>',
                                      re.MULTILINE | re.DOTALL),
                           'Not available.</div>', s)

            html_rows.append(s)
            row_cnt += 1

        all_rows = '\n'.join(html_rows)
        if args.collection == 'msmarco-v1-passage':
            full_name = 'MS MARCO V1 Passage'
        else:
            full_name = 'MS MARCO V1 Document'

        with open(args.output, 'w') as out:
            out.write(Template(html_template).substitute(title=full_name, rows=all_rows))
    else:
        row_cnt = 1

        html_rows = []
        for name in models[args.collection]:
            if not name:
                # Add blank row for spacing
                html_rows.append('<tr><td style="border-bottom: 0"></td></tr>')
                continue
            s = Template(row_template)
            s = s.substitute(row_cnt=row_cnt,
                             condition_name=table_keys[name],
                             row=row_ids[name],
                             s1=f'{table[name]["dl21"]["MAP@100"]:.4f}',
                             s2=f'{table[name]["dl21"]["nDCG@10"]:.4f}',
                             s3=f'{table[name]["dl21"]["MRR@100"]:.4f}',
                             s4=f'{table[name]["dl21"]["R@100"]:.4f}',
                             s5=f'{table[name]["dl21"]["R@1K"]:.4f}',
                             s6=f'{table[name]["dev"]["MRR@100"]:.4f}',
                             s7=f'{table[name]["dev"]["R@1K"]:.4f}',
                             s8=f'{table[name]["dev2"]["MRR@100"]:.4f}',
                             s9=f'{table[name]["dev2"]["R@1K"]:.4f}',
                             cmd1=format_command(commands[name]['dl21']),
                             cmd2=format_command(commands[name]['dev']),
                             cmd3=format_command(commands[name]['dev2']),
                             eval_cmd1=eval_commands[name]['dl21'],
                             eval_cmd2=eval_commands[name]['dev'],
                             eval_cmd3=eval_commands[name]['dev2']
                             )
            html_rows.append(s)
            row_cnt += 1

        all_rows = '\n'.join(html_rows)
        if args.collection == 'msmarco-v2-passage':
            full_name = 'MS MARCO V2 Passage'
        else:
            full_name = 'MS MARCO V2 Document'

        with open(args.output, 'w') as out:
            out.write(Template(html_template).substitute(title=full_name, rows=all_rows))


def run_conditions(args):
    start = time.time()

    table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
    table_keys = {}

    yaml_file = pkg_resources.resource_filename(__name__, f'{args.collection}.yaml')

    with open(yaml_file) as f:
        yaml_data = yaml.safe_load(f)
        for condition in yaml_data['conditions']:
            # Either we're running all conditions, or running only the condition specified in --condition
            if not args.all:
                if not condition['name'] == args.condition:
                    continue

            name = condition['name']
            display = condition['display']
            cmd_template = condition['command']

            print(f'# Running condition "{name}": {display}\n')
            for topic_set in condition['topics']:
                topic_key = topic_set['topic_key']
                eval_key = topic_set['eval_key']

                short_topic_key = ''
                if args.collection == 'msmarco-v1-passage' or args.collection == 'msmarco-v1-doc':
                    short_topic_key = find_msmarco_table_topic_set_key_v1(topic_key)
                else:
                    short_topic_key = find_msmarco_table_topic_set_key_v2(topic_key)

                print(f'  - topic_key: {topic_key}')

                runfile = os.path.join(args.directory, f'run.{args.collection}.{name}.{short_topic_key}.txt')
                cmd = Template(cmd_template).substitute(topics=topic_key, output=runfile)

                if args.display_commands:
                    print(f'\n```bash\n{format_command(cmd)}\n```\n')

                if not os.path.exists(runfile):
                    if not args.dry_run:
                        os.system(cmd)

                for expected in topic_set['scores']:
                    for metric in expected:
                        table_keys[name] = display
                        if not args.skip_eval:
                            # If the runfile doesn't exist, we can't evaluate.
                            # This would be the case if --dry-run were set.
                            if not os.path.exists(runfile):
                                continue

                            score = float(
                                run_eval_and_return_metric(
                                    metric,
                                    eval_key,
                                    trec_eval_metric_definitions[args.collection][eval_key][metric],
                                    runfile))
                            if math.isclose(score, float(expected[metric])):
                                result_str = ok_str
                            # Flaky test: small difference on my iMac Studio
                            elif args.collection == 'v1-passage' and topic_key == 'msmarco-passage-dev-subset' and \
                                    name == 'ance-otf' and math.isclose(score, float(expected[metric]), abs_tol=2e-4):
                                result_str = okish_str
                            else:
                                result_str = fail_str + f' expected {expected[metric]:.4f}'
                            print(f'    {metric:7}: {score:.4f} {result_str}')
                            table[name][short_topic_key][metric] = score
                        else:
                            table[name][short_topic_key][metric] = expected[metric]

                if not args.skip_eval:
                    print('')

    if args.collection == 'msmarco-v1-passage' or args.collection == 'msmarco-v1-doc':
        print(' ' * 69 + 'TREC 2019' + ' ' * 16 + 'TREC 2020' + ' ' * 12 + 'MS MARCO dev')
        print(' ' * 62 + 'MAP    nDCG@10    R@1K       MAP nDCG@10    R@1K    MRR@10    R@1K')
        print(' ' * 62 + '-' * 22 + '    ' + '-' * 22 + '    ' + '-' * 14)

        if args.condition:
            # If we've used --condition to specify a specific condition, print out only that row.
            name = args.condition
            print(f'{table_keys[name]:60}' +
                  f'{table[name]["dl19"]["MAP"]:8.4f}{table[name]["dl19"]["nDCG@10"]:8.4f}{table[name]["dl19"]["R@1K"]:8.4f}  ' +
                  f'{table[name]["dl20"]["MAP"]:8.4f}{table[name]["dl20"]["nDCG@10"]:8.4f}{table[name]["dl20"]["R@1K"]:8.4f}  ' +
                  f'{table[name]["dev"]["MRR@10"]:8.4f}{table[name]["dev"]["R@1K"]:8.4f}')
        else:
            # Otherwise, print out all rows
            for name in models[args.collection]:
                if not name:
                    print('')
                    continue
                print(f'{table_keys[name]:60}' +
                      f'{table[name]["dl19"]["MAP"]:8.4f}{table[name]["dl19"]["nDCG@10"]:8.4f}{table[name]["dl19"]["R@1K"]:8.4f}  ' +
                      f'{table[name]["dl20"]["MAP"]:8.4f}{table[name]["dl20"]["nDCG@10"]:8.4f}{table[name]["dl20"]["R@1K"]:8.4f}  ' +
                      f'{table[name]["dev"]["MRR@10"]:8.4f}{table[name]["dev"]["R@1K"]:8.4f}')
    else:
        print(' ' * 77 + 'TREC 2021' + ' ' * 18 + 'MS MARCO dev' + ' ' * 6 + 'MS MARCO dev2')
        print(' ' * 62 + 'MAP@100 nDCG@10 MRR@100 R@100   R@1K     MRR@100   R@1K    MRR@100   R@1K')
        print(' ' * 62 + '-' * 38 + '    ' + '-' * 14 + '    ' + '-' * 14)

        if args.condition:
            # If we've used --condition to specify a specific condition, print out only that row.
            name = args.condition
            print(f'{table_keys[name]:60}' +
                  f'{table[name]["dl21"]["MAP@100"]:8.4f}{table[name]["dl21"]["nDCG@10"]:8.4f}' +
                  f'{table[name]["dl21"]["MRR@100"]:8.4f}{table[name]["dl21"]["R@100"]:8.4f}{table[name]["dl21"]["R@1K"]:8.4f}  ' +
                  f'{table[name]["dev"]["MRR@100"]:8.4f}{table[name]["dev"]["R@1K"]:8.4f}  ' +
                  f'{table[name]["dev2"]["MRR@100"]:8.4f}{table[name]["dev2"]["R@1K"]:8.4f}')
        else:
            # Otherwise, print out all rows
            for name in models[args.collection]:
                if not name:
                    print('')
                    continue
                print(f'{table_keys[name]:60}' +
                      f'{table[name]["dl21"]["MAP@100"]:8.4f}{table[name]["dl21"]["nDCG@10"]:8.4f}' +
                      f'{table[name]["dl21"]["MRR@100"]:8.4f}{table[name]["dl21"]["R@100"]:8.4f}{table[name]["dl21"]["R@1K"]:8.4f}  ' +
                      f'{table[name]["dev"]["MRR@100"]:8.4f}{table[name]["dev"]["R@1K"]:8.4f}  ' +
                      f'{table[name]["dev2"]["MRR@100"]:8.4f}{table[name]["dev2"]["R@1K"]:8.4f}')

    end = time.time()

    print('\n')
    print(f'Total elapsed time: {end - start:.0f}s')


if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='Generate regression matrix for MS MARCO corpora.')
    parser.add_argument('--collection', type=str,
                        help='Collection = {v1-passage, v1-doc, v2-passage, v2-doc}.', required=True)
    # To list all conditions
    parser.add_argument('--list-conditions', action='store_true', default=False, help='List available conditions.')
    # For generating reports
    parser.add_argument('--generate-report', action='store_true', default=False, help='Generate report.')
    parser.add_argument('--output', type=str, help='File to store report.', required=False)
    # For actually running the experimental conditions
    parser.add_argument('--all', action='store_true', default=False, help='Run all conditions.')
    parser.add_argument('--condition', type=str, help='Condition to run.', required=False)
    parser.add_argument('--directory', type=str, help='Base directory.', default='', required=False)
    parser.add_argument('--dry-run', action='store_true', default=False, help='Print out commands but do not execute.')
    parser.add_argument('--skip-eval', action='store_true', default=False, help='Skip running trec_eval.')
    parser.add_argument('--display-commands', action='store_true', default=False, help='Display command.')
    args = parser.parse_args()

    if args.collection == 'v1-passage':
        args.collection = 'msmarco-v1-passage'
    elif args.collection == 'v1-doc':
        args.collection = 'msmarco-v1-doc'
    elif args.collection == 'v2-passage':
        args.collection = 'msmarco-v2-passage'
    elif args.collection == 'v2-doc':
        args.collection = 'msmarco-v2-doc'
    else:
        raise ValueError(f'Unknown corpus: {args.collection}')

    if args.list_conditions:
        list_conditions(args)
        sys.exit()

    if args.generate_report:
        if not args.output:
            print(f'Must specify report filename with --output.')
            sys.exit()

        generate_report(args)
        sys.exit()

    if not args.all and not args.condition:
        print(f'Must specify a specific condition using --condition or use --all to run all conditions.')
        sys.exit()

    run_conditions(args)