NetsPresso_QA / scripts /repro_matrix /generate_html_beir.py
geonmin-kim's picture
Upload folder using huggingface_hub
d6585f5
raw
history blame
No virus
4.88 kB
#
# Pyserini: Reproducible IR research with sparse and dense representations
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from collections import defaultdict
from string import Template
import yaml
from scripts.repro_matrix.defs_beir import beir_keys, trec_eval_metric_definitions
def format_run_command(raw):
return raw.replace('--topics', '\\\n --topics')\
.replace('--index', '\\\n --index')\
.replace('--encoder-class', '\\\n --encoder-class')\
.replace('--output ', '\\\n --output ')\
.replace('--output-format trec', '\\\n --output-format trec \\\n ') \
.replace('--hits ', '\\\n --hits ')
def format_eval_command(raw):
return raw.replace('-c ', '\\\n -c ')\
.replace('run.', '\\\n run.')
def read_file(f):
fin = open(f, 'r')
text = fin.read()
fin.close()
return text
if __name__ == '__main__':
table = defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.0)))
commands = defaultdict(lambda: defaultdict(lambda: ''))
eval_commands = defaultdict(lambda: defaultdict(lambda: ''))
html_template = read_file('scripts/repro_matrix/beir_html.template')
row_template = read_file('scripts/repro_matrix/beir_html_row.template')
with open('pyserini/resources/beir.yaml') as f:
yaml_data = yaml.safe_load(f)
for condition in yaml_data['conditions']:
name = condition['name']
cmd_template = condition['command']
for datasets in condition['datasets']:
dataset = datasets['dataset']
runfile = f'run.beir-{name}.{dataset}.txt'
cmd = Template(cmd_template).substitute(dataset=dataset, output=runfile)
commands[dataset][name] = format_run_command(cmd)
for expected in datasets['scores']:
for metric in expected:
eval_cmd = f'python -m pyserini.eval.trec_eval ' + \
f'{trec_eval_metric_definitions[metric]} beir-v1.0.0-{dataset}-test {runfile}'
eval_commands[dataset][name] += format_eval_command(eval_cmd) + '\n\n'
table[dataset][name][metric] = expected[metric]
row_cnt = 1
html_rows = []
for dataset in beir_keys:
s = Template(row_template)
s = s.substitute(row_cnt=row_cnt,
dataset=dataset,
s1=f'{table[dataset]["bm25-flat"]["nDCG@10"]:8.4f}',
s2=f'{table[dataset]["bm25-flat"]["R@100"]:8.4f}',
s3=f'{table[dataset]["bm25-multifield"]["nDCG@10"]:8.4f}',
s4=f'{table[dataset]["bm25-multifield"]["R@100"]:8.4f}',
s5=f'{table[dataset]["splade-distil-cocodenser-medium"]["nDCG@10"]:8.4f}',
s6=f'{table[dataset]["splade-distil-cocodenser-medium"]["R@100"]:8.4f}',
s7=f'{table[dataset]["contriever"]["nDCG@10"]:8.4f}',
s8=f'{table[dataset]["contriever"]["R@100"]:8.4f}',
s9=f'{table[dataset]["contriever-msmarco"]["nDCG@10"]:8.4f}',
s10=f'{table[dataset]["contriever-msmarco"]["R@100"]:8.4f}',
cmd1=commands[dataset]["bm25-flat"],
cmd2=commands[dataset]["bm25-multifield"],
cmd3=commands[dataset]["splade-distil-cocodenser-medium"],
cmd4=commands[dataset]["contriever"],
cmd5=commands[dataset]["contriever-msmarco"],
eval_cmd1=eval_commands[dataset]["bm25-flat"].rstrip(),
eval_cmd2=eval_commands[dataset]["bm25-multifield"].rstrip(),
eval_cmd3=eval_commands[dataset]["splade-distil-cocodenser-medium"].rstrip(),
eval_cmd4=eval_commands[dataset]["contriever"].rstrip(),
eval_cmd5=eval_commands[dataset]["contriever-msmarco"].rstrip(),
)
html_rows.append(s)
row_cnt += 1
all_rows = '\n'.join(html_rows)
print(Template(html_template).substitute(title='BEIR', rows=all_rows))