NetsPresso_QA / scripts /ltr_msmarco /generate_document_score_withmaxP.py
geonmin-kim's picture
Upload folder using huggingface_hub
d6585f5
raw
history blame
No virus
1.16 kB
from tqdm import tqdm
import collections
import argparse
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Generate document level score')
parser.add_argument('--input', metavar='input file', help='input file',
type=str, required=True)
parser.add_argument('--output', metavar='output file', help='output file',
type=str, required=True)
args = parser.parse_args()
scores = collections.defaultdict(dict)
with open(args.input) as fin:
for line in tqdm(fin):
qid, _, pid, rank, score, _ = line.split('\t')
score = float(score)
docid = pid.split('#')[0]
if (qid not in scores or docid not in scores[qid] or score > scores[qid][docid]):
scores[qid][docid] = score
with open(args.output, 'w') as fout:
for qid, docid_score in tqdm(scores.items()):
rank = 1
docid_score = sorted(docid_score.items(),key=lambda kv: kv[1], reverse=True)
for docid, score in docid_score:
fout.write(f'{qid}\t{docid}\t{rank}\n')
rank += 1