File size: 4,059 Bytes
d6585f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
#
# Pyserini: Python interface to the Anserini IR toolkit built on Lucene
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

"""Compute various statistics for qrels."""

import argparse
import collections


def is_relevant(grade: int, threshold: int):
    return grade > threshold


def main():
    parser = argparse.ArgumentParser(description=__doc__,
                                     formatter_class=lambda prog: argparse.HelpFormatter(prog, width=100))
    parser.add_argument('--qrels', metavar='FILE', type=str, required=True, help='Qrels file.')
    parser.add_argument('--output', metavar='FILE', type=str, help='File to store per-topic statistics.')
    parser.add_argument('--relevance-threshold', metavar='INT', type=int, default=0,
                        help='Threshold for considering a judgment "relevant". ' +
                             'Default is zero, which means any relevance grade above zero is considered relevant.')

    args = parser.parse_args()

    qrels = collections.defaultdict(dict)
    with open(args.qrels) as f:
        for i, line in enumerate(f):
            qid, _, docid, relevance = line.rstrip().split()
            qrels[qid][docid] = int(relevance)

    total_judgments_count = 0
    total_topics_count = 0
    total_rel_count = 0

    max_per_topic_judged = 0
    min_per_topic_judged = 10000000

    max_per_topic_relevant = 0
    min_per_topic_relevant = 10000000

    per_topic_output = ['qid,rel_count,not_rel_count,total']

    for qid in sorted(qrels):
        total_topics_count += 1
        rel_count = 0
        not_rel_count = 0
        for docid in qrels[qid]:
            if is_relevant(qrels[qid][docid], args.relevance_threshold):
                rel_count += 1
                total_rel_count += 1
            else:
                not_rel_count += 1
            total_judgments_count += 1

        assert len(qrels[qid]) == rel_count + not_rel_count

        if rel_count > max_per_topic_relevant:
            max_per_topic_relevant = rel_count

        if rel_count + not_rel_count > max_per_topic_judged:
            max_per_topic_judged = rel_count + not_rel_count

        if rel_count < min_per_topic_relevant:
            min_per_topic_relevant = rel_count

        if rel_count + not_rel_count < min_per_topic_judged:
            min_per_topic_judged = rel_count + not_rel_count

        per_topic_output.append(f'{qid},{rel_count},{not_rel_count},{rel_count+not_rel_count}')

    per_topic_output.append(f'total,{total_rel_count},{total_judgments_count-total_rel_count},{total_judgments_count}')
    per_topic_output.append(f'avg,{total_rel_count/total_topics_count:.2f},' +
                            f'{(total_judgments_count-total_rel_count)/total_topics_count:.2f},' +
                            f'{total_judgments_count/total_topics_count:.2f}')

    if args.output:
        with open(args.output, 'w') as f:
            for line in per_topic_output:
                f.write(line + '\n')

    print('# Summary Statistics')
    print(f'Total number of topics: {total_topics_count}')
    print(f'Total number of judgments: {total_judgments_count}')
    print(f'Total number of relevant labels: {total_rel_count}')
    print(f'Avg. judgments per topic: {total_judgments_count/total_topics_count:.2f}' +
          f' (max = {max_per_topic_judged}, min = {min_per_topic_judged})')
    print(f'Avg. relevant labels per topic: {total_rel_count/total_topics_count:.2f}' +
          f' (max = {max_per_topic_relevant}, min = {min_per_topic_relevant})')


if __name__ == "__main__":
    main()