Spaces:
Sleeping
Sleeping
File size: 2,577 Bytes
98e2ea5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
import argparse
import os
import logging
import json
import numpy as np
from coref_utils.metrics import CorefEvaluator
from coref_utils.utils import get_mention_to_cluster
os.environ["TOKENIZERS_PARALLELISM"] = "false"
logging.basicConfig(format="%(message)s", level=logging.INFO)
logger = logging.getLogger()
def process_args():
"""Parse command line arguments."""
parser = argparse.ArgumentParser()
# Add arguments to parser
parser.add_argument("log_file", help="Log file", type=str)
args = parser.parse_args()
return args
def singleton_analysis(data):
max_length = 0
max_doc_id = ""
max_cluster = []
for instance in data:
gold_clusters, gold_mentions_to_cluster = get_mention_to_cluster(
instance["clusters"]
)
pred_clusters, pred_mentions_to_cluster = get_mention_to_cluster(
instance["predicted_clusters"]
)
for cluster in gold_clusters:
all_mention_unseen = True
for mention in cluster:
if mention in pred_mentions_to_cluster:
all_mention_unseen = False
break
if all_mention_unseen:
if len(cluster) > max_length:
max_length = len(cluster)
max_doc_id = instance["doc_key"]
max_cluster = cluster
print(max_doc_id)
print(max_length, max_cluster)
def reverse_analysis(data):
max_length = 0
max_doc_id = ""
max_cluster = []
for instance in data:
gold_clusters, gold_mentions_to_cluster = get_mention_to_cluster(
instance["clusters"]
)
pred_clusters, pred_mentions_to_cluster = get_mention_to_cluster(
instance["predicted_clusters"]
)
for cluster in pred_clusters:
all_mention_unseen = True
for mention in cluster:
if mention in gold_mentions_to_cluster:
all_mention_unseen = False
break
if all_mention_unseen:
if len(cluster) > max_length:
max_length = len(cluster)
max_doc_id = instance["doc_key"]
max_cluster = cluster
print(max_doc_id)
print(max_length, max_cluster)
def main():
args = process_args()
data = []
with open(args.log_file) as f:
for line in f:
data.append(json.loads(line))
singleton_analysis(data)
reverse_analysis(data)
if __name__ == "__main__":
main()
|