|
""" |
|
This file contains the implementation of the candidate manager in charge of loading the candidate sets, |
|
and modifying the phrase annotations using the loaded candidates. |
|
""" |
|
import json |
|
from span_annotation import PhraseAnnotation |
|
from configuration import get_resources_dir |
|
|
|
|
|
class CandidateManager: |
|
def __init__(self, mentions_vocab, is_kb_yago = False, is_ppr_for_ned = False, is_context_agnostic = False, |
|
is_indexed_for_spans= False): |
|
self.mentions_vocab = mentions_vocab |
|
self.candidates = None |
|
if is_kb_yago: |
|
print(" * Loading the candidates stored for KB+YAGO ...") |
|
is_context_agnostic = True |
|
is_indexed_for_spans = False |
|
self.load_kb_plus_yago() |
|
elif is_ppr_for_ned: |
|
print(" * Loading the {} PPRforNED candidate set ...".format( |
|
'context agnostic' if is_context_agnostic else 'context aware')) |
|
self.load_ppr_for_ned_candidates(is_context_agnostic, is_indexed_for_spans) |
|
else: |
|
raise ValueError("Either \'is_kb_yago\' or \'is_ppr_for_ned\' flags must be True!") |
|
self.is_context_agnostic = is_context_agnostic |
|
self.is_indexed_for_spans = is_indexed_for_spans |
|
self.is_kb_yago = is_kb_yago |
|
self.is_ppr_for_ned = is_ppr_for_ned |
|
|
|
def load_ppr_for_ned_candidates(self, is_context_agnostic, is_indexed_for_spans): |
|
if is_context_agnostic: |
|
file_address = "context_agnostic_mentions.json" |
|
elif is_indexed_for_spans: |
|
file_address = "context_aware_spans.json" |
|
else: |
|
file_address = "context_aware_mentions.json" |
|
candidates_a = json.load(open( |
|
get_resources_dir() / "data" / "candidates" / "aida_testa_pprforned" / file_address, "r")) |
|
candidates_b = json.load(open( |
|
get_resources_dir() / "data" / "candidates" / "aida_testb_pprforned" / file_address, "r")) |
|
if is_context_agnostic: |
|
for key in candidates_b: |
|
if key in candidates_a: |
|
for elem in candidates_b[key]: |
|
if elem not in candidates_a[key]: |
|
candidates_a[key].append(elem) |
|
else: |
|
candidates_a[key] = candidates_b[key] |
|
else: |
|
candidates_a.update(candidates_b) |
|
self.candidates = candidates_a |
|
|
|
def load_kb_plus_yago(self): |
|
self.candidates = json.load(open( |
|
get_resources_dir() / "data" / "candidates" / "kb_plus_yago_candidates.json", "r")) |
|
|
|
def _fetch_candidates(self, phrase_annotation, sentence = None): |
|
candidates = [] |
|
if self.is_kb_yago: |
|
phrase_to_check = phrase_annotation.word_string.lower() |
|
if phrase_to_check in self.candidates: |
|
candidates = self.candidates[phrase_to_check] |
|
elif self.is_ppr_for_ned: |
|
|
|
span_key = f"({phrase_annotation.begin_character}, {phrase_annotation.end_character})" |
|
if self.is_context_agnostic and phrase_annotation.word_string in self.candidates: |
|
candidates = self.candidates[phrase_annotation.word_string] |
|
elif not self.is_context_agnostic and sentence in self.candidates: |
|
if self.is_indexed_for_spans and span_key in self.candidates[sentence]: |
|
candidates = self.candidates[sentence][span_key] |
|
elif not self.is_indexed_for_spans and phrase_annotation.word_string in self.candidates[sentence]: |
|
candidates = self.candidates[sentence][phrase_annotation.word_string] |
|
return candidates |
|
|
|
def modify_phrase_annotation_using_candidates(self, phrase_annotation: PhraseAnnotation, sentence: str = None): |
|
""" |
|
The method post processes the :param phrase_annotation: found in a :param sentence: to make sure it is bound to |
|
the predefined {self.candidates} set. |
|
It is not possible to perform candidate look up for spans in context agnostic scenario |
|
so {self.is_indexed_for_spans} will only be considered where {self.is_context_agnostic} is False. |
|
""" |
|
if self.candidates is None or phrase_annotation.resolved_annotation == 0: |
|
return |
|
candidates = self._fetch_candidates(phrase_annotation, sentence) |
|
if not candidates: |
|
phrase_annotation.set_alternative_as_resolved_annotation(0) |
|
return |
|
if self.is_kb_yago: |
|
candidates_ = [self.mentions_vocab[x[0]] for x in candidates if x[0] in self.mentions_vocab] |
|
prior_probabilities_ = [x[1] for x in candidates if x[0] in self.mentions_vocab] |
|
else: |
|
candidates_ = [self.mentions_vocab[x] for x in candidates if x in self.mentions_vocab] |
|
prior_probabilities_ = [1.0 for x in candidates if x in self.mentions_vocab] |
|
|
|
if candidates_: |
|
all_p_anns = phrase_annotation.all_possible_annotations() |
|
filtered_p_predictions = sorted( |
|
[x for x in all_p_anns if x[0] in candidates_], key=lambda y: y[1], reverse=True) |
|
if filtered_p_predictions: |
|
phrase_annotation.set_alternative_as_resolved_annotation(filtered_p_predictions[0][0]) |
|
else: |
|
phrase_annotation.set_alternative_as_resolved_annotation(0) |