SpEL / candidate_manager.py
shavarani's picture
SpEL files required to run the streamlit app copied from https://github.com/shavarani/SpEL
c337225
raw
history blame
5.49 kB
"""
This file contains the implementation of the candidate manager in charge of loading the candidate sets,
and modifying the phrase annotations using the loaded candidates.
"""
import json
from span_annotation import PhraseAnnotation
from configuration import get_resources_dir
class CandidateManager:
def __init__(self, mentions_vocab, is_kb_yago = False, is_ppr_for_ned = False, is_context_agnostic = False,
is_indexed_for_spans= False):
self.mentions_vocab = mentions_vocab
self.candidates = None
if is_kb_yago:
print(" * Loading the candidates stored for KB+YAGO ...")
is_context_agnostic = True
is_indexed_for_spans = False
self.load_kb_plus_yago()
elif is_ppr_for_ned:
print(" * Loading the {} PPRforNED candidate set ...".format(
'context agnostic' if is_context_agnostic else 'context aware'))
self.load_ppr_for_ned_candidates(is_context_agnostic, is_indexed_for_spans)
else:
raise ValueError("Either \'is_kb_yago\' or \'is_ppr_for_ned\' flags must be True!")
self.is_context_agnostic = is_context_agnostic
self.is_indexed_for_spans = is_indexed_for_spans
self.is_kb_yago = is_kb_yago
self.is_ppr_for_ned = is_ppr_for_ned
def load_ppr_for_ned_candidates(self, is_context_agnostic, is_indexed_for_spans):
if is_context_agnostic:
file_address = "context_agnostic_mentions.json"
elif is_indexed_for_spans:
file_address = "context_aware_spans.json"
else:
file_address = "context_aware_mentions.json"
candidates_a = json.load(open(
get_resources_dir() / "data" / "candidates" / "aida_testa_pprforned" / file_address, "r"))
candidates_b = json.load(open(
get_resources_dir() / "data" / "candidates" / "aida_testb_pprforned" / file_address, "r"))
if is_context_agnostic:
for key in candidates_b:
if key in candidates_a:
for elem in candidates_b[key]:
if elem not in candidates_a[key]:
candidates_a[key].append(elem)
else:
candidates_a[key] = candidates_b[key]
else:
candidates_a.update(candidates_b)
self.candidates = candidates_a
def load_kb_plus_yago(self):
self.candidates = json.load(open(
get_resources_dir() / "data" / "candidates" / "kb_plus_yago_candidates.json", "r"))
def _fetch_candidates(self, phrase_annotation, sentence = None):
candidates = []
if self.is_kb_yago:
phrase_to_check = phrase_annotation.word_string.lower()
if phrase_to_check in self.candidates:
candidates = self.candidates[phrase_to_check]
elif self.is_ppr_for_ned:
# TODO lower-cased check mention surface forms
span_key = f"({phrase_annotation.begin_character}, {phrase_annotation.end_character})"
if self.is_context_agnostic and phrase_annotation.word_string in self.candidates:
candidates = self.candidates[phrase_annotation.word_string]
elif not self.is_context_agnostic and sentence in self.candidates:
if self.is_indexed_for_spans and span_key in self.candidates[sentence]:
candidates = self.candidates[sentence][span_key]
elif not self.is_indexed_for_spans and phrase_annotation.word_string in self.candidates[sentence]:
candidates = self.candidates[sentence][phrase_annotation.word_string]
return candidates
def modify_phrase_annotation_using_candidates(self, phrase_annotation: PhraseAnnotation, sentence: str = None):
"""
The method post processes the :param phrase_annotation: found in a :param sentence: to make sure it is bound to
the predefined {self.candidates} set.
It is not possible to perform candidate look up for spans in context agnostic scenario
so {self.is_indexed_for_spans} will only be considered where {self.is_context_agnostic} is False.
"""
if self.candidates is None or phrase_annotation.resolved_annotation == 0:
return
candidates = self._fetch_candidates(phrase_annotation, sentence)
if not candidates:
phrase_annotation.set_alternative_as_resolved_annotation(0)
return
if self.is_kb_yago:
candidates_ = [self.mentions_vocab[x[0]] for x in candidates if x[0] in self.mentions_vocab]
prior_probabilities_ = [x[1] for x in candidates if x[0] in self.mentions_vocab]
else:
candidates_ = [self.mentions_vocab[x] for x in candidates if x in self.mentions_vocab]
prior_probabilities_ = [1.0 for x in candidates if x in self.mentions_vocab]
# TODO use the prior_probabilities_ to adjust the probabilities
if candidates_:
all_p_anns = phrase_annotation.all_possible_annotations()
filtered_p_predictions = sorted(
[x for x in all_p_anns if x[0] in candidates_], key=lambda y: y[1], reverse=True)
if filtered_p_predictions:
phrase_annotation.set_alternative_as_resolved_annotation(filtered_p_predictions[0][0])
else:
phrase_annotation.set_alternative_as_resolved_annotation(0)