healthsea-pipeline / support_functions.py
edichief's picture
Init
a7a38d6
from spacy.tokens import Doc
class HealthseaPipe:
# Get Clauses and their predictions
def get_clauses(self, doc):
clauses = []
for clause in doc._.clauses:
words = []
spaces = []
clause_slice = doc[clause["split_indices"][0] : clause["split_indices"][1]]
if clause["has_ent"]:
for token in clause_slice:
if token.i == clause["ent_indices"][0]:
words.append(
clause["blinder"].replace(">", "").replace("<", "")
)
spaces.append(True)
elif token.i not in range(
clause["ent_indices"][0], clause["ent_indices"][1]
):
words.append(token.text)
spaces.append(token.whitespace_)
clauses.append(Doc(doc.vocab, words=words, spaces=spaces))
else:
for token in clause_slice:
words.append(token.text)
spaces.append(token.whitespace_)
clauses.append(Doc(doc.vocab, words=words, spaces=spaces))
return clauses