to_passive_voice / modules /m_apvoice.py
nanom's picture
Updated interface
b1aa3b5
raw
history blame
8.11 kB
import enum
import subprocess
import spacy
import pyinflect
from difflib import ndiff
from typing import List, Union, Tuple, Dict
# BES auxiliary “be” Let it **be**.
# HVS forms of “have” I**’ve** seen the Queen
# MD verb, modal auxiliary VerbType=mod This **could** work.
# VB verb, base form VerbForm=inf I want to **go**.
# VBD verb, past tense VerbForm=fin Tense=past This **was** a sentence.
# VBG verb, gerund or present participle VerbForm=part Tense=pres Aspect=prog I am **going**.
# VBN verb, past participle VerbForm=part Tense=past Aspect=perf The treasure was **lost**.
# VBP verb, non-3rd person singular present VerbForm=fin Tense=pres I **want** to go.
# VBZ verb, 3rd person singular present VerbForm=fin Tense=pres Number=sing Person=3 He **wants** to go.
class APVoice:
class Tense(enum.Enum):
simple_present = {
'aux':[None,'VBZ'],
'main':['VBZ','VBP', 'VB'],
'tobe':{'NN':'is{}','NNS':'are{}'}
}
simple_past = {
'aux':[None, 'VBD'],
'main':['VBD', 'VB'],
'tobe':{'NN':'was{}','NNS':'were{}'}
}
future_simple = {
'aux':['MD'],
'main':['VB'],
'tobe':{'NN':'will{} be','NNS':'will{} be'}
}
present_cont = {
'aux':['VBP','VBZ'],
'main':['VBG'],
'tobe':{'NN':'is{} being','NNS':'are{} being'}
}
past_cont = {
'aux':['VBD'],
'main':['VBG'],
'tobe':{'NN':'was{} being','NNS':'were{} being'}
}
present_perfect = {
'aux':['VBP','VBZ'],
'main':['VBN'],
'tobe':{'NN':'has{} been','NNS':'have{} been'}
}
def __init__(
self
) -> None:
self.parser = None
self.__init_parser(model="en_core_web_sm")
def __init_parser(
self,
model: str
) -> None:
self.parser = None
try:
self.parser = spacy.load(model)
except:
print(f"* Downloading {model} model...")
_ = subprocess.Popen(
f"python -m spacy download {model}",
stdout=subprocess.PIPE,
shell=True).communicate()
self.parser = spacy.load(model)
def verb2participle(
self,
verb: str
) -> str:
tk = self.parser(verb)[0]
return tk._.inflect('VBN')
def subjp2objp(
self,
pronoun: str
) -> str:
"""
Convert Subject pronouns to Object pronouns.
"""
mapping = {"i":"me","you":"you","we":"us","they":"them","he":"him","she":"her", "it":"it"}
return mapping.get(pronoun.lower(), None)
def get_gramatical_number(
self,
dobj_data: List[List[Tuple[str,str,str]]]
) -> Union[str, None]:
result = [tag for _,dep,tag in dobj_data if dep == 'dobj']
if len(result) == 0:
result = None
else:
result = result[0].replace('NNP', 'NN')
return result
def get_verbal_tense(
self,
verb_data: List[List[Tuple[str,str,str,int]]]
) -> Union[str, None]:
aux, neg, root = verb_data
root = root[0][2] if len(root) > 0 else None
aux = aux[0][2] if len(aux) > 0 else None
tense_name = None
for tense in self.Tense:
if aux in tense.value['aux'] and root in tense.value['main']:
tense_name = tense.name
break
return tense_name
def get_subj(
self,
sentence: str,
) -> Tuple[ List[Tuple[str,str,str]], str]:
out_data = []
for tk in self.parser(sentence):
if "subj" in tk.dep_:
out_data = [(t,t.dep_,t.tag_) for t in tk.subtree]
break
out_str = ' '.join([t.text for t,_,_ in out_data])
return out_data, out_str
def get_verb(
self,
sentence: str,
) -> Tuple[ List[List[Tuple[str,str,str,int]]], str]:
main_data = []
aux_data = []
neg_data = []
out_data = []
for tk in self.parser(sentence):
if "ROOT" in tk.dep_:
main_data = [ (tk,tk.dep_,tk.tag_,tk.i)]
aux_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "aux"]
neg_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "neg"]
out_data = [aux_data, neg_data, main_data]
break
out_str = sorted([tup for list_ in out_data for tup in list_], key=lambda x: x[3])
out_str = ' '.join([t.text for t,_,_,_ in out_str])
return out_data, out_str
def get_dobj(
self,
sentence: str,
) -> Tuple[ List[Tuple[str,str,str]], str]:
out_data = []
for tk in self.parser(sentence):
if "dobj" in tk.dep_:
out_data = [(t,t.dep_,t.tag_)for t in tk.subtree]
break
out_str = ' '.join([t.text for t,_,_ in out_data])
return out_data, out_str
def get_complement(
self,
subj: str,
verb: str,
dobj: str,
full_sentence: str,
) -> str:
concat_sentence = subj + ' ' + verb + ' ' + dobj
diff = ""
for tk in ndiff(concat_sentence.split(), full_sentence.split()):
mark, word = tk[0], tk[2:]
if mark == '+':
diff += word + " "
return diff.strip()
def active2passive(
self,
active_sentence: str,
debug: bool=False
) -> Dict[str, str]:
active_sentence = active_sentence.strip()
if active_sentence == "":
raise RuntimeError(
f"Error: The sentence does not be empty!"
)
subj_data, subj_str = self.get_subj(active_sentence)
if debug: print(subj_data)
if subj_str == "":
raise RuntimeError(
f"Error: The sentence's subject has not been found or the sentence does not be the correct format!"
)
verb_data, verb_str = self.get_verb(active_sentence)
if debug: print(verb_data)
if verb_str == "":
raise RuntimeError(
f"Error: The sentence's verb has not been found or the sentence does not be the correct format!"
)
dobj_data, dobj_str = self.get_dobj(active_sentence)
if debug: print(dobj_data)
if dobj_str == "":
raise RuntimeError(
f"Error: The sentence's direct object has not been found or the sentence does not be the correct format!"
)
complement = self.get_complement(subj_str, verb_str, dobj_str, active_sentence)
# Get pasive subject
p_subj = dobj_str
# Get tense + participle verb
verbal_tense = self.get_verbal_tense(verb_data)
if debug: print(verbal_tense)
if verbal_tense is None:
raise RuntimeError(
f"Error: The sentence does not be the correct format or the verbal tense has not been implemented yet!"
)
_, neg_data, main_data = verb_data
neg = " not" if len(neg_data) > 0 else ""
gramatical_number = self.get_gramatical_number(dobj_data)
if debug: print(gramatical_number)
p_tobe = self.Tense[verbal_tense].value['tobe'][gramatical_number].format(neg)
p_verb = self.verb2participle(main_data[0][0].text)
# Convert active_object to pasive_agent
p_agent = "by "
for tk,_,tag in subj_data:
word = tk.text
if tag == 'PRP':
word = self.subjp2objp(word)
p_agent += word + " "
return {
'subject': p_subj.capitalize(),
'tobe':p_tobe,
'participle': p_verb,
'agent': p_agent[0].lower() + p_agent[1:].strip(),
'complement':complement
}