Spaces:
Running
Running
import enum | |
import subprocess | |
import spacy | |
import pyinflect | |
from difflib import ndiff | |
from typing import List, Union, Tuple, Dict | |
# BES auxiliary “be” Let it **be**. | |
# HVS forms of “have” I**’ve** seen the Queen | |
# MD verb, modal auxiliary VerbType=mod This **could** work. | |
# VB verb, base form VerbForm=inf I want to **go**. | |
# VBD verb, past tense VerbForm=fin Tense=past This **was** a sentence. | |
# VBG verb, gerund or present participle VerbForm=part Tense=pres Aspect=prog I am **going**. | |
# VBN verb, past participle VerbForm=part Tense=past Aspect=perf The treasure was **lost**. | |
# VBP verb, non-3rd person singular present VerbForm=fin Tense=pres I **want** to go. | |
# VBZ verb, 3rd person singular present VerbForm=fin Tense=pres Number=sing Person=3 He **wants** to go. | |
class APVoice: | |
class Tense(enum.Enum): | |
simple_present = { | |
'aux':[None,'VBZ'], | |
'main':['VBZ','VBP', 'VB'], | |
'tobe':{'NN':'is{}','NNS':'are{}'} | |
} | |
simple_past = { | |
'aux':[None, 'VBD'], | |
'main':['VBD', 'VB'], | |
'tobe':{'NN':'was{}','NNS':'were{}'} | |
} | |
future_simple = { | |
'aux':['MD'], | |
'main':['VB'], | |
'tobe':{'NN':'will{} be','NNS':'will{} be'} | |
} | |
present_cont = { | |
'aux':['VBP','VBZ'], | |
'main':['VBG'], | |
'tobe':{'NN':'is{} being','NNS':'are{} being'} | |
} | |
past_cont = { | |
'aux':['VBD'], | |
'main':['VBG'], | |
'tobe':{'NN':'was{} being','NNS':'were{} being'} | |
} | |
present_perfect = { | |
'aux':['VBP','VBZ'], | |
'main':['VBN'], | |
'tobe':{'NN':'has{} been','NNS':'have{} been'} | |
} | |
def __init__( | |
self | |
) -> None: | |
self.parser = None | |
self.__init_parser(model="en_core_web_sm") | |
def __init_parser( | |
self, | |
model: str | |
) -> None: | |
self.parser = None | |
try: | |
self.parser = spacy.load(model) | |
except: | |
print(f"* Downloading {model} model...") | |
_ = subprocess.Popen( | |
f"python -m spacy download {model}", | |
stdout=subprocess.PIPE, | |
shell=True).communicate() | |
self.parser = spacy.load(model) | |
def verb2participle( | |
self, | |
verb: str | |
) -> str: | |
tk = self.parser(verb)[0] | |
return tk._.inflect('VBN') | |
def subjp2objp( | |
self, | |
pronoun: str | |
) -> str: | |
""" | |
Convert Subject pronouns to Object pronouns. | |
""" | |
mapping = {"i":"me","you":"you","we":"us","they":"them","he":"him","she":"her", "it":"it"} | |
return mapping.get(pronoun.lower(), None) | |
def get_gramatical_number( | |
self, | |
dobj_data: List[List[Tuple[str,str,str]]] | |
) -> Union[str, None]: | |
result = [tag for _,dep,tag in dobj_data if dep == 'dobj'] | |
if len(result) == 0: | |
result = None | |
else: | |
result = result[0].replace('NNP', 'NN') | |
return result | |
def get_verbal_tense( | |
self, | |
verb_data: List[List[Tuple[str,str,str,int]]] | |
) -> Union[str, None]: | |
aux, neg, root = verb_data | |
root = root[0][2] if len(root) > 0 else None | |
aux = aux[0][2] if len(aux) > 0 else None | |
tense_name = None | |
for tense in self.Tense: | |
if aux in tense.value['aux'] and root in tense.value['main']: | |
tense_name = tense.name | |
break | |
return tense_name | |
def get_subj( | |
self, | |
sentence: str, | |
) -> Tuple[ List[Tuple[str,str,str]], str]: | |
out_data = [] | |
for tk in self.parser(sentence): | |
if "subj" in tk.dep_: | |
out_data = [(t,t.dep_,t.tag_) for t in tk.subtree] | |
break | |
out_str = ' '.join([t.text for t,_,_ in out_data]) | |
return out_data, out_str | |
def get_verb( | |
self, | |
sentence: str, | |
) -> Tuple[ List[List[Tuple[str,str,str,int]]], str]: | |
main_data = [] | |
aux_data = [] | |
neg_data = [] | |
out_data = [] | |
for tk in self.parser(sentence): | |
if "ROOT" in tk.dep_: | |
main_data = [ (tk,tk.dep_,tk.tag_,tk.i)] | |
aux_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "aux"] | |
neg_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "neg"] | |
out_data = [aux_data, neg_data, main_data] | |
break | |
out_str = sorted([tup for list_ in out_data for tup in list_], key=lambda x: x[3]) | |
out_str = ' '.join([t.text for t,_,_,_ in out_str]) | |
return out_data, out_str | |
def get_dobj( | |
self, | |
sentence: str, | |
) -> Tuple[ List[Tuple[str,str,str]], str]: | |
out_data = [] | |
for tk in self.parser(sentence): | |
if "dobj" in tk.dep_: | |
out_data = [(t,t.dep_,t.tag_)for t in tk.subtree] | |
break | |
out_str = ' '.join([t.text for t,_,_ in out_data]) | |
return out_data, out_str | |
def get_complement( | |
self, | |
subj: str, | |
verb: str, | |
dobj: str, | |
full_sentence: str, | |
) -> str: | |
concat_sentence = subj + ' ' + verb + ' ' + dobj | |
diff = "" | |
for tk in ndiff(concat_sentence.split(), full_sentence.split()): | |
mark, word = tk[0], tk[2:] | |
if mark == '+': | |
diff += word + " " | |
return diff.strip() | |
def active2passive( | |
self, | |
active_sentence: str, | |
debug: bool=False | |
) -> Dict[str, str]: | |
active_sentence = active_sentence.strip() | |
if active_sentence == "": | |
raise RuntimeError( | |
f"Error: The sentence does not be empty!" | |
) | |
subj_data, subj_str = self.get_subj(active_sentence) | |
if debug: print(subj_data) | |
if subj_str == "": | |
raise RuntimeError( | |
f"Error: The sentence's subject has not been found or the sentence does not be the correct format!" | |
) | |
verb_data, verb_str = self.get_verb(active_sentence) | |
if debug: print(verb_data) | |
if verb_str == "": | |
raise RuntimeError( | |
f"Error: The sentence's verb has not been found or the sentence does not be the correct format!" | |
) | |
dobj_data, dobj_str = self.get_dobj(active_sentence) | |
if debug: print(dobj_data) | |
if dobj_str == "": | |
raise RuntimeError( | |
f"Error: The sentence's direct object has not been found or the sentence does not be the correct format!" | |
) | |
complement = self.get_complement(subj_str, verb_str, dobj_str, active_sentence) | |
# Get pasive subject | |
p_subj = dobj_str | |
# Get tense + participle verb | |
verbal_tense = self.get_verbal_tense(verb_data) | |
if debug: print(verbal_tense) | |
if verbal_tense is None: | |
raise RuntimeError( | |
f"Error: The sentence does not be the correct format or the verbal tense has not been implemented yet!" | |
) | |
_, neg_data, main_data = verb_data | |
neg = " not" if len(neg_data) > 0 else "" | |
gramatical_number = self.get_gramatical_number(dobj_data) | |
if debug: print(gramatical_number) | |
p_tobe = self.Tense[verbal_tense].value['tobe'][gramatical_number].format(neg) | |
p_verb = self.verb2participle(main_data[0][0].text) | |
# Convert active_object to pasive_agent | |
p_agent = "by " | |
for tk,_,tag in subj_data: | |
word = tk.text | |
if tag == 'PRP': | |
word = self.subjp2objp(word) | |
p_agent += word + " " | |
return { | |
'subject': p_subj.capitalize(), | |
'tobe':p_tobe, | |
'participle': p_verb, | |
'agent': p_agent[0].lower() + p_agent[1:].strip(), | |
'complement':complement | |
} |