import enum import subprocess import spacy import pyinflect from typing import List, Union, Tuple # BES auxiliary “be” Let it **be**. # HVS forms of “have” I**’ve** seen the Queen # MD verb, modal auxiliary VerbType=mod This **could** work. # VB verb, base form VerbForm=inf I want to **go**. # VBD verb, past tense VerbForm=fin Tense=past This **was** a sentence. # VBG verb, gerund or present participle VerbForm=part Tense=pres Aspect=prog I am **going**. # VBN verb, past participle VerbForm=part Tense=past Aspect=perf The treasure was **lost**. # VBP verb, non-3rd person singular present VerbForm=fin Tense=pres I **want** to go. # VBZ verb, 3rd person singular present VerbForm=fin Tense=pres Number=sing Person=3 He **wants** to go. class Tense(enum.Enum): simple_present = { 'aux':[None,'VBZ'], 'main':['VBZ','VBP', 'VB'], 'tobe':{'NN':'is{}','NNS':'are{}'} } simple_past = { 'aux':[None, 'VBD'], 'main':['VBD', 'VB'], 'tobe':{'NN':'was{}','NNS':'were{}'} } future_simple = { 'aux':['MD'], 'main':['VB'], 'tobe':{'NN':'will{} be','NNS':'will{} be'} } present_cont = { 'aux':['VBP','VBZ'], 'main':['VBG'], 'tobe':{'NN':'is{} being','NNS':'are{} being'} } past_cont = { 'aux':['VBD'], 'main':['VBG'], 'tobe':{'NN':'was{} being','NNS':'were{} being'} } present_perfect = { 'aux':['VBP','VBZ'], 'main':['VBN'], 'tobe':{'NN':'has{} been','NNS':'have{} been'} } class Parser: def __init__( self ) -> None: self.parser = None self.__init_parser(model="en_core_web_sm") def __init_parser( self, model: str ) -> None: self.parser = None try: self.parser = spacy.load(model) except: print(f"* Downloading {model} model...") _ = subprocess.Popen( f"python -m spacy download {model}", stdout=subprocess.PIPE, shell=True).communicate() self.parser = spacy.load(model) def verb2participle( self, verb: str ) -> str: tk = self.parser(verb)[0] return tk._.inflect('VBN') def subj2obj( self, pronoun: str ) -> str: """ Convert Subject pronouns to Object pronouns. """ mapping = {"i":"me","you":"you","we":"us","they":"them","he":"him","she":"her", "it":"it"} return mapping.get(pronoun.lower(), None) def get_gramatical_number( self, dobj_data: List[List[Tuple[str,str,str]]] ) -> Union[str, None]: result = [tag for _,dep,tag in dobj_data if dep == 'dobj'] if len(result) == 0: result = None else: result = result[0].replace('NNP', 'NN') return result def get_verbal_tense( self, verb_data: List[List[Tuple[str,str,str,int]]] ) -> Union[str, None]: aux, neg, root = verb_data root = root[0][2] if len(root) > 0 else None aux = aux[0][2] if len(aux) > 0 else None tense_name = None for tense in Tense: if aux in tense.value['aux'] and root in tense.value['main']: tense_name = tense.name break return tense_name