Spaces:

nanom
/

to_passive_voice

Running

File size: 6,201 Bytes

e5fc5ba

from modules.m_parser import Parser, Tense
from difflib import ndiff
from typing import List, Tuple, Dict


class ActiveVoice(Parser):
    def __init__(
        self
    ) -> None:

        Parser.__init__(self)

    def __check_errors(
        self,
        sentence: str,
        debug: bool = False
    ) -> Dict:

        sentence = sentence.strip()
        if sentence == "":
            raise RuntimeError(
                    f"Error: The sentence can not be empty!"
                )

        subj_data, subj_str = self.get_a_subj(sentence)
        if debug: print(subj_data)
        if subj_str == "":
            raise RuntimeError(
                f"Error: The subject of the sentence was not found or the sentence is not in the correct format!"
            )

        verb_data, verb_str = self.get_a_verb(sentence)
        if debug: print(verb_data)
        if verb_str == "":
            raise RuntimeError(
                f"Error: The verbs of the sentence were not found or the sentence is not in the correct format!"
            )
        
        dobj_data, dobj_str = self.get_a_dobj(sentence)
        if debug: print(dobj_data)
        if dobj_str == "":
            raise RuntimeError(
                f"Error: The direct object of the sentence was not found or the sentence is not in the correct format!"
            )

        compl_str = self.get_a_compl(subj_str, verb_str, dobj_str, sentence)
        if debug: print(compl_str)

        return {
            'subj': [subj_data, subj_str],
            'verb': [verb_data, verb_str],
            'dobj': [dobj_data, dobj_str],
            'compl': compl_str,
        }

    def __create_p_subj(
        self,
        a_dobj: str
    ) -> str:

        return a_dobj

    def __create_p_verb(
        self,
        a_verb_data: List[Tuple[str,str,str,int]],
        a_dobj_data: List[Tuple[str,str,str]],
        debug: bool = False
    ) -> Tuple[str, str]:
        
        # know verbal tense
        verbal_tense = self.get_verbal_tense(a_verb_data)
        if debug: print(verbal_tense)
        if verbal_tense is None:
            raise RuntimeError(
                f"Error: The sentence is not in the correct format or the verbal tense has not been implemented yet!"
            ) 

        # Know if sentence is in afirmative or negative
        _, neg_data, main_data = a_verb_data
        neg = " not" if len(neg_data) > 0 else ""

        # know if noun is plural or singular
        gramatical_number = self.get_gramatical_number(a_dobj_data)
        if debug: print(gramatical_number)

        # Form to be verb
        tobe = Tense[verbal_tense].value['tobe'][gramatical_number]
        tobe = tobe.format(neg)
        if debug: print(tobe)
        
        # Know  participle 
        participle = self.verb2participle(main_data[0][0].text)
        if debug: print(participle)

        return tobe, participle

    def __create_p_agent(
        self,
        a_subj_data: List[Tuple[str,str,str]],
        debug: bool = False
    ) -> str:

        agent = []
        for tk,_,tag in a_subj_data:
            word = tk.text
            if tag == 'PRP':
                word = self.subj2obj(word)
            agent.append(word)
        
        return ' '.join(agent).strip()

    def get_a_subj(
        self,
        sentence: str,
    ) -> Tuple[ List[Tuple[str,str,str]], str]:

        out_data = []
        for tk in self.parser(sentence):
            if "subj" in tk.dep_:
                out_data = [(t,t.dep_,t.tag_) for t in tk.subtree]
                break
        
        out_str = ' '.join([t.text for t,_,_ in out_data])
        return out_data, out_str
    
    def get_a_verb(
        self,
        sentence: str,
    ) -> Tuple[ List[List[Tuple[str,str,str,int]]], str]:

        main_data = []
        aux_data = []
        neg_data = []
        out_data = []

        for tk in self.parser(sentence):
            if "ROOT" in tk.dep_:
                main_data = [ (tk,tk.dep_,tk.tag_,tk.i)]
                aux_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "aux"]
                neg_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "neg"]
                out_data = [aux_data, neg_data, main_data]
                break

        out_str = sorted([tup for list_ in out_data for tup in list_], key=lambda x: x[3])
        out_str = ' '.join([t.text for t,_,_,_ in out_str])
        return out_data, out_str

    def get_a_dobj(
        self,
        sentence: str,
    ) -> Tuple[ List[Tuple[str,str,str]], str]:

        out_data = []
        for tk in self.parser(sentence):
            if "dobj" in tk.dep_:
                out_data = [(t,t.dep_,t.tag_)for t in tk.subtree]
                break

        out_str = ' '.join([t.text for t,_,_ in out_data])
        return out_data, out_str

    def get_a_compl(
        self,
        subj: str,
        verb: str,
        dobj: str,
        full_sentence: str,
    ) -> str:
        
        concat_sentence = subj + ' ' + verb + ' ' + dobj
        diff = ""  
        for tk in ndiff(concat_sentence.split(), full_sentence.split()):
            mark, word = tk[0], tk[2:]
            if mark == '+':
                diff += word + " "
        
        return diff.strip()

    def to_pasive(
        self, 
        sentence: str, 
        debug: bool=False
    ) -> Dict[str, str]:

        outputs = self.__check_errors(sentence, debug)
        subj_data, subj_str = outputs['subj']
        verb_data, verb_str = outputs['verb']
        dobj_data, dobj_str = outputs['dobj']
        p_compl = outputs['compl']

        # Create passive subject
        p_subj = self.__create_p_subj(
            dobj_str
        )

        # Create passive verb
        p_tobe, p_participle = self.__create_p_verb(
            verb_data, dobj_data, debug
        )
        
        # Create passive agent
        p_agent = self.__create_p_agent(
            subj_data, debug
        )

        return {
            'subj': p_subj.capitalize(),
            'tobe':p_tobe, 
            'participle': p_participle, 
            'agent': 'by '+ p_agent[0].lower() + p_agent[1:].strip(),
            'compl': p_compl
        }