nanom commited on
Commit
5d0a311
0 Parent(s):

First commit

Browse files
Files changed (6) hide show
  1. .gitignore +1 -0
  2. app.py +45 -0
  3. modules/.gitignore +1 -0
  4. modules/m_apvoice.py +253 -0
  5. modules/m_connector.py +64 -0
  6. requirements.txt +1 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
app.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from modules.m_connector import Connector
3
+
4
+ iface = gr.Blocks(css="container {max-width: 78%; margin: auto;}")
5
+ conn = Connector()
6
+
7
+ with iface:
8
+ in_sentence = gr.Textbox(
9
+ label = "Active sentence",
10
+ lines=2,
11
+ placeholder = "Enter here the sentence without contractions...",
12
+ )
13
+ btn_act2pas = gr.Button(
14
+ value = "Pass to passive!"
15
+ )
16
+
17
+ error = gr.HTML()
18
+
19
+ out_sentence = gr.HTML(
20
+ label = "Out. Pasive sentences:",
21
+ )
22
+
23
+ gr.Examples(
24
+ inputs = in_sentence,
25
+ examples = [
26
+ "The teacher corrected the exams in less than an hour",
27
+ "Christopher Columbus discovered America in 1492",
28
+ "Mchael Jackson sings Billy Jean",
29
+ "They are painting the house" ,
30
+ "My mom has prepared the dinner",
31
+ "The man has not found the farm"
32
+ ]
33
+ )
34
+
35
+ btn_act2pas.click(
36
+ fn = conn.active2passive,
37
+ inputs = in_sentence,
38
+ outputs = [error, out_sentence]
39
+ )
40
+
41
+ iface.launch(
42
+ # server_port= 9090,
43
+ # server_name = "0.0.0.0",
44
+ share = True
45
+ )
modules/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
modules/m_apvoice.py ADDED
@@ -0,0 +1,253 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import enum
2
+ import subprocess
3
+ import spacy
4
+ import pyinflect
5
+ from difflib import ndiff
6
+ from typing import List, Union, Tuple
7
+
8
+ # BES auxiliary “be” Let it **be**.
9
+ # HVS forms of “have” I**’ve** seen the Queen
10
+ # MD verb, modal auxiliary VerbType=mod This **could** work.
11
+ # VB verb, base form VerbForm=inf I want to **go**.
12
+ # VBD verb, past tense VerbForm=fin Tense=past This **was** a sentence.
13
+ # VBG verb, gerund or present participle VerbForm=part Tense=pres Aspect=prog I am **going**.
14
+ # VBN verb, past participle VerbForm=part Tense=past Aspect=perf The treasure was **lost**.
15
+ # VBP verb, non-3rd person singular present VerbForm=fin Tense=pres I **want** to go.
16
+ # VBZ verb, 3rd person singular present VerbForm=fin Tense=pres Number=sing Person=3 He **wants** to go.
17
+
18
+ class APVoice:
19
+ class Tense(enum.Enum):
20
+ simple_present = {
21
+ 'aux':[None,'VBZ'],
22
+ 'main':['VBZ','VBP', 'VB'],
23
+ 'tobe':{'NN':'is{}','NNS':'are{}'}
24
+ }
25
+ simple_past = {
26
+ 'aux':[None, 'VBD'],
27
+ 'main':['VBD', 'VB'],
28
+ 'tobe':{'NN':'was{}','NNS':'were{}'}
29
+ }
30
+ future_simple = {
31
+ 'aux':['MD'],
32
+ 'main':['VB'],
33
+ 'tobe':{'NN':'will{} be','NNS':'will{} be'}
34
+ }
35
+ present_cont = {
36
+ 'aux':['VBP','VBZ'],
37
+ 'main':['VBG'],
38
+ 'tobe':{'NN':'is{} being','NNS':'are{} being'}
39
+ }
40
+ past_cont = {
41
+ 'aux':['VBD'],
42
+ 'main':['VBG'],
43
+ 'tobe':{'NN':'was{} being','NNS':'were{} being'}
44
+ }
45
+ present_perfect = {
46
+ 'aux':['VBP','VBZ'],
47
+ 'main':['VBN'],
48
+ 'tobe':{'NN':'has{} been','NNS':'have{} been'}
49
+ }
50
+
51
+ def __init__(
52
+ self
53
+ ) -> None:
54
+
55
+ self.parser = None
56
+ self.__init_parser(model="en_core_web_sm")
57
+
58
+ def __init_parser(
59
+ self,
60
+ model: str
61
+ ) -> None:
62
+
63
+ self.parser = None
64
+ try:
65
+ self.parser = spacy.load(model)
66
+ except:
67
+ print(f"* Downloading {model} model...")
68
+ _ = subprocess.Popen(
69
+ f"python -m spacy download {model}",
70
+ stdout=subprocess.PIPE,
71
+ shell=True).communicate()
72
+
73
+ self.parser = spacy.load(model)
74
+
75
+ def verb2participle(
76
+ self,
77
+ verb: str
78
+ ) -> str:
79
+
80
+ tk = self.parser(verb)[0]
81
+ return tk._.inflect('VBN')
82
+
83
+ def subjp2objp(
84
+ self,
85
+ pronoun: str
86
+ ) -> str:
87
+ """
88
+ Convert Subject pronouns to Object pronouns.
89
+ """
90
+ mapping = {"i":"me","you":"you","we":"us","they":"them","he":"him","she":"her", "it":"it"}
91
+ return mapping.get(pronoun.lower(), None)
92
+
93
+ def get_gramatical_number(
94
+ self,
95
+ dobj_data: List[List[Tuple[str,str,str]]]
96
+ ) -> Union[str, None]:
97
+
98
+ result = [tag for _,dep,tag in dobj_data if dep == 'dobj']
99
+ if len(result) == 0:
100
+ result = None
101
+ else:
102
+ result = result[0].replace('NNP', 'NN')
103
+
104
+ return result
105
+
106
+ def get_verbal_tense(
107
+ self,
108
+ verb_data: List[List[Tuple[str,str,str,int]]]
109
+ ) -> Union[str, None]:
110
+
111
+ aux, neg, root = verb_data
112
+
113
+ root = root[0][2] if len(root) > 0 else None
114
+ aux = aux[0][2] if len(aux) > 0 else None
115
+
116
+ tense_name = None
117
+ for tense in self.Tense:
118
+ if aux in tense.value['aux'] and root in tense.value['main']:
119
+ tense_name = tense.name
120
+ break
121
+
122
+ return tense_name
123
+
124
+ def get_subj(
125
+ self,
126
+ sentence: str,
127
+ ) -> Tuple[ List[Tuple[str,str,str]], str]:
128
+
129
+ out_data = []
130
+ for tk in self.parser(sentence):
131
+ if "subj" in tk.dep_:
132
+ out_data = [(t,t.dep_,t.tag_) for t in tk.subtree]
133
+ break
134
+
135
+ out_str = ' '.join([t.text for t,_,_ in out_data])
136
+ return out_data, out_str
137
+
138
+ def get_verb(
139
+ self,
140
+ sentence: str,
141
+ ) -> Tuple[ List[List[Tuple[str,str,str,int]]], str]:
142
+
143
+ main_data = []
144
+ aux_data = []
145
+ neg_data = []
146
+ out_data = []
147
+
148
+ for tk in self.parser(sentence):
149
+ if "ROOT" in tk.dep_:
150
+ main_data = [ (tk,tk.dep_,tk.tag_,tk.i)]
151
+ aux_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "aux"]
152
+ neg_data = [(t,t.dep_,t.tag_,t.i) for t in tk.children if t.dep_ == "neg"]
153
+ out_data = [aux_data, neg_data, main_data]
154
+ break
155
+
156
+ out_str = sorted([tup for list_ in out_data for tup in list_], key=lambda x: x[3])
157
+ out_str = ' '.join([t.text for t,_,_,_ in out_str])
158
+ return out_data, out_str
159
+
160
+ def get_dobj(
161
+ self,
162
+ sentence: str,
163
+ ) -> Tuple[ List[Tuple[str,str,str]], str]:
164
+
165
+ out_data = []
166
+ for tk in self.parser(sentence):
167
+ if "dobj" in tk.dep_:
168
+ out_data = [(t,t.dep_,t.tag_)for t in tk.subtree]
169
+ break
170
+
171
+ out_str = ' '.join([t.text for t,_,_ in out_data])
172
+ return out_data, out_str
173
+
174
+ def get_complement(
175
+ self,
176
+ subj: str,
177
+ verb: str,
178
+ dobj: str,
179
+ full_sentence: str,
180
+ ) -> str:
181
+
182
+ concat_sentence = subj + ' ' + verb + ' ' + dobj
183
+ diff = ""
184
+ for tk in ndiff(concat_sentence.split(), full_sentence.split()):
185
+ mark, word = tk[0], tk[2:]
186
+ if mark == '+':
187
+ diff += word + " "
188
+
189
+ return diff.strip()
190
+
191
+ def active2passive(
192
+ self,
193
+ active_sentence: str,
194
+ debug: bool=False
195
+ ) -> List[str]:
196
+
197
+ active_sentence = active_sentence.strip()
198
+ if active_sentence == "":
199
+ raise RuntimeError(
200
+ f"Error: The sentence does not be empty!"
201
+ )
202
+
203
+ subj_data, subj_str = self.get_subj(active_sentence)
204
+ if debug: print(subj_data)
205
+ if subj_str == "":
206
+ raise RuntimeError(
207
+ f"Error: The sentence's subject has not been found or the sentence does not be the correct format!"
208
+ )
209
+
210
+ verb_data, verb_str = self.get_verb(active_sentence)
211
+ if debug: print(verb_data)
212
+ if verb_str == "":
213
+ raise RuntimeError(
214
+ f"Error: The sentence's verb has not been found or the sentence does not be the correct format!"
215
+ )
216
+
217
+ dobj_data, dobj_str = self.get_dobj(active_sentence)
218
+ if debug: print(dobj_data)
219
+ if dobj_str == "":
220
+ raise RuntimeError(
221
+ f"Error: The sentence's direct object has not been found or the sentence does not be the correct format!"
222
+ )
223
+
224
+ complement = self.get_complement(subj_str, verb_str, dobj_str, active_sentence)
225
+
226
+ # Get pasive subject
227
+ p_subj = dobj_str
228
+
229
+ # Get tense + participle verb
230
+ verbal_tense = self.get_verbal_tense(verb_data)
231
+ if debug: print(verbal_tense)
232
+ if verbal_tense is None:
233
+ raise RuntimeError(
234
+ f"Error: The sentence does not be the correct format or the verbal tense has not been implemented yet!"
235
+ )
236
+
237
+ _, neg_data, main_data = verb_data
238
+ neg = " not" if len(neg_data) > 0 else ""
239
+ gramatical_number = self.get_gramatical_number(dobj_data)
240
+ if debug: print(gramatical_number)
241
+ p_tobe = self.Tense[verbal_tense].value['tobe'][gramatical_number].format(neg)
242
+ p_verb = self.verb2participle(main_data[0][0].text)
243
+
244
+ # Convert active_object to pasive_agent
245
+ p_agent = "by "
246
+ for tk,_,tag in subj_data:
247
+ word = tk.text
248
+ if tag == 'PRP':
249
+ word = self.subjp2objp(word)
250
+ p_agent += word + " "
251
+
252
+ # return f"{p_subj.capitalize()} {p_tobe} {p_verb} {p_agent.strip().lower()} {complement}"
253
+ return [p_subj.capitalize(), p_tobe, p_verb, p_agent.strip().lower(), complement]
modules/m_connector.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Tuple
2
+ from modules.m_apvoice import APVoice
3
+
4
+ class Connector:
5
+ def __init__(
6
+ self
7
+ ) -> None:
8
+
9
+ self.apvoice = APVoice()
10
+
11
+ # <center>{}</center>
12
+ self.__out_template = """
13
+ <html lang="en">
14
+ <head>
15
+ <meta charset="utf-8">
16
+ <meta name="viewport" content="width=device-width, initial-scale=1">
17
+ <link href="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/css/bootstrap.min.css" rel="stylesheet" integrity="sha384-GLhlTQ8iRABdZLl6O3oVMWSktQOp6b7In1Zl3/Jr59b6EGGoI1aFkw7cmDA6j6gD" crossorigin="anonymous">
18
+ </head>
19
+ <body>
20
+ <center><div class="btn-group btn-group-sm" role="group">{}</div></center>
21
+ <script src="https://cdn.jsdelivr.net/npm/bootstrap@5.3.0-alpha1/dist/js/bootstrap.bundle.min.js" integrity="sha384-w76AqPfDkMBDXo30jS1Sgez6pr3x5MlQ1ZAGC+nuZB+EYdgRZgiwxhTBTkF7CXvN" crossorigin="anonymous"></script>
22
+ </body>
23
+ </html>
24
+ """
25
+ self.__error_template = "<center><b>{}</b></center>"
26
+
27
+ def create_category(
28
+ self,
29
+ text: str,
30
+ category: str,
31
+ color: str
32
+ ) -> str:
33
+
34
+ html = f"""
35
+ <div type="button" title="{category}" class="btn btn-{color} btn-sm p-2">
36
+ <b>{text}</b><br>
37
+ <span class="badge text-bg-light">{category}</span>
38
+ </div>
39
+ """
40
+ return html if text != "" else ""
41
+
42
+ def active2passive(
43
+ self,
44
+ sentence: str
45
+ ) -> Tuple[str,str]:
46
+
47
+ out, error = "", ""
48
+ try:
49
+ subject, tobe, participle, agent, complement = self.apvoice.active2passive(sentence)
50
+ verb = f"""
51
+ """
52
+ passive_sentece = f"""
53
+ {self.create_category(subject, 'subject','primary')}
54
+ {self.create_category(tobe,'to be','warning')}
55
+ {self.create_category(participle,'participle','danger')}
56
+ {self.create_category(agent,'agent','info')}
57
+ {self.create_category(complement,'compl.','dark')}
58
+ """
59
+ out = self.__out_template.format(passive_sentece)
60
+
61
+ except Exception as e:
62
+ error = self.__error_template.format(str(e))
63
+
64
+ return error, out
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pyinflect