better inference
Browse files- introduck/inference.py +16 -59
- introduck/routes.py +75 -43
- introduck/utils.py +6 -1
- requirements.txt +1 -0
introduck/inference.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import spacy
|
2 |
|
3 |
|
@@ -13,71 +14,19 @@ def _load_spacy_model() -> spacy.Language:
|
|
13 |
return _load_spacy_model.nlp
|
14 |
|
15 |
|
16 |
-
def
|
17 |
if not payload:
|
18 |
-
return
|
19 |
|
20 |
nlp: spacy.Language = _load_spacy_model()
|
21 |
doc = nlp(payload)
|
22 |
|
23 |
-
|
24 |
-
|
25 |
-
if ent.label_ != "PERSON":
|
26 |
-
continue
|
27 |
-
|
28 |
-
contacts.append([ent.text, "unknown", "unknown", "no notes"])
|
29 |
-
|
30 |
-
return contacts
|
31 |
-
|
32 |
-
|
33 |
-
def generate_acceptance_reply(payload: str) -> str:
|
34 |
-
if not payload:
|
35 |
-
return ""
|
36 |
-
|
37 |
-
msg: str = """\
|
38 |
-
Hi,
|
39 |
-
|
40 |
-
Thanks for reaching out! I will be glad to make an intro to {PERSON or COMPANY}!
|
41 |
-
|
42 |
-
Looking forward to talking to you,
|
43 |
-
{SIGNATURE}\
|
44 |
-
"""
|
45 |
-
|
46 |
-
return msg
|
47 |
-
|
48 |
-
|
49 |
-
def generate_rejection_reply(payload: str) -> str:
|
50 |
-
if not payload:
|
51 |
-
return ""
|
52 |
|
53 |
-
|
54 |
-
Hi,
|
55 |
-
|
56 |
-
Thanks for reaching out!
|
57 |
-
|
58 |
-
I'm not able to make an intro at this time,
|
59 |
-
but I'll keep your company in mind for the future.
|
60 |
-
|
61 |
-
Thanks again,
|
62 |
-
{SIGNATURE}\
|
63 |
-
"""
|
64 |
-
|
65 |
-
return msg
|
66 |
-
|
67 |
-
|
68 |
-
def highlight_named_entities(payload: str, labels: list[str] = None) -> dict:
|
69 |
-
if not payload:
|
70 |
-
return {"text": "", "entities": []}
|
71 |
-
|
72 |
-
if labels is None:
|
73 |
-
labels = ["ORG", "PERSON"]
|
74 |
-
|
75 |
-
nlp: spacy.Language = _load_spacy_model()
|
76 |
-
doc = nlp(payload)
|
77 |
-
|
78 |
-
entities: list = []
|
79 |
for ent in doc.ents:
|
80 |
-
if ent.label_
|
81 |
continue
|
82 |
|
83 |
entity: dict = {
|
@@ -87,4 +36,12 @@ def highlight_named_entities(payload: str, labels: list[str] = None) -> dict:
|
|
87 |
|
88 |
entities.append(entity)
|
89 |
|
90 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
import spacy
|
3 |
|
4 |
|
|
|
14 |
return _load_spacy_model.nlp
|
15 |
|
16 |
|
17 |
+
def extract_contacts_from_text(payload: str) -> dict:
|
18 |
if not payload:
|
19 |
+
return {}
|
20 |
|
21 |
nlp: spacy.Language = _load_spacy_model()
|
22 |
doc = nlp(payload)
|
23 |
|
24 |
+
contacts_cols: list[str] = ["name", "email"]
|
25 |
+
contacts: pd.DataFrame = pd.DataFrame(columns=contacts_cols)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
+
entities: list[dict] = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
for ent in doc.ents:
|
29 |
+
if ent.label_ != "PERSON":
|
30 |
continue
|
31 |
|
32 |
entity: dict = {
|
|
|
36 |
|
37 |
entities.append(entity)
|
38 |
|
39 |
+
contact: dict = {
|
40 |
+
"name": ent.text,
|
41 |
+
"email": "unknown"}
|
42 |
+
|
43 |
+
contacts = pd.concat(
|
44 |
+
[contacts, pd.DataFrame([contact])],
|
45 |
+
ignore_index=True)
|
46 |
+
|
47 |
+
return {"contacts": contacts, "entities": entities, "payload": payload}
|
introduck/routes.py
CHANGED
@@ -1,13 +1,10 @@
|
|
1 |
import gradio as gr
|
|
|
2 |
|
3 |
from fastapi import FastAPI
|
4 |
from gradio.routes import App as GradioApp
|
5 |
-
|
6 |
-
from introduck.
|
7 |
-
from introduck.inference import generate_acceptance_reply
|
8 |
-
from introduck.inference import generate_rejection_reply
|
9 |
-
from introduck.inference import highlight_named_entities
|
10 |
-
from introduck.utils import dump_email_as_string
|
11 |
|
12 |
_INTRO_SUBJECT_EXAMPLE: str = "Could you make an intro?"
|
13 |
_INTRO_MESSAGE_EXAMPLE: str = """\
|
@@ -34,27 +31,70 @@ Best,
|
|
34 |
"""
|
35 |
|
36 |
|
37 |
-
def _analyze_message(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
msg_data: dict = {
|
39 |
"from": sender,
|
40 |
"to": recipients,
|
41 |
"subject": subject,
|
42 |
"body": body}
|
43 |
|
44 |
-
msg: str =
|
45 |
|
46 |
-
|
47 |
-
acceptance_reply: str = generate_acceptance_reply(payload=msg)
|
48 |
-
rejection_reply: str = generate_rejection_reply(payload=msg)
|
49 |
-
highlighted_text: dict = highlight_named_entities(payload=msg)
|
50 |
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
|
54 |
def _use_message_template() -> (str, str):
|
55 |
return _INTRO_SUBJECT_EXAMPLE, _INTRO_MESSAGE_EXAMPLE
|
56 |
|
57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
def create_playground_route() -> FastAPI:
|
59 |
# TODO: Fix once resolved https://github.com/gradio-app/gradio/issues/1683
|
60 |
# TODO: ...and remove elem_id="htxt" from HighlightedText for RF822 output
|
@@ -109,6 +149,10 @@ def create_playground_route() -> FastAPI:
|
|
109 |
# label="Attachments:",
|
110 |
# file_count="multiple")
|
111 |
|
|
|
|
|
|
|
|
|
112 |
with gr.Row():
|
113 |
email_template_button: gr.Button = gr.Button(
|
114 |
value="Use template",
|
@@ -128,30 +172,11 @@ def create_playground_route() -> FastAPI:
|
|
128 |
|
129 |
with gr.Tabs(selected="default"):
|
130 |
with gr.TabItem(label="Contacts", id="default"):
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
"Email",
|
135 |
-
"NOTE"]
|
136 |
-
|
137 |
-
contacts_output: gr.Dataframe = gr.Dataframe(
|
138 |
-
headers=contacts_table_headers,
|
139 |
-
max_cols=len(contacts_table_headers),
|
140 |
-
max_rows=16)
|
141 |
-
|
142 |
-
with gr.TabItem(label="Replies"):
|
143 |
-
with gr.Column():
|
144 |
-
acceptance_template_output: gr.Textbox = gr.Textbox(
|
145 |
-
label="Use this message to accept an intro request:",
|
146 |
-
interactive=True)
|
147 |
-
|
148 |
-
rejection_template_output: gr.Textbox = gr.Textbox(
|
149 |
-
label="Use this message to decline an intro request:",
|
150 |
-
interactive=True)
|
151 |
-
|
152 |
-
with gr.TabItem(label="RFC822"):
|
153 |
rfc822_output: gr.HighlightedText = gr.HighlightedText(
|
154 |
-
label="RFC822 message (for debugging purposes):",
|
155 |
show_legend=False,
|
156 |
elem_id="htxt") # temporary fix, see above for more info
|
157 |
|
@@ -174,11 +199,6 @@ def create_playground_route() -> FastAPI:
|
|
174 |
|
175 |
# ui end, continue with signals and slots...
|
176 |
|
177 |
-
email_template_button.click(
|
178 |
-
fn=_use_message_template,
|
179 |
-
inputs=[],
|
180 |
-
outputs=[email_subject_input, email_body_input])
|
181 |
-
|
182 |
email_submit_button.click(
|
183 |
fn=_analyze_message,
|
184 |
inputs=[
|
@@ -188,8 +208,20 @@ def create_playground_route() -> FastAPI:
|
|
188 |
email_body_input],
|
189 |
outputs=[
|
190 |
contacts_output,
|
191 |
-
acceptance_template_output,
|
192 |
-
rejection_template_output,
|
193 |
rfc822_output])
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
return GradioApp.create_app(blocks=base_blocks)
|
|
|
1 |
import gradio as gr
|
2 |
+
import pandas as pd
|
3 |
|
4 |
from fastapi import FastAPI
|
5 |
from gradio.routes import App as GradioApp
|
6 |
+
from introduck.inference import extract_contacts_from_text
|
7 |
+
from introduck.utils import dump_email_to_string
|
|
|
|
|
|
|
|
|
8 |
|
9 |
_INTRO_SUBJECT_EXAMPLE: str = "Could you make an intro?"
|
10 |
_INTRO_MESSAGE_EXAMPLE: str = """\
|
|
|
31 |
"""
|
32 |
|
33 |
|
34 |
+
def _analyze_message(
|
35 |
+
sender: str,
|
36 |
+
recipients: str,
|
37 |
+
subject: str,
|
38 |
+
body: str
|
39 |
+
) -> (pd.DataFrame, dict):
|
40 |
+
default_outputs = None, None
|
41 |
+
|
42 |
+
# TODO: validate sender and recipients
|
43 |
+
|
44 |
+
if not subject:
|
45 |
+
return default_outputs
|
46 |
+
|
47 |
+
if not body:
|
48 |
+
return default_outputs
|
49 |
+
|
50 |
msg_data: dict = {
|
51 |
"from": sender,
|
52 |
"to": recipients,
|
53 |
"subject": subject,
|
54 |
"body": body}
|
55 |
|
56 |
+
msg: str = dump_email_to_string(data=msg_data)
|
57 |
|
58 |
+
output: dict = extract_contacts_from_text(payload=msg)
|
|
|
|
|
|
|
59 |
|
60 |
+
contacts: pd.DataFrame = output.get("contacts", pd.DataFrame())
|
61 |
+
highlighted_text: dict = {
|
62 |
+
"text": output.get("payload", str),
|
63 |
+
"entities": output.get("entities", [])}
|
64 |
+
|
65 |
+
return contacts, highlighted_text
|
66 |
|
67 |
|
68 |
def _use_message_template() -> (str, str):
|
69 |
return _INTRO_SUBJECT_EXAMPLE, _INTRO_MESSAGE_EXAMPLE
|
70 |
|
71 |
|
72 |
+
def _validate_message(
|
73 |
+
sender: str,
|
74 |
+
recipients: str,
|
75 |
+
subject: str,
|
76 |
+
body: str
|
77 |
+
) -> dict:
|
78 |
+
errors: list[str] = []
|
79 |
+
|
80 |
+
# ...
|
81 |
+
|
82 |
+
if not subject:
|
83 |
+
errors.append("- subject can't be empty;")
|
84 |
+
|
85 |
+
if not body:
|
86 |
+
errors.append("- message can't be empty;")
|
87 |
+
|
88 |
+
new_value: str = ""
|
89 |
+
if errors:
|
90 |
+
errors_list: str = "\n".join(errors)
|
91 |
+
new_value = f"```\nPlease, fix this errors:\n{errors_list}\n```"
|
92 |
+
|
93 |
+
new_visibility: bool = len(errors) > 0
|
94 |
+
|
95 |
+
return gr.update(value=new_value, visible=new_visibility)
|
96 |
+
|
97 |
+
|
98 |
def create_playground_route() -> FastAPI:
|
99 |
# TODO: Fix once resolved https://github.com/gradio-app/gradio/issues/1683
|
100 |
# TODO: ...and remove elem_id="htxt" from HighlightedText for RF822 output
|
|
|
149 |
# label="Attachments:",
|
150 |
# file_count="multiple")
|
151 |
|
152 |
+
email_errors_output: gr.Markdown = gr.Markdown(
|
153 |
+
value="",
|
154 |
+
visible=False)
|
155 |
+
|
156 |
with gr.Row():
|
157 |
email_template_button: gr.Button = gr.Button(
|
158 |
value="Use template",
|
|
|
172 |
|
173 |
with gr.Tabs(selected="default"):
|
174 |
with gr.TabItem(label="Contacts", id="default"):
|
175 |
+
contacts_output: gr.Dataframe = gr.Dataframe(max_rows=16)
|
176 |
+
|
177 |
+
with gr.TabItem(label="RFC822-ish"):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
178 |
rfc822_output: gr.HighlightedText = gr.HighlightedText(
|
179 |
+
label="RFC822-ish message (for debugging purposes):",
|
180 |
show_legend=False,
|
181 |
elem_id="htxt") # temporary fix, see above for more info
|
182 |
|
|
|
199 |
|
200 |
# ui end, continue with signals and slots...
|
201 |
|
|
|
|
|
|
|
|
|
|
|
202 |
email_submit_button.click(
|
203 |
fn=_analyze_message,
|
204 |
inputs=[
|
|
|
208 |
email_body_input],
|
209 |
outputs=[
|
210 |
contacts_output,
|
|
|
|
|
211 |
rfc822_output])
|
212 |
|
213 |
+
email_submit_button.click(
|
214 |
+
fn=_validate_message,
|
215 |
+
inputs=[
|
216 |
+
email_sender_input,
|
217 |
+
email_recipients_input,
|
218 |
+
email_subject_input,
|
219 |
+
email_body_input],
|
220 |
+
outputs=[email_errors_output])
|
221 |
+
|
222 |
+
email_template_button.click(
|
223 |
+
fn=_use_message_template,
|
224 |
+
inputs=[],
|
225 |
+
outputs=[email_subject_input, email_body_input])
|
226 |
+
|
227 |
return GradioApp.create_app(blocks=base_blocks)
|
introduck/utils.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
def
|
2 |
msg_from: str = data.get("from", "") or "*"
|
3 |
msg_to: str = data.get("to", "") or "*"
|
4 |
msg_subject: str = data.get("subject", "") or "*"
|
@@ -11,3 +11,8 @@ def dump_email_as_string(data: dict) -> str:
|
|
11 |
msg += f"{msg_body}\n"
|
12 |
|
13 |
return msg
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
def dump_email_to_string(data: dict) -> str:
|
2 |
msg_from: str = data.get("from", "") or "*"
|
3 |
msg_to: str = data.get("to", "") or "*"
|
4 |
msg_subject: str = data.get("subject", "") or "*"
|
|
|
11 |
msg += f"{msg_body}\n"
|
12 |
|
13 |
return msg
|
14 |
+
|
15 |
+
|
16 |
+
def validate_email_address(data: str) -> bool:
|
17 |
+
# TODO: https://pypi.org/project/email-validator/
|
18 |
+
return False
|
requirements.txt
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
fastapi[all]
|
2 |
gradio==3.4.0 # keep in sync with version from README.md metadata
|
|
|
3 |
spacy==3.4.0
|
4 |
uvicorn[standard]
|
5 |
|
|
|
1 |
fastapi[all]
|
2 |
gradio==3.4.0 # keep in sync with version from README.md metadata
|
3 |
+
pandas==1.5.0
|
4 |
spacy==3.4.0
|
5 |
uvicorn[standard]
|
6 |
|