remote inference api
Browse files- introduck/inference.py +30 -35
- introduck/routes.py +8 -15
- introduck/utils.py +0 -15
- requirements.txt +1 -4
introduck/inference.py
CHANGED
@@ -1,47 +1,42 @@
|
|
1 |
-
import
|
2 |
-
import
|
|
|
3 |
|
|
|
4 |
|
5 |
-
def _load_spacy_model() -> spacy.Language:
|
6 |
-
spacy_model_name: str = "en_core_web_sm"
|
7 |
|
8 |
-
|
9 |
-
nlp = spacy.load(name=spacy_model_name)
|
10 |
-
print(f"Loaded {nlp.meta.get('name', 'unknown')} model from {nlp.path}")
|
11 |
-
|
12 |
-
_load_spacy_model.nlp = nlp
|
13 |
-
|
14 |
-
return _load_spacy_model.nlp
|
15 |
-
|
16 |
-
|
17 |
-
def extract_contacts_from_text(payload: str) -> dict:
|
18 |
if not payload:
|
19 |
-
return
|
20 |
|
21 |
-
|
22 |
-
|
|
|
23 |
|
24 |
-
|
25 |
-
|
|
|
26 |
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
entity: dict = {
|
33 |
-
"
|
34 |
-
"
|
35 |
-
"
|
|
|
|
|
36 |
|
37 |
entities.append(entity)
|
38 |
|
39 |
-
|
40 |
-
"name": ent.text,
|
41 |
-
"email": "unknown"}
|
42 |
-
|
43 |
-
contacts = pd.concat(
|
44 |
-
[contacts, pd.DataFrame([contact])],
|
45 |
-
ignore_index=True)
|
46 |
-
|
47 |
-
return {"contacts": contacts, "entities": entities, "payload": payload}
|
1 |
+
import json
|
2 |
+
import os
|
3 |
+
import requests
|
4 |
|
5 |
+
from typing import Any
|
6 |
|
|
|
|
|
7 |
|
8 |
+
def extract_contacts_from_text(payload: str) -> list[dict[str, Any]]:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
if not payload:
|
10 |
+
return []
|
11 |
|
12 |
+
api_token: str = os.environ.get("HUGGINGFACE_INFERENCE_ENDPOINT_TOKEN", "")
|
13 |
+
if not api_token:
|
14 |
+
return []
|
15 |
|
16 |
+
api_url: str = os.environ.get("HUGGINGFACE_INFERENCE_ENDPOINT_URL", "")
|
17 |
+
if not api_url:
|
18 |
+
return []
|
19 |
|
20 |
+
response: requests.Response = requests.post(
|
21 |
+
url=api_url,
|
22 |
+
headers={
|
23 |
+
"Authorization": f"Bearer {api_token}",
|
24 |
+
"Content-Type": "application/json"},
|
25 |
+
data=json.dumps({"inputs": payload}))
|
26 |
|
27 |
+
outputs: list[dict[str, Any]] = response.json()
|
28 |
+
if not outputs:
|
29 |
+
return []
|
30 |
+
|
31 |
+
entities: list[dict[str, Any]] = []
|
32 |
+
for output in outputs:
|
33 |
entity: dict = {
|
34 |
+
"text": output.get("word", ""),
|
35 |
+
"score": output.get("score", -1),
|
36 |
+
"entity": output.get("entity_group", "UNKNOWN"),
|
37 |
+
"start": output.get("start", -1),
|
38 |
+
"end": output.get("end", -1)}
|
39 |
|
40 |
entities.append(entity)
|
41 |
|
42 |
+
return entities
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
introduck/routes.py
CHANGED
@@ -4,9 +4,9 @@ import pandas as pd
|
|
4 |
from fastapi import FastAPI
|
5 |
from gradio.routes import App as GradioApp
|
6 |
from introduck.inference import extract_contacts_from_text
|
7 |
-
from introduck.utils import dump_email_to_string
|
8 |
from introduck.utils import validate_email
|
9 |
from introduck.utils import validate_multiple_emails
|
|
|
10 |
|
11 |
_INTRO_SUBJECT_EXAMPLE: str = "Could you make an intro?"
|
12 |
_INTRO_MESSAGE_EXAMPLE: str = """\
|
@@ -59,22 +59,15 @@ def _analyze_message(
|
|
59 |
if not body:
|
60 |
return default_outputs
|
61 |
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
|
68 |
-
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
contacts: pd.DataFrame = output.get("contacts", pd.DataFrame())
|
73 |
-
highlighted_text: dict = {
|
74 |
-
"text": output.get("payload", str),
|
75 |
-
"entities": output.get("entities", [])}
|
76 |
-
|
77 |
-
return contacts, highlighted_text
|
78 |
|
79 |
|
80 |
def _use_message_template() -> (str, str):
|
4 |
from fastapi import FastAPI
|
5 |
from gradio.routes import App as GradioApp
|
6 |
from introduck.inference import extract_contacts_from_text
|
|
|
7 |
from introduck.utils import validate_email
|
8 |
from introduck.utils import validate_multiple_emails
|
9 |
+
from typing import Any
|
10 |
|
11 |
_INTRO_SUBJECT_EXAMPLE: str = "Could you make an intro?"
|
12 |
_INTRO_MESSAGE_EXAMPLE: str = """\
|
59 |
if not body:
|
60 |
return default_outputs
|
61 |
|
62 |
+
payload: str = ""
|
63 |
+
payload += f"From: {sender or '*'}\n"
|
64 |
+
payload += f"To: {recipients or '*'}\n"
|
65 |
+
payload += f"Subject: {subject or '*'}\n\n"
|
66 |
+
payload += f"{body or '***'}\n"
|
67 |
|
68 |
+
outputs: list[dict[str, Any]] = extract_contacts_from_text(payload=payload)
|
69 |
|
70 |
+
return pd.DataFrame(outputs), {"text": payload, "entities": outputs}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
71 |
|
72 |
|
73 |
def _use_message_template() -> (str, str):
|
introduck/utils.py
CHANGED
@@ -2,21 +2,6 @@ from email_validator import EmailNotValidError
|
|
2 |
from email_validator import validate_email as validate_email_address
|
3 |
|
4 |
|
5 |
-
def dump_email_to_string(data: dict) -> str:
|
6 |
-
msg_from: str = data.get("from", "") or "*"
|
7 |
-
msg_to: str = data.get("to", "") or "*"
|
8 |
-
msg_subject: str = data.get("subject", "") or "*"
|
9 |
-
msg_body: str = data.get("body", "") or "***"
|
10 |
-
|
11 |
-
msg: str = ""
|
12 |
-
msg += f"From: {msg_from}\n"
|
13 |
-
msg += f"To: {msg_to}\n"
|
14 |
-
msg += f"Subject: {msg_subject}\n\n"
|
15 |
-
msg += f"{msg_body}\n"
|
16 |
-
|
17 |
-
return msg
|
18 |
-
|
19 |
-
|
20 |
def validate_email(email: str) -> str:
|
21 |
if not email:
|
22 |
return ""
|
2 |
from email_validator import validate_email as validate_email_address
|
3 |
|
4 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
def validate_email(email: str) -> str:
|
6 |
if not email:
|
7 |
return ""
|
requirements.txt
CHANGED
@@ -2,8 +2,5 @@ email-validator==1.3.0
|
|
2 |
fastapi[all]
|
3 |
gradio==3.4.0 # keep in sync with version from README.md metadata
|
4 |
pandas==1.5.0
|
5 |
-
|
6 |
uvicorn[standard]
|
7 |
-
|
8 |
-
https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
|
9 |
-
https://huggingface.co/spacy/en_core_web_trf/resolve/main/en_core_web_trf-any-py3-none-any.whl
|
2 |
fastapi[all]
|
3 |
gradio==3.4.0 # keep in sync with version from README.md metadata
|
4 |
pandas==1.5.0
|
5 |
+
requests==2.28.1
|
6 |
uvicorn[standard]
|
|
|
|
|
|