speqtr commited on
Commit
aa1a424
1 Parent(s): 20a8c14

remote inference api

Browse files
introduck/inference.py CHANGED
@@ -1,47 +1,42 @@
1
- import pandas as pd
2
- import spacy
 
3
 
 
4
 
5
- def _load_spacy_model() -> spacy.Language:
6
- spacy_model_name: str = "en_core_web_sm"
7
 
8
- if not hasattr(_load_spacy_model, "nlp"):
9
- nlp = spacy.load(name=spacy_model_name)
10
- print(f"Loaded {nlp.meta.get('name', 'unknown')} model from {nlp.path}")
11
-
12
- _load_spacy_model.nlp = nlp
13
-
14
- return _load_spacy_model.nlp
15
-
16
-
17
- def extract_contacts_from_text(payload: str) -> dict:
18
  if not payload:
19
- return {}
20
 
21
- nlp: spacy.Language = _load_spacy_model()
22
- doc = nlp(payload)
 
23
 
24
- contacts_cols: list[str] = ["name", "email"]
25
- contacts: pd.DataFrame = pd.DataFrame(columns=contacts_cols)
 
26
 
27
- entities: list[dict] = []
28
- for ent in doc.ents:
29
- if ent.label_ != "PERSON":
30
- continue
 
 
31
 
 
 
 
 
 
 
32
  entity: dict = {
33
- "entity": ent.label_,
34
- "start": ent.start_char,
35
- "end": ent.end_char}
 
 
36
 
37
  entities.append(entity)
38
 
39
- contact: dict = {
40
- "name": ent.text,
41
- "email": "unknown"}
42
-
43
- contacts = pd.concat(
44
- [contacts, pd.DataFrame([contact])],
45
- ignore_index=True)
46
-
47
- return {"contacts": contacts, "entities": entities, "payload": payload}
1
+ import json
2
+ import os
3
+ import requests
4
 
5
+ from typing import Any
6
 
 
 
7
 
8
+ def extract_contacts_from_text(payload: str) -> list[dict[str, Any]]:
 
 
 
 
 
 
 
 
 
9
  if not payload:
10
+ return []
11
 
12
+ api_token: str = os.environ.get("HUGGINGFACE_INFERENCE_ENDPOINT_TOKEN", "")
13
+ if not api_token:
14
+ return []
15
 
16
+ api_url: str = os.environ.get("HUGGINGFACE_INFERENCE_ENDPOINT_URL", "")
17
+ if not api_url:
18
+ return []
19
 
20
+ response: requests.Response = requests.post(
21
+ url=api_url,
22
+ headers={
23
+ "Authorization": f"Bearer {api_token}",
24
+ "Content-Type": "application/json"},
25
+ data=json.dumps({"inputs": payload}))
26
 
27
+ outputs: list[dict[str, Any]] = response.json()
28
+ if not outputs:
29
+ return []
30
+
31
+ entities: list[dict[str, Any]] = []
32
+ for output in outputs:
33
  entity: dict = {
34
+ "text": output.get("word", ""),
35
+ "score": output.get("score", -1),
36
+ "entity": output.get("entity_group", "UNKNOWN"),
37
+ "start": output.get("start", -1),
38
+ "end": output.get("end", -1)}
39
 
40
  entities.append(entity)
41
 
42
+ return entities
 
 
 
 
 
 
 
 
introduck/routes.py CHANGED
@@ -4,9 +4,9 @@ import pandas as pd
4
  from fastapi import FastAPI
5
  from gradio.routes import App as GradioApp
6
  from introduck.inference import extract_contacts_from_text
7
- from introduck.utils import dump_email_to_string
8
  from introduck.utils import validate_email
9
  from introduck.utils import validate_multiple_emails
 
10
 
11
  _INTRO_SUBJECT_EXAMPLE: str = "Could you make an intro?"
12
  _INTRO_MESSAGE_EXAMPLE: str = """\
@@ -59,22 +59,15 @@ def _analyze_message(
59
  if not body:
60
  return default_outputs
61
 
62
- msg_data: dict = {
63
- "from": sender,
64
- "to": recipients,
65
- "subject": subject,
66
- "body": body}
67
 
68
- msg: str = dump_email_to_string(data=msg_data)
69
 
70
- output: dict = extract_contacts_from_text(payload=msg)
71
-
72
- contacts: pd.DataFrame = output.get("contacts", pd.DataFrame())
73
- highlighted_text: dict = {
74
- "text": output.get("payload", str),
75
- "entities": output.get("entities", [])}
76
-
77
- return contacts, highlighted_text
78
 
79
 
80
  def _use_message_template() -> (str, str):
4
  from fastapi import FastAPI
5
  from gradio.routes import App as GradioApp
6
  from introduck.inference import extract_contacts_from_text
 
7
  from introduck.utils import validate_email
8
  from introduck.utils import validate_multiple_emails
9
+ from typing import Any
10
 
11
  _INTRO_SUBJECT_EXAMPLE: str = "Could you make an intro?"
12
  _INTRO_MESSAGE_EXAMPLE: str = """\
59
  if not body:
60
  return default_outputs
61
 
62
+ payload: str = ""
63
+ payload += f"From: {sender or '*'}\n"
64
+ payload += f"To: {recipients or '*'}\n"
65
+ payload += f"Subject: {subject or '*'}\n\n"
66
+ payload += f"{body or '***'}\n"
67
 
68
+ outputs: list[dict[str, Any]] = extract_contacts_from_text(payload=payload)
69
 
70
+ return pd.DataFrame(outputs), {"text": payload, "entities": outputs}
 
 
 
 
 
 
 
71
 
72
 
73
  def _use_message_template() -> (str, str):
introduck/utils.py CHANGED
@@ -2,21 +2,6 @@ from email_validator import EmailNotValidError
2
  from email_validator import validate_email as validate_email_address
3
 
4
 
5
- def dump_email_to_string(data: dict) -> str:
6
- msg_from: str = data.get("from", "") or "*"
7
- msg_to: str = data.get("to", "") or "*"
8
- msg_subject: str = data.get("subject", "") or "*"
9
- msg_body: str = data.get("body", "") or "***"
10
-
11
- msg: str = ""
12
- msg += f"From: {msg_from}\n"
13
- msg += f"To: {msg_to}\n"
14
- msg += f"Subject: {msg_subject}\n\n"
15
- msg += f"{msg_body}\n"
16
-
17
- return msg
18
-
19
-
20
  def validate_email(email: str) -> str:
21
  if not email:
22
  return ""
2
  from email_validator import validate_email as validate_email_address
3
 
4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  def validate_email(email: str) -> str:
6
  if not email:
7
  return ""
requirements.txt CHANGED
@@ -2,8 +2,5 @@ email-validator==1.3.0
2
  fastapi[all]
3
  gradio==3.4.0 # keep in sync with version from README.md metadata
4
  pandas==1.5.0
5
- spacy==3.4.0
6
  uvicorn[standard]
7
-
8
- https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl
9
- https://huggingface.co/spacy/en_core_web_trf/resolve/main/en_core_web_trf-any-py3-none-any.whl
2
  fastapi[all]
3
  gradio==3.4.0 # keep in sync with version from README.md metadata
4
  pandas==1.5.0
5
+ requests==2.28.1
6
  uvicorn[standard]