yetessam commited on
Commit
0a72d15
·
verified ·
1 Parent(s): 8b05f14

Create endpoint_utils.py

Browse files
Files changed (1) hide show
  1. endpoint_utils.py +69 -0
endpoint_utils.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # endpoint_utils.py
2
+ from __future__ import annotations
3
+ from typing import Optional, Tuple, Callable, Dict, Any
4
+ from urllib.parse import urlparse
5
+ import os, time, requests
6
+
7
+
8
+ def _valid_uri(uri: Optional[str]) -> bool:
9
+ if not uri:
10
+ return False
11
+ p = urlparse(uri)
12
+ return p.scheme in {"http", "https"} and bool(p.netloc)
13
+
14
+
15
+ def wake_endpoint(
16
+ uri: Optional[str],
17
+ *,
18
+ token: Optional[str] = None,
19
+ max_wait: int = 180,
20
+ poll_every: float = 5.0,
21
+ warm_payload: Optional[Dict[str, Any]] = None,
22
+ log: Callable[[str], None] = lambda _: None,
23
+ ) -> Tuple[bool, Optional[str]]:
24
+ """
25
+ Nudge a scale-to-zero Hugging Face Inference Endpoint and poll until it responds.
26
+
27
+ Returns:
28
+ (True, None) on success, or (False, "reason") on timeout / invalid input.
29
+
30
+ Notes:
31
+ - Expects endpoints that accept POST JSON bodies like {"inputs": "..."}.
32
+ - Treats 429/503/504 as "warming" signals while polling.
33
+ """
34
+ if not _valid_uri(uri):
35
+ return False, "invalid or missing URI (expect http(s)://...)"
36
+
37
+ headers: Dict[str, str] = {}
38
+ tok = token or os.environ.get("HF_TOKEN")
39
+ if tok:
40
+ headers["Authorization"] = f"Bearer {tok}"
41
+
42
+ # 1) Initial nudge (ignore any errors)
43
+ payload = warm_payload if warm_payload is not None else {"inputs": "wake"}
44
+ try:
45
+ requests.post(uri, headers=headers, json=payload, timeout=5)
46
+ except requests.RequestException:
47
+ pass
48
+
49
+ # 2) Poll until healthy or timeout
50
+ deadline = time.time() + max_wait
51
+ while time.time() < deadline:
52
+ try:
53
+ r = requests.post(uri, headers=headers, json={"inputs": "ping"}, timeout=5)
54
+ if r.ok:
55
+ log("✅ Endpoint is awake and responsive.")
56
+ return True, None
57
+
58
+ # Common warmup statuses: throttle/warming
59
+ if r.status_code in (429, 503, 504):
60
+ log(f"Endpoint warming (HTTP {r.status_code}); retrying in {poll_every:.0f}s…")
61
+ else:
62
+ log(f"Unexpected response (HTTP {r.status_code}); retrying in {poll_every:.0f}s…")
63
+
64
+ except requests.RequestException as e:
65
+ log(f"{type(e).__name__}; retrying in {poll_every:.0f}s…")
66
+
67
+ time.sleep(poll_every)
68
+
69
+ return False, f"timed out after {max_wait}s waiting for endpoint"