File size: 1,422 Bytes
5b1f64c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
from fastapi import FastAPI, Depends, HTTPException, status
from fastapi.security import OAuth2PasswordBearer
from pydantic import BaseModel, Field
import httpx, os

app = FastAPI(title="Healthcare Inference Gateway")

oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token")

class InferenceRequest(BaseModel):
  content_type: str = Field(..., regex="^(image/.*|text/.*|application/json)$")
  payload: dict

def verify_token(token: str = Depends(oauth2_scheme)):
  if not token:
    raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid token")
  return token

@app.get("/health")
def health(): return {"status":"ok"}

@app.post("/infer")
async def infer(req: InferenceRequest, token: str = Depends(verify_token)):
  # Conditional routing to preprocessor
  normalized = req.payload
  if req.content_type.startswith(("image/","text/")):
    target = os.getenv("PREPROCESSOR_URL", "http://preprocessor-svc")
    async with httpx.AsyncClient(timeout=10) as client:
      resp = await client.post(f"{target}/preprocess", json=req.payload)
      resp.raise_for_status()
      normalized = resp.json()

  # Forward to Triton (HTTP)
  async with httpx.AsyncClient(timeout=15) as client:
    triton = os.getenv("TRITON_URL", "http://triton-service:8000")
    infer_resp = await client.post(f"{triton}/v2/models/<model>/infer", json=normalized)
    infer_resp.raise_for_status()
    return infer_resp.json()