Fucius's picture
Upload 422 files
2eafbc4 verified
from typing import Union
from fastapi.encoders import jsonable_encoder
from inference.core.devices.utils import GLOBAL_INFERENCE_SERVER_ID
from inference.core.entities.requests.inference import InferenceRequest
from inference.core.entities.responses.inference import InferenceResponse
from inference.core.env import TINY_CACHE
from inference.core.logger import logger
from inference.core.version import __version__
def to_cachable_inference_item(
infer_request: InferenceRequest,
infer_response: Union[InferenceResponse, list[InferenceResponse]],
) -> dict:
if not TINY_CACHE:
return {
"inference_id": infer_request.id,
"inference_server_version": __version__,
"inference_server_id": GLOBAL_INFERENCE_SERVER_ID,
"request": jsonable_encoder(infer_request),
"response": jsonable_encoder(infer_response),
}
included_request_fields = {
"api_key",
"confidence",
"model_id",
"model_type",
"source",
"source_info",
}
request = infer_request.dict(include=included_request_fields)
response = build_condensed_response(infer_response)
return {
"inference_id": infer_request.id,
"inference_server_version": __version__,
"inference_server_id": GLOBAL_INFERENCE_SERVER_ID,
"request": jsonable_encoder(request),
"response": jsonable_encoder(response),
}
def build_condensed_response(responses):
if not isinstance(responses, list):
responses = [responses]
formatted_responses = []
for response in responses:
if not getattr(response, "predictions", None):
continue
try:
predictions = [
{"confidence": pred.confidence, "class": pred.class_name}
for pred in response.predictions
]
formatted_responses.append(
{
"predictions": predictions,
"time": response.time,
}
)
except Exception as e:
logger.warning(f"Error formatting response, skipping caching: {e}")
return formatted_responses