File size: 2,199 Bytes
2eafbc4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
from typing import Union

from fastapi.encoders import jsonable_encoder

from inference.core.devices.utils import GLOBAL_INFERENCE_SERVER_ID
from inference.core.entities.requests.inference import InferenceRequest
from inference.core.entities.responses.inference import InferenceResponse
from inference.core.env import TINY_CACHE
from inference.core.logger import logger
from inference.core.version import __version__


def to_cachable_inference_item(
    infer_request: InferenceRequest,
    infer_response: Union[InferenceResponse, list[InferenceResponse]],
) -> dict:
    if not TINY_CACHE:
        return {
            "inference_id": infer_request.id,
            "inference_server_version": __version__,
            "inference_server_id": GLOBAL_INFERENCE_SERVER_ID,
            "request": jsonable_encoder(infer_request),
            "response": jsonable_encoder(infer_response),
        }

    included_request_fields = {
        "api_key",
        "confidence",
        "model_id",
        "model_type",
        "source",
        "source_info",
    }
    request = infer_request.dict(include=included_request_fields)
    response = build_condensed_response(infer_response)

    return {
        "inference_id": infer_request.id,
        "inference_server_version": __version__,
        "inference_server_id": GLOBAL_INFERENCE_SERVER_ID,
        "request": jsonable_encoder(request),
        "response": jsonable_encoder(response),
    }


def build_condensed_response(responses):
    if not isinstance(responses, list):
        responses = [responses]

    formatted_responses = []
    for response in responses:
        if not getattr(response, "predictions", None):
            continue
        try:
            predictions = [
                {"confidence": pred.confidence, "class": pred.class_name}
                for pred in response.predictions
            ]
            formatted_responses.append(
                {
                    "predictions": predictions,
                    "time": response.time,
                }
            )
        except Exception as e:
            logger.warning(f"Error formatting response, skipping caching: {e}")

    return formatted_responses