Fucius's picture
Upload 422 files
2eafbc4 verified
import platform
import re
import socket
import time
import uuid
from inference.core.cache import cache
from inference.core.logger import logger
from inference.core.version import __version__
def get_model_metrics(
inference_server_id: str, model_id: str, min: float = -1, max: float = float("inf")
) -> dict:
"""
Gets the metrics for a given model between a specified time range.
Args:
device_id (str): The identifier of the device.
model_id (str): The identifier of the model.
start (float, optional): The starting timestamp of the time range. Defaults to -1.
stop (float, optional): The ending timestamp of the time range. Defaults to float("inf").
Returns:
dict: A dictionary containing the metrics of the model:
- num_inferences (int): The number of inferences made.
- avg_inference_time (float): The average inference time.
- num_errors (int): The number of errors occurred.
"""
now = time.time()
inferences_with_times = cache.zrangebyscore(
f"inference:{inference_server_id}:{model_id}", min=min, max=max, withscores=True
)
num_inferences = len(inferences_with_times)
inference_times = []
for inference, t in inferences_with_times:
response = inference["response"]
if isinstance(response, list):
times = [r["time"] for r in response if "time" in r]
inference_times.extend(times)
else:
if "time" in response:
inference_times.append(response["time"])
avg_inference_time = (
sum(inference_times) / len(inference_times) if len(inference_times) > 0 else 0
)
errors_with_times = cache.zrangebyscore(
f"error:{inference_server_id}:{model_id}", min=min, max=max, withscores=True
)
num_errors = len(errors_with_times)
return {
"num_inferences": num_inferences,
"avg_inference_time": avg_inference_time,
"num_errors": num_errors,
}
def get_system_info() -> dict:
"""Collects system information such as platform, architecture, hostname, IP address, MAC address, and processor details.
Returns:
dict: A dictionary containing detailed system information.
"""
info = {}
try:
info["platform"] = platform.system()
info["platform_release"] = platform.release()
info["platform_version"] = platform.version()
info["architecture"] = platform.machine()
info["hostname"] = socket.gethostname()
info["ip_address"] = socket.gethostbyname(socket.gethostname())
info["mac_address"] = ":".join(re.findall("..", "%012x" % uuid.getnode()))
info["processor"] = platform.processor()
return info
except Exception as e:
logger.exception(e)
finally:
return info
def get_inference_results_for_model(
inference_server_id: str, model_id: str, min: float = -1, max: float = float("inf")
):
inferences_with_times = cache.zrangebyscore(
f"inference:{inference_server_id}:{model_id}", min=min, max=max, withscores=True
)
inference_results = []
for result, score in inferences_with_times:
# Don't send large image files
if result.get("request", {}).get("image"):
del result["request"]["image"]
responses = result.get("response")
if responses:
if not isinstance(responses, list):
responses = [responses]
for resp in responses:
if resp.get("image"):
del resp["image"]
inference_results.append({"request_time": score, "inference": result})
return inference_results