|
|
|
|
|
|
|
import io |
|
import json |
|
import re |
|
import traceback |
|
import time |
|
import logging |
|
import os |
|
import psutil |
|
import gc |
|
from datetime import datetime, timezone, timedelta |
|
from typing import Dict, List, Optional, Tuple, Any, Callable, Union |
|
from PIL import Image |
|
from pydantic import BaseModel, Field |
|
import torch |
|
import numpy as np |
|
|
|
|
|
|
|
|
|
def get_system_info(): |
|
cpu_percent = psutil.cpu_percent(interval=0.1) |
|
memory = psutil.virtual_memory() |
|
|
|
info = { |
|
"cpu_percent": round(cpu_percent, 2), |
|
"memory_percent": round(memory.percent, 2), |
|
"memory_total_gb": round(memory.total / (1024**3), 2), |
|
"memory_available_gb": round(memory.available / (1024**3), 2), |
|
"memory_used_gb": round(memory.used / (1024**3), 2), |
|
} |
|
|
|
if torch.cuda.is_available() and os.getenv("SPACE_ID") is None: |
|
try: |
|
info["gpu_count"] = torch.cuda.device_count() |
|
for i in range(torch.cuda.device_count()): |
|
torch.cuda.set_device(i) |
|
|
|
gpu_allocated = torch.cuda.memory_allocated(i) / (1024**3) |
|
gpu_reserved = torch.cuda.memory_reserved(i) / (1024**3) |
|
gpu_total = torch.cuda.get_device_properties(i).total_memory / (1024**3) |
|
|
|
info[f"gpu_{i}_memory_allocated_gb"] = round(gpu_allocated, 2) |
|
info[f"gpu_{i}_memory_reserved_gb"] = round(gpu_reserved, 2) |
|
info[f"gpu_{i}_memory_total_gb"] = round(gpu_total, 2) |
|
info[f"gpu_{i}_memory_allocated_percent"] = round((gpu_allocated / gpu_total * 100) if gpu_total > 0 else 0, 2) |
|
info[f"gpu_{i}_memory_reserved_percent"] = round((gpu_reserved / gpu_total * 100) if gpu_total > 0 else 0, 2) |
|
except Exception as e: |
|
logging.warning(f"GPU memory monitoring failed: {e}") |
|
info["gpu_count"] = 0 |
|
elif torch.cuda.is_available(): |
|
info["gpu_available"] = True |
|
info["gpu_count"] = 0 |
|
|
|
return info |
|
|
|
def cleanup_memory(): |
|
gc.collect() |
|
if torch.cuda.is_available(): |
|
try: |
|
torch.cuda.empty_cache() |
|
if not (hasattr(torch, '_C') and hasattr(torch._C, '_cuda_getDeviceCount') and torch._C._cuda_getDeviceCount() > 0): |
|
torch.cuda.synchronize() |
|
except RuntimeError as e: |
|
if "CUDA must not be initialized" not in str(e): |
|
raise |
|
|
|
def force_gpu_memory_update(): |
|
if torch.cuda.is_available() and torch.cuda.device_count() > 0 and os.getenv("SPACE_ID") is None: |
|
try: |
|
temp_tensor = torch.randn(1, device='cuda') |
|
del temp_tensor |
|
torch.cuda.empty_cache() |
|
except Exception as e: |
|
logging.debug(f"GPU memory update failed: {e}") |
|
|
|
|
|
|
|
|
|
LOG_LEVEL_MAP = { |
|
"DEBUG": logging.DEBUG, |
|
"INFO": logging.INFO, |
|
"SUCCESS": logging.INFO, |
|
"WARNING": logging.WARNING, |
|
"ERROR": logging.ERROR, |
|
"CRITICAL": logging.CRITICAL, |
|
"PROCESSING": logging.INFO, |
|
"RETRY": logging.WARNING, |
|
"JSON": logging.INFO, |
|
"PERFORMANCE": logging.INFO, |
|
"MEMORY": logging.INFO |
|
} |
|
|
|
EMOJI_MAP = { |
|
"DEBUG": "🔍", |
|
"INFO": "ℹ️", |
|
"SUCCESS": "✅", |
|
"WARNING": "⚠️", |
|
"ERROR": "❌", |
|
"CRITICAL": "🔥", |
|
"PROCESSING": "⚙️", |
|
"RETRY": "🔄", |
|
"JSON": "📊", |
|
"PERFORMANCE": "⚡", |
|
"MEMORY": "💾" |
|
} |
|
|
|
def setup_logging(): |
|
LOG_FORMAT = "%(asctime)s [%(levelname)s] %(module)s: %(message)s" |
|
LOG_LEVEL = logging.INFO |
|
|
|
if os.getenv("DEBUG", "").lower() == "true": |
|
LOG_LEVEL = logging.DEBUG |
|
|
|
for name, level in LOG_LEVEL_MAP.items(): |
|
if not hasattr(logging, name): |
|
setattr(logging, name, level) |
|
|
|
class EmojiFormatter(logging.Formatter): |
|
def formatTime(self, record, datefmt=None): |
|
dt = datetime.fromtimestamp(record.created, tz=timezone.utc) |
|
dt = dt - timedelta(hours=2) |
|
if datefmt: |
|
return dt.strftime(datefmt) |
|
return dt.strftime('%Y-%m-%d %H:%M:%S,%f')[:-3] |
|
|
|
def format(self, record): |
|
if not getattr(record, 'emoji_prefixed', False): |
|
for emoji in EMOJI_MAP.values(): |
|
if str(record.msg).startswith(emoji): |
|
record.emoji_prefixed = True |
|
break |
|
|
|
if not getattr(record, 'emoji_prefixed', False): |
|
for name, level in LOG_LEVEL_MAP.items(): |
|
if record.levelno == level: |
|
record.msg = f"{EMOJI_MAP.get(name, '')} {record.msg}" |
|
record.emoji_prefixed = True |
|
break |
|
|
|
return super().format(record) |
|
|
|
formatter = EmojiFormatter(LOG_FORMAT) |
|
handler = logging.StreamHandler() |
|
handler.setFormatter(formatter) |
|
|
|
|
|
logging.root.handlers = [] |
|
|
|
|
|
root = logging.getLogger() |
|
root.setLevel(LOG_LEVEL) |
|
root.addHandler(handler) |
|
|
|
|
|
for logger_name in ["uvicorn", "uvicorn.access", "uvicorn.error", "uvicorn.asgi"]: |
|
logger = logging.getLogger(logger_name) |
|
logger.handlers = [] |
|
logger.propagate = False |
|
|
|
|
|
|
|
for logger_name in ["_client", "httpx._client", "httpcore._sync.connection_pool", "httpcore._sync.http11"]: |
|
logger = logging.getLogger(logger_name) |
|
logger.handlers = [] |
|
if logger_name == "_client": |
|
|
|
logger.addHandler(handler) |
|
logger.setLevel(LOG_LEVEL) |
|
logger.propagate = False |
|
|
|
|
|
for logger_name in ["PIL", "PIL.Image", "transformers", "accelerate"]: |
|
logger = logging.getLogger(logger_name) |
|
logger.setLevel(logging.WARNING) |
|
|
|
logging.info("Application logging configured successfully") |
|
|
|
|
|
system_info = get_system_info() |
|
logging.info(f"System info: {system_info}") |
|
|
|
|
|
|
|
|
|
def measure_performance(func: Callable) -> Callable: |
|
def wrapper(*args, **kwargs): |
|
start_time = time.perf_counter() |
|
|
|
force_gpu_memory_update() |
|
start_memory = get_system_info() |
|
|
|
try: |
|
result = func(*args, **kwargs) |
|
|
|
end_time = time.perf_counter() |
|
|
|
force_gpu_memory_update() |
|
end_memory = get_system_info() |
|
|
|
duration = end_time - start_time |
|
|
|
memory_used_delta_gb = end_memory["memory_used_gb"] - start_memory["memory_used_gb"] |
|
memory_percent_delta = end_memory["memory_percent"] - start_memory["memory_percent"] |
|
total_memory_gb = end_memory.get("memory_total_gb", 0) |
|
|
|
memory_delta = { |
|
"memory_used_delta_gb": round(memory_used_delta_gb, 2), |
|
"memory_percent_delta": round(memory_percent_delta, 2), |
|
"memory_used_percent": round(end_memory["memory_percent"], 2), |
|
"memory_total_gb": round(total_memory_gb, 2) |
|
} |
|
|
|
if torch.cuda.is_available() and torch.cuda.device_count() > 0 and not os.getenv("SPACE_ID"): |
|
for i in range(torch.cuda.device_count()): |
|
allocated_key = f"gpu_{i}_memory_allocated_gb" |
|
reserved_key = f"gpu_{i}_memory_reserved_gb" |
|
total_key = f"gpu_{i}_memory_total_gb" |
|
|
|
start_allocated = start_memory.get(allocated_key, 0) |
|
end_allocated = end_memory.get(allocated_key, 0) |
|
start_reserved = start_memory.get(reserved_key, 0) |
|
end_reserved = end_memory.get(reserved_key, 0) |
|
|
|
if start_allocated > 0 or end_allocated > 0 or start_reserved > 0 or end_reserved > 0: |
|
allocated_delta = end_allocated - start_allocated |
|
reserved_delta = end_reserved - start_reserved |
|
gpu_total = end_memory.get(total_key, 0) |
|
|
|
memory_delta[f"gpu_{i}_allocated_delta_gb"] = round(allocated_delta, 2) |
|
memory_delta[f"gpu_{i}_reserved_delta_gb"] = round(reserved_delta, 2) |
|
memory_delta[f"gpu_{i}_allocated_percent"] = round(end_memory.get(f"gpu_{i}_memory_allocated_percent", 0), 2) |
|
memory_delta[f"gpu_{i}_reserved_percent"] = round(end_memory.get(f"gpu_{i}_memory_reserved_percent", 0), 2) |
|
memory_delta[f"gpu_{i}_total_gb"] = round(gpu_total, 2) |
|
|
|
logging.log( |
|
LOG_LEVEL_MAP["PERFORMANCE"], |
|
f"{EMOJI_MAP['PERFORMANCE']} {func.__name__} completed in {duration:.3f}s | Memory delta: {memory_delta}" |
|
) |
|
|
|
return result |
|
|
|
except Exception as e: |
|
end_time = time.perf_counter() |
|
duration = end_time - start_time |
|
logging.error(f"{func.__name__} failed after {duration:.3f}s: {str(e)}") |
|
raise |
|
|
|
return wrapper |