import torch import gc class GPUOptimizer: def __init__(self): self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def optimize(self): if torch.cuda.is_available(): # Clear cache torch.cuda.empty_cache() gc.collect() # Set memory fraction torch.cuda.set_per_process_memory_fraction(0.9) # Enable TF32 for better performance torch.backends.cuda.matmul.allow_tf32 = True torch.backends.cudnn.allow_tf32 = True # Enable autocast for mixed precision torch.cuda.amp.autocast(enabled=True) def get_memory_usage(self): if torch.cuda.is_available(): return { 'allocated': torch.cuda.memory_allocated() / 1024**2, # MB 'reserved': torch.cuda.memory_reserved() / 1024**2 # MB } return {'allocated': 0, 'reserved': 0}