import torch
import gc

class GPUOptimizer:
    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
    def optimize(self):
        if torch.cuda.is_available():
            # Clear cache
            torch.cuda.empty_cache()
            gc.collect()
            
            # Set memory fraction
            torch.cuda.set_per_process_memory_fraction(0.9)
            
            # Enable TF32 for better performance
            torch.backends.cuda.matmul.allow_tf32 = True
            torch.backends.cudnn.allow_tf32 = True
            
            # Enable autocast for mixed precision
            torch.cuda.amp.autocast(enabled=True)
            
    def get_memory_usage(self):
        if torch.cuda.is_available():
            return {
                'allocated': torch.cuda.memory_allocated() / 1024**2,  # MB
                'reserved': torch.cuda.memory_reserved() / 1024**2     # MB
            }
        return {'allocated': 0, 'reserved': 0}