Spaces:

yssszzzzzzzzy
/

FPro-Dehaze

Runtime error

App Files Files Community

yssszzzzzzzzy commited on Jul 12

Commit

6f8982e

verified ·

1 Parent(s): 77f167e

Upload 3 files

Browse files

Files changed (3) hide show

RealDehazing_FPro.yml +135 -0
RealDemoiring_FPro.yml +135 -0
model.py +232 -110

RealDehazing_FPro.yml ADDED Viewed

	@@ -0,0 +1,135 @@

+# general settings
+name: Dehazing_FPro
+model_type: ImageCleanModel
+scale: 1
+num_gpu: 8  # set num_gpu: 0 for cpu mode
+manual_seed: 100
+# dataset and data loader settings
+datasets:
+  train:
+    name: TrainSet
+    type: Dataset_PairedImage_dehazeSOT
+    dataroot_gt: /mnt/sda/zsh/dataset/haze
+    dataroot_lq: /mnt/sda/zsh/dataset/haze
+    geometric_augs: true
+    filename_tmpl: '{}'
+    io_backend:
+      type: disk
+    # data loader
+    use_shuffle: true
+    num_worker_per_gpu: 8
+    batch_size_per_gpu: 8
+         ## ------- Training on single fixed-patch size 128x128---------
+    mini_batch_sizes: [2]
+    iters: [300000]
+    gt_size: 256
+    gt_sizes: [256]
+    ## ------------------------------------------------------------
+    dataset_enlarge_ratio: 1
+    prefetch_mode: ~
+  val:
+    name: ValSet
+    type: Dataset_PairedImage_dehazeSOT
+    dataroot_gt: /mnt/sda/zsh/dataset/haze
+    dataroot_lq: /mnt/sda/zsh/dataset/haze
+    gt_size: 256
+    io_backend:
+      type: disk
+# network structures
+network_g:
+  type: FPro
+  inp_channels: 3
+  out_channels: 3
+  # input_res: 128
+  dim: 48
+  # num_blocks: [4,6,6,8]
+  num_blocks: [2,3,6]
+  # num_refinement_blocks: 4
+  num_refinement_blocks: 2
+  # heads: [1,2,4,8]
+  heads: [2,4,8]
+  # ffn_expansion_factor: 2.66
+  ffn_expansion_factor: 3
+  bias: False
+  LayerNorm_type: WithBias
+  dual_pixel_task: False
+# path
+path:
+  pretrain_network_g: ~
+  strict_load_g: true
+  resume_state: ~
+# training settings
+train:
+  total_iter: 300000
+  warmup_iter: -1 # no warm up
+  use_grad_clip: true
+  # Split 300k iterations into two cycles.
+  # 1st cycle: fixed 3e-4 LR for 92k iters.
+  # 2nd cycle: cosine annealing (3e-4 to 1e-6) for 208k iters.
+  scheduler:
+    type: CosineAnnealingRestartCyclicLR
+    periods: [92000, 208000]
+    restart_weights: [1,1]
+    eta_mins: [0.0003,0.000001]
+  mixing_augs:
+    mixup: true
+    mixup_beta: 1.2
+    use_identity: true
+  optim_g:
+    type: AdamW
+    lr: !!float 3e-4
+    weight_decay: !!float 1e-4
+    betas: [0.9, 0.999]
+  # losses
+  pixel_opt:
+    type: L1Loss
+    loss_weight: 1
+    reduction: mean
+  fft_loss_opt:
+    type: FFTLoss
+    loss_weight: 0.1
+    reduction: mean
+# validation settings
+val:
+  window_size: 8
+  val_freq: !!float 4e3
+  save_img: false
+  rgb2bgr: true
+  use_image: false
+  max_minibatch: 8
+  metrics:
+    psnr: # metric name, can be arbitrary
+      type: calculate_psnr
+      crop_border: 0
+      test_y_channel: false
+# logging settings
+logger:
+  print_freq: 1000
+  save_checkpoint_freq: !!float 4e3
+  use_tb_logger: true
+  wandb:
+    project: ~
+    resume_id: ~
+# dist training settings
+dist_params:
+  backend: nccl
+  port: 29500

RealDemoiring_FPro.yml ADDED Viewed

	@@ -0,0 +1,135 @@

+# general settings
+name: RealDemoiring_Restormer
+model_type: ImageCleanModel
+scale: 1
+num_gpu: 8  # set num_gpu: 0 for cpu mode
+manual_seed: 100
+# dataset and data loader settings
+datasets:
+  train:
+    name: TrainSet
+    type: Dataset_PairedImage_denseHaze
+    dataroot_gt: /home/ubuntu/zsh/datasets/TIP18/process/train/thin_target
+    dataroot_lq: /home/ubuntu/zsh/datasets/TIP18/process/train/thin_source
+    geometric_augs: False
+    filename_tmpl: '{}'
+    io_backend:
+      type: disk
+    # data loader
+    use_shuffle: true
+    num_worker_per_gpu: 8
+    batch_size_per_gpu: 8
+         ## ------- Training on single fixed-patch size 128x128---------
+    mini_batch_sizes: [2]
+    iters: [300000]
+    gt_size: 256
+    gt_sizes: [256]
+    ## ------------------------------------------------------------
+    dataset_enlarge_ratio: 1
+    prefetch_mode: ~
+  val:
+    name: ValSet
+    type: Dataset_PairedImage_denseHaze
+    dataroot_gt: /home/ubuntu/zsh/datasets/TIP18/process/val/thin_target
+    dataroot_lq: /home/ubuntu/zsh/datasets/TIP18/process/val/thin_source
+    gt_size: 256
+    io_backend:
+      type: disk
+# network structures
+network_g:
+  type: Restormer
+  inp_channels: 3
+  out_channels: 3
+  # input_res: 128
+  dim: 48
+  # num_blocks: [4,6,6,8]
+  num_blocks: [2,3,6]
+  # num_refinement_blocks: 4
+  num_refinement_blocks: 2
+  # heads: [1,2,4,8]
+  heads: [2,4,8]
+  # ffn_expansion_factor: 2.66
+  ffn_expansion_factor: 3
+  bias: False
+  LayerNorm_type: WithBias
+  dual_pixel_task: False
+# path
+path:
+  pretrain_network_g: ~
+  strict_load_g: true
+  resume_state: ~
+# training settings
+train:
+  total_iter: 300000
+  warmup_iter: -1 # no warm up
+  use_grad_clip: true
+  # Split 300k iterations into two cycles.
+  # 1st cycle: fixed 3e-4 LR for 92k iters.
+  # 2nd cycle: cosine annealing (3e-4 to 1e-6) for 208k iters.
+  scheduler:
+    type: CosineAnnealingRestartCyclicLR
+    periods: [92000, 208000]
+    restart_weights: [1,1]
+    eta_mins: [0.0003,0.000001]
+  mixing_augs:
+    mixup: true
+    mixup_beta: 1.2
+    use_identity: true
+  optim_g:
+    type: AdamW
+    lr: !!float 3e-4
+    weight_decay: !!float 1e-4
+    betas: [0.9, 0.999]
+  # losses
+  pixel_opt:
+    type: L1Loss
+    loss_weight: 1
+    reduction: mean
+  fft_loss_opt:
+    type: FFTLoss
+    loss_weight: 0.1
+    reduction: mean
+# validation settings
+val:
+  window_size: 8
+  val_freq: !!float 4e3
+  save_img: false
+  rgb2bgr: true
+  use_image: false
+  max_minibatch: 8
+  metrics:
+    psnr: # metric name, can be arbitrary
+      type: calculate_psnr
+      crop_border: 0
+      test_y_channel: false
+# logging settings
+logger:
+  print_freq: 1000
+  save_checkpoint_freq: !!float 4e3
+  use_tb_logger: true
+  wandb:
+    project: ~
+    resume_id: ~
+# dist training settings
+dist_params:
+  backend: nccl
+  port: 29500

model.py CHANGED Viewed

@@ -1,83 +1,165 @@
-# model.py - 整合版本：支持去雾和去摩尔纹
 import yaml, torch, math, numpy as np
 import torch.nn.functional as F
 from PIL import Image
 from io import BytesIO
 from basicsr.models.archs.FPro_arch import FPro
-# 强制使用 CPU
-device = torch.device('cpu')
-# 全局变量存储两个模型
 dehaze_model = None
 demoiring_model = None
-def splitimage(imgtensor, crop_size=256, overlap_size=158):
-    """切块函数，与原代码保持一致"""
     _, C, H, W = imgtensor.shape
-    hstarts = [x for x in range(0, H, crop_size - overlap_size)]
-    while hstarts and hstarts[-1] + crop_size >= H:
-        hstarts.pop()
-    hstarts.append(H - crop_size)
-    wstarts = [x for x in range(0, W, crop_size - overlap_size)]
-    while wstarts and wstarts[-1] + crop_size >= W:
-        wstarts.pop()
-    wstarts.append(W - crop_size)
     starts = []
-    split_data = []
-    for hs in hstarts:
-        for ws in wstarts:
-            cimgdata = imgtensor[:, :, hs:hs + crop_size, ws:ws + crop_size]
             starts.append((hs, ws))
-            split_data.append(cimgdata)
-    return split_data, starts
-def get_scoremap(H, W, C, B=1, is_mean=True):
-    """权重图生成函数，与原代码保持一致"""
-    center_h = H / 2
-    center_w = W / 2
-    score = torch.ones((B, C, H, W))
-    if not is_mean:
-        for h in range(H):
-            for w in range(W):
-                score[:, :, h, w] = 1.0 / (math.sqrt((h - center_h) ** 2 + (w - center_w) ** 2 + 1e-6))
-    return score
-def mergeimage(split_data, starts, crop_size=256, resolution=(1, 3, 128, 128)):
-    """图像合并函数，与原代码保持一致"""
-    B, C, H, W = resolution[0], resolution[1], resolution[2], resolution[3]
-    tot_score = torch.zeros((B, C, H, W))
-    merge_img = torch.zeros((B, C, H, W))
-    scoremap = get_scoremap(crop_size, crop_size, C, B=B, is_mean=True)
-    for simg, cstart in zip(split_data, starts):
-        hs, ws = cstart
-        merge_img[:, :, hs:hs + crop_size, ws:ws + crop_size] += scoremap * simg
-        tot_score[:, :, hs:hs + crop_size, ws:ws + crop_size] += scoremap
-    merge_img = merge_img / tot_score
-    return merge_img
 def init():
-    """初始化两个模型"""
     global dehaze_model, demoiring_model
     # 初始化去雾模型
     try:
-        print("正在加载去雾模型...")
-        dehaze_cfg = yaml.safe_load(open("dehaze_config.yml", "r"))['network_g']
         dehaze_cfg.pop('type', None)
         dehaze_model = FPro(**dehaze_cfg)
-        dehaze_model = dehaze_model.to(device)
         dehaze_ckpt = torch.load("dehaze.pth", map_location='cpu')
         dehaze_model.load_state_dict(dehaze_ckpt['params'])
         dehaze_model.eval()
-        dehaze_model = dehaze_model.cpu()
         print("去雾模型加载成功！")
     except Exception as e:
         print(f"去雾模型加载失败: {e}")
@@ -85,77 +167,76 @@ def init():
     # 初始化去摩尔纹模型
     try:
-        print("正在加载去摩尔纹模型...")
-        # 尝试加载YAML配置文件
         try:
-            demoiring_cfg = yaml.safe_load(open("demoiring_config.yml", "r"))['network_g']
             demoiring_cfg.pop('type', None)
         except FileNotFoundError:
-            # 如果没有单独的配置文件，使用默认配置
-            print("未找到去摩尔纹配置文件，使用默认配置")
             demoiring_cfg = {
-                'inp_channels': 3,
-                'out_channels': 3,
-                'dim': 48,
-                'num_blocks': [4, 6, 6, 8],
-                'num_refinement_blocks': 4,
-                'heads': [1, 2, 4, 8],
-                'ffn_expansion_factor': 2.66,
-                'bias': False,
-                'LayerNorm_type': 'WithBias',
-                'dual_pixel_task': False
             }
         demoiring_model = FPro(**demoiring_cfg)
-        demoiring_model = demoiring_model.to(device)
         demoiring_ckpt = torch.load("deblur.pth", map_location='cpu')
         demoiring_model.load_state_dict(demoiring_ckpt['params'])
         demoiring_model.eval()
-        demoiring_model = demoiring_model.cpu()
         print("去摩尔纹模型加载成功！")
     except Exception as e:
         print(f"去摩尔纹模型加载失败: {e}")
         demoiring_model = None
-def inference(body: bytes, task_type: str = "dehaze") -> bytes:
-    """
-    推理函数：支持去雾和去摩尔纹
-    Args:
-        body: 图像字节流
-        task_type: 任务类型，"dehaze" 或 "demoiring"
-    Returns:
-        处理后的图像字节流
-    """
-    # 选择对应的模型
     if task_type == "dehaze":
         if dehaze_model is None:
             raise Exception("去雾模型未加载")
         model = dehaze_model
-        # 去雾任务的参数
-        crop_size_arg = 256
-        overlap_size_arg = 158
     elif task_type == "demoiring":
         if demoiring_model is None:
             raise Exception("去摩尔纹模型未加载")
         model = demoiring_model
-        # 去摩尔纹任务的参数（根据第一个代码）
-        crop_size_arg = 256
-        overlap_size_arg = 200
     else:
         raise Exception(f"不支持的任务类型: {task_type}")
-    # 加载输入图
     img = Image.open(BytesIO(body)).convert("RGB")
-    arr = np.float32(img) / 255.0
-    t = torch.from_numpy(arr).permute(2, 0, 1).unsqueeze(0)
-    # 确保张量在 CPU 上
-    t = t.to(device)
-    # Padding in case images are not multiples of 8
     factor = 8
     h, w = t.shape[2], t.shape[3]
     H = ((h + factor) // factor) * factor
@@ -165,52 +246,93 @@ def inference(body: bytes, task_type: str = "dehaze") -> bytes:
     t = F.pad(t, (0, padw, 0, padh), 'reflect')
     B, C, H, W = t.shape
-    # 如果图像小于等于切块大小，直接处理
-    if H <= crop_size_arg and W <= crop_size_arg:
         with torch.no_grad():
             restored = model(t)
     else:
-        # 使用切块逻辑
-        split_data, starts = splitimage(t, crop_size=crop_size_arg, overlap_size=overlap_size_arg)
-        # 对每个切块进行推理
         with torch.no_grad():
-            for i, data in enumerate(split_data):
-                data = data.to(device)
-                split_data[i] = model(data).cpu()
         # 合并结果
-        restored = mergeimage(split_data, starts, crop_size=crop_size_arg, resolution=(B, C, H, W))
-        restored = restored.to(device)
-    # Unpad images to original dimensions
-    restored = restored[:, :, :h, :w]
-    # 输出处理
-    merged = torch.clamp(restored, 0, 1).squeeze(0).permute(1, 2, 0).numpy()
     merged = (merged * 255).astype(np.uint8)
-    # 输出 PNG
     out_img = Image.fromarray(merged)
     buf = BytesIO()
     out_img.save(buf, format="PNG")
     return buf.getvalue()
 def dehaze_inference(body: bytes) -> bytes:
-    """去雾推理的便捷函数"""
-    return inference(body, task_type="dehaze")
 def demoiring_inference(body: bytes) -> bytes:
-    """去摩尔纹推理的便捷函数"""
-    return inference(body, task_type="demoiring")
 def get_model_status():
-    """获取模型加载状态"""
     return {
         "dehaze_model_loaded": dehaze_model is not None,
-        "demoiring_model_loaded": demoiring_model is not None
-    }

+# model.py - 性能优化版本
 import yaml, torch, math, numpy as np
 import torch.nn.functional as F
 from PIL import Image
 from io import BytesIO
 from basicsr.models.archs.FPro_arch import FPro
+import time
+import cv2
+# 检测可用设备
+if torch.cuda.is_available():
+    device = torch.device('cuda')
+    print("使用 GPU 加速")
+else:
+    device = torch.device('cpu')
+    print("使用 CPU 计算")
+# 全局变量
 dehaze_model = None
 demoiring_model = None
+# 性能配置
+PERFORMANCE_CONFIG = {
+    'max_resolution': 1024,  # 最大处理分辨率
+    'min_crop_size': 128,  # 最小切块大小
+    'use_fast_mode': True,  # 快速模式
+    'enable_torch_compile': True,  # 启用模型编译(PyTorch 2.0+)
+}
+def optimize_image_size(image, max_size=1024):
+    """智能图像尺寸优化"""
+    w, h = image.size
+    # 如果图像已经很小，直接返回
+    if max(w, h) <= max_size:
+        return image, 1.0
+    # 计算缩放比例
+    scale = max_size / max(w, h)
+    new_w = int(w * scale)
+    new_h = int(h * scale)
+    # 确保尺寸是8的倍数
+    new_w = ((new_w + 7) // 8) * 8
+    new_h = ((new_h + 7) // 8) * 8
+    resized_image = image.resize((new_w, new_h), Image.LANCZOS)
+    return resized_image, scale
+def smart_crop_strategy(H, W, min_size=128, max_size=512):
+    """智能切块策略"""
+    # 根据图像大小动态调整切块大小
+    if max(H, W) <= max_size:
+        return H, W, 0  # 不需要切块
+    # 计算最优切块大小
+    crop_size = min(max_size, max(min_size, min(H, W) // 2))
+    crop_size = ((crop_size + 7) // 8) * 8  # 确保是8的倍数
+    # 动态调整重叠大小
+    overlap = min(crop_size // 4, 64)
+    return crop_size, crop_size, overlap
+def fast_splitimage(imgtensor, crop_size=256, overlap_size=64):
+    """优化的切块函数 - 减少内存分配"""
     _, C, H, W = imgtensor.shape
+    # 如果图像小于切块大小，直接返回
+    if H <= crop_size and W <= crop_size:
+        return [imgtensor], [(0, 0)]
+    step = crop_size - overlap_size
     starts = []
+    # 预计算所有位置
+    h_positions = list(range(0, H - crop_size + 1, step))
+    w_positions = list(range(0, W - crop_size + 1, step))
+    # 确保覆盖边界
+    if h_positions[-1] + crop_size < H:
+        h_positions.append(H - crop_size)
+    if w_positions[-1] + crop_size < W:
+        w_positions.append(W - crop_size)
+    # 生成切块（延迟计算，减少内存占用）
+    for hs in h_positions:
+        for ws in w_positions:
             starts.append((hs, ws))
+    return None, starts  # 返回None表示延迟切块
+def fast_mergeimage(results, starts, crop_size, H, W, C, B=1):
+    """优化的合并函数 - 使用简化权重"""
+    merge_img = torch.zeros((B, C, H, W), device=device)
+    weight_sum = torch.zeros((B, C, H, W), device=device)
+    # 使用简单的线性权重而不是复杂的距离权重
+    edge_fade = crop_size // 8  # 边缘渐变区域
+    for result, (hs, ws) in zip(results, starts):
+        result = result.to(device)
+        # 创建简单权重图
+        weight = torch.ones_like(result)
+        # 只在边缘应用权重衰减
+        if edge_fade > 0:
+            # 上边缘
+            weight[:, :, :edge_fade, :] *= torch.linspace(0.1, 1.0, edge_fade).view(1, 1, -1, 1)
+            # 下边缘
+            weight[:, :, -edge_fade:, :] *= torch.linspace(1.0, 0.1, edge_fade).view(1, 1, -1, 1)
+            # 左边缘
+            weight[:, :, :, :edge_fade] *= torch.linspace(0.1, 1.0, edge_fade).view(1, 1, 1, -1)
+            # 右边缘
+            weight[:, :, :, -edge_fade:] *= torch.linspace(1.0, 0.1, edge_fade).view(1, 1, 1, -1)
+        merge_img[:, :, hs:hs + crop_size, ws:ws + crop_size] += weight * result
+        weight_sum[:, :, hs:hs + crop_size, ws:ws + crop_size] += weight
+    # 避免除零
+    weight_sum = torch.clamp(weight_sum, min=1e-8)
+    return merge_img / weight_sum
 def init():
+    """优化的模型初始化"""
     global dehaze_model, demoiring_model
+    print("正在初始化模型...")
+    start_time = time.time()
     # 初始化去雾模型
     try:
+        print("加载去雾模型...")
+        dehaze_cfg = yaml.safe_load(open("RealDehazing_FPro.yml", "r"))['network_g']
         dehaze_cfg.pop('type', None)
         dehaze_model = FPro(**dehaze_cfg)
+        # 加载权重
         dehaze_ckpt = torch.load("dehaze.pth", map_location='cpu')
         dehaze_model.load_state_dict(dehaze_ckpt['params'])
+        dehaze_model.to(device)
         dehaze_model.eval()
+        # 模型编译优化 (PyTorch 2.0+)
+        if PERFORMANCE_CONFIG['enable_torch_compile'] and hasattr(torch, 'compile'):
+            try:
+                dehaze_model = torch.compile(dehaze_model)
+                print("去雾模型编译优化成功")
+            except Exception as e:
+                print(f"模型编译失败: {e}")
+        # 预热模型
+        with torch.no_grad():
+            dummy_input = torch.randn(1, 3, 256, 256).to(device)
+            _ = dehaze_model(dummy_input)
         print("去雾模型加载成功！")
     except Exception as e:
         print(f"去雾模型加载失败: {e}")
     # 初始化去摩尔纹模型
     try:
+        print("加载去摩尔纹模型...")
         try:
+            demoiring_cfg = yaml.safe_load(open("RealDemoiring_FPro.yml", "r"))['network_g']
             demoiring_cfg.pop('type', None)
         except FileNotFoundError:
             demoiring_cfg = {
+                'inp_channels': 3, 'out_channels': 3, 'dim': 48,
+                'num_blocks': [4, 6, 6, 8], 'num_refinement_blocks': 4,
+                'heads': [1, 2, 4, 8], 'ffn_expansion_factor': 2.66,
+                'bias': False, 'LayerNorm_type': 'WithBias', 'dual_pixel_task': False
             }
         demoiring_model = FPro(**demoiring_cfg)
         demoiring_ckpt = torch.load("deblur.pth", map_location='cpu')
         demoiring_model.load_state_dict(demoiring_ckpt['params'])
+        demoiring_model.to(device)
         demoiring_model.eval()
+        # 模型编译优化
+        if PERFORMANCE_CONFIG['enable_torch_compile'] and hasattr(torch, 'compile'):
+            try:
+                demoiring_model = torch.compile(demoiring_model)
+                print("去摩尔纹模型编译优化成功")
+            except Exception as e:
+                print(f"模型编译失败: {e}")
+        # 预热模型
+        with torch.no_grad():
+            dummy_input = torch.randn(1, 3, 256, 256).to(device)
+            _ = demoiring_model(dummy_input)
         print("去摩尔纹模型加载成功！")
     except Exception as e:
         print(f"去摩尔纹模型加载失败: {e}")
         demoiring_model = None
+    init_time = time.time() - start_time
+    print(f"模型初始化完成，耗时: {init_time:.2f}秒")
+def fast_inference(body: bytes, task_type: str = "dehaze") -> bytes:
+    """优化的推理函数"""
+    start_time = time.time()
+    # 选择模型
     if task_type == "dehaze":
         if dehaze_model is None:
             raise Exception("去雾模型未加载")
         model = dehaze_model
     elif task_type == "demoiring":
         if demoiring_model is None:
             raise Exception("去摩尔纹模型未加载")
         model = demoiring_model
     else:
         raise Exception(f"不支持的任务类型: {task_type}")
+    # 图像加载和预处理
     img = Image.open(BytesIO(body)).convert("RGB")
+    original_size = img.size
+    # 智能尺寸优化
+    if PERFORMANCE_CONFIG['use_fast_mode']:
+        img, scale_factor = optimize_image_size(img, PERFORMANCE_CONFIG['max_resolution'])
+        print(f"图像缩放比例: {scale_factor:.2f}")
+    # 转换为张量
+    arr = np.float32(img) / 255.0
+    t = torch.from_numpy(arr).permute(2, 0, 1).unsqueeze(0).to(device)
+    # Padding
     factor = 8
     h, w = t.shape[2], t.shape[3]
     H = ((h + factor) // factor) * factor
     t = F.pad(t, (0, padw, 0, padh), 'reflect')
     B, C, H, W = t.shape
+    print(f"处理图像尺寸: {H}x{W}")
+    # 智能切块策略
+    crop_size, _, overlap_size = smart_crop_strategy(H, W)
+    if crop_size == H and crop_size == W:
+        # 小图像直接处理
+        print("直接处理整图")
         with torch.no_grad():
+            torch.cuda.empty_cache() if torch.cuda.is_available() else None
             restored = model(t)
     else:
+        # 大图像切块处理
+        print(f"切块处理，切块大小: {crop_size}x{crop_size}, 重叠: {overlap_size}")
+        # 获取切块位置
+        _, starts = fast_splitimage(t, crop_size, overlap_size)
+        results = []
         with torch.no_grad():
+            for i, (hs, ws) in enumerate(starts):
+                if torch.cuda.is_available():
+                    torch.cuda.empty_cache()
+                # 动态切块
+                patch = t[:, :, hs:hs + crop_size, ws:ws + crop_size]
+                result = model(patch)
+                results.append(result.cpu())  # 立即移到CPU释放GPU内存
+                if i % 5 == 0:  # 每5个切块打印一次进度
+                    print(f"处理进度: {i + 1}/{len(starts)}")
         # 合并结果
+        print("合并结果...")
+        # 将结果移回GPU进行合并
+        results = [r.to(device) for r in results]
+        restored = fast_mergeimage(results, starts, crop_size, H, W, C, B)
+    # 后处理
+    restored = restored[:, :, :h, :w]  # 去除padding
+    merged = torch.clamp(restored, 0, 1).cpu().squeeze(0).permute(1, 2, 0).numpy()
+    # 恢复原始尺寸
+    if PERFORMANCE_CONFIG['use_fast_mode'] and scale_factor < 1.0:
+        merged_img = Image.fromarray((merged * 255).astype(np.uint8))
+        merged_img = merged_img.resize(original_size, Image.LANCZOS)
+        merged = np.array(merged_img).astype(np.float32) / 255.0
     merged = (merged * 255).astype(np.uint8)
+    # 输出
     out_img = Image.fromarray(merged)
     buf = BytesIO()
     out_img.save(buf, format="PNG")
+    total_time = time.time() - start_time
+    print(f"总���理时间: {total_time:.2f}秒")
     return buf.getvalue()
 def dehaze_inference(body: bytes) -> bytes:
+    """去雾推理"""
+    return fast_inference(body, task_type="dehaze")
 def demoiring_inference(body: bytes) -> bytes:
+    """去摩尔纹推理"""
+    return fast_inference(body, task_type="demoiring")
 def get_model_status():
+    """获取模型状态"""
     return {
         "dehaze_model_loaded": dehaze_model is not None,
+        "demoiring_model_loaded": demoiring_model is not None,
+        "device": str(device),
+        "performance_mode": "Fast" if PERFORMANCE_CONFIG['use_fast_mode'] else "Quality"
+    }
+def update_performance_config(max_resolution=1024, fast_mode=True):
+    """更新性能配置"""
+    PERFORMANCE_CONFIG['max_resolution'] = max_resolution
+    PERFORMANCE_CONFIG['use_fast_mode'] = fast_mode
+    print(f"性能配置更新: 最大分辨率={max_resolution}, 快速模式={'开启' if fast_mode else '关闭'}")
+# 兼容性函数
+inference = fast_inference