Spaces:

thiemcun203
/

super-resolution

Running

App Files Files Community

Nguyễn Bá Thiêm commited on Jan 5, 2024

Commit

b16ab70

1 Parent(s): 239e299

Add image super resolution functionality

Browse files

Files changed (4) hide show

.gitignore +1 -0
app.py +105 -0
images/{img_003_SRF_4_LR.png → demo.png} +0 -0
models/HAT/hat.py +197 -314

.gitignore CHANGED Viewed

	@@ -0,0 +1 @@


1	+ models/HAT/__pycache__/hat.cpython-39.pyc

app.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import time
+import streamlit as st
+import numpy as np
+from PIL import Image
+import cv2  # If you're using OpenCV for image processing
+from io import BytesIO
+import base64
+from models.HAT.hat import *
+# Initialize session state for enhanced images
+if 'hat_enhanced_image' not in st.session_state:
+    st.session_state['hat_enhanced_image'] = None
+if 'rcan_enhanced_image' not in st.session_state:
+    st.session_state['rcan_enhanced_image'] = None
+if 'hat_clicked' not in st.session_state:
+    st.session_state['hat_clicked'] = False
+if 'rcan_clicked' not in st.session_state:
+    st.session_state['rcan_clicked'] = False
+st.markdown("<h1 style='text-align: center'>Image Super Resolution</h1>", unsafe_allow_html=True)
+# Sidebar for navigation
+st.sidebar.title("Options")
+app_mode = st.sidebar.selectbox("Choose the input source",
+                                ["Upload image", "Take a photo"])
+# Depending on the choice, show the uploader widget or webcam capture
+if app_mode == "Upload image":
+    uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "png"], on_change=lambda: reset_states())
+    if uploaded_file is not None:
+        image = Image.open(uploaded_file).convert("RGB")
+elif app_mode == "Take a photo":
+    # Using JS code to access user's webcam
+    camera_input = st.camera_input("Take a picture", on_change=lambda: reset_states())
+    if camera_input is not None:
+        # Convert the camera image to an RGB image
+        image = Image.open(camera_input).convert("RGB")
+def reset_states():
+    st.session_state['hat_enhanced_image'] = None
+    st.session_state['rcan_enhanced_image'] = None
+    st.session_state['hat_clicked'] = False
+    st.session_state['rcan_clicked'] = False
+def get_image_download_link(img, filename):
+    """Generates a link allowing the PIL image to be downloaded"""
+    # Convert the PIL image to Bytes
+    buffered = BytesIO()
+    img.save(buffered, format="PNG")
+    return st.download_button(
+        label="Download Image",
+        data=buffered.getvalue(),
+        file_name=filename,
+        mime="image/png"
+    )
+if 'image' in locals():
+    # st.image(image, caption='Uploaded Image', use_column_width=True)
+    st.write("")
+    if st.button('Enhance with HAT'):
+        with st.spinner('Processing using HAT...'):
+            with st.spinner('Wait for it... the model is processing the image'):
+                # Simulate a delay for processing image
+                enhanced_image = HAT_for_deployment(image)
+                st.session_state['hat_enhanced_image'] = enhanced_image
+            st.session_state['hat_clicked'] = True
+            st.success('Done!')
+        # Display the low and high resolution images side by side
+    if st.session_state['hat_enhanced_image'] is not None:
+        col1, col2 = st.columns(2)
+        col1.header("Original")
+        col1.image(image, use_column_width=True)
+        col2.header("Enhanced")
+        col2.image(st.session_state['hat_enhanced_image'], use_column_width=True)
+        with col2:
+            get_image_download_link(st.session_state['hat_enhanced_image'], 'hat_enhanced.jpg')
+    if st.button('Enhance with RCAN'):
+        with st.spinner('Processing using RCAN...'):
+            with st.spinner('Wait for it... the model is processing the image'):
+                # Simulate a delay for processing image
+                time.sleep(2)  # replace this with actual model processing code
+                enhanced_image = image
+                # Display the low and high resolution images side by side
+                st.session_state['rcan_enhanced_image'] = enhanced_image
+            st.session_state['rcan_clicked'] = True
+            st.success('Done!')
+    if st.session_state['rcan_enhanced_image'] is not None:
+        col1, col2 = st.columns(2)
+        col1.header("Original")
+        col1.image(image, use_column_width=True)
+        col2.header("Enhanced")
+        col2.image(st.session_state['rcan_enhanced_image'], use_column_width=True)
+        with col2:
+            get_image_download_link(st.session_state['rcan_enhanced_image'], 'rcan_enhanced.jpg')

images/{img_003_SRF_4_LR.png → demo.png} RENAMED Viewed

File without changes

models/HAT/hat.py CHANGED Viewed

@@ -1,28 +1,15 @@
 import gdown
-# url = 'https://drive.google.com/file/d/1LHIUM7YoUDk8cXWzVZhroAcA1xXi-d87/view?usp=drive_link'
-output = 'models/HAT/hat_model_checkpoint_best.pth'
-# gdown.download(url, output, quiet=False)
 import gc
 import os
 import random
 import time
-import wandb
-from tqdm import tqdm
 import matplotlib.pyplot as plt
 from PIL import Image
-from skimage.metrics import structural_similarity as ssim
 import torch
 from torch import nn, optim
 import torch.nn.functional as F
-from torch.utils.data import Dataset, DataLoader, ConcatDataset
 from torchvision import transforms
-from torchvision.transforms import Compose
-from torchmetrics.functional.image import structural_similarity_index_measure as ssim
 from basicsr.archs.arch_util import to_2tuple, trunc_normal_
 from einops import rearrange
 import math
@@ -299,6 +286,117 @@ class OCAB(nn.Module):
         x = self.proj(x) + shortcut
         x = x + self.mlp(self.norm2(x))
         return x
 class AttenBlocks(nn.Module):
     """ A series of attention blocks for one RHAG.
@@ -843,6 +941,8 @@ class HAT(nn.Module):
         x = x / self.img_range + self.mean
         return x
 # ------------------------------ HYPERPARAMS ------------------------------ #
 config = {
     "network_g": {
@@ -892,12 +992,12 @@ config = {
 }
 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-DEVICE
 class Network:
-    def __init__(self, train_dataloader=train_dataloader, valid_dataloader=valid_dataloader,
-                 config = config, device=DEVICE, run_id=None, wandb_mode = False, STOP = float('inf'), save_temp_model = True, train_model_continue = False):
         self.config = config
         self.model = HAT(
             upscale=self.config['network_g']['upscale'],
             in_chans=self.config['network_g']['in_chans'],
@@ -914,59 +1014,15 @@ class Network:
             mlp_ratio=self.config['network_g']['mlp_ratio'],
             upsampler=self.config['network_g']['upsampler'],
             resi_connection=self.config['network_g']['resi_connection']
-        ).to(device)
-        self.device = device
-        self.STOP = STOP
-        self.wandb_mode = wandb_mode
-        self.loss_fn = nn.L1Loss(reduction='mean').to(device)
         self.optimizer = optim.Adam(self.model.parameters(), lr=self.config['train']['optim_g']['lr'], weight_decay=config['train']['optim_g']['weight_decay'],betas=tuple(config['train']['optim_g']['betas']))
-        self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones = self.config['train']['scheduler']['milestones'], gamma=self.config['train']['scheduler']['gamma'])
-        self.train_dataloader = train_dataloader
-        self.valid_dataloader = valid_dataloader
-        self.num_epochs = self.config['train']['total_iter']
-        self.run_id = run_id
-        self.save_temp_model = save_temp_model
-        self.train_model_continue = train_model_continue
-        self.last_valid_loss = float('inf')
-        checkpoint_path = output
-        if self.save_temp_model:
-            if self.train_model_continue:
-                # Load the network and other states from the checkpoint
-                self.start_epoch, train_loss, valid_loss = self.load_network(checkpoint_path)
-                initial_lr = self.config['train']['optim_g']['lr'] * self.config['train']['scheduler']['gamma']  # Define your initial or desired learning rate
-                for param_group in self.optimizer.param_groups:
-                    param_group['lr'] = initial_lr  # Resetting learning rate
-                # Recreate the scheduler with the updated optimizer
-                self.scheduler = optim.lr_scheduler.MultiStepLR(
-                    self.optimizer,
-                    milestones=self.config['train']['scheduler']['milestones'],
-                    gamma=self.config['train']['scheduler']['gamma'],
-                    last_epoch = self.start_epoch - 1  # Ensure to set the last_epoch to continue correctly
-                )
-                # Print the updated learning rate and scheduler state
-                print("Updated Learning Rate is:", self.optimizer.param_groups[0]['lr'])
-                print(self.scheduler.state_dict())
-                self.last_valid_loss = valid_loss
-#                 self.num_epochs-= self.start_epoch
-                print("Previous train loss: ", train_loss)
-                print("Previous valid loss: ", self.last_valid_loss)
-                # Resume training notice
-                print("------------------- Resuming training -------------------")
-            self.save_network(0, 0, 0, 'temp_model_checkpoint.pth')
-    def del_model(self):
-        del self.model
-        del self.optimizer
-        del self.scheduler
-        gc.collect()
-        torch.cuda.empty_cache()
     def pre_process(self):
         # pad to multiplication of window_size
         window_size = self.config['network_g']['window_size'] * 4
@@ -986,84 +1042,11 @@ class Network:
             self.mod_pad_w = window_size - w % window_size
             for i in range(self.mod_pad_w):
                 self.input_tile = F.pad(self.input_tile, (0, 1, 0, 0), 'reflect')
     def post_process(self):
         _, _, h, w = self.output_tile.size()
         self.output_tile = self.output_tile[:, :, 0:h - self.mod_pad_h * self.scale, 0:w - self.mod_pad_w * self.scale]
-    def save_network(self, epoch, train_loss, valid_loss, checkpoint_path):
-        checkpoint = {
-            'epoch': epoch,
-            'train_loss': train_loss,
-            'valid_loss': valid_loss,
-            'model': self.model.state_dict(),
-            'optimizer': self.optimizer.state_dict(),
-            'learning_rate_scheduler': self.scheduler.state_dict(),
-            'network': self
-        }
-        torch.save(checkpoint, checkpoint_path)
-    def load_network(self, checkpoint_path):
-        checkpoint = torch.load(checkpoint_path, map_location=self.device)
-        self.model = HAT(
-            upscale=self.config['network_g']['upscale'],
-            in_chans=self.config['network_g']['in_chans'],
-            img_size=self.config['network_g']['img_size'],
-            window_size=self.config['network_g']['window_size'],
-            compress_ratio=self.config['network_g']['compress_ratio'],
-            squeeze_factor=self.config['network_g']['squeeze_factor'],
-            conv_scale=self.config['network_g']['conv_scale'],
-            overlap_ratio=self.config['network_g']['overlap_ratio'],
-            img_range=self.config['network_g']['img_range'],
-            depths=self.config['network_g']['depths'],
-            embed_dim=self.config['network_g']['embed_dim'],
-            num_heads=self.config['network_g']['num_heads'],
-            mlp_ratio=self.config['network_g']['mlp_ratio'],
-            upsampler=self.config['network_g']['upsampler'],
-            resi_connection=self.config['network_g']['resi_connection']
-        ).to(self.device)
-        self.optimizer = optim.Adam(self.model.parameters(), lr=self.config['train']['optim_g']['lr'], weight_decay=config['train']['optim_g']['weight_decay'],betas=tuple(config['train']['optim_g']['betas']))
-        self.model.load_state_dict(checkpoint['model'])
-        self.optimizer.load_state_dict(checkpoint['optimizer']) # before create and load scheduler
-        self.scheduler = optim.lr_scheduler.MultiStepLR(self.optimizer, milestones = self.config['train']['scheduler']['milestones'], gamma=self.config['train']['scheduler']['gamma'])
-        self.scheduler.load_state_dict(checkpoint['learning_rate_scheduler'])
-        return checkpoint['epoch'], checkpoint['train_loss'], checkpoint['valid_loss']
-    def train_step(self, lr_images, hr_images):
-        lr_images, hr_images = lr_images.to(self.device), hr_images.to(self.device)
-        sr_images = self.model(lr_images)
-        self.optimizer.zero_grad()
-        loss = self.loss_fn(sr_images, hr_images)
-        loss.backward()
-        self.optimizer.step()
-        # Memory cleanup
-        del sr_images, lr_images, hr_images
-        gc.collect()
-        torch.cuda.empty_cache()
-        return loss.item()
-    def valid_step(self, lr_images, hr_images):
-        lr_images, hr_images = lr_images.to(self.device), hr_images.to(self.device)
-        sr_images = self.tile_valid(lr_images)
-        loss = self.loss_fn(sr_images, hr_images)
-        # Memory cleanup
-        del sr_images, lr_images, hr_images
-        gc.collect()
-        torch.cuda.empty_cache()
-        return loss.item()
     def tile_valid(self, lr_images):
         """
         Process all tiles of an image in a batch and then merge them back into the output image.
@@ -1167,115 +1150,8 @@ class Network:
         gc.collect()
         torch.cuda.empty_cache()
         return sr_images
-    def train_model(self):
-        if self.wandb_mode:
-            wandb.init(project='HAT-for-image-sr',
-                       resume='allow',
-                       config= self.config,
-                       id=self.run_id)
-            wandb.watch(self.model)
-        if self.train_model_continue:
-            epoch_lst = range(self.start_epoch, self.num_epochs)
-        else:
-            epoch_lst = range(self.num_epochs)
-        for epoch in epoch_lst:
-            start1 = time.time()
-            # ------------------- TRAIN -------------------
-            if self.save_temp_model:
-                self.load_network('temp_model_checkpoint.pth')
-            self.model.train()
-            train_epoch_loss = 0
-            stop = 0
-            for hr_images, lr_images in tqdm(self.train_dataloader, desc=f'Epoch {epoch+1}/{self.num_epochs}'):
-                if stop == self.STOP:
-                    break
-                stop+=1
-                loss = self.train_step(lr_images, hr_images)
-                train_epoch_loss += loss
-                if self.wandb_mode:
-                    wandb.log({
-                        'batch_loss': loss,
-                    })
-            if self.wandb_mode:
-                wandb.log({
-                    'learning_rate': self.optimizer.param_groups[0]['lr']
-                })
-            print("Learning Rate is:", self.optimizer.param_groups[0]['lr'])
-            self.scheduler.step()
-            if self.save_temp_model:
-                self.save_network(epoch, train_epoch_loss, 0, 'temp_model_checkpoint.pth')
-                print(self.scheduler.state_dict())
-                self.del_model()
-            del hr_images
-            del lr_images
-            gc.collect()
-            train_epoch_loss /= len(self.train_dataloader)
-            end1 = time.time()
-            # ------------------- VALID -------------------
-            start2 = time.time()
-            if self.save_temp_model:
-                self.load_network('temp_model_checkpoint.pth')
-            self.model.eval()
-            with torch.no_grad():
-                valid_epoch_loss = 0
-                stop = 0
-                for hr_images, lr_images in tqdm(self.valid_dataloader, desc=f'Epoch {epoch+1}/{self.num_epochs}'):
-                    if stop == self.STOP:
-                        break
-                    stop+=1
-                    loss = self.valid_step(lr_images, hr_images)
-                    valid_epoch_loss += loss
-                valid_epoch_loss /= len(self.valid_dataloader)
-            end2 = time.time()
-            # ------------------- LOG -------------------
-            if self.wandb_mode:
-                wandb.log({
-                    'train_loss': train_epoch_loss,
-                    'valid_loss': valid_epoch_loss,
-                })
-            # ------------------- VERBOSE -------------------
-            print(f'Epoch {epoch+1}/{self.num_epochs} | Train Loss: {train_epoch_loss:.4f} | Valid Loss: {valid_epoch_loss:.4f} | Time train: {end1-start1:.2f}s | Time valid: {end2-start2:.2f}s')
-            # ------------------- CHECKPOINT -------------------
-            self.save_network(epoch, train_epoch_loss, valid_epoch_loss, 'model_checkpoint_latest.pth')
-            if valid_epoch_loss < self.last_valid_loss:
-                self.last_valid_loss = valid_epoch_loss
-                self.save_network(epoch, train_epoch_loss, valid_epoch_loss, 'model_checkpoint_best.pth')
-                print("New best checkpoint saved!")
-            if self.save_temp_model:
-                self.del_model()
-            del hr_images
-            del lr_images
-            gc.collect()
-        if self.wandb_mode:
-            wandb.finish()
-    def inference(self, lr_image, hr_image):
         """
         - lr_image: torch.Tensor
             3D Tensor (C, H, W)
@@ -1284,80 +1160,87 @@ class Network:
             ground-truth high-res image. If used solely for inference, skip this. Default is None/
         """
         lr_image = lr_image.unsqueeze(0).to(self.device)
         self.for_inference = True
         with torch.no_grad():
             sr_image = self.tile_valid(lr_image)
-        lr_image = lr_image.squeeze(0)
-        sr_image = sr_image.squeeze(0)
-        print(">> Size of low-res image:", lr_image.size())
-        print(">> Size of super-res image:", sr_image.size())
-        if hr_image != None:
-            print(">> Size of high-res image:", hr_image.size())
-        if hr_image != None:
-            fig, axes = plt.subplots(1, 3, figsize=(10, 6))
-            axes[0].imshow(lr_image.cpu().detach().permute((1, 2, 0)))
-            axes[0].set_title('Low Resolution')
-            axes[1].imshow(sr_image.cpu().detach().permute((1, 2, 0)))
-            axes[1].set_title('Super Resolution')
-            axes[2].imshow(hr_image.cpu().detach().permute((1, 2, 0)))
-            axes[2].set_title('High Resolution')
-            for ax in axes.flat:
-                ax.axis('off')
         else:
-            fig, axes = plt.subplots(1, 2, figsize=(10, 6))
-            axes[0].imshow(lr_image.cpu().detach().permute((1, 2, 0)))
-            axes[0].set_title('Low Resolution')
-            axes[1].imshow(sr_image.cpu().detach().permute((1, 2, 0)))
-            axes[1].set_title('Super Resolution')
-            for ax in axes.flat:
-                ax.axis('off')
-        plt.tight_layout()
-        plt.show()
-        return sr_image
-class TestDataset(Dataset):
-    def __init__(self, lr_images_path):
-        super(TestDataset, self).__init__()
-        # hr_images_list = os.listdir(hr_images_path)
-        self.lr_images_path = lr_images_path
-    def __getitem__(self, idx):
-        lr_image = Image.open(self.lr_image_path)
-        lr_image = transforms.functional.to_tensor(lr_image)
-        return lr_image
 if __name__ == "__main__":
     import os
     import sys
-    # Getting to the Lambda directory
     sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../"))
-    image_path = "images/img_003_SRF_4_LR.png"
-    infer_dataset = TestDataset(images_path=image_path)
-    # hat = Network(run_id="hat-for-image-sr-" + str(int(1704006834)),config = config, wandb_mode = False, save_temp_model = True, train_model_continue = False) # STOP = 2
-    # num_params = sum(p.numel() for p in hat.model.parameters() if p.requires_grad)
-    # print("Number of learnable parameters: ", num_params)
-    # ---------- LOAD FROM LATEST CHECKPOINT ---------- #
-    gc.collect()
-    torch.cuda.empty_cache()
-    hat = Network()
-    hat.load_network(output)
-    num_params = sum(p.numel() for p in hat.model.parameters() if p.requires_grad)
-    print("Number of learnable parameters: ", num_params)
-    image = image.squeeze(0)
-    hat.inference(lr_image)

+import numpy as np
 import gdown
 import gc
 import os
 import random
 import time
 import matplotlib.pyplot as plt
 from PIL import Image
 import torch
 from torch import nn, optim
 import torch.nn.functional as F
 from torchvision import transforms
 from basicsr.archs.arch_util import to_2tuple, trunc_normal_
 from einops import rearrange
 import math
         x = self.proj(x) + shortcut
         x = x + self.mlp(self.norm2(x))
+        return x
+class HAB(nn.Module):
+    r""" Hybrid Attention Block.
+    Args:
+        dim (int): Number of input channels.
+        input_resolution (tuple[int]): Input resolution.
+        num_heads (int): Number of attention heads.
+        window_size (int): Window size.
+        shift_size (int): Shift size for SW-MSA.
+        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
+        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
+        qk_scale (float | None, optional): Override default qk scale of head_dim ** -0.5 if set.
+        drop (float, optional): Dropout rate. Default: 0.0
+        attn_drop (float, optional): Attention dropout rate. Default: 0.0
+        drop_path (float, optional): Stochastic depth rate. Default: 0.0
+        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
+        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
+    """
+    def __init__(self,
+                 dim,
+                 input_resolution,
+                 num_heads,
+                 window_size=7,
+                 shift_size=0,
+                 compress_ratio=3,
+                 squeeze_factor=30,
+                 conv_scale=0.01,
+                 mlp_ratio=4.,
+                 qkv_bias=True,
+                 qk_scale=None,
+                 drop=0.,
+                 attn_drop=0.,
+                 drop_path=0.,
+                 act_layer=nn.GELU,
+                 norm_layer=nn.LayerNorm):
+        super().__init__()
+        self.dim = dim
+        self.input_resolution = input_resolution
+        self.num_heads = num_heads
+        self.window_size = window_size
+        self.shift_size = shift_size
+        self.mlp_ratio = mlp_ratio
+        if min(self.input_resolution) <= self.window_size:
+            # if window size is larger than input resolution, we don't partition windows
+            self.shift_size = 0
+            self.window_size = min(self.input_resolution)
+        assert 0 <= self.shift_size < self.window_size, 'shift_size must in 0-window_size'
+        self.norm1 = norm_layer(dim)
+        self.attn = WindowAttention(
+            dim,
+            window_size=to_2tuple(self.window_size),
+            num_heads=num_heads,
+            qkv_bias=qkv_bias,
+            qk_scale=qk_scale,
+            attn_drop=attn_drop,
+            proj_drop=drop)
+        self.conv_scale = conv_scale
+        self.conv_block = CAB(num_feat=dim, compress_ratio=compress_ratio, squeeze_factor=squeeze_factor)
+        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
+        self.norm2 = norm_layer(dim)
+        mlp_hidden_dim = int(dim * mlp_ratio)
+        self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
+    def forward(self, x, x_size, rpi_sa, attn_mask):
+        h, w = x_size
+        b, _, c = x.shape
+        # assert seq_len == h * w, "input feature has wrong size"
+        shortcut = x
+        x = self.norm1(x)
+        x = x.view(b, h, w, c)
+        # Conv_X
+        conv_x = self.conv_block(x.permute(0, 3, 1, 2))
+        conv_x = conv_x.permute(0, 2, 3, 1).contiguous().view(b, h * w, c)
+        # cyclic shift
+        if self.shift_size > 0:
+            shifted_x = torch.roll(x, shifts=(-self.shift_size, -self.shift_size), dims=(1, 2))
+            attn_mask = attn_mask
+        else:
+            shifted_x = x
+            attn_mask = None
+        # partition windows
+        x_windows = window_partition(shifted_x, self.window_size)  # nw*b, window_size, window_size, c
+        x_windows = x_windows.view(-1, self.window_size * self.window_size, c)  # nw*b, window_size*window_size, c
+        # W-MSA/SW-MSA (to be compatible for testing on images whose shapes are the multiple of window size
+        attn_windows = self.attn(x_windows, rpi=rpi_sa, mask=attn_mask)
+        # merge windows
+        attn_windows = attn_windows.view(-1, self.window_size, self.window_size, c)
+        shifted_x = window_reverse(attn_windows, self.window_size, h, w)  # b h' w' c
+        # reverse cyclic shift
+        if self.shift_size > 0:
+            attn_x = torch.roll(shifted_x, shifts=(self.shift_size, self.shift_size), dims=(1, 2))
+        else:
+            attn_x = shifted_x
+        attn_x = attn_x.view(b, h * w, c)
+        # FFN
+        x = shortcut + self.drop_path(attn_x) + conv_x * self.conv_scale
+        x = x + self.drop_path(self.mlp(self.norm2(x)))
         return x
 class AttenBlocks(nn.Module):
     """ A series of attention blocks for one RHAG.
         x = x / self.img_range + self.mean
         return x
 # ------------------------------ HYPERPARAMS ------------------------------ #
 config = {
     "network_g": {
 }
 DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+# DEVICE = torch.device('mps' if torch.backends.mps.is_built() else 'cpu')
+print('device', DEVICE)
 class Network:
+    def __init__(self,config = config, device=DEVICE):
         self.config = config
+        self.device = device
         self.model = HAT(
             upscale=self.config['network_g']['upscale'],
             in_chans=self.config['network_g']['in_chans'],
             mlp_ratio=self.config['network_g']['mlp_ratio'],
             upsampler=self.config['network_g']['upsampler'],
             resi_connection=self.config['network_g']['resi_connection']
+        ).to(self.device)
         self.optimizer = optim.Adam(self.model.parameters(), lr=self.config['train']['optim_g']['lr'], weight_decay=config['train']['optim_g']['weight_decay'],betas=tuple(config['train']['optim_g']['betas']))
+    def load_network(self, checkpoint_path):
+        checkpoint = torch.load(checkpoint_path, map_location=self.device)
+        self.model.load_state_dict(checkpoint['model'])
+        self.optimizer.load_state_dict(checkpoint['optimizer']) # before create and load scheduler
     def pre_process(self):
         # pad to multiplication of window_size
         window_size = self.config['network_g']['window_size'] * 4
             self.mod_pad_w = window_size - w % window_size
             for i in range(self.mod_pad_w):
                 self.input_tile = F.pad(self.input_tile, (0, 1, 0, 0), 'reflect')
     def post_process(self):
         _, _, h, w = self.output_tile.size()
         self.output_tile = self.output_tile[:, :, 0:h - self.mod_pad_h * self.scale, 0:w - self.mod_pad_w * self.scale]
     def tile_valid(self, lr_images):
         """
         Process all tiles of an image in a batch and then merge them back into the output image.
         gc.collect()
         torch.cuda.empty_cache()
         return sr_images
+    def inference(self, lr_image, hr_image = None, deployment = False):
         """
         - lr_image: torch.Tensor
             3D Tensor (C, H, W)
             ground-truth high-res image. If used solely for inference, skip this. Default is None/
         """
         lr_image = lr_image.unsqueeze(0).to(self.device)
         self.for_inference = True
         with torch.no_grad():
             sr_image = self.tile_valid(lr_image)
+        sr_image = torch.clamp(sr_image, 0, 1)
+        if deployment:
+            return sr_image.squeeze(0)
         else:
+            lr_image = lr_image.squeeze(0)
+            sr_image = sr_image.squeeze(0)
+            print(">> Size of low-res image:", lr_image.size())
+            print(">> Size of super-res image:", sr_image.size())
+            if hr_image != None:
+                print(">> Size of high-res image:", hr_image.size())
+            if hr_image != None:
+                fig, axes = plt.subplots(1, 3, figsize=(10, 6))
+                axes[0].imshow(lr_image.cpu().detach().permute((1, 2, 0)))
+                axes[0].set_title('Low Resolution')
+                axes[1].imshow(sr_image.cpu().detach().permute((1, 2, 0)))
+                axes[1].set_title('Super Resolution')
+                axes[2].imshow(hr_image.cpu().detach().permute((1, 2, 0)))
+                axes[2].set_title('High Resolution')
+                for ax in axes.flat:
+                    ax.axis('off')
+            else:
+                fig, axes = plt.subplots(1, 2, figsize=(10, 6))
+                axes[0].imshow(lr_image.cpu().detach().permute((1, 2, 0)))
+                axes[0].set_title('Low Resolution')
+                axes[1].imshow(sr_image.cpu().detach().permute((1, 2, 0)))
+                axes[1].set_title('Super Resolution')
+                for ax in axes.flat:
+                    ax.axis('off')
+            plt.tight_layout()
+            plt.show()
+            return sr_image
+def HAT_for_deployment(lr_image, model_path = 'models/HAT/hat_model_checkpoint_best.pth'):
+    lr_image = transforms.functional.to_tensor(lr_image)
+    hat = Network()
+    hat.load_network(model_path)
+    t1 = time.time()
+    sr_image = hat.inference(lr_image, deployment=True).cpu().numpy()
+    t2 = time.time()
+    print("Time taken to infer:", t2 - t1)
+    # If image is in [C, H, W] format, transpose it to [H, W, C]
+    sr_image = np.transpose(sr_image, (1, 2, 0))
+    if sr_image.max() <= 1.0:
+        sr_image = (sr_image * 255).astype(np.uint8)
+    sr_image = Image.fromarray(sr_image)
+    return sr_image
 if __name__ == "__main__":
     import os
     import sys
+    # Getting to the true directory
     sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../../"))
+    # Define the model's file path and the Google Drive link
+    model_path = 'models/HAT/hat_model_checkpoint_best.pth'
+    gdrive_id = '1LHIUM7YoUDk8cXWzVZhroAcA1xXi-d87'  # Replace with your actual Google Drive file URL
+    # Check if the model file exists
+    if not os.path.exists(model_path):
+        print(f"Model file not found at {model_path}. Downloading from Google Drive...")
+        # Ensure the directory exists, as gdown will not automatically create directory paths
+        os.makedirs(os.path.dirname(model_path), exist_ok=True)
+        # Download the file from Google Drive
+        # gdown.download(id=gdrive_id, output=model_path, quiet=False)
+    else:
+        print(f"Model file found at {model_path}. No need to download.")
+    image_path = "images/demo.png"
+    lr_image = Image.open(image_path)
+    # lr_image = transforms.functional.to_tensor(lr_image)
+    # hat = Network()
+    # hat.load_network(model_path)
+    # hat.inference(lr_image)
+    print(HAT_for_deployment(lr_image, model_path))