import argparse
import gradio as gr
from PIL import Image
import os
import torch
import numpy as np
import yaml
from huggingface_hub import hf_hub_download
from gradio_imageslider import ImageSlider
## local code
from models import instructir
from text.models import LanguageModel, LMHead
def dict2namespace(config):
namespace = argparse.Namespace()
for key, value in config.items():
if isinstance(value, dict):
new_value = dict2namespace(value)
else:
new_value = value
setattr(namespace, key, new_value)
return namespace
hf_hub_download(repo_id="marcosv/InstructIR", filename="im_instructir-7d.pt", local_dir="./")
hf_hub_download(repo_id="marcosv/InstructIR", filename="lm_instructir-7d.pt", local_dir="./")
CONFIG = "configs/eval5d.yml"
LM_MODEL = "lm_instructir-7d.pt"
MODEL_NAME = "im_instructir-7d.pt"
# parse config file
with open(os.path.join(CONFIG), "r") as f:
config = yaml.safe_load(f)
cfg = dict2namespace(config)
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
model = instructir.create_model(input_channels =cfg.model.in_ch, width=cfg.model.width, enc_blks = cfg.model.enc_blks,
middle_blk_num = cfg.model.middle_blk_num, dec_blks = cfg.model.dec_blks, txtdim=cfg.model.textdim)
model = model.to(device)
print ("IMAGE MODEL CKPT:", MODEL_NAME)
model.load_state_dict(torch.load(MODEL_NAME, map_location="cpu"), strict=True)
os.environ["TOKENIZERS_PARALLELISM"] = "false"
LMODEL = cfg.llm.model
language_model = LanguageModel(model=LMODEL)
lm_head = LMHead(embedding_dim=cfg.llm.model_dim, hidden_dim=cfg.llm.embd_dim, num_classes=cfg.llm.nclasses)
lm_head = lm_head.to(device)
print("LMHEAD MODEL CKPT:", LM_MODEL)
lm_head.load_state_dict(torch.load(LM_MODEL, map_location="cpu"), strict=True)
def load_img (filename, norm=True,):
img = np.array(Image.open(filename).convert("RGB"))
if norm:
img = img / 255.
img = img.astype(np.float32)
return img
def process_img (image, prompt):
img = np.array(image)
img = img / 255.
img = img.astype(np.float32)
y = torch.tensor(img).permute(2,0,1).unsqueeze(0).to(device)
lm_embd = language_model(prompt)
lm_embd = lm_embd.to(device)
with torch.no_grad():
text_embd, deg_pred = lm_head (lm_embd)
x_hat = model(y, text_embd)
restored_img = x_hat.squeeze().permute(1,2,0).clamp_(0, 1).cpu().detach().numpy()
restored_img = np.clip(restored_img, 0. , 1.)
restored_img = (restored_img * 255.0).round().astype(np.uint8) # float32 to uint8
return (image, Image.fromarray(restored_img)) #Image.fromarray(restored_img)
title = "InstructIR ✏️🖼️ 🤗"
description = ''' ## [High-Quality Image Restoration Following Human Instructions](https://github.com/mv-lab/InstructIR)
[Marcos V. Conde](https://scholar.google.com/citations?user=NtB1kjYAAAAJ&hl=en), [Gregor Geigle](https://scholar.google.com/citations?user=uIlyqRwAAAAJ&hl=en), [Radu Timofte](https://scholar.google.com/citations?user=u3MwH5kAAAAJ&hl=en)
Computer Vision Lab, University of Wuerzburg | Sony PlayStation, FTG
### TL;DR: quickstart
***InstructIR takes as input an image and a human-written instruction for how to improve that image.***
The (single) neural model performs all-in-one image restoration. InstructIR achieves state-of-the-art results on several restoration tasks including image denoising, deraining, deblurring, dehazing, and (low-light) image enhancement.
**🚀 You can start with the [demo tutorial.](https://github.com/mv-lab/InstructIR/blob/main/demo.ipynb)** Check [our github](https://github.com/mv-lab/InstructIR) for more information
Image restoration is a fundamental problem that involves recovering a high-quality clean image from its degraded observation. All-In-One image restoration models can effectively restore images from various types and levels of degradation using degradation-specific information as prompts to guide the restoration model. In this work, we present the first approach that uses human-written instructions to guide the image restoration model. Given natural language prompts, our model can recover high-quality images from their degraded counterparts, considering multiple degradation types. Our method, InstructIR, achieves state-of-the-art results on several restoration tasks including image denoising, deraining, deblurring, dehazing, and (low-light) image enhancement. InstructIR improves +1dB over previous all-in-one restoration methods. Moreover, our dataset and results represent a novel benchmark for new research on text-guided image restoration and enhancement.
Abstract (click me to read)
The model was trained using mostly synthetic data, thus it might not work great on real-world complex images.
However, it works surprisingly well on real-world foggy and low-light images.
You can also try general image enhancement prompts (e.g., "retouch this image", "enhance the colors") and see how it improves the colors.
'''
article = "