text-guided-image-colorization

Running

App Files Files Community

LogicGoInfotechSpaces commited on Nov 13

Commit

5e6062c

1 Parent(s): 8d0a1ae

Switch to FastAI GAN colorization model (Hammad712/GAN-Colorization-Model)

Browse files

Files changed (4) hide show

app/colorize_model.py +91 -167
app/config.py +6 -1
app/main.py +3 -0
requirements.txt +1 -0

app/colorize_model.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """
-Colorize model wrapper replicating the behaviour of the
-`fffiloni/text-guided-image-colorization` Space.
 """
 from __future__ import annotations
@@ -11,15 +11,8 @@ from typing import Tuple
 import torch
 from PIL import Image
-from diffusers import (
-    AutoencoderKL,
-    ControlNetModel,
-    StableDiffusionXLControlNetPipeline,
-    UNet2DConditionModel,
-)
-from huggingface_hub import hf_hub_download
-from safetensors.torch import load_file
-from transformers import BlipForConditionalGeneration, BlipProcessor
 from app.config import settings
@@ -30,7 +23,7 @@ def _ensure_cache_dir() -> str:
     cache_dir = os.environ.get("HF_HOME") or "/tmp/hf_cache"
     try:
         os.makedirs(cache_dir, exist_ok=True)
-    except Exception as exc:  # pragma: no cover
         logger.warning("Could not create cache directory %s: %s", cache_dir, exc)
     os.environ["HF_HOME"] = cache_dir
     os.environ["TRANSFORMERS_CACHE"] = cache_dir
@@ -39,167 +32,98 @@ def _ensure_cache_dir() -> str:
     return cache_dir
-def _apply_lab_merge(original_luminance: Image.Image, color_map: Image.Image) -> Image.Image:
-    base_lab = original_luminance.convert("LAB")
-    color_lab = color_map.convert("LAB")
-    l_channel, _, _ = base_lab.split()
-    _, a_channel, b_channel = color_lab.split()
-    merged = Image.merge("LAB", (l_channel, a_channel, b_channel))
-    return merged.convert("RGB")
-def _clean_caption(prompt: str) -> str:
-    remove_terms = [
-        "black and white", "black & white", "monochrome", "bw photo",
-        "historical", "restored", "low contrast", "desaturated", "overcast",
-    ]
-    cleaned = prompt
-    for term in remove_terms:
-        cleaned = cleaned.replace(term, "")
-    return cleaned.strip(" ,")
 class ColorizeModel:
-    """Colorization model that runs the SDXL + ControlNet pipeline locally."""
     def __init__(self, model_id: str | None = None) -> None:
         self.cache_dir = _ensure_cache_dir()
-        self.hf_token = (
-            os.getenv("HF_TOKEN")
-            or os.getenv("HUGGINGFACE_HUB_TOKEN")
-            or os.getenv("HUGGINGFACE_API_TOKEN")
-        )
-        if not self.hf_token:
-            logger.warning("HF token not provided – attempting to download public models only.")
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        self.dtype = torch.float16 if self.device.type == "cuda" else torch.float32
         os.environ.setdefault("OMP_NUM_THREADS", "1")
-        self.controlnet_id = model_id or settings.MODEL_ID
-        self.base_model_id = settings.BASE_MODEL_ID
-        self.lightning_repo = settings.LIGHTNING_REPO
-        self.lightning_weights = settings.LIGHTNING_WEIGHTS
-        self.caption_model_id = settings.CAPTION_MODEL_ID
-        self.num_inference_steps = settings.NUM_INFERENCE_STEPS
-        self.guidance_scale = settings.GUIDANCE_SCALE
-        self.controlnet_scale = settings.CONTROLNET_SCALE
-        self.positive_prompt = settings.POSITIVE_PROMPT
-        self.negative_prompt = settings.NEGATIVE_PROMPT
-        self.caption_prefix = settings.CAPTION_PREFIX
-        self.seed = settings.COLORIZE_SEED
-        self._load_caption_model()
-        self._load_pipeline()
-    def _load_caption_model(self) -> None:
-        logger.info("Loading BLIP captioning model: %s", self.caption_model_id)
-        self.caption_processor = BlipProcessor.from_pretrained(
-            self.caption_model_id,
-            cache_dir=self.cache_dir,
-            token=self.hf_token,
-        )
-        self.caption_model = BlipForConditionalGeneration.from_pretrained(
-            self.caption_model_id,
-            cache_dir=self.cache_dir,
-            token=self.hf_token,
-            torch_dtype=self.dtype if self.device.type == "cuda" else torch.float32,
-        ).to(self.device)
-    def _load_pipeline(self) -> None:
-        logger.info("Loading ControlNet model: %s", self.controlnet_id)
-        controlnet = ControlNetModel.from_pretrained(
-            self.controlnet_id,
-            torch_dtype=self.dtype,
-            cache_dir=self.cache_dir,
-            token=self.hf_token,
-        )
-        logger.info("Loading SDXL base model components: %s", self.base_model_id)
-        vae = AutoencoderKL.from_pretrained(
-            self.base_model_id,
-            subfolder="vae",
-            torch_dtype=self.dtype,
-            cache_dir=self.cache_dir,
-            token=self.hf_token,
-        )
-        unet = UNet2DConditionModel.from_config(
-            self.base_model_id,
-            subfolder="unet",
-            cache_dir=self.cache_dir,
-            token=self.hf_token,
-        )
-        lightning_path = hf_hub_download(
-            repo_id=self.lightning_repo,
-            filename=self.lightning_weights,
-            cache_dir=self.cache_dir,
-            token=self.hf_token,
-        )
-        unet.load_state_dict(load_file(lightning_path))
-        self.pipe = StableDiffusionXLControlNetPipeline.from_pretrained(
-            self.base_model_id,
-            vae=vae,
-            unet=unet,
-            controlnet=controlnet,
-            torch_dtype=self.dtype,
-            cache_dir=self.cache_dir,
-            token=self.hf_token,
-            safety_checker=None,
-            requires_safety_checker=False,
-        )
-        self.pipe.set_progress_bar_config(disable=True)
-        self.pipe.to(self.device, dtype=self.dtype)
-        if self.device.type == "cuda" and hasattr(self.pipe, "enable_xformers_memory_efficient_attention"):
-            try:
-                self.pipe.enable_xformers_memory_efficient_attention()
-            except Exception as exc:  # pragma: no cover
-                logger.warning("Could not enable xFormers optimizations: %s", exc)
-        logger.info("Colorization pipeline ready.")
-    def caption_image(self, image: Image.Image) -> str:
-        inputs = self.caption_processor(
-            image,
-            self.caption_prefix,
-            return_tensors="pt",
-        ).to(self.device)
-        if self.device.type != "cuda":
-            inputs = {k: v.to(torch.float32) if isinstance(v, torch.Tensor) else v for k, v in inputs.items()}
-        with torch.inference_mode():
-            caption_ids = self.caption_model.generate(**inputs)
-        caption = self.caption_processor.decode(caption_ids[0], skip_special_tokens=True)
-        return _clean_caption(caption)
     def colorize(self, image: Image.Image, num_inference_steps: int | None = None) -> Tuple[Image.Image, str]:
-        original_size = image.size
-        control_image = image.convert("L").convert("RGB").resize((512, 512), Image.Resampling.LANCZOS)
-        caption = self.caption_image(image)
-        prompt_components = [self.positive_prompt, caption]
-        prompt = ", ".join([p for p in prompt_components if p])
-        steps = num_inference_steps or self.num_inference_steps
-        generator = torch.Generator(device=self.device).manual_seed(self.seed)
-        logger.info("Running ControlNet pipeline with prompt: %s", prompt)
-        result = self.pipe(
-            prompt=prompt,
-            negative_prompt=self.negative_prompt or None,
-            image=control_image,
-            control_image=control_image,
-            num_inference_steps=steps,
-            guidance_scale=self.guidance_scale,
-            controlnet_conditioning_scale=self.controlnet_scale,
-            generator=generator,
-        )
-        generated = result.images[0]
-        colorized = _apply_lab_merge(control_image, generated)
-        if colorized.size != original_size:
-            colorized = colorized.resize(original_size, Image.Resampling.LANCZOS)
-        return colorized, caption

 """
+Colorize model wrapper using FastAI GAN Colorization Model
+Hammad712/GAN-Colorization-Model
 """
 from __future__ import annotations
 import torch
 from PIL import Image
+from fastai.vision.all import *
+from huggingface_hub import from_pretrained_fastai
 from app.config import settings
     cache_dir = os.environ.get("HF_HOME") or "/tmp/hf_cache"
     try:
         os.makedirs(cache_dir, exist_ok=True)
+    except Exception as exc:
         logger.warning("Could not create cache directory %s: %s", cache_dir, exc)
     os.environ["HF_HOME"] = cache_dir
     os.environ["TRANSFORMERS_CACHE"] = cache_dir
     return cache_dir
 class ColorizeModel:
+    """Colorization model using FastAI GAN model."""
     def __init__(self, model_id: str | None = None) -> None:
         self.cache_dir = _ensure_cache_dir()
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         os.environ.setdefault("OMP_NUM_THREADS", "1")
+        # Use FastAI model ID from config or default
+        self.model_id = model_id or settings.MODEL_ID
+        self.output_caption = getattr(settings, "FASTAI_OUTPUT_CAPTION", "Colorized using GAN-Colorization-Model")
+        logger.info("Loading FastAI GAN Colorization model: %s", self.model_id)
+        try:
+            self.learn = from_pretrained_fastai(self.model_id)
+            logger.info("FastAI GAN Colorization model loaded successfully")
+        except Exception as e:
+            error_msg = (
+                f"Failed to load FastAI model '{self.model_id}'. "
+                f"Error: {str(e)}\n"
+                f"Please check the MODEL_ID environment variable. "
+                f"Default model: 'Hammad712/GAN-Colorization-Model'"
+            )
+            logger.error(error_msg)
+            raise RuntimeError(error_msg) from e
     def colorize(self, image: Image.Image, num_inference_steps: int | None = None) -> Tuple[Image.Image, str]:
+        """
+        Colorize a grayscale or color image using FastAI GAN model.
+        Args:
+            image: PIL Image (grayscale or color)
+            num_inference_steps: Ignored for FastAI model (kept for API compatibility)
+        Returns:
+            Tuple of (colorized PIL Image, caption string)
+        """
+        try:
+            original_size = image.size
+            # Ensure image is RGB
+            if image.mode != "RGB":
+                image = image.convert("RGB")
+            # FastAI predict expects a PIL Image
+            logger.info("Running FastAI GAN colorization...")
+            # Use the model's predict method
+            # FastAI predict for image models typically returns the output image directly
+            # or as the first element of a tuple
+            prediction = self.learn.predict(image)
+            # Extract the colorized image from prediction
+            # Handle different return types from FastAI
+            if isinstance(prediction, (list, tuple)):
+                # If tuple/list, first element is usually the prediction
+                colorized = prediction[0] if len(prediction) > 0 else image
+            else:
+                # Direct return
+                colorized = prediction
+            # Ensure we have a PIL Image
+            if not isinstance(colorized, Image.Image):
+                # If it's a tensor, convert to PIL
+                if isinstance(colorized, torch.Tensor):
+                    # Handle tensor conversion
+                    if colorized.dim() == 4:
+                        colorized = colorized[0]  # Remove batch dimension
+                    if colorized.dim() == 3:
+                        # Convert CHW to HWC and denormalize if needed
+                        colorized = colorized.permute(1, 2, 0).cpu()
+                        # Clamp values to [0, 1] if float, or [0, 255] if uint8
+                        if colorized.dtype == torch.float32 or colorized.dtype == torch.float16:
+                            colorized = torch.clamp(colorized, 0, 1)
+                            colorized = (colorized * 255).byte()
+                        colorized = Image.fromarray(colorized.numpy(), 'RGB')
+                    else:
+                        raise ValueError(f"Unexpected tensor shape: {colorized.shape}")
+                else:
+                    raise ValueError(f"Unexpected prediction type: {type(colorized)}")
+            # Ensure RGB mode
+            if colorized.mode != "RGB":
+                colorized = colorized.convert("RGB")
+            # Resize back to original size if needed
+            if colorized.size != original_size:
+                colorized = colorized.resize(original_size, Image.Resampling.LANCZOS)
+            logger.info("Colorization completed successfully")
+            return colorized, self.output_caption
+        except Exception as e:
+            logger.error("Error during colorization: %s", str(e))
+            raise RuntimeError(f"Colorization failed: {str(e)}") from e

app/config.py CHANGED Viewed

@@ -18,7 +18,8 @@ class Settings(BaseSettings):
     BASE_URL: str = os.getenv("BASE_URL", "http://localhost:8000")
     # Model / inference settings
-    MODEL_ID: str = os.getenv("MODEL_ID", "fffiloni/controlnet-colorization-sdxl")
     BASE_MODEL_ID: str = os.getenv("BASE_MODEL_ID", "stabilityai/stable-diffusion-xl-base-1.0")
     LIGHTNING_REPO: str = os.getenv("LIGHTNING_REPO", "ByteDance/SDXL-Lightning")
     LIGHTNING_WEIGHTS: str = os.getenv("LIGHTNING_WEIGHTS", "sdxl_lightning_8step_unet.safetensors")
@@ -36,6 +37,10 @@ class Settings(BaseSettings):
     CONTROLNET_SCALE: float = float(os.getenv("CONTROLNET_SCALE", "1.0"))
     CAPTION_PREFIX: str = os.getenv("CAPTION_PREFIX", "a photography of")
     COLORIZE_SEED: int = int(os.getenv("COLORIZE_SEED", "123"))
     INFERENCE_PROVIDER: str = os.getenv("INFERENCE_PROVIDER", "hf-inference")
     INFERENCE_TIMEOUT: int = int(os.getenv("INFERENCE_TIMEOUT", "180"))

     BASE_URL: str = os.getenv("BASE_URL", "http://localhost:8000")
     # Model / inference settings
+    MODEL_ID: str = os.getenv("MODEL_ID", "Hammad712/GAN-Colorization-Model")
+    MODEL_BACKEND: str = os.getenv("MODEL_BACKEND", "fastai")
     BASE_MODEL_ID: str = os.getenv("BASE_MODEL_ID", "stabilityai/stable-diffusion-xl-base-1.0")
     LIGHTNING_REPO: str = os.getenv("LIGHTNING_REPO", "ByteDance/SDXL-Lightning")
     LIGHTNING_WEIGHTS: str = os.getenv("LIGHTNING_WEIGHTS", "sdxl_lightning_8step_unet.safetensors")
     CONTROLNET_SCALE: float = float(os.getenv("CONTROLNET_SCALE", "1.0"))
     CAPTION_PREFIX: str = os.getenv("CAPTION_PREFIX", "a photography of")
     COLORIZE_SEED: int = int(os.getenv("COLORIZE_SEED", "123"))
+    FASTAI_OUTPUT_CAPTION: str = os.getenv(
+        "FASTAI_OUTPUT_CAPTION",
+        "Colorized using GAN-Colorization-Model"
+    )
     INFERENCE_PROVIDER: str = os.getenv("INFERENCE_PROVIDER", "hf-inference")
     INFERENCE_TIMEOUT: int = int(os.getenv("INFERENCE_TIMEOUT", "180"))

app/main.py CHANGED Viewed

@@ -3,6 +3,9 @@ FastAPI application for image colorization using ColorizeNet model
 with Firebase App Check integration
 """
 import os
 import uuid
 import logging
 from pathlib import Path

 with Firebase App Check integration
 """
 import os
+# Set OMP_NUM_THREADS before any torch imports to avoid libgomp warnings
+os.environ.setdefault("OMP_NUM_THREADS", "1")
 import uuid
 import logging
 from pathlib import Path

requirements.txt CHANGED Viewed

@@ -14,4 +14,5 @@ firebase-admin>=6.0.0
 pydantic-settings>=2.0.0
 huggingface-hub>=0.16.0
 safetensors>=0.3.0

 pydantic-settings>=2.0.0
 huggingface-hub>=0.16.0
 safetensors>=0.3.0
+fastai>=2.7.13