Spaces:

dreamer-technoland
/

object-to-object-replace

Running

App Files Files Community

nikunjkdtechnoland commited on Mar 6, 2024

Commit

063372b

1 Parent(s): 526f250

init commit some files

Browse files

Files changed (40) hide show

.gitignore +110 -0
app.py +14 -0
data/__init__.py +0 -0
iopaint/__init__.py +23 -0
iopaint/__main__.py +4 -0
iopaint/api.py +396 -0
iopaint/batch_processing.py +127 -0
iopaint/benchmark.py +109 -0
iopaint/file_manager/__init__.py +1 -0
iopaint/model/__init__.py +37 -0
iopaint/model/anytext/__init__.py +0 -0
iopaint/model/anytext/anytext_model.py +73 -0
iopaint/model/anytext/anytext_pipeline.py +403 -0
iopaint/model/anytext/anytext_sd15.yaml +99 -0
iopaint/model/anytext/cldm/__init__.py +0 -0
iopaint/model/anytext/ldm/__init__.py +0 -0
iopaint/model/anytext/ldm/models/__init__.py +0 -0
iopaint/model/anytext/ldm/models/autoencoder.py +218 -0
iopaint/model/anytext/ldm/models/diffusion/__init__.py +0 -0
iopaint/model/anytext/ldm/models/diffusion/dpm_solver/__init__.py +1 -0
iopaint/model/anytext/ldm/modules/__init__.py +0 -0
iopaint/model/anytext/ldm/modules/attention.py +360 -0
iopaint/model/anytext/ldm/modules/diffusionmodules/__init__.py +0 -0
iopaint/model/anytext/ldm/modules/distributions/__init__.py +0 -0
iopaint/model/anytext/ldm/modules/encoders/__init__.py +0 -0
iopaint/model/anytext/ocr_recog/__init__.py +0 -0
iopaint/model/base.py +418 -0
iopaint/model/helper/__init__.py +0 -0
iopaint/model/original_sd_configs/__init__.py +19 -0
iopaint/model/power_paint/__init__.py +0 -0
iopaint/plugins/__init__.py +74 -0
iopaint/plugins/anime_seg.py +462 -0
iopaint/plugins/base_plugin.py +30 -0
iopaint/plugins/segment_anything/__init__.py +14 -0
iopaint/plugins/segment_anything/modeling/__init__.py +11 -0
iopaint/plugins/segment_anything/utils/__init__.py +5 -0
iopaint/tests/.gitignore +2 -0
iopaint/tests/__init__.py +0 -0
model/__init__.py +0 -0
utils/__init__.py +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,110 @@

+### Project ###
+checkpoints/
+pretrained-model/yolov8m-seg.pt
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/

app.py ADDED Viewed

	@@ -0,0 +1,14 @@

+import gradio as gr
+from only_gradio_server import process_images
+# Create Gradio interface
+iface = gr.Interface(fn=process_images,
+                     inputs=[gr.Image(type='filepath', label='Input Image 1'),
+                             gr.Image(type='filepath', label='Input Image 2', image_mode="RGBA"),
+                             gr.Textbox(label='Replace Object Name')],
+                     outputs='image',
+                     title="Image Processing",
+                     description="Object to Object Replacement")
+# Launch Gradio interface
+iface.launch()

data/__init__.py ADDED Viewed

File without changes

iopaint/__init__.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import os
+os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
+# https://github.com/pytorch/pytorch/issues/27971#issuecomment-1768868068
+os.environ["ONEDNN_PRIMITIVE_CACHE_CAPACITY"] = "1"
+os.environ["LRU_CACHE_CAPACITY"] = "1"
+# prevent CPU memory leak when run model on GPU
+# https://github.com/pytorch/pytorch/issues/98688#issuecomment-1869288431
+# https://github.com/pytorch/pytorch/issues/108334#issuecomment-1752763633
+os.environ["TORCH_CUDNN_V8_API_LRU_CACHE_LIMIT"] = "1"
+import warnings
+warnings.simplefilter("ignore", UserWarning)
+def entry_point():
+    # To make os.environ["XDG_CACHE_HOME"] = args.model_cache_dir works for diffusers
+    # https://github.com/huggingface/diffusers/blob/be99201a567c1ccd841dc16fb24e88f7f239c187/src/diffusers/utils/constants.py#L18
+    from iopaint.cli import typer_app
+    typer_app()

iopaint/__main__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from iopaint import entry_point
+if __name__ == "__main__":
+    entry_point()

iopaint/api.py ADDED Viewed

	@@ -0,0 +1,396 @@

+import asyncio
+import os
+import threading
+import time
+import traceback
+from pathlib import Path
+from typing import Optional, Dict, List
+import cv2
+import numpy as np
+import socketio
+import torch
+try:
+    torch._C._jit_override_can_fuse_on_cpu(False)
+    torch._C._jit_override_can_fuse_on_gpu(False)
+    torch._C._jit_set_texpr_fuser_enabled(False)
+    torch._C._jit_set_nvfuser_enabled(False)
+except:
+    pass
+import uvicorn
+from PIL import Image
+from fastapi import APIRouter, FastAPI, Request, UploadFile
+from fastapi.encoders import jsonable_encoder
+from fastapi.exceptions import HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, FileResponse, Response
+from fastapi.staticfiles import StaticFiles
+from loguru import logger
+from socketio import AsyncServer
+from iopaint.file_manager import FileManager
+from iopaint.helper import (
+    load_img,
+    decode_base64_to_image,
+    pil_to_bytes,
+    numpy_to_bytes,
+    concat_alpha_channel,
+    gen_frontend_mask,
+    adjust_mask,
+)
+from iopaint.model.utils import torch_gc
+from iopaint.model_manager import ModelManager
+from iopaint.plugins import build_plugins, RealESRGANUpscaler, InteractiveSeg
+from iopaint.plugins.base_plugin import BasePlugin
+from iopaint.plugins.remove_bg import RemoveBG
+from iopaint.schema import (
+    GenInfoResponse,
+    ApiConfig,
+    ServerConfigResponse,
+    SwitchModelRequest,
+    InpaintRequest,
+    RunPluginRequest,
+    SDSampler,
+    PluginInfo,
+    AdjustMaskRequest,
+    RemoveBGModel,
+    SwitchPluginModelRequest,
+    ModelInfo,
+    InteractiveSegModel,
+    RealESRGANModel,
+)
+CURRENT_DIR = Path(__file__).parent.absolute().resolve()
+WEB_APP_DIR = CURRENT_DIR / "web_app"
+def api_middleware(app: FastAPI):
+    rich_available = False
+    try:
+        if os.environ.get("WEBUI_RICH_EXCEPTIONS", None) is not None:
+            import anyio  # importing just so it can be placed on silent list
+            import starlette  # importing just so it can be placed on silent list
+            from rich.console import Console
+            console = Console()
+            rich_available = True
+    except Exception:
+        pass
+    def handle_exception(request: Request, e: Exception):
+        err = {
+            "error": type(e).__name__,
+            "detail": vars(e).get("detail", ""),
+            "body": vars(e).get("body", ""),
+            "errors": str(e),
+        }
+        if not isinstance(
+            e, HTTPException
+        ):  # do not print backtrace on known httpexceptions
+            message = f"API error: {request.method}: {request.url} {err}"
+            if rich_available:
+                print(message)
+                console.print_exception(
+                    show_locals=True,
+                    max_frames=2,
+                    extra_lines=1,
+                    suppress=[anyio, starlette],
+                    word_wrap=False,
+                    width=min([console.width, 200]),
+                )
+            else:
+                traceback.print_exc()
+        return JSONResponse(
+            status_code=vars(e).get("status_code", 500), content=jsonable_encoder(err)
+        )
+    @app.middleware("http")
+    async def exception_handling(request: Request, call_next):
+        try:
+            return await call_next(request)
+        except Exception as e:
+            return handle_exception(request, e)
+    @app.exception_handler(Exception)
+    async def fastapi_exception_handler(request: Request, e: Exception):
+        return handle_exception(request, e)
+    @app.exception_handler(HTTPException)
+    async def http_exception_handler(request: Request, e: HTTPException):
+        return handle_exception(request, e)
+    cors_options = {
+        "allow_methods": ["*"],
+        "allow_headers": ["*"],
+        "allow_origins": ["*"],
+        "allow_credentials": True,
+    }
+    app.add_middleware(CORSMiddleware, **cors_options)
+global_sio: AsyncServer = None
+def diffuser_callback(pipe, step: int, timestep: int, callback_kwargs: Dict = {}):
+    # self: DiffusionPipeline, step: int, timestep: int, callback_kwargs: Dict
+    # logger.info(f"diffusion callback: step={step}, timestep={timestep}")
+    # We use asyncio loos for task processing. Perhaps in the future, we can add a processing queue similar to InvokeAI,
+    # but for now let's just start a separate event loop. It shouldn't make a difference for single person use
+    asyncio.run(global_sio.emit("diffusion_progress", {"step": step}))
+    return {}
+class Api:
+    def __init__(self, app: FastAPI, config: ApiConfig):
+        self.app = app
+        self.config = config
+        self.router = APIRouter()
+        self.queue_lock = threading.Lock()
+        api_middleware(self.app)
+        self.file_manager = self._build_file_manager()
+        self.plugins = self._build_plugins()
+        self.model_manager = self._build_model_manager()
+        # fmt: off
+        self.add_api_route("/api/v1/gen-info", self.api_geninfo, methods=["POST"], response_model=GenInfoResponse)
+        self.add_api_route("/api/v1/server-config", self.api_server_config, methods=["GET"], response_model=ServerConfigResponse)
+        self.add_api_route("/api/v1/model", self.api_current_model, methods=["GET"], response_model=ModelInfo)
+        self.add_api_route("/api/v1/model", self.api_switch_model, methods=["POST"], response_model=ModelInfo)
+        self.add_api_route("/api/v1/inputimage", self.api_input_image, methods=["GET"])
+        self.add_api_route("/api/v1/inpaint", self.api_inpaint, methods=["POST"])
+        self.add_api_route("/api/v1/switch_plugin_model", self.api_switch_plugin_model, methods=["POST"])
+        self.add_api_route("/api/v1/run_plugin_gen_mask", self.api_run_plugin_gen_mask, methods=["POST"])
+        self.add_api_route("/api/v1/run_plugin_gen_image", self.api_run_plugin_gen_image, methods=["POST"])
+        self.add_api_route("/api/v1/samplers", self.api_samplers, methods=["GET"])
+        self.add_api_route("/api/v1/adjust_mask", self.api_adjust_mask, methods=["POST"])
+        self.add_api_route("/api/v1/save_image", self.api_save_image, methods=["POST"])
+        self.app.mount("/", StaticFiles(directory=WEB_APP_DIR, html=True), name="assets")
+        # fmt: on
+        global global_sio
+        self.sio = socketio.AsyncServer(async_mode="asgi", cors_allowed_origins="*")
+        self.combined_asgi_app = socketio.ASGIApp(self.sio, self.app)
+        self.app.mount("/ws", self.combined_asgi_app)
+        global_sio = self.sio
+    def add_api_route(self, path: str, endpoint, **kwargs):
+        return self.app.add_api_route(path, endpoint, **kwargs)
+    def api_save_image(self, file: UploadFile):
+        filename = file.filename
+        origin_image_bytes = file.file.read()
+        with open(self.config.output_dir / filename, "wb") as fw:
+            fw.write(origin_image_bytes)
+    def api_current_model(self) -> ModelInfo:
+        return self.model_manager.current_model
+    def api_switch_model(self, req: SwitchModelRequest) -> ModelInfo:
+        if req.name == self.model_manager.name:
+            return self.model_manager.current_model
+        self.model_manager.switch(req.name)
+        return self.model_manager.current_model
+    def api_switch_plugin_model(self, req: SwitchPluginModelRequest):
+        if req.plugin_name in self.plugins:
+            self.plugins[req.plugin_name].switch_model(req.model_name)
+            if req.plugin_name == RemoveBG.name:
+                self.config.remove_bg_model = req.model_name
+            if req.plugin_name == RealESRGANUpscaler.name:
+                self.config.realesrgan_model = req.model_name
+            if req.plugin_name == InteractiveSeg.name:
+                self.config.interactive_seg_model = req.model_name
+            torch_gc()
+    def api_server_config(self) -> ServerConfigResponse:
+        plugins = []
+        for it in self.plugins.values():
+            plugins.append(
+                PluginInfo(
+                    name=it.name,
+                    support_gen_image=it.support_gen_image,
+                    support_gen_mask=it.support_gen_mask,
+                )
+            )
+        return ServerConfigResponse(
+            plugins=plugins,
+            modelInfos=self.model_manager.scan_models(),
+            removeBGModel=self.config.remove_bg_model,
+            removeBGModels=RemoveBGModel.values(),
+            realesrganModel=self.config.realesrgan_model,
+            realesrganModels=RealESRGANModel.values(),
+            interactiveSegModel=self.config.interactive_seg_model,
+            interactiveSegModels=InteractiveSegModel.values(),
+            enableFileManager=self.file_manager is not None,
+            enableAutoSaving=self.config.output_dir is not None,
+            enableControlnet=self.model_manager.enable_controlnet,
+            controlnetMethod=self.model_manager.controlnet_method,
+            disableModelSwitch=False,
+            isDesktop=False,
+            samplers=self.api_samplers(),
+        )
+    def api_input_image(self) -> FileResponse:
+        if self.config.input and self.config.input.is_file():
+            return FileResponse(self.config.input)
+        raise HTTPException(status_code=404, detail="Input image not found")
+    def api_geninfo(self, file: UploadFile) -> GenInfoResponse:
+        _, _, info = load_img(file.file.read(), return_info=True)
+        parts = info.get("parameters", "").split("Negative prompt: ")
+        prompt = parts[0].strip()
+        negative_prompt = ""
+        if len(parts) > 1:
+            negative_prompt = parts[1].split("\n")[0].strip()
+        return GenInfoResponse(prompt=prompt, negative_prompt=negative_prompt)
+    def api_inpaint(self, req: InpaintRequest):
+        image, alpha_channel, infos = decode_base64_to_image(req.image)
+        mask, _, _ = decode_base64_to_image(req.mask, gray=True)
+        mask = cv2.threshold(mask, 127, 255, cv2.THRESH_BINARY)[1]
+        if image.shape[:2] != mask.shape[:2]:
+            raise HTTPException(
+                400,
+                detail=f"Image size({image.shape[:2]}) and mask size({mask.shape[:2]}) not match.",
+            )
+        if req.paint_by_example_example_image:
+            paint_by_example_image, _, _ = decode_base64_to_image(
+                req.paint_by_example_example_image
+            )
+        start = time.time()
+        rgb_np_img = self.model_manager(image, mask, req)
+        logger.info(f"process time: {(time.time() - start) * 1000:.2f}ms")
+        torch_gc()
+        rgb_np_img = cv2.cvtColor(rgb_np_img.astype(np.uint8), cv2.COLOR_BGR2RGB)
+        rgb_res = concat_alpha_channel(rgb_np_img, alpha_channel)
+        ext = "png"
+        res_img_bytes = pil_to_bytes(
+            Image.fromarray(rgb_res),
+            ext=ext,
+            quality=self.config.quality,
+            infos=infos,
+        )
+        asyncio.run(self.sio.emit("diffusion_finish"))
+        return Response(
+            content=res_img_bytes,
+            media_type=f"image/{ext}",
+            headers={"X-Seed": str(req.sd_seed)},
+        )
+    def api_run_plugin_gen_image(self, req: RunPluginRequest):
+        ext = "png"
+        if req.name not in self.plugins:
+            raise HTTPException(status_code=422, detail="Plugin not found")
+        if not self.plugins[req.name].support_gen_image:
+            raise HTTPException(
+                status_code=422, detail="Plugin does not support output image"
+            )
+        rgb_np_img, alpha_channel, infos = decode_base64_to_image(req.image)
+        bgr_or_rgba_np_img = self.plugins[req.name].gen_image(rgb_np_img, req)
+        torch_gc()
+        if bgr_or_rgba_np_img.shape[2] == 4:
+            rgba_np_img = bgr_or_rgba_np_img
+        else:
+            rgba_np_img = cv2.cvtColor(bgr_or_rgba_np_img, cv2.COLOR_BGR2RGB)
+            rgba_np_img = concat_alpha_channel(rgba_np_img, alpha_channel)
+        return Response(
+            content=pil_to_bytes(
+                Image.fromarray(rgba_np_img),
+                ext=ext,
+                quality=self.config.quality,
+                infos=infos,
+            ),
+            media_type=f"image/{ext}",
+        )
+    def api_run_plugin_gen_mask(self, req: RunPluginRequest):
+        if req.name not in self.plugins:
+            raise HTTPException(status_code=422, detail="Plugin not found")
+        if not self.plugins[req.name].support_gen_mask:
+            raise HTTPException(
+                status_code=422, detail="Plugin does not support output image"
+            )
+        rgb_np_img, alpha_channel, infos = decode_base64_to_image(req.image)
+        bgr_or_gray_mask = self.plugins[req.name].gen_mask(rgb_np_img, req)
+        torch_gc()
+        res_mask = gen_frontend_mask(bgr_or_gray_mask)
+        return Response(
+            content=numpy_to_bytes(res_mask, "png"),
+            media_type="image/png",
+        )
+    def api_samplers(self) -> List[str]:
+        return [member.value for member in SDSampler.__members__.values()]
+    def api_adjust_mask(self, req: AdjustMaskRequest):
+        mask, _, _ = decode_base64_to_image(req.mask, gray=True)
+        mask = adjust_mask(mask, req.kernel_size, req.operate)
+        return Response(content=numpy_to_bytes(mask, "png"), media_type="image/png")
+    def launch(self):
+        self.app.include_router(self.router)
+        uvicorn.run(
+            self.combined_asgi_app,
+            host=self.config.host,
+            port=self.config.port,
+            timeout_keep_alive=999999999,
+        )
+    def _build_file_manager(self) -> Optional[FileManager]:
+        if self.config.input and self.config.input.is_dir():
+            logger.info(
+                f"Input is directory, initialize file manager {self.config.input}"
+            )
+            return FileManager(
+                app=self.app,
+                input_dir=self.config.input,
+                output_dir=self.config.output_dir,
+            )
+        return None
+    def _build_plugins(self) -> Dict[str, BasePlugin]:
+        return build_plugins(
+            self.config.enable_interactive_seg,
+            self.config.interactive_seg_model,
+            self.config.interactive_seg_device,
+            self.config.enable_remove_bg,
+            self.config.remove_bg_model,
+            self.config.enable_anime_seg,
+            self.config.enable_realesrgan,
+            self.config.realesrgan_device,
+            self.config.realesrgan_model,
+            self.config.enable_gfpgan,
+            self.config.gfpgan_device,
+            self.config.enable_restoreformer,
+            self.config.restoreformer_device,
+            self.config.no_half,
+        )
+    def _build_model_manager(self):
+        return ModelManager(
+            name=self.config.model,
+            device=torch.device(self.config.device),
+            no_half=self.config.no_half,
+            low_mem=self.config.low_mem,
+            disable_nsfw=self.config.disable_nsfw_checker,
+            sd_cpu_textencoder=self.config.cpu_textencoder,
+            local_files_only=self.config.local_files_only,
+            cpu_offload=self.config.cpu_offload,
+            callback=diffuser_callback,
+        )

iopaint/batch_processing.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import json
+from pathlib import Path
+from typing import Dict, Optional
+import cv2
+import psutil
+from PIL import Image
+from loguru import logger
+from rich.console import Console
+from rich.progress import (
+    Progress,
+    SpinnerColumn,
+    TimeElapsedColumn,
+    MofNCompleteColumn,
+    TextColumn,
+    BarColumn,
+    TaskProgressColumn,
+)
+from iopaint.helper import pil_to_bytes
+from iopaint.model.utils import torch_gc
+from iopaint.model_manager import ModelManager
+from iopaint.schema import InpaintRequest
+def glob_images(path: Path) -> Dict[str, Path]:
+    # png/jpg/jpeg
+    if path.is_file():
+        return {path.stem: path}
+    elif path.is_dir():
+        res = {}
+        for it in path.glob("*.*"):
+            if it.suffix.lower() in [".png", ".jpg", ".jpeg"]:
+                res[it.stem] = it
+        return res
+def batch_inpaint(
+    model: str,
+    device,
+    image: Path,
+    mask: Path,
+    output: Path,
+    config: Optional[Path] = None,
+    concat: bool = False,
+):
+    if image.is_dir() and output.is_file():
+        logger.error(
+            f"invalid --output: when image is a directory, output should be a directory"
+        )
+        exit(-1)
+    output.mkdir(parents=True, exist_ok=True)
+    image_paths = glob_images(image)
+    mask_paths = glob_images(mask)
+    if len(image_paths) == 0:
+        logger.error(f"invalid --image: empty image folder")
+        exit(-1)
+    if len(mask_paths) == 0:
+        logger.error(f"invalid --mask: empty mask folder")
+        exit(-1)
+    if config is None:
+        inpaint_request = InpaintRequest()
+        logger.info(f"Using default config: {inpaint_request}")
+    else:
+        with open(config, "r", encoding="utf-8") as f:
+            inpaint_request = InpaintRequest(**json.load(f))
+    model_manager = ModelManager(name=model, device=device)
+    first_mask = list(mask_paths.values())[0]
+    console = Console()
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        MofNCompleteColumn(),
+        TimeElapsedColumn(),
+        console=console,
+        transient=False,
+    ) as progress:
+        task = progress.add_task("Batch processing...", total=len(image_paths))
+        for stem, image_p in image_paths.items():
+            if stem not in mask_paths and mask.is_dir():
+                progress.log(f"mask for {image_p} not found")
+                progress.update(task, advance=1)
+                continue
+            mask_p = mask_paths.get(stem, first_mask)
+            infos = Image.open(image_p).info
+            img = cv2.imread(str(image_p))
+            img = cv2.cvtColor(img, cv2.COLOR_BGRA2RGB)
+            mask_img = cv2.imread(str(mask_p), cv2.IMREAD_GRAYSCALE)
+            if mask_img.shape[:2] != img.shape[:2]:
+                progress.log(
+                    f"resize mask {mask_p.name} to image {image_p.name} size: {img.shape[:2]}"
+                )
+                mask_img = cv2.resize(
+                    mask_img,
+                    (img.shape[1], img.shape[0]),
+                    interpolation=cv2.INTER_NEAREST,
+                )
+            mask_img[mask_img >= 127] = 255
+            mask_img[mask_img < 127] = 0
+            # bgr
+            inpaint_result = model_manager(img, mask_img, inpaint_request)
+            inpaint_result = cv2.cvtColor(inpaint_result, cv2.COLOR_BGR2RGB)
+            if concat:
+                mask_img = cv2.cvtColor(mask_img, cv2.COLOR_GRAY2RGB)
+                inpaint_result = cv2.hconcat([img, mask_img, inpaint_result])
+            img_bytes = pil_to_bytes(Image.fromarray(inpaint_result), "png", 100, infos)
+            save_p = output / f"{stem}.png"
+            with open(save_p, "wb") as fw:
+                fw.write(img_bytes)
+            progress.update(task, advance=1)
+            torch_gc()
+            # pid = psutil.Process().pid
+            # memory_info = psutil.Process(pid).memory_info()
+            # memory_in_mb = memory_info.rss / (1024 * 1024)
+            # print(f"原图大小：{img.shape},当前进程的内存占用：{memory_in_mb}MB")

iopaint/benchmark.py ADDED Viewed

	@@ -0,0 +1,109 @@

+#!/usr/bin/env python3
+import argparse
+import os
+import time
+import numpy as np
+import nvidia_smi
+import psutil
+import torch
+from iopaint.model_manager import ModelManager
+from iopaint.schema import InpaintRequest, HDStrategy, SDSampler
+try:
+    torch._C._jit_override_can_fuse_on_cpu(False)
+    torch._C._jit_override_can_fuse_on_gpu(False)
+    torch._C._jit_set_texpr_fuser_enabled(False)
+    torch._C._jit_set_nvfuser_enabled(False)
+except:
+    pass
+NUM_THREADS = str(4)
+os.environ["OMP_NUM_THREADS"] = NUM_THREADS
+os.environ["OPENBLAS_NUM_THREADS"] = NUM_THREADS
+os.environ["MKL_NUM_THREADS"] = NUM_THREADS
+os.environ["VECLIB_MAXIMUM_THREADS"] = NUM_THREADS
+os.environ["NUMEXPR_NUM_THREADS"] = NUM_THREADS
+if os.environ.get("CACHE_DIR"):
+    os.environ["TORCH_HOME"] = os.environ["CACHE_DIR"]
+def run_model(model, size):
+    # RGB
+    image = np.random.randint(0, 256, (size[0], size[1], 3)).astype(np.uint8)
+    mask = np.random.randint(0, 255, size).astype(np.uint8)
+    config = InpaintRequest(
+        ldm_steps=2,
+        hd_strategy=HDStrategy.ORIGINAL,
+        hd_strategy_crop_margin=128,
+        hd_strategy_crop_trigger_size=128,
+        hd_strategy_resize_limit=128,
+        prompt="a fox is sitting on a bench",
+        sd_steps=5,
+        sd_sampler=SDSampler.ddim,
+    )
+    model(image, mask, config)
+def benchmark(model, times: int, empty_cache: bool):
+    sizes = [(512, 512)]
+    nvidia_smi.nvmlInit()
+    device_id = 0
+    handle = nvidia_smi.nvmlDeviceGetHandleByIndex(device_id)
+    def format(metrics):
+        return f"{np.mean(metrics):.2f} ± {np.std(metrics):.2f}"
+    process = psutil.Process(os.getpid())
+    # 每个 size 给出显存和内存占用的指标
+    for size in sizes:
+        torch.cuda.empty_cache()
+        time_metrics = []
+        cpu_metrics = []
+        memory_metrics = []
+        gpu_memory_metrics = []
+        for _ in range(times):
+            start = time.time()
+            run_model(model, size)
+            torch.cuda.synchronize()
+            # cpu_metrics.append(process.cpu_percent())
+            time_metrics.append((time.time() - start) * 1000)
+            memory_metrics.append(process.memory_info().rss / 1024 / 1024)
+            gpu_memory_metrics.append(
+                nvidia_smi.nvmlDeviceGetMemoryInfo(handle).used / 1024 / 1024
+            )
+        print(f"size: {size}".center(80, "-"))
+        # print(f"cpu: {format(cpu_metrics)}")
+        print(f"latency: {format(time_metrics)}ms")
+        print(f"memory: {format(memory_metrics)} MB")
+        print(f"gpu memory: {format(gpu_memory_metrics)} MB")
+    nvidia_smi.nvmlShutdown()
+def get_args_parser():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--name")
+    parser.add_argument("--device", default="cuda", type=str)
+    parser.add_argument("--times", default=10, type=int)
+    parser.add_argument("--empty-cache", action="store_true")
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = get_args_parser()
+    device = torch.device(args.device)
+    model = ModelManager(
+        name=args.name,
+        device=device,
+        disable_nsfw=True,
+        sd_cpu_textencoder=True,
+    )
+    benchmark(model, args.times, args.empty_cache)

iopaint/file_manager/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .file_manager import FileManager

iopaint/model/__init__.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from .anytext.anytext_model import AnyText
+from .controlnet import ControlNet
+from .fcf import FcF
+from .instruct_pix2pix import InstructPix2Pix
+from .kandinsky import Kandinsky22
+from .lama import LaMa
+from .ldm import LDM
+from .manga import Manga
+from .mat import MAT
+from .mi_gan import MIGAN
+from .opencv2 import OpenCV2
+from .paint_by_example import PaintByExample
+from .power_paint.power_paint import PowerPaint
+from .sd import SD15, SD2, Anything4, RealisticVision14, SD
+from .sdxl import SDXL
+from .zits import ZITS
+models = {
+    LaMa.name: LaMa,
+    LDM.name: LDM,
+    ZITS.name: ZITS,
+    MAT.name: MAT,
+    FcF.name: FcF,
+    OpenCV2.name: OpenCV2,
+    Manga.name: Manga,
+    MIGAN.name: MIGAN,
+    SD15.name: SD15,
+    Anything4.name: Anything4,
+    RealisticVision14.name: RealisticVision14,
+    SD2.name: SD2,
+    PaintByExample.name: PaintByExample,
+    InstructPix2Pix.name: InstructPix2Pix,
+    Kandinsky22.name: Kandinsky22,
+    SDXL.name: SDXL,
+    PowerPaint.name: PowerPaint,
+    AnyText.name: AnyText,
+}

iopaint/model/anytext/__init__.py ADDED Viewed

File without changes

iopaint/model/anytext/anytext_model.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import torch
+from huggingface_hub import hf_hub_download
+from iopaint.const import ANYTEXT_NAME
+from iopaint.model.anytext.anytext_pipeline import AnyTextPipeline
+from iopaint.model.base import DiffusionInpaintModel
+from iopaint.model.utils import get_torch_dtype, is_local_files_only
+from iopaint.schema import InpaintRequest
+class AnyText(DiffusionInpaintModel):
+    name = ANYTEXT_NAME
+    pad_mod = 64
+    is_erase_model = False
+    @staticmethod
+    def download(local_files_only=False):
+        hf_hub_download(
+            repo_id=ANYTEXT_NAME,
+            filename="model_index.json",
+            local_files_only=local_files_only,
+        )
+        ckpt_path = hf_hub_download(
+            repo_id=ANYTEXT_NAME,
+            filename="pytorch_model.fp16.safetensors",
+            local_files_only=local_files_only,
+        )
+        font_path = hf_hub_download(
+            repo_id=ANYTEXT_NAME,
+            filename="SourceHanSansSC-Medium.otf",
+            local_files_only=local_files_only,
+        )
+        return ckpt_path, font_path
+    def init_model(self, device, **kwargs):
+        local_files_only = is_local_files_only(**kwargs)
+        ckpt_path, font_path = self.download(local_files_only)
+        use_gpu, torch_dtype = get_torch_dtype(device, kwargs.get("no_half", False))
+        self.model = AnyTextPipeline(
+            ckpt_path=ckpt_path,
+            font_path=font_path,
+            device=device,
+            use_fp16=torch_dtype == torch.float16,
+        )
+        self.callback = kwargs.pop("callback", None)
+    def forward(self, image, mask, config: InpaintRequest):
+        """Input image and output image have same size
+        image: [H, W, C] RGB
+        mask: [H, W, 1] 255 means area to inpainting
+        return: BGR IMAGE
+        """
+        height, width = image.shape[:2]
+        mask = mask.astype("float32") / 255.0
+        masked_image = image * (1 - mask)
+        # list of rgb ndarray
+        results, rtn_code, rtn_warning = self.model(
+            image=image,
+            masked_image=masked_image,
+            prompt=config.prompt,
+            negative_prompt=config.negative_prompt,
+            num_inference_steps=config.sd_steps,
+            strength=config.sd_strength,
+            guidance_scale=config.sd_guidance_scale,
+            height=height,
+            width=width,
+            seed=config.sd_seed,
+            sort_priority="y",
+            callback=self.callback
+        )
+        inpainted_rgb_image = results[0][..., ::-1]
+        return inpainted_rgb_image

iopaint/model/anytext/anytext_pipeline.py ADDED Viewed

	@@ -0,0 +1,403 @@

+"""
+AnyText: Multilingual Visual Text Generation And Editing
+Paper: https://arxiv.org/abs/2311.03054
+Code: https://github.com/tyxsspa/AnyText
+Copyright (c) Alibaba, Inc. and its affiliates.
+"""
+import os
+from pathlib import Path
+from iopaint.model.utils import set_seed
+from safetensors.torch import load_file
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
+import torch
+import re
+import numpy as np
+import cv2
+import einops
+from PIL import ImageFont
+from iopaint.model.anytext.cldm.model import create_model, load_state_dict
+from iopaint.model.anytext.cldm.ddim_hacked import DDIMSampler
+from iopaint.model.anytext.utils import (
+    check_channels,
+    draw_glyph,
+    draw_glyph2,
+)
+BBOX_MAX_NUM = 8
+PLACE_HOLDER = "*"
+max_chars = 20
+ANYTEXT_CFG = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "anytext_sd15.yaml"
+)
+def check_limits(tensor):
+    float16_min = torch.finfo(torch.float16).min
+    float16_max = torch.finfo(torch.float16).max
+    # 检查张量中是否有值小于float16的最小值或大于float16的最大值
+    is_below_min = (tensor < float16_min).any()
+    is_above_max = (tensor > float16_max).any()
+    return is_below_min or is_above_max
+class AnyTextPipeline:
+    def __init__(self, ckpt_path, font_path, device, use_fp16=True):
+        self.cfg_path = ANYTEXT_CFG
+        self.font_path = font_path
+        self.use_fp16 = use_fp16
+        self.device = device
+        self.font = ImageFont.truetype(font_path, size=60)
+        self.model = create_model(
+            self.cfg_path,
+            device=self.device,
+            use_fp16=self.use_fp16,
+        )
+        if self.use_fp16:
+            self.model = self.model.half()
+        if Path(ckpt_path).suffix == ".safetensors":
+            state_dict = load_file(ckpt_path, device="cpu")
+        else:
+            state_dict = load_state_dict(ckpt_path, location="cpu")
+        self.model.load_state_dict(state_dict, strict=False)
+        self.model = self.model.eval().to(self.device)
+        self.ddim_sampler = DDIMSampler(self.model, device=self.device)
+    def __call__(
+        self,
+        prompt: str,
+        negative_prompt: str,
+        image: np.ndarray,
+        masked_image: np.ndarray,
+        num_inference_steps: int,
+        strength: float,
+        guidance_scale: float,
+        height: int,
+        width: int,
+        seed: int,
+        sort_priority: str = "y",
+        callback=None,
+    ):
+        """
+        Args:
+            prompt:
+            negative_prompt:
+            image:
+            masked_image:
+            num_inference_steps:
+            strength:
+            guidance_scale:
+            height:
+            width:
+            seed:
+            sort_priority: x: left-right, y: top-down
+        Returns:
+            result: list of images in numpy.ndarray format
+            rst_code: 0: normal -1: error 1:warning
+            rst_info: string of error or warning
+        """
+        set_seed(seed)
+        str_warning = ""
+        mode = "text-editing"
+        revise_pos = False
+        img_count = 1
+        ddim_steps = num_inference_steps
+        w = width
+        h = height
+        strength = strength
+        cfg_scale = guidance_scale
+        eta = 0.0
+        prompt, texts = self.modify_prompt(prompt)
+        if prompt is None and texts is None:
+            return (
+                None,
+                -1,
+                "You have input Chinese prompt but the translator is not loaded!",
+                "",
+            )
+        n_lines = len(texts)
+        if mode in ["text-generation", "gen"]:
+            edit_image = np.ones((h, w, 3)) * 127.5  # empty mask image
+        elif mode in ["text-editing", "edit"]:
+            if masked_image is None or image is None:
+                return (
+                    None,
+                    -1,
+                    "Reference image and position image are needed for text editing!",
+                    "",
+                )
+            if isinstance(image, str):
+                image = cv2.imread(image)[..., ::-1]
+                assert image is not None, f"Can't read ori_image image from{image}!"
+            elif isinstance(image, torch.Tensor):
+                image = image.cpu().numpy()
+            else:
+                assert isinstance(
+                    image, np.ndarray
+                ), f"Unknown format of ori_image: {type(image)}"
+            edit_image = image.clip(1, 255)  # for mask reason
+            edit_image = check_channels(edit_image)
+            # edit_image = resize_image(
+            #     edit_image, max_length=768
+            # )  # make w h multiple of 64, resize if w or h > max_length
+            h, w = edit_image.shape[:2]  # change h, w by input ref_img
+        # preprocess pos_imgs(if numpy, make sure it's white pos in black bg)
+        if masked_image is None:
+            pos_imgs = np.zeros((w, h, 1))
+        if isinstance(masked_image, str):
+            masked_image = cv2.imread(masked_image)[..., ::-1]
+            assert (
+                masked_image is not None
+            ), f"Can't read draw_pos image from{masked_image}!"
+            pos_imgs = 255 - masked_image
+        elif isinstance(masked_image, torch.Tensor):
+            pos_imgs = masked_image.cpu().numpy()
+        else:
+            assert isinstance(
+                masked_image, np.ndarray
+            ), f"Unknown format of draw_pos: {type(masked_image)}"
+            pos_imgs = 255 - masked_image
+        pos_imgs = pos_imgs[..., 0:1]
+        pos_imgs = cv2.convertScaleAbs(pos_imgs)
+        _, pos_imgs = cv2.threshold(pos_imgs, 254, 255, cv2.THRESH_BINARY)
+        # seprate pos_imgs
+        pos_imgs = self.separate_pos_imgs(pos_imgs, sort_priority)
+        if len(pos_imgs) == 0:
+            pos_imgs = [np.zeros((h, w, 1))]
+        if len(pos_imgs) < n_lines:
+            if n_lines == 1 and texts[0] == " ":
+                pass  # text-to-image without text
+            else:
+                raise RuntimeError(
+                    f"{n_lines} text line to draw from prompt, not enough mask area({len(pos_imgs)}) on images"
+                )
+        elif len(pos_imgs) > n_lines:
+            str_warning = f"Warning: found {len(pos_imgs)} positions that > needed {n_lines} from prompt."
+        # get pre_pos, poly_list, hint that needed for anytext
+        pre_pos = []
+        poly_list = []
+        for input_pos in pos_imgs:
+            if input_pos.mean() != 0:
+                input_pos = (
+                    input_pos[..., np.newaxis]
+                    if len(input_pos.shape) == 2
+                    else input_pos
+                )
+                poly, pos_img = self.find_polygon(input_pos)
+                pre_pos += [pos_img / 255.0]
+                poly_list += [poly]
+            else:
+                pre_pos += [np.zeros((h, w, 1))]
+                poly_list += [None]
+        np_hint = np.sum(pre_pos, axis=0).clip(0, 1)
+        # prepare info dict
+        info = {}
+        info["glyphs"] = []
+        info["gly_line"] = []
+        info["positions"] = []
+        info["n_lines"] = [len(texts)] * img_count
+        gly_pos_imgs = []
+        for i in range(len(texts)):
+            text = texts[i]
+            if len(text) > max_chars:
+                str_warning = (
+                    f'"{text}" length > max_chars: {max_chars}, will be cut off...'
+                )
+                text = text[:max_chars]
+            gly_scale = 2
+            if pre_pos[i].mean() != 0:
+                gly_line = draw_glyph(self.font, text)
+                glyphs = draw_glyph2(
+                    self.font,
+                    text,
+                    poly_list[i],
+                    scale=gly_scale,
+                    width=w,
+                    height=h,
+                    add_space=False,
+                )
+                gly_pos_img = cv2.drawContours(
+                    glyphs * 255, [poly_list[i] * gly_scale], 0, (255, 255, 255), 1
+                )
+                if revise_pos:
+                    resize_gly = cv2.resize(
+                        glyphs, (pre_pos[i].shape[1], pre_pos[i].shape[0])
+                    )
+                    new_pos = cv2.morphologyEx(
+                        (resize_gly * 255).astype(np.uint8),
+                        cv2.MORPH_CLOSE,
+                        kernel=np.ones(
+                            (resize_gly.shape[0] // 10, resize_gly.shape[1] // 10),
+                            dtype=np.uint8,
+                        ),
+                        iterations=1,
+                    )
+                    new_pos = (
+                        new_pos[..., np.newaxis] if len(new_pos.shape) == 2 else new_pos
+                    )
+                    contours, _ = cv2.findContours(
+                        new_pos, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
+                    )
+                    if len(contours) != 1:
+                        str_warning = f"Fail to revise position {i} to bounding rect, remain position unchanged..."
+                    else:
+                        rect = cv2.minAreaRect(contours[0])
+                        poly = np.int0(cv2.boxPoints(rect))
+                        pre_pos[i] = (
+                            cv2.drawContours(new_pos, [poly], -1, 255, -1) / 255.0
+                        )
+                        gly_pos_img = cv2.drawContours(
+                            glyphs * 255, [poly * gly_scale], 0, (255, 255, 255), 1
+                        )
+                gly_pos_imgs += [gly_pos_img]  # for show
+            else:
+                glyphs = np.zeros((h * gly_scale, w * gly_scale, 1))
+                gly_line = np.zeros((80, 512, 1))
+                gly_pos_imgs += [
+                    np.zeros((h * gly_scale, w * gly_scale, 1))
+                ]  # for show
+            pos = pre_pos[i]
+            info["glyphs"] += [self.arr2tensor(glyphs, img_count)]
+            info["gly_line"] += [self.arr2tensor(gly_line, img_count)]
+            info["positions"] += [self.arr2tensor(pos, img_count)]
+        # get masked_x
+        masked_img = ((edit_image.astype(np.float32) / 127.5) - 1.0) * (1 - np_hint)
+        masked_img = np.transpose(masked_img, (2, 0, 1))
+        masked_img = torch.from_numpy(masked_img.copy()).float().to(self.device)
+        if self.use_fp16:
+            masked_img = masked_img.half()
+        encoder_posterior = self.model.encode_first_stage(masked_img[None, ...])
+        masked_x = self.model.get_first_stage_encoding(encoder_posterior).detach()
+        if self.use_fp16:
+            masked_x = masked_x.half()
+        info["masked_x"] = torch.cat([masked_x for _ in range(img_count)], dim=0)
+        hint = self.arr2tensor(np_hint, img_count)
+        cond = self.model.get_learned_conditioning(
+            dict(
+                c_concat=[hint],
+                c_crossattn=[[prompt] * img_count],
+                text_info=info,
+            )
+        )
+        un_cond = self.model.get_learned_conditioning(
+            dict(
+                c_concat=[hint],
+                c_crossattn=[[negative_prompt] * img_count],
+                text_info=info,
+            )
+        )
+        shape = (4, h // 8, w // 8)
+        self.model.control_scales = [strength] * 13
+        samples, intermediates = self.ddim_sampler.sample(
+            ddim_steps,
+            img_count,
+            shape,
+            cond,
+            verbose=False,
+            eta=eta,
+            unconditional_guidance_scale=cfg_scale,
+            unconditional_conditioning=un_cond,
+            callback=callback
+        )
+        if self.use_fp16:
+            samples = samples.half()
+        x_samples = self.model.decode_first_stage(samples)
+        x_samples = (
+            (einops.rearrange(x_samples, "b c h w -> b h w c") * 127.5 + 127.5)
+            .cpu()
+            .numpy()
+            .clip(0, 255)
+            .astype(np.uint8)
+        )
+        results = [x_samples[i] for i in range(img_count)]
+        # if (
+        #     mode == "edit" and False
+        # ):  # replace backgound in text editing but not ideal yet
+        #     results = [r * np_hint + edit_image * (1 - np_hint) for r in results]
+        #     results = [r.clip(0, 255).astype(np.uint8) for r in results]
+        # if len(gly_pos_imgs) > 0 and show_debug:
+        #     glyph_bs = np.stack(gly_pos_imgs, axis=2)
+        #     glyph_img = np.sum(glyph_bs, axis=2) * 255
+        #     glyph_img = glyph_img.clip(0, 255).astype(np.uint8)
+        #     results += [np.repeat(glyph_img, 3, axis=2)]
+        rst_code = 1 if str_warning else 0
+        return results, rst_code, str_warning
+    def modify_prompt(self, prompt):
+        prompt = prompt.replace("“", '"')
+        prompt = prompt.replace("”", '"')
+        p = '"(.*?)"'
+        strs = re.findall(p, prompt)
+        if len(strs) == 0:
+            strs = [" "]
+        else:
+            for s in strs:
+                prompt = prompt.replace(f'"{s}"', f" {PLACE_HOLDER} ", 1)
+        # if self.is_chinese(prompt):
+        #     if self.trans_pipe is None:
+        #         return None, None
+        #     old_prompt = prompt
+        #     prompt = self.trans_pipe(input=prompt + " .")["translation"][:-1]
+        #     print(f"Translate: {old_prompt} --> {prompt}")
+        return prompt, strs
+    # def is_chinese(self, text):
+    #     text = checker._clean_text(text)
+    #     for char in text:
+    #         cp = ord(char)
+    #         if checker._is_chinese_char(cp):
+    #             return True
+    #     return False
+    def separate_pos_imgs(self, img, sort_priority, gap=102):
+        num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(img)
+        components = []
+        for label in range(1, num_labels):
+            component = np.zeros_like(img)
+            component[labels == label] = 255
+            components.append((component, centroids[label]))
+        if sort_priority == "y":
+            fir, sec = 1, 0  # top-down first
+        elif sort_priority == "x":
+            fir, sec = 0, 1  # left-right first
+        components.sort(key=lambda c: (c[1][fir] // gap, c[1][sec] // gap))
+        sorted_components = [c[0] for c in components]
+        return sorted_components
+    def find_polygon(self, image, min_rect=False):
+        contours, hierarchy = cv2.findContours(
+            image, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE
+        )
+        max_contour = max(contours, key=cv2.contourArea)  # get contour with max area
+        if min_rect:
+            # get minimum enclosing rectangle
+            rect = cv2.minAreaRect(max_contour)
+            poly = np.int0(cv2.boxPoints(rect))
+        else:
+            # get approximate polygon
+            epsilon = 0.01 * cv2.arcLength(max_contour, True)
+            poly = cv2.approxPolyDP(max_contour, epsilon, True)
+            n, _, xy = poly.shape
+            poly = poly.reshape(n, xy)
+        cv2.drawContours(image, [poly], -1, 255, -1)
+        return poly, image
+    def arr2tensor(self, arr, bs):
+        arr = np.transpose(arr, (2, 0, 1))
+        _arr = torch.from_numpy(arr.copy()).float().to(self.device)
+        if self.use_fp16:
+            _arr = _arr.half()
+        _arr = torch.stack([_arr for _ in range(bs)], dim=0)
+        return _arr

iopaint/model/anytext/anytext_sd15.yaml ADDED Viewed

	@@ -0,0 +1,99 @@

+model:
+  target: iopaint.model.anytext.cldm.cldm.ControlLDM
+  params:
+    linear_start: 0.00085
+    linear_end: 0.0120
+    num_timesteps_cond: 1
+    log_every_t: 200
+    timesteps: 1000
+    first_stage_key: "img"
+    cond_stage_key: "caption"
+    control_key: "hint"
+    glyph_key: "glyphs"
+    position_key: "positions"
+    image_size: 64
+    channels: 4
+    cond_stage_trainable: true  # need be true when embedding_manager is valid
+    conditioning_key: crossattn
+    monitor: val/loss_simple_ema
+    scale_factor: 0.18215
+    use_ema: False
+    only_mid_control: False
+    loss_alpha: 0  # perceptual loss, 0.003
+    loss_beta: 0  # ctc loss
+    latin_weight: 1.0  # latin text line may need smaller weigth
+    with_step_weight: true
+    use_vae_upsample: true
+    embedding_manager_config:
+      target: iopaint.model.anytext.cldm.embedding_manager.EmbeddingManager
+      params:
+        valid: true  # v6
+        emb_type: ocr  # ocr, vit, conv
+        glyph_channels: 1
+        position_channels: 1
+        add_pos: false
+        placeholder_string: '*'
+    control_stage_config:
+      target: iopaint.model.anytext.cldm.cldm.ControlNet
+      params:
+        image_size: 32 # unused
+        in_channels: 4
+        model_channels: 320
+        glyph_channels: 1
+        position_channels: 1
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+    unet_config:
+      target: iopaint.model.anytext.cldm.cldm.ControlledUnetModel
+      params:
+        image_size: 32 # unused
+        in_channels: 4
+        out_channels: 4
+        model_channels: 320
+        attention_resolutions: [ 4, 2, 1 ]
+        num_res_blocks: 2
+        channel_mult: [ 1, 2, 4, 4 ]
+        num_heads: 8
+        use_spatial_transformer: True
+        transformer_depth: 1
+        context_dim: 768
+        use_checkpoint: True
+        legacy: False
+    first_stage_config:
+      target: iopaint.model.anytext.ldm.models.autoencoder.AutoencoderKL
+      params:
+        embed_dim: 4
+        monitor: val/rec_loss
+        ddconfig:
+          double_z: true
+          z_channels: 4
+          resolution: 256
+          in_channels: 3
+          out_ch: 3
+          ch: 128
+          ch_mult:
+          - 1
+          - 2
+          - 4
+          - 4
+          num_res_blocks: 2
+          attn_resolutions: []
+          dropout: 0.0
+        lossconfig:
+          target: torch.nn.Identity
+    cond_stage_config:
+      target: iopaint.model.anytext.ldm.modules.encoders.modules.FrozenCLIPEmbedderT3
+      params:
+        version: openai/clip-vit-large-patch14
+        use_vision: false  # v6

iopaint/model/anytext/cldm/__init__.py ADDED Viewed

File without changes

iopaint/model/anytext/ldm/__init__.py ADDED Viewed

File without changes

iopaint/model/anytext/ldm/models/__init__.py ADDED Viewed

File without changes

iopaint/model/anytext/ldm/models/autoencoder.py ADDED Viewed

	@@ -0,0 +1,218 @@

+import torch
+import torch.nn.functional as F
+from contextlib import contextmanager
+from iopaint.model.anytext.ldm.modules.diffusionmodules.model import Encoder, Decoder
+from iopaint.model.anytext.ldm.modules.distributions.distributions import DiagonalGaussianDistribution
+from iopaint.model.anytext.ldm.util import instantiate_from_config
+from iopaint.model.anytext.ldm.modules.ema import LitEma
+class AutoencoderKL(torch.nn.Module):
+    def __init__(self,
+                 ddconfig,
+                 lossconfig,
+                 embed_dim,
+                 ckpt_path=None,
+                 ignore_keys=[],
+                 image_key="image",
+                 colorize_nlabels=None,
+                 monitor=None,
+                 ema_decay=None,
+                 learn_logvar=False
+                 ):
+        super().__init__()
+        self.learn_logvar = learn_logvar
+        self.image_key = image_key
+        self.encoder = Encoder(**ddconfig)
+        self.decoder = Decoder(**ddconfig)
+        self.loss = instantiate_from_config(lossconfig)
+        assert ddconfig["double_z"]
+        self.quant_conv = torch.nn.Conv2d(2*ddconfig["z_channels"], 2*embed_dim, 1)
+        self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
+        self.embed_dim = embed_dim
+        if colorize_nlabels is not None:
+            assert type(colorize_nlabels)==int
+            self.register_buffer("colorize", torch.randn(3, colorize_nlabels, 1, 1))
+        if monitor is not None:
+            self.monitor = monitor
+        self.use_ema = ema_decay is not None
+        if self.use_ema:
+            self.ema_decay = ema_decay
+            assert 0. < ema_decay < 1.
+            self.model_ema = LitEma(self, decay=ema_decay)
+            print(f"Keeping EMAs of {len(list(self.model_ema.buffers()))}.")
+        if ckpt_path is not None:
+            self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
+    def init_from_ckpt(self, path, ignore_keys=list()):
+        sd = torch.load(path, map_location="cpu")["state_dict"]
+        keys = list(sd.keys())
+        for k in keys:
+            for ik in ignore_keys:
+                if k.startswith(ik):
+                    print("Deleting key {} from state_dict.".format(k))
+                    del sd[k]
+        self.load_state_dict(sd, strict=False)
+        print(f"Restored from {path}")
+    @contextmanager
+    def ema_scope(self, context=None):
+        if self.use_ema:
+            self.model_ema.store(self.parameters())
+            self.model_ema.copy_to(self)
+            if context is not None:
+                print(f"{context}: Switched to EMA weights")
+        try:
+            yield None
+        finally:
+            if self.use_ema:
+                self.model_ema.restore(self.parameters())
+                if context is not None:
+                    print(f"{context}: Restored training weights")
+    def on_train_batch_end(self, *args, **kwargs):
+        if self.use_ema:
+            self.model_ema(self)
+    def encode(self, x):
+        h = self.encoder(x)
+        moments = self.quant_conv(h)
+        posterior = DiagonalGaussianDistribution(moments)
+        return posterior
+    def decode(self, z):
+        z = self.post_quant_conv(z)
+        dec = self.decoder(z)
+        return dec
+    def forward(self, input, sample_posterior=True):
+        posterior = self.encode(input)
+        if sample_posterior:
+            z = posterior.sample()
+        else:
+            z = posterior.mode()
+        dec = self.decode(z)
+        return dec, posterior
+    def get_input(self, batch, k):
+        x = batch[k]
+        if len(x.shape) == 3:
+            x = x[..., None]
+        x = x.permute(0, 3, 1, 2).to(memory_format=torch.contiguous_format).float()
+        return x
+    def training_step(self, batch, batch_idx, optimizer_idx):
+        inputs = self.get_input(batch, self.image_key)
+        reconstructions, posterior = self(inputs)
+        if optimizer_idx == 0:
+            # train encoder+decoder+logvar
+            aeloss, log_dict_ae = self.loss(inputs, reconstructions, posterior, optimizer_idx, self.global_step,
+                                            last_layer=self.get_last_layer(), split="train")
+            self.log("aeloss", aeloss, prog_bar=True, logger=True, on_step=True, on_epoch=True)
+            self.log_dict(log_dict_ae, prog_bar=False, logger=True, on_step=True, on_epoch=False)
+            return aeloss
+        if optimizer_idx == 1:
+            # train the discriminator
+            discloss, log_dict_disc = self.loss(inputs, reconstructions, posterior, optimizer_idx, self.global_step,
+                                                last_layer=self.get_last_layer(), split="train")
+            self.log("discloss", discloss, prog_bar=True, logger=True, on_step=True, on_epoch=True)
+            self.log_dict(log_dict_disc, prog_bar=False, logger=True, on_step=True, on_epoch=False)
+            return discloss
+    def validation_step(self, batch, batch_idx):
+        log_dict = self._validation_step(batch, batch_idx)
+        with self.ema_scope():
+            log_dict_ema = self._validation_step(batch, batch_idx, postfix="_ema")
+        return log_dict
+    def _validation_step(self, batch, batch_idx, postfix=""):
+        inputs = self.get_input(batch, self.image_key)
+        reconstructions, posterior = self(inputs)
+        aeloss, log_dict_ae = self.loss(inputs, reconstructions, posterior, 0, self.global_step,
+                                        last_layer=self.get_last_layer(), split="val"+postfix)
+        discloss, log_dict_disc = self.loss(inputs, reconstructions, posterior, 1, self.global_step,
+                                            last_layer=self.get_last_layer(), split="val"+postfix)
+        self.log(f"val{postfix}/rec_loss", log_dict_ae[f"val{postfix}/rec_loss"])
+        self.log_dict(log_dict_ae)
+        self.log_dict(log_dict_disc)
+        return self.log_dict
+    def configure_optimizers(self):
+        lr = self.learning_rate
+        ae_params_list = list(self.encoder.parameters()) + list(self.decoder.parameters()) + list(
+            self.quant_conv.parameters()) + list(self.post_quant_conv.parameters())
+        if self.learn_logvar:
+            print(f"{self.__class__.__name__}: Learning logvar")
+            ae_params_list.append(self.loss.logvar)
+        opt_ae = torch.optim.Adam(ae_params_list,
+                                  lr=lr, betas=(0.5, 0.9))
+        opt_disc = torch.optim.Adam(self.loss.discriminator.parameters(),
+                                    lr=lr, betas=(0.5, 0.9))
+        return [opt_ae, opt_disc], []
+    def get_last_layer(self):
+        return self.decoder.conv_out.weight
+    @torch.no_grad()
+    def log_images(self, batch, only_inputs=False, log_ema=False, **kwargs):
+        log = dict()
+        x = self.get_input(batch, self.image_key)
+        x = x.to(self.device)
+        if not only_inputs:
+            xrec, posterior = self(x)
+            if x.shape[1] > 3:
+                # colorize with random projection
+                assert xrec.shape[1] > 3
+                x = self.to_rgb(x)
+                xrec = self.to_rgb(xrec)
+            log["samples"] = self.decode(torch.randn_like(posterior.sample()))
+            log["reconstructions"] = xrec
+            if log_ema or self.use_ema:
+                with self.ema_scope():
+                    xrec_ema, posterior_ema = self(x)
+                    if x.shape[1] > 3:
+                        # colorize with random projection
+                        assert xrec_ema.shape[1] > 3
+                        xrec_ema = self.to_rgb(xrec_ema)
+                    log["samples_ema"] = self.decode(torch.randn_like(posterior_ema.sample()))
+                    log["reconstructions_ema"] = xrec_ema
+        log["inputs"] = x
+        return log
+    def to_rgb(self, x):
+        assert self.image_key == "segmentation"
+        if not hasattr(self, "colorize"):
+            self.register_buffer("colorize", torch.randn(3, x.shape[1], 1, 1).to(x))
+        x = F.conv2d(x, weight=self.colorize)
+        x = 2.*(x-x.min())/(x.max()-x.min()) - 1.
+        return x
+class IdentityFirstStage(torch.nn.Module):
+    def __init__(self, *args, vq_interface=False, **kwargs):
+        self.vq_interface = vq_interface
+        super().__init__()
+    def encode(self, x, *args, **kwargs):
+        return x
+    def decode(self, x, *args, **kwargs):
+        return x
+    def quantize(self, x, *args, **kwargs):
+        if self.vq_interface:
+            return x, None, [None, None, None]
+        return x
+    def forward(self, x, *args, **kwargs):
+        return x

iopaint/model/anytext/ldm/models/diffusion/__init__.py ADDED Viewed

File without changes

iopaint/model/anytext/ldm/models/diffusion/dpm_solver/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .sampler import DPMSolverSampler

iopaint/model/anytext/ldm/modules/__init__.py ADDED Viewed

File without changes

iopaint/model/anytext/ldm/modules/attention.py ADDED Viewed

	@@ -0,0 +1,360 @@

+from inspect import isfunction
+import math
+import torch
+import torch.nn.functional as F
+from torch import nn, einsum
+from einops import rearrange, repeat
+from typing import Optional, Any
+from iopaint.model.anytext.ldm.modules.diffusionmodules.util import checkpoint
+# CrossAttn precision handling
+import os
+_ATTN_PRECISION = os.environ.get("ATTN_PRECISION", "fp32")
+def exists(val):
+    return val is not None
+def uniq(arr):
+    return {el: True for el in arr}.keys()
+def default(val, d):
+    if exists(val):
+        return val
+    return d() if isfunction(d) else d
+def max_neg_value(t):
+    return -torch.finfo(t.dtype).max
+def init_(tensor):
+    dim = tensor.shape[-1]
+    std = 1 / math.sqrt(dim)
+    tensor.uniform_(-std, std)
+    return tensor
+# feedforward
+class GEGLU(nn.Module):
+    def __init__(self, dim_in, dim_out):
+        super().__init__()
+        self.proj = nn.Linear(dim_in, dim_out * 2)
+    def forward(self, x):
+        x, gate = self.proj(x).chunk(2, dim=-1)
+        return x * F.gelu(gate)
+class FeedForward(nn.Module):
+    def __init__(self, dim, dim_out=None, mult=4, glu=False, dropout=0.0):
+        super().__init__()
+        inner_dim = int(dim * mult)
+        dim_out = default(dim_out, dim)
+        project_in = (
+            nn.Sequential(nn.Linear(dim, inner_dim), nn.GELU())
+            if not glu
+            else GEGLU(dim, inner_dim)
+        )
+        self.net = nn.Sequential(
+            project_in, nn.Dropout(dropout), nn.Linear(inner_dim, dim_out)
+        )
+    def forward(self, x):
+        return self.net(x)
+def zero_module(module):
+    """
+    Zero out the parameters of a module and return it.
+    """
+    for p in module.parameters():
+        p.detach().zero_()
+    return module
+def Normalize(in_channels):
+    return torch.nn.GroupNorm(
+        num_groups=32, num_channels=in_channels, eps=1e-6, affine=True
+    )
+class SpatialSelfAttention(nn.Module):
+    def __init__(self, in_channels):
+        super().__init__()
+        self.in_channels = in_channels
+        self.norm = Normalize(in_channels)
+        self.q = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.k = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.v = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+        self.proj_out = torch.nn.Conv2d(
+            in_channels, in_channels, kernel_size=1, stride=1, padding=0
+        )
+    def forward(self, x):
+        h_ = x
+        h_ = self.norm(h_)
+        q = self.q(h_)
+        k = self.k(h_)
+        v = self.v(h_)
+        # compute attention
+        b, c, h, w = q.shape
+        q = rearrange(q, "b c h w -> b (h w) c")
+        k = rearrange(k, "b c h w -> b c (h w)")
+        w_ = torch.einsum("bij,bjk->bik", q, k)
+        w_ = w_ * (int(c) ** (-0.5))
+        w_ = torch.nn.functional.softmax(w_, dim=2)
+        # attend to values
+        v = rearrange(v, "b c h w -> b c (h w)")
+        w_ = rearrange(w_, "b i j -> b j i")
+        h_ = torch.einsum("bij,bjk->bik", v, w_)
+        h_ = rearrange(h_, "b c (h w) -> b c h w", h=h)
+        h_ = self.proj_out(h_)
+        return x + h_
+class CrossAttention(nn.Module):
+    def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0):
+        super().__init__()
+        inner_dim = dim_head * heads
+        context_dim = default(context_dim, query_dim)
+        self.scale = dim_head**-0.5
+        self.heads = heads
+        self.to_q = nn.Linear(query_dim, inner_dim, bias=False)
+        self.to_k = nn.Linear(context_dim, inner_dim, bias=False)
+        self.to_v = nn.Linear(context_dim, inner_dim, bias=False)
+        self.to_out = nn.Sequential(
+            nn.Linear(inner_dim, query_dim), nn.Dropout(dropout)
+        )
+    def forward(self, x, context=None, mask=None):
+        h = self.heads
+        q = self.to_q(x)
+        context = default(context, x)
+        k = self.to_k(context)
+        v = self.to_v(context)
+        q, k, v = map(lambda t: rearrange(t, "b n (h d) -> (b h) n d", h=h), (q, k, v))
+        # force cast to fp32 to avoid overflowing
+        if _ATTN_PRECISION == "fp32":
+            with torch.autocast(enabled=False, device_type="cuda"):
+                q, k = q.float(), k.float()
+                sim = einsum("b i d, b j d -> b i j", q, k) * self.scale
+        else:
+            sim = einsum("b i d, b j d -> b i j", q, k) * self.scale
+        del q, k
+        if exists(mask):
+            mask = rearrange(mask, "b ... -> b (...)")
+            max_neg_value = -torch.finfo(sim.dtype).max
+            mask = repeat(mask, "b j -> (b h) () j", h=h)
+            sim.masked_fill_(~mask, max_neg_value)
+        # attention, what we cannot get enough of
+        sim = sim.softmax(dim=-1)
+        out = einsum("b i j, b j d -> b i d", sim, v)
+        out = rearrange(out, "(b h) n d -> b n (h d)", h=h)
+        return self.to_out(out)
+class SDPACrossAttention(CrossAttention):
+    def forward(self, x, context=None, mask=None):
+        batch_size, sequence_length, inner_dim = x.shape
+        if mask is not None:
+            mask = self.prepare_attention_mask(mask, sequence_length, batch_size)
+            mask = mask.view(batch_size, self.heads, -1, mask.shape[-1])
+        h = self.heads
+        q_in = self.to_q(x)
+        context = default(context, x)
+        k_in = self.to_k(context)
+        v_in = self.to_v(context)
+        head_dim = inner_dim // h
+        q = q_in.view(batch_size, -1, h, head_dim).transpose(1, 2)
+        k = k_in.view(batch_size, -1, h, head_dim).transpose(1, 2)
+        v = v_in.view(batch_size, -1, h, head_dim).transpose(1, 2)
+        del q_in, k_in, v_in
+        dtype = q.dtype
+        if _ATTN_PRECISION == "fp32":
+            q, k, v = q.float(), k.float(), v.float()
+        # the output of sdp = (batch, num_heads, seq_len, head_dim)
+        hidden_states = torch.nn.functional.scaled_dot_product_attention(
+            q, k, v, attn_mask=mask, dropout_p=0.0, is_causal=False
+        )
+        hidden_states = hidden_states.transpose(1, 2).reshape(
+            batch_size, -1, h * head_dim
+        )
+        hidden_states = hidden_states.to(dtype)
+        # linear proj
+        hidden_states = self.to_out[0](hidden_states)
+        # dropout
+        hidden_states = self.to_out[1](hidden_states)
+        return hidden_states
+class BasicTransformerBlock(nn.Module):
+    def __init__(
+        self,
+        dim,
+        n_heads,
+        d_head,
+        dropout=0.0,
+        context_dim=None,
+        gated_ff=True,
+        checkpoint=True,
+        disable_self_attn=False,
+    ):
+        super().__init__()
+        if hasattr(torch.nn.functional, "scaled_dot_product_attention"):
+            attn_cls = SDPACrossAttention
+        else:
+            attn_cls = CrossAttention
+        self.disable_self_attn = disable_self_attn
+        self.attn1 = attn_cls(
+            query_dim=dim,
+            heads=n_heads,
+            dim_head=d_head,
+            dropout=dropout,
+            context_dim=context_dim if self.disable_self_attn else None,
+        )  # is a self-attention if not self.disable_self_attn
+        self.ff = FeedForward(dim, dropout=dropout, glu=gated_ff)
+        self.attn2 = attn_cls(
+            query_dim=dim,
+            context_dim=context_dim,
+            heads=n_heads,
+            dim_head=d_head,
+            dropout=dropout,
+        )  # is self-attn if context is none
+        self.norm1 = nn.LayerNorm(dim)
+        self.norm2 = nn.LayerNorm(dim)
+        self.norm3 = nn.LayerNorm(dim)
+        self.checkpoint = checkpoint
+    def forward(self, x, context=None):
+        return checkpoint(
+            self._forward, (x, context), self.parameters(), self.checkpoint
+        )
+    def _forward(self, x, context=None):
+        x = (
+            self.attn1(
+                self.norm1(x), context=context if self.disable_self_attn else None
+            )
+            + x
+        )
+        x = self.attn2(self.norm2(x), context=context) + x
+        x = self.ff(self.norm3(x)) + x
+        return x
+class SpatialTransformer(nn.Module):
+    """
+    Transformer block for image-like data.
+    First, project the input (aka embedding)
+    and reshape to b, t, d.
+    Then apply standard transformer action.
+    Finally, reshape to image
+    NEW: use_linear for more efficiency instead of the 1x1 convs
+    """
+    def __init__(
+        self,
+        in_channels,
+        n_heads,
+        d_head,
+        depth=1,
+        dropout=0.0,
+        context_dim=None,
+        disable_self_attn=False,
+        use_linear=False,
+        use_checkpoint=True,
+    ):
+        super().__init__()
+        if exists(context_dim) and not isinstance(context_dim, list):
+            context_dim = [context_dim]
+        self.in_channels = in_channels
+        inner_dim = n_heads * d_head
+        self.norm = Normalize(in_channels)
+        if not use_linear:
+            self.proj_in = nn.Conv2d(
+                in_channels, inner_dim, kernel_size=1, stride=1, padding=0
+            )
+        else:
+            self.proj_in = nn.Linear(in_channels, inner_dim)
+        self.transformer_blocks = nn.ModuleList(
+            [
+                BasicTransformerBlock(
+                    inner_dim,
+                    n_heads,
+                    d_head,
+                    dropout=dropout,
+                    context_dim=context_dim[d],
+                    disable_self_attn=disable_self_attn,
+                    checkpoint=use_checkpoint,
+                )
+                for d in range(depth)
+            ]
+        )
+        if not use_linear:
+            self.proj_out = zero_module(
+                nn.Conv2d(inner_dim, in_channels, kernel_size=1, stride=1, padding=0)
+            )
+        else:
+            self.proj_out = zero_module(nn.Linear(in_channels, inner_dim))
+        self.use_linear = use_linear
+    def forward(self, x, context=None):
+        # note: if no context is given, cross-attention defaults to self-attention
+        if not isinstance(context, list):
+            context = [context]
+        b, c, h, w = x.shape
+        x_in = x
+        x = self.norm(x)
+        if not self.use_linear:
+            x = self.proj_in(x)
+        x = rearrange(x, "b c h w -> b (h w) c").contiguous()
+        if self.use_linear:
+            x = self.proj_in(x)
+        for i, block in enumerate(self.transformer_blocks):
+            x = block(x, context=context[i])
+        if self.use_linear:
+            x = self.proj_out(x)
+        x = rearrange(x, "b (h w) c -> b c h w", h=h, w=w).contiguous()
+        if not self.use_linear:
+            x = self.proj_out(x)
+        return x + x_in

iopaint/model/anytext/ldm/modules/diffusionmodules/__init__.py ADDED Viewed

File without changes

iopaint/model/anytext/ldm/modules/distributions/__init__.py ADDED Viewed

File without changes

iopaint/model/anytext/ldm/modules/encoders/__init__.py ADDED Viewed

File without changes

iopaint/model/anytext/ocr_recog/__init__.py ADDED Viewed

File without changes

iopaint/model/base.py ADDED Viewed

	@@ -0,0 +1,418 @@

+import abc
+from typing import Optional
+import cv2
+import torch
+import numpy as np
+from loguru import logger
+from iopaint.helper import (
+    boxes_from_mask,
+    resize_max_size,
+    pad_img_to_modulo,
+    switch_mps_device,
+)
+from iopaint.schema import InpaintRequest, HDStrategy, SDSampler
+from .helper.g_diffuser_bot import expand_image
+from .utils import get_scheduler
+class InpaintModel:
+    name = "base"
+    min_size: Optional[int] = None
+    pad_mod = 8
+    pad_to_square = False
+    is_erase_model = False
+    def __init__(self, device, **kwargs):
+        """
+        Args:
+            device:
+        """
+        device = switch_mps_device(self.name, device)
+        self.device = device
+        self.init_model(device, **kwargs)
+    @abc.abstractmethod
+    def init_model(self, device, **kwargs):
+        ...
+    @staticmethod
+    @abc.abstractmethod
+    def is_downloaded() -> bool:
+        return False
+    @abc.abstractmethod
+    def forward(self, image, mask, config: InpaintRequest):
+        """Input images and output images have same size
+        images: [H, W, C] RGB
+        masks: [H, W, 1] 255 为 masks 区域
+        return: BGR IMAGE
+        """
+        ...
+    @staticmethod
+    def download():
+        ...
+    def _pad_forward(self, image, mask, config: InpaintRequest):
+        origin_height, origin_width = image.shape[:2]
+        pad_image = pad_img_to_modulo(
+            image, mod=self.pad_mod, square=self.pad_to_square, min_size=self.min_size
+        )
+        pad_mask = pad_img_to_modulo(
+            mask, mod=self.pad_mod, square=self.pad_to_square, min_size=self.min_size
+        )
+        # logger.info(f"final forward pad size: {pad_image.shape}")
+        image, mask = self.forward_pre_process(image, mask, config)
+        result = self.forward(pad_image, pad_mask, config)
+        result = result[0:origin_height, 0:origin_width, :]
+        result, image, mask = self.forward_post_process(result, image, mask, config)
+        if config.sd_keep_unmasked_area:
+            mask = mask[:, :, np.newaxis]
+            result = result * (mask / 255) + image[:, :, ::-1] * (1 - (mask / 255))
+        return result
+    def forward_pre_process(self, image, mask, config):
+        return image, mask
+    def forward_post_process(self, result, image, mask, config):
+        return result, image, mask
+    @torch.no_grad()
+    def __call__(self, image, mask, config: InpaintRequest):
+        """
+        images: [H, W, C] RGB, not normalized
+        masks: [H, W]
+        return: BGR IMAGE
+        """
+        inpaint_result = None
+        # logger.info(f"hd_strategy: {config.hd_strategy}")
+        if config.hd_strategy == HDStrategy.CROP:
+            if max(image.shape) > config.hd_strategy_crop_trigger_size:
+                logger.info(f"Run crop strategy")
+                boxes = boxes_from_mask(mask)
+                crop_result = []
+                for box in boxes:
+                    crop_image, crop_box = self._run_box(image, mask, box, config)
+                    crop_result.append((crop_image, crop_box))
+                inpaint_result = image[:, :, ::-1]
+                for crop_image, crop_box in crop_result:
+                    x1, y1, x2, y2 = crop_box
+                    inpaint_result[y1:y2, x1:x2, :] = crop_image
+        elif config.hd_strategy == HDStrategy.RESIZE:
+            if max(image.shape) > config.hd_strategy_resize_limit:
+                origin_size = image.shape[:2]
+                downsize_image = resize_max_size(
+                    image, size_limit=config.hd_strategy_resize_limit
+                )
+                downsize_mask = resize_max_size(
+                    mask, size_limit=config.hd_strategy_resize_limit
+                )
+                logger.info(
+                    f"Run resize strategy, origin size: {image.shape} forward size: {downsize_image.shape}"
+                )
+                inpaint_result = self._pad_forward(
+                    downsize_image, downsize_mask, config
+                )
+                # only paste masked area result
+                inpaint_result = cv2.resize(
+                    inpaint_result,
+                    (origin_size[1], origin_size[0]),
+                    interpolation=cv2.INTER_CUBIC,
+                )
+                original_pixel_indices = mask < 127
+                inpaint_result[original_pixel_indices] = image[:, :, ::-1][
+                    original_pixel_indices
+                ]
+        if inpaint_result is None:
+            inpaint_result = self._pad_forward(image, mask, config)
+        return inpaint_result
+    def _crop_box(self, image, mask, box, config: InpaintRequest):
+        """
+        Args:
+            image: [H, W, C] RGB
+            mask: [H, W, 1]
+            box: [left,top,right,bottom]
+        Returns:
+            BGR IMAGE, (l, r, r, b)
+        """
+        box_h = box[3] - box[1]
+        box_w = box[2] - box[0]
+        cx = (box[0] + box[2]) // 2
+        cy = (box[1] + box[3]) // 2
+        img_h, img_w = image.shape[:2]
+        w = box_w + config.hd_strategy_crop_margin * 2
+        h = box_h + config.hd_strategy_crop_margin * 2
+        _l = cx - w // 2
+        _r = cx + w // 2
+        _t = cy - h // 2
+        _b = cy + h // 2
+        l = max(_l, 0)
+        r = min(_r, img_w)
+        t = max(_t, 0)
+        b = min(_b, img_h)
+        # try to get more context when crop around image edge
+        if _l < 0:
+            r += abs(_l)
+        if _r > img_w:
+            l -= _r - img_w
+        if _t < 0:
+            b += abs(_t)
+        if _b > img_h:
+            t -= _b - img_h
+        l = max(l, 0)
+        r = min(r, img_w)
+        t = max(t, 0)
+        b = min(b, img_h)
+        crop_img = image[t:b, l:r, :]
+        crop_mask = mask[t:b, l:r]
+        # logger.info(f"box size: ({box_h},{box_w}) crop size: {crop_img.shape}")
+        return crop_img, crop_mask, [l, t, r, b]
+    def _calculate_cdf(self, histogram):
+        cdf = histogram.cumsum()
+        normalized_cdf = cdf / float(cdf.max())
+        return normalized_cdf
+    def _calculate_lookup(self, source_cdf, reference_cdf):
+        lookup_table = np.zeros(256)
+        lookup_val = 0
+        for source_index, source_val in enumerate(source_cdf):
+            for reference_index, reference_val in enumerate(reference_cdf):
+                if reference_val >= source_val:
+                    lookup_val = reference_index
+                    break
+            lookup_table[source_index] = lookup_val
+        return lookup_table
+    def _match_histograms(self, source, reference, mask):
+        transformed_channels = []
+        if len(mask.shape) == 3:
+            mask = mask[:, :, -1]
+        for channel in range(source.shape[-1]):
+            source_channel = source[:, :, channel]
+            reference_channel = reference[:, :, channel]
+            # only calculate histograms for non-masked parts
+            source_histogram, _ = np.histogram(source_channel[mask == 0], 256, [0, 256])
+            reference_histogram, _ = np.histogram(
+                reference_channel[mask == 0], 256, [0, 256]
+            )
+            source_cdf = self._calculate_cdf(source_histogram)
+            reference_cdf = self._calculate_cdf(reference_histogram)
+            lookup = self._calculate_lookup(source_cdf, reference_cdf)
+            transformed_channels.append(cv2.LUT(source_channel, lookup))
+        result = cv2.merge(transformed_channels)
+        result = cv2.convertScaleAbs(result)
+        return result
+    def _apply_cropper(self, image, mask, config: InpaintRequest):
+        img_h, img_w = image.shape[:2]
+        l, t, w, h = (
+            config.croper_x,
+            config.croper_y,
+            config.croper_width,
+            config.croper_height,
+        )
+        r = l + w
+        b = t + h
+        l = max(l, 0)
+        r = min(r, img_w)
+        t = max(t, 0)
+        b = min(b, img_h)
+        crop_img = image[t:b, l:r, :]
+        crop_mask = mask[t:b, l:r]
+        return crop_img, crop_mask, (l, t, r, b)
+    def _run_box(self, image, mask, box, config: InpaintRequest):
+        """
+        Args:
+            image: [H, W, C] RGB
+            mask: [H, W, 1]
+            box: [left,top,right,bottom]
+        Returns:
+            BGR IMAGE
+        """
+        crop_img, crop_mask, [l, t, r, b] = self._crop_box(image, mask, box, config)
+        return self._pad_forward(crop_img, crop_mask, config), [l, t, r, b]
+class DiffusionInpaintModel(InpaintModel):
+    def __init__(self, device, **kwargs):
+        self.model_info = kwargs["model_info"]
+        self.model_id_or_path = self.model_info.path
+        super().__init__(device, **kwargs)
+    @torch.no_grad()
+    def __call__(self, image, mask, config: InpaintRequest):
+        """
+        images: [H, W, C] RGB, not normalized
+        masks: [H, W]
+        return: BGR IMAGE
+        """
+        # boxes = boxes_from_mask(mask)
+        if config.use_croper:
+            crop_img, crop_mask, (l, t, r, b) = self._apply_cropper(image, mask, config)
+            crop_image = self._scaled_pad_forward(crop_img, crop_mask, config)
+            inpaint_result = image[:, :, ::-1]
+            inpaint_result[t:b, l:r, :] = crop_image
+        elif config.use_extender:
+            inpaint_result = self._do_outpainting(image, config)
+        else:
+            inpaint_result = self._scaled_pad_forward(image, mask, config)
+        return inpaint_result
+    def _do_outpainting(self, image, config: InpaintRequest):
+        # cropper 和 image 在同一个坐标系下，croper_x/y 可能为负数
+        # 从 image 中 crop 出 outpainting 区域
+        image_h, image_w = image.shape[:2]
+        cropper_l = config.extender_x
+        cropper_t = config.extender_y
+        cropper_r = config.extender_x + config.extender_width
+        cropper_b = config.extender_y + config.extender_height
+        image_l = 0
+        image_t = 0
+        image_r = image_w
+        image_b = image_h
+        # 类似求 IOU
+        l = max(cropper_l, image_l)
+        t = max(cropper_t, image_t)
+        r = min(cropper_r, image_r)
+        b = min(cropper_b, image_b)
+        assert (
+            0 <= l < r and 0 <= t < b
+        ), f"cropper and image not overlap, {l},{t},{r},{b}"
+        cropped_image = image[t:b, l:r, :]
+        padding_l = max(0, image_l - cropper_l)
+        padding_t = max(0, image_t - cropper_t)
+        padding_r = max(0, cropper_r - image_r)
+        padding_b = max(0, cropper_b - image_b)
+        expanded_image, mask_image = expand_image(
+            cropped_image,
+            left=padding_l,
+            top=padding_t,
+            right=padding_r,
+            bottom=padding_b,
+            softness=config.sd_outpainting_softness,
+            space=config.sd_outpainting_space,
+        )
+        # 最终扩大了的 image, BGR
+        expanded_cropped_result_image = self._scaled_pad_forward(
+            expanded_image, mask_image, config
+        )
+        # RGB -> BGR
+        outpainting_image = cv2.copyMakeBorder(
+            image,
+            left=padding_l,
+            top=padding_t,
+            right=padding_r,
+            bottom=padding_b,
+            borderType=cv2.BORDER_CONSTANT,
+            value=0,
+        )[:, :, ::-1]
+        # 把 cropped_result_image 贴到 outpainting_image 上，这一步不需要 blend
+        paste_t = 0 if config.extender_y < 0 else config.extender_y
+        paste_l = 0 if config.extender_x < 0 else config.extender_x
+        outpainting_image[
+            paste_t : paste_t + expanded_cropped_result_image.shape[0],
+            paste_l : paste_l + expanded_cropped_result_image.shape[1],
+            :,
+        ] = expanded_cropped_result_image
+        return outpainting_image
+    def _scaled_pad_forward(self, image, mask, config: InpaintRequest):
+        longer_side_length = int(config.sd_scale * max(image.shape[:2]))
+        origin_size = image.shape[:2]
+        downsize_image = resize_max_size(image, size_limit=longer_side_length)
+        downsize_mask = resize_max_size(mask, size_limit=longer_side_length)
+        if config.sd_scale != 1:
+            logger.info(
+                f"Resize image to do sd inpainting: {image.shape} -> {downsize_image.shape}"
+            )
+        inpaint_result = self._pad_forward(downsize_image, downsize_mask, config)
+        # only paste masked area result
+        inpaint_result = cv2.resize(
+            inpaint_result,
+            (origin_size[1], origin_size[0]),
+            interpolation=cv2.INTER_CUBIC,
+        )
+        # blend result, copy from g_diffuser_bot
+        # mask_rgb = 1.0 - np_img_grey_to_rgb(mask / 255.0)
+        # inpaint_result = np.clip(
+        #     inpaint_result * (1.0 - mask_rgb) + image * mask_rgb, 0.0, 255.0
+        # )
+        # original_pixel_indices = mask < 127
+        # inpaint_result[original_pixel_indices] = image[:, :, ::-1][
+        #     original_pixel_indices
+        # ]
+        return inpaint_result
+    def set_scheduler(self, config: InpaintRequest):
+        scheduler_config = self.model.scheduler.config
+        sd_sampler = config.sd_sampler
+        if config.sd_lcm_lora and self.model_info.support_lcm_lora:
+            sd_sampler = SDSampler.lcm
+            logger.info(f"LCM Lora enabled, use {sd_sampler} sampler")
+        scheduler = get_scheduler(sd_sampler, scheduler_config)
+        self.model.scheduler = scheduler
+    def forward_pre_process(self, image, mask, config):
+        if config.sd_mask_blur != 0:
+            k = 2 * config.sd_mask_blur + 1
+            mask = cv2.GaussianBlur(mask, (k, k), 0)[:, :, np.newaxis]
+        return image, mask
+    def forward_post_process(self, result, image, mask, config):
+        if config.sd_match_histograms:
+            result = self._match_histograms(result, image[:, :, ::-1], mask)
+        if config.sd_mask_blur != 0:
+            k = 2 * config.sd_mask_blur + 1
+            mask = cv2.GaussianBlur(mask, (k, k), 0)
+        return result, image, mask

iopaint/model/helper/__init__.py ADDED Viewed

File without changes

iopaint/model/original_sd_configs/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from pathlib import Path
+from typing import Dict
+CURRENT_DIR = Path(__file__).parent.absolute()
+def get_config_files() -> Dict[str, Path]:
+    """
+    - `v1`: Config file for Stable Diffusion v1
+    - `v2`: Config file for Stable Diffusion v2
+    - `xl`: Config file for Stable Diffusion XL
+    - `xl_refiner`: Config file for Stable Diffusion XL Refiner
+    """
+    return {
+        "v1": CURRENT_DIR / "v1-inference.yaml",
+        "v2": CURRENT_DIR / "v2-inference-v.yaml",
+        "xl": CURRENT_DIR / "sd_xl_base.yaml",
+        "xl_refiner": CURRENT_DIR / "sd_xl_refiner.yaml",
+    }

iopaint/model/power_paint/__init__.py ADDED Viewed

File without changes

iopaint/plugins/__init__.py ADDED Viewed

	@@ -0,0 +1,74 @@

+from typing import Dict
+from loguru import logger
+from .anime_seg import AnimeSeg
+from .gfpgan_plugin import GFPGANPlugin
+from .interactive_seg import InteractiveSeg
+from .realesrgan import RealESRGANUpscaler
+from .remove_bg import RemoveBG
+from .restoreformer import RestoreFormerPlugin
+from ..schema import InteractiveSegModel, Device, RealESRGANModel
+def build_plugins(
+    enable_interactive_seg: bool,
+    interactive_seg_model: InteractiveSegModel,
+    interactive_seg_device: Device,
+    enable_remove_bg: bool,
+    remove_bg_model: str,
+    enable_anime_seg: bool,
+    enable_realesrgan: bool,
+    realesrgan_device: Device,
+    realesrgan_model: RealESRGANModel,
+    enable_gfpgan: bool,
+    gfpgan_device: Device,
+    enable_restoreformer: bool,
+    restoreformer_device: Device,
+    no_half: bool,
+) -> Dict:
+    plugins = {}
+    if enable_interactive_seg:
+        logger.info(f"Initialize {InteractiveSeg.name} plugin")
+        plugins[InteractiveSeg.name] = InteractiveSeg(
+            interactive_seg_model, interactive_seg_device
+        )
+    if enable_remove_bg:
+        logger.info(f"Initialize {RemoveBG.name} plugin")
+        plugins[RemoveBG.name] = RemoveBG(remove_bg_model)
+    if enable_anime_seg:
+        logger.info(f"Initialize {AnimeSeg.name} plugin")
+        plugins[AnimeSeg.name] = AnimeSeg()
+    if enable_realesrgan:
+        logger.info(
+            f"Initialize {RealESRGANUpscaler.name} plugin: {realesrgan_model}, {realesrgan_device}"
+        )
+        plugins[RealESRGANUpscaler.name] = RealESRGANUpscaler(
+            realesrgan_model,
+            realesrgan_device,
+            no_half=no_half,
+        )
+    if enable_gfpgan:
+        logger.info(f"Initialize {GFPGANPlugin.name} plugin")
+        if enable_realesrgan:
+            logger.info("Use realesrgan as GFPGAN background upscaler")
+        else:
+            logger.info(
+                f"GFPGAN no background upscaler, use --enable-realesrgan to enable it"
+            )
+        plugins[GFPGANPlugin.name] = GFPGANPlugin(
+            gfpgan_device,
+            upscaler=plugins.get(RealESRGANUpscaler.name, None),
+        )
+    if enable_restoreformer:
+        logger.info(f"Initialize {RestoreFormerPlugin.name} plugin")
+        plugins[RestoreFormerPlugin.name] = RestoreFormerPlugin(
+            restoreformer_device,
+            upscaler=plugins.get(RealESRGANUpscaler.name, None),
+        )
+    return plugins

iopaint/plugins/anime_seg.py ADDED Viewed

	@@ -0,0 +1,462 @@

+import cv2
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import numpy as np
+from PIL import Image
+from iopaint.helper import load_model
+from iopaint.plugins.base_plugin import BasePlugin
+from iopaint.schema import RunPluginRequest
+class REBNCONV(nn.Module):
+    def __init__(self, in_ch=3, out_ch=3, dirate=1, stride=1):
+        super(REBNCONV, self).__init__()
+        self.conv_s1 = nn.Conv2d(
+            in_ch, out_ch, 3, padding=1 * dirate, dilation=1 * dirate, stride=stride
+        )
+        self.bn_s1 = nn.BatchNorm2d(out_ch)
+        self.relu_s1 = nn.ReLU(inplace=True)
+    def forward(self, x):
+        hx = x
+        xout = self.relu_s1(self.bn_s1(self.conv_s1(hx)))
+        return xout
+## upsample tensor 'src' to have the same spatial size with tensor 'tar'
+def _upsample_like(src, tar):
+    src = F.interpolate(src, size=tar.shape[2:], mode="bilinear", align_corners=False)
+    return src
+### RSU-7 ###
+class RSU7(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3, img_size=512):
+        super(RSU7, self).__init__()
+        self.in_ch = in_ch
+        self.mid_ch = mid_ch
+        self.out_ch = out_ch
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)  ## 1 -> 1/2
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool5 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv7 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv6d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        b, c, h, w = x.shape
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx = self.pool5(hx5)
+        hx6 = self.rebnconv6(hx)
+        hx7 = self.rebnconv7(hx6)
+        hx6d = self.rebnconv6d(torch.cat((hx7, hx6), 1))
+        hx6dup = _upsample_like(hx6d, hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6dup, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-6 ###
+class RSU6(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU6, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool4 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv6 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv5d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx = self.pool4(hx4)
+        hx5 = self.rebnconv5(hx)
+        hx6 = self.rebnconv6(hx5)
+        hx5d = self.rebnconv5d(torch.cat((hx6, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-5 ###
+class RSU5(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU5, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool3 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv5 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv4d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx = self.pool3(hx3)
+        hx4 = self.rebnconv4(hx)
+        hx5 = self.rebnconv5(hx4)
+        hx4d = self.rebnconv4d(torch.cat((hx5, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4 ###
+class RSU4(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.pool1 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.pool2 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=1)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=1)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx = self.pool1(hx1)
+        hx2 = self.rebnconv2(hx)
+        hx = self.pool2(hx2)
+        hx3 = self.rebnconv3(hx)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.rebnconv2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.rebnconv1d(torch.cat((hx2dup, hx1), 1))
+        return hx1d + hxin
+### RSU-4F ###
+class RSU4F(nn.Module):
+    def __init__(self, in_ch=3, mid_ch=12, out_ch=3):
+        super(RSU4F, self).__init__()
+        self.rebnconvin = REBNCONV(in_ch, out_ch, dirate=1)
+        self.rebnconv1 = REBNCONV(out_ch, mid_ch, dirate=1)
+        self.rebnconv2 = REBNCONV(mid_ch, mid_ch, dirate=2)
+        self.rebnconv3 = REBNCONV(mid_ch, mid_ch, dirate=4)
+        self.rebnconv4 = REBNCONV(mid_ch, mid_ch, dirate=8)
+        self.rebnconv3d = REBNCONV(mid_ch * 2, mid_ch, dirate=4)
+        self.rebnconv2d = REBNCONV(mid_ch * 2, mid_ch, dirate=2)
+        self.rebnconv1d = REBNCONV(mid_ch * 2, out_ch, dirate=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.rebnconvin(hx)
+        hx1 = self.rebnconv1(hxin)
+        hx2 = self.rebnconv2(hx1)
+        hx3 = self.rebnconv3(hx2)
+        hx4 = self.rebnconv4(hx3)
+        hx3d = self.rebnconv3d(torch.cat((hx4, hx3), 1))
+        hx2d = self.rebnconv2d(torch.cat((hx3d, hx2), 1))
+        hx1d = self.rebnconv1d(torch.cat((hx2d, hx1), 1))
+        return hx1d + hxin
+class ISNetDIS(nn.Module):
+    def __init__(self, in_ch=3, out_ch=1):
+        super(ISNetDIS, self).__init__()
+        self.conv_in = nn.Conv2d(in_ch, 64, 3, stride=2, padding=1)
+        self.pool_in = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage1 = RSU7(64, 32, 64)
+        self.pool12 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage2 = RSU6(64, 32, 128)
+        self.pool23 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage3 = RSU5(128, 64, 256)
+        self.pool34 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage4 = RSU4(256, 128, 512)
+        self.pool45 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage5 = RSU4F(512, 256, 512)
+        self.pool56 = nn.MaxPool2d(2, stride=2, ceil_mode=True)
+        self.stage6 = RSU4F(512, 256, 512)
+        # decoder
+        self.stage5d = RSU4F(1024, 256, 512)
+        self.stage4d = RSU4(1024, 128, 256)
+        self.stage3d = RSU5(512, 64, 128)
+        self.stage2d = RSU6(256, 32, 64)
+        self.stage1d = RSU7(128, 16, 64)
+        self.side1 = nn.Conv2d(64, out_ch, 3, padding=1)
+    def forward(self, x):
+        hx = x
+        hxin = self.conv_in(hx)
+        hx = self.pool_in(hxin)
+        # stage 1
+        hx1 = self.stage1(hxin)
+        hx = self.pool12(hx1)
+        # stage 2
+        hx2 = self.stage2(hx)
+        hx = self.pool23(hx2)
+        # stage 3
+        hx3 = self.stage3(hx)
+        hx = self.pool34(hx3)
+        # stage 4
+        hx4 = self.stage4(hx)
+        hx = self.pool45(hx4)
+        # stage 5
+        hx5 = self.stage5(hx)
+        hx = self.pool56(hx5)
+        # stage 6
+        hx6 = self.stage6(hx)
+        hx6up = _upsample_like(hx6, hx5)
+        # -------------------- decoder --------------------
+        hx5d = self.stage5d(torch.cat((hx6up, hx5), 1))
+        hx5dup = _upsample_like(hx5d, hx4)
+        hx4d = self.stage4d(torch.cat((hx5dup, hx4), 1))
+        hx4dup = _upsample_like(hx4d, hx3)
+        hx3d = self.stage3d(torch.cat((hx4dup, hx3), 1))
+        hx3dup = _upsample_like(hx3d, hx2)
+        hx2d = self.stage2d(torch.cat((hx3dup, hx2), 1))
+        hx2dup = _upsample_like(hx2d, hx1)
+        hx1d = self.stage1d(torch.cat((hx2dup, hx1), 1))
+        # side output
+        d1 = self.side1(hx1d)
+        d1 = _upsample_like(d1, x)
+        return d1.sigmoid()
+# 从小到大
+ANIME_SEG_MODELS = {
+    "url": "https://github.com/Sanster/models/releases/download/isnetis/isnetis.pth",
+    "md5": "5f25479076b73074730ab8de9e8f2051",
+}
+class AnimeSeg(BasePlugin):
+    # Model from: https://github.com/SkyTNT/anime-segmentation
+    name = "AnimeSeg"
+    support_gen_image = True
+    support_gen_mask = True
+    def __init__(self):
+        super().__init__()
+        self.model = load_model(
+            ISNetDIS(),
+            ANIME_SEG_MODELS["url"],
+            "cpu",
+            ANIME_SEG_MODELS["md5"],
+        )
+    def gen_image(self, rgb_np_img, req: RunPluginRequest) -> np.ndarray:
+        mask = self.forward(rgb_np_img)
+        mask = Image.fromarray(mask, mode="L")
+        h0, w0 = rgb_np_img.shape[0], rgb_np_img.shape[1]
+        empty = Image.new("RGBA", (w0, h0), 0)
+        img = Image.fromarray(rgb_np_img)
+        cutout = Image.composite(img, empty, mask)
+        return np.asarray(cutout)
+    def gen_mask(self, rgb_np_img, req: RunPluginRequest) -> np.ndarray:
+        return self.forward(rgb_np_img)
+    @torch.inference_mode()
+    def forward(self, rgb_np_img):
+        s = 1024
+        h0, w0 = h, w = rgb_np_img.shape[0], rgb_np_img.shape[1]
+        if h > w:
+            h, w = s, int(s * w / h)
+        else:
+            h, w = int(s * h / w), s
+        ph, pw = s - h, s - w
+        tmpImg = np.zeros([s, s, 3], dtype=np.float32)
+        tmpImg[ph // 2 : ph // 2 + h, pw // 2 : pw // 2 + w] = (
+            cv2.resize(rgb_np_img, (w, h)) / 255
+        )
+        tmpImg = tmpImg.transpose((2, 0, 1))
+        tmpImg = torch.from_numpy(tmpImg).unsqueeze(0).type(torch.FloatTensor)
+        mask = self.model(tmpImg)
+        mask = mask[0, :, ph // 2 : ph // 2 + h, pw // 2 : pw // 2 + w]
+        mask = cv2.resize(mask.cpu().numpy().transpose((1, 2, 0)), (w0, h0))
+        return (mask * 255).astype("uint8")

iopaint/plugins/base_plugin.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from loguru import logger
+import numpy as np
+from iopaint.schema import RunPluginRequest
+class BasePlugin:
+    name: str
+    support_gen_image: bool = False
+    support_gen_mask: bool = False
+    def __init__(self):
+        err_msg = self.check_dep()
+        if err_msg:
+            logger.error(err_msg)
+            exit(-1)
+    def gen_image(self, rgb_np_img, req: RunPluginRequest) -> np.ndarray:
+        # return RGBA np image or BGR np image
+        ...
+    def gen_mask(self, rgb_np_img, req: RunPluginRequest) -> np.ndarray:
+        # return GRAY or BGR np image, 255 means foreground, 0 means background
+        ...
+    def check_dep(self):
+        ...
+    def switch_model(self, new_model_name: str):
+        ...

iopaint/plugins/segment_anything/__init__.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from .build_sam import (
+    build_sam,
+    build_sam_vit_h,
+    build_sam_vit_l,
+    build_sam_vit_b,
+    sam_model_registry,
+)
+from .predictor import SamPredictor

iopaint/plugins/segment_anything/modeling/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.
+from .sam import Sam
+from .image_encoder import ImageEncoderViT
+from .mask_decoder import MaskDecoder
+from .prompt_encoder import PromptEncoder
+from .transformer import TwoWayTransformer

iopaint/plugins/segment_anything/utils/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the license found in the
+# LICENSE file in the root directory of this source tree.

iopaint/tests/.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ *_result.png
2	+ result/

iopaint/tests/__init__.py ADDED Viewed

File without changes

model/__init__.py ADDED Viewed

File without changes

utils/__init__.py ADDED Viewed

File without changes