Spaces:

aka7774
/

sdfp12

Sleeping

App Files Files Community

aka7774 commited on Mar 13

Commit

91b7cdf

•

1 Parent(s): f7d1502

Upload 11 files

Browse files

Files changed (11) hide show

LICENSE +21 -0
animagineXLV3_v30.safetensors +3 -0
app.py +21 -0
fn.py +195 -0
fp12/__init__.py +8 -0
fp12/convert.py +0 -0
fp12/nn.py +89 -0
install.bat +56 -0
main.py +40 -0
requirements.txt +7 -0
venv.sh +7 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2024 hnmr293
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

animagineXLV3_v30.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1449e5b0b9de87b0f414c5f29cb11ce3b3dc61fa2b320e784c9441720bf7b766
+size 6938218610

app.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import fn
+import gradio as gr
+with gr.Blocks() as demo:
+    prompt = gr.Textbox(label='prompt')
+    negative_prompt = gr.Textbox(label='negative_prompt')
+    model = gr.Textbox(label='model')
+    guidance_scale = gr.Textbox(value=5.0, label='guidance_scale')
+    steps = gr.Textbox(value=20, label='steps')
+    seed = gr.Textbox(value=-1, label='seed')
+    run = gr.Button()
+    dst_image = gr.Image(label="Result", interactive=False)
+    run.click(
+        fn=fn.run,
+        inputs=[prompt, negative_prompt, model, guidance_scale, steps, seed],
+        outputs=[dst_image],
+    )
+if __name__ == '__main__':
+    demo.launch()

fn.py ADDED Viewed

	@@ -0,0 +1,195 @@

+import os
+from PIL import Image
+import contextlib
+import torch
+from diffusers import DiffusionPipeline, StableDiffusionXLPipeline
+from fp12 import Linear, Conv2d
+pipe = None
+PATH_TO_MODEL = "./animagineXLV3_v30.safetensors"
+USE_FP12 = True
+FP12_ONLY_ATTN = True
+FP12_APPLY_LINEAR = False
+FP12_APPLY_CONV = False
+# ==============================================================================
+# Model loading
+# ==============================================================================
+def free_memory():
+    import gc
+    gc.collect()
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+def to_fp12(module: torch.nn.Module):
+    target_modules = []
+    if FP12_APPLY_LINEAR:
+        target_modules.append((torch.nn.Linear, Linear))
+    if FP12_APPLY_CONV:
+        target_modules.append((torch.nn.Conv2d, Conv2d))
+    for name, mod in list(module.named_children()):
+        for orig_class, fp12_class in target_modules:
+            if isinstance(mod, orig_class):
+                try:
+                    new_mod = fp12_class(mod)
+                except Exception as e:
+                    print(f'  -> failed: {name} {str(e)}')
+                    continue
+                delattr(module, name)
+                del mod
+                setattr(module, name, new_mod)
+                break
+def load_model_cpu(path: str):
+    pipe = StableDiffusionXLPipeline.from_single_file(
+        path,
+        torch_dtype=torch.float16,
+        safety_checker=None,
+    )
+    return pipe
+def replace_fp12(pipe: DiffusionPipeline):
+    for name, mod in pipe.unet.named_modules():
+        if FP12_ONLY_ATTN and 'attn' not in name:
+            continue
+        print('[fp12] REPLACE', name)
+        to_fp12(mod)
+    return pipe
+@contextlib.contextmanager
+def cuda_profiler(device: str):
+    cuda_start = torch.cuda.Event(enable_timing=True)
+    cuda_end = torch.cuda.Event(enable_timing=True)
+    obj = {}
+    torch.cuda.synchronize()
+    torch.cuda.reset_peak_memory_stats(device)
+    cuda_start.record()
+    try:
+        yield obj
+    finally:
+        pass
+    cuda_end.record()
+    torch.cuda.synchronize()
+    obj['time'] = cuda_start.elapsed_time(cuda_end)
+    obj['memory'] = torch.cuda.max_memory_allocated(device)
+# ==============================================================================
+# Generation
+# ==============================================================================
+def generate(pipe: DiffusionPipeline, prompt: str, negative_prompt: str, seed: int, device: str, use_amp: bool = False, guidance_scale = None, steps = None):
+    import contextlib
+    import torch.amp
+    context = (
+        torch.amp.autocast_mode.autocast if use_amp
+        else contextlib.nullcontext
+    )
+    with torch.no_grad(), context(device):
+        rng = torch.Generator(device=device)
+        if 0 <= seed:
+            rng = rng.manual_seed(seed)
+        latents, *_ = pipe(
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            width=1024,
+            height=1024,
+            num_inference_steps=steps,
+            guidance_scale=guidance_scale,
+            num_images_per_prompt=1,
+            generator=rng,
+            device=device,
+            return_dict=False,
+            output_type='latent',
+        )
+        return latents
+def save_image(pipe, latents):
+        with torch.no_grad():
+            images = pipe.vae.decode(latents / pipe.vae.config.scaling_factor, return_dict=False)[0]
+            images = pipe.image_processor.postprocess(images, output_type='pil')
+        for i, image in enumerate(images):
+            #image.save(f'{i:02d}.png')
+            return image
+def load_model(model = None, device = None):
+    global pipe
+    model = model or PATH_TO_MODEL
+    device = device or 'cuda:0'
+    pipe = load_model_cpu(model)
+    if USE_FP12:
+        pipe = replace_fp12(pipe)
+    free_memory()
+    with cuda_profiler(device) as prof:
+        pipe.unet = pipe.unet.to(device)
+    print('LOAD VRAM', prof['memory'])
+    print('LOAD TIME', prof['time'])
+    pipe.text_encoder = pipe.text_encoder.to(device)
+    pipe.text_encoder_2 = pipe.text_encoder_2.to(device)
+    if torch.cuda.is_available():
+        torch.cuda.synchronize(device)
+def run(prompt = None, negative_prompt = None, model = None, guidance_scale = None, steps = None, seed = None, device: str = None, use_amp: bool = False):
+    global pipe
+    if not pipe:
+        load_model(model)
+    _prompt = "masterpiece, best quality, 1girl, portrait"
+    _negative_prompt = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name"
+    prompt = prompt or _prompt
+    negative_prompt = negative_prompt or _negative_prompt
+    guidance_scale = float(guidance_scale) if guidance_scale else 5.0
+    steps = int(steps) if steps else 20
+    seed = int(seed) if seed else -1
+    device = device or 'cuda:0'
+    free_memory()
+    with cuda_profiler(device) as prof:
+        latents = generate(pipe, prompt, negative_prompt, seed, device, use_amp, guidance_scale, steps)
+    print('UNET VRAM', prof['memory'])
+    print('UNET TIME', prof['time'])
+    #pipe.unet = pipe.unet.to('cpu')
+    #pipe.text_encoder = pipe.text_encoder.to('cpu')
+    #pipe.text_encoder_2 = pipe.text_encoder_2.to('cpu')
+    free_memory()
+    pipe.vae = pipe.vae.to(device)
+    pipe.vae.enable_slicing()
+    return save_image(pipe, latents)
+def pil_to_webp(img):
+    buffer = io.BytesIO()
+    img.save(buffer, 'webp')
+    return buffer.getvalue()
+def bin_to_base64(bin):
+    return base64.b64encode(bin).decode('ascii')

fp12/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from .convert import FP12_MAX, FP12_MIN
+from .convert import to_fp12, fp12_to_fp16
+from .nn import Linear, Conv2d
+__all__ = [
+    'convert',
+    'nn',
+]

fp12/convert.py ADDED Viewed

The diff for this file is too large to render. See raw diff

fp12/nn.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from typing import Optional
+import torch
+import torch.nn.functional as F
+from fp12 import to_fp12, fp12_to_fp16, FP12_MAX
+def get_param(data: torch.Tensor):
+    if FP12_MAX <= data.abs().max():
+        print('[WARN] max(abs(data)) >= FP12_MAX')
+    exp, frac = to_fp12(data)
+    exp.requires_grad_(False)
+    frac.requires_grad_(False)
+    exp = torch.nn.Parameter(exp, requires_grad=False)
+    frac = torch.nn.Parameter(frac, requires_grad=False)
+    return exp, frac
+class Linear(torch.nn.Module):
+    def __init__(self, base: torch.nn.Linear) -> None:
+        super().__init__()
+        self.weight = get_param(base.weight)
+        self.weight_shape = base.weight.shape
+        if base.bias is not None:
+            self.bias = get_param(base.bias)
+            self.bias_shape = base.bias.shape
+        else:
+            self.bias = None
+            self.bias_shape = None
+        self.to(base.weight.device)
+    def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+        weight = fp12_to_fp16(*self.weight).reshape(self.weight_shape)
+        bias = fp12_to_fp16(*self.bias).reshape(self.bias_shape) if self.bias else None
+        return F.linear(x, weight, bias)
+    def _apply(self, fn, recurse=True):
+        super()._apply(fn, recurse)
+        self.weight = [fn(p) for p in self.weight]
+        if self.bias:
+            self.bias = [fn(p) for p in self.bias]
+        return self
+class Conv2d(torch.nn.Module):
+    def __init__(self, base: torch.nn.Conv2d):
+        super().__init__()
+        self.weight = get_param(base.weight)
+        self.weight_shape = base.weight.shape
+        if base.bias is not None:
+            self.bias = get_param(base.bias)
+            self.bias_shape = base.bias.shape
+        else:
+            self.bias = None
+            self.bias_shape = None
+        self.padding_mode = base.padding_mode
+        self._reversed_padding_repeated_twice = base._reversed_padding_repeated_twice
+        self.stride = base.stride
+        self.dilation = base.dilation
+        self.groups = base.groups
+        self.padding = base.padding
+        self.to(base.weight.device)
+    def _conv_forward(self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor]):
+        if self.padding_mode != 'zeros':
+            return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
+                            weight, bias, self.stride,
+                            (0, 0), self.dilation, self.groups)
+        return F.conv2d(input, weight, bias, self.stride,
+                        self.padding, self.dilation, self.groups)
+    def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
+        weight = fp12_to_fp16(*self.weight).reshape(self.weight_shape)
+        bias = fp12_to_fp16(*self.bias).reshape(self.bias_shape) if self.bias else None
+        return self._conv_forward(x, weight, bias)
+    def _apply(self, fn, recurse=True):
+        super()._apply(fn, recurse)
+        self.weight = [fn(p) for p in self.weight]
+        if self.bias:
+            self.bias = [fn(p) for p in self.bias]
+        return self

install.bat ADDED Viewed

	@@ -0,0 +1,56 @@

+@echo off
+rem -------------------------------------------
+rem NOT guaranteed to work on Windows
+set REPOS=https://huggingface.co/spaces/aka7774/sdfp12
+set APPDIR=sdfp12
+set VENV=venv
+rem -------------------------------------------
+set INSTALL_DIR=%~dp0
+cd /d %INSTALL_DIR%
+:git_clone
+set DL_URL=%REPOS%
+set DL_DST=%APPDIR%
+git clone %DL_URL% %APPDIR%
+if exist %DL_DST% goto install_python
+set DL_URL=https://github.com/git-for-windows/git/releases/download/v2.41.0.windows.3/PortableGit-2.41.0.3-64-bit.7z.exe
+set DL_DST=PortableGit-2.41.0.3-64-bit.7z.exe
+curl -L -o %DL_DST% %DL_URL%
+if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
+%DL_DST% -y
+del %DL_DST%
+set GIT=%INSTALL_DIR%PortableGit\bin\git
+%GIT% clone %REPOS%
+:install_python
+set DL_URL=https://github.com/indygreg/python-build-standalone/releases/download/20240107/cpython-3.10.13+20240107-i686-pc-windows-msvc-shared-install_only.tar.gz
+set DL_DST="%INSTALL_DIR%python.tar.gz"
+curl -L -o %DL_DST% %DL_URL%
+if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
+tar -xzf %DL_DST%
+set PYTHON=%INSTALL_DIR%python\python.exe
+set PATH=%PATH%;%INSTALL_DIR%python310\Scripts
+:install_venv
+cd %APPDIR%
+%PYTHON% -m venv %VENV%
+set PYTHON=%VENV%\Scripts\python.exe
+:install_pip
+set DL_URL=https://bootstrap.pypa.io/get-pip.py
+set DL_DST=%INSTALL_DIR%get-pip.py
+curl -o %DL_DST% %DL_URL%
+if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
+%PYTHON% %DL_DST%
+%PYTHON% -m pip install gradio
+%PYTHON% -m pip install -r requirements.txt
+pause

main.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import os
+import sys
+import time
+import signal
+import psutil
+import io
+from fastapi import FastAPI, Request, status, Form, UploadFile
+from fastapi.staticfiles import StaticFiles
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+from fastapi.exceptions import RequestValidationError
+from fastapi.responses import Response
+import fn
+import gradio as gr
+from app import demo
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=['*'],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+gr.mount_gradio_app(app, demo, path="/gradio")
+@app.post("/run")
+async def api_run(prompt = None, negative_prompt = None, model = None, guidance_scale = None, steps = None, seed = None):
+    try:
+        dst_image = fn.run(prompt, negative_prompt, model, guidance_scale, steps, seed)
+        bin = fn.pil_to_webp(dst_image)
+        return Response(content=bin, media_type="image/webp")
+    except Exception as e:
+        return {"error": str(e)}

requirements.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+fastapi
+uvicorn
+torch
+diffusers
+transformers
+psutil
+python-multipart

venv.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+#!/usr/bin/bash
+python3 -m venv venv
+curl -kL https://bootstrap.pypa.io/get-pip.py | venv/bin/python
+venv/bin/python -m pip install gradio
+venv/bin/python -m pip install -r requirements.txt