aka7774 commited on
Commit
91b7cdf
1 Parent(s): f7d1502

Upload 11 files

Browse files
Files changed (11) hide show
  1. LICENSE +21 -0
  2. animagineXLV3_v30.safetensors +3 -0
  3. app.py +21 -0
  4. fn.py +195 -0
  5. fp12/__init__.py +8 -0
  6. fp12/convert.py +0 -0
  7. fp12/nn.py +89 -0
  8. install.bat +56 -0
  9. main.py +40 -0
  10. requirements.txt +7 -0
  11. venv.sh +7 -0
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2024 hnmr293
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
animagineXLV3_v30.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1449e5b0b9de87b0f414c5f29cb11ce3b3dc61fa2b320e784c9441720bf7b766
3
+ size 6938218610
app.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import fn
2
+ import gradio as gr
3
+
4
+ with gr.Blocks() as demo:
5
+ prompt = gr.Textbox(label='prompt')
6
+ negative_prompt = gr.Textbox(label='negative_prompt')
7
+ model = gr.Textbox(label='model')
8
+ guidance_scale = gr.Textbox(value=5.0, label='guidance_scale')
9
+ steps = gr.Textbox(value=20, label='steps')
10
+ seed = gr.Textbox(value=-1, label='seed')
11
+ run = gr.Button()
12
+ dst_image = gr.Image(label="Result", interactive=False)
13
+
14
+ run.click(
15
+ fn=fn.run,
16
+ inputs=[prompt, negative_prompt, model, guidance_scale, steps, seed],
17
+ outputs=[dst_image],
18
+ )
19
+
20
+ if __name__ == '__main__':
21
+ demo.launch()
fn.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from PIL import Image
3
+ import contextlib
4
+ import torch
5
+ from diffusers import DiffusionPipeline, StableDiffusionXLPipeline
6
+
7
+ from fp12 import Linear, Conv2d
8
+
9
+ pipe = None
10
+
11
+ PATH_TO_MODEL = "./animagineXLV3_v30.safetensors"
12
+ USE_FP12 = True
13
+ FP12_ONLY_ATTN = True
14
+ FP12_APPLY_LINEAR = False
15
+ FP12_APPLY_CONV = False
16
+
17
+
18
+ # ==============================================================================
19
+ # Model loading
20
+ # ==============================================================================
21
+
22
+ def free_memory():
23
+ import gc
24
+ gc.collect()
25
+ if torch.cuda.is_available():
26
+ torch.cuda.empty_cache()
27
+
28
+ def to_fp12(module: torch.nn.Module):
29
+ target_modules = []
30
+
31
+ if FP12_APPLY_LINEAR:
32
+ target_modules.append((torch.nn.Linear, Linear))
33
+
34
+ if FP12_APPLY_CONV:
35
+ target_modules.append((torch.nn.Conv2d, Conv2d))
36
+
37
+ for name, mod in list(module.named_children()):
38
+ for orig_class, fp12_class in target_modules:
39
+ if isinstance(mod, orig_class):
40
+ try:
41
+ new_mod = fp12_class(mod)
42
+ except Exception as e:
43
+ print(f' -> failed: {name} {str(e)}')
44
+ continue
45
+
46
+ delattr(module, name)
47
+ del mod
48
+
49
+ setattr(module, name, new_mod)
50
+ break
51
+
52
+
53
+ def load_model_cpu(path: str):
54
+ pipe = StableDiffusionXLPipeline.from_single_file(
55
+ path,
56
+ torch_dtype=torch.float16,
57
+ safety_checker=None,
58
+ )
59
+ return pipe
60
+
61
+ def replace_fp12(pipe: DiffusionPipeline):
62
+ for name, mod in pipe.unet.named_modules():
63
+ if FP12_ONLY_ATTN and 'attn' not in name:
64
+ continue
65
+ print('[fp12] REPLACE', name)
66
+ to_fp12(mod)
67
+ return pipe
68
+
69
+
70
+ @contextlib.contextmanager
71
+ def cuda_profiler(device: str):
72
+ cuda_start = torch.cuda.Event(enable_timing=True)
73
+ cuda_end = torch.cuda.Event(enable_timing=True)
74
+
75
+ obj = {}
76
+
77
+ torch.cuda.synchronize()
78
+ torch.cuda.reset_peak_memory_stats(device)
79
+ cuda_start.record()
80
+
81
+ try:
82
+ yield obj
83
+ finally:
84
+ pass
85
+
86
+ cuda_end.record()
87
+ torch.cuda.synchronize()
88
+ obj['time'] = cuda_start.elapsed_time(cuda_end)
89
+ obj['memory'] = torch.cuda.max_memory_allocated(device)
90
+
91
+ # ==============================================================================
92
+ # Generation
93
+ # ==============================================================================
94
+
95
+ def generate(pipe: DiffusionPipeline, prompt: str, negative_prompt: str, seed: int, device: str, use_amp: bool = False, guidance_scale = None, steps = None):
96
+ import contextlib
97
+ import torch.amp
98
+
99
+ context = (
100
+ torch.amp.autocast_mode.autocast if use_amp
101
+ else contextlib.nullcontext
102
+ )
103
+
104
+ with torch.no_grad(), context(device):
105
+ rng = torch.Generator(device=device)
106
+ if 0 <= seed:
107
+ rng = rng.manual_seed(seed)
108
+
109
+ latents, *_ = pipe(
110
+ prompt=prompt,
111
+ negative_prompt=negative_prompt,
112
+ width=1024,
113
+ height=1024,
114
+ num_inference_steps=steps,
115
+ guidance_scale=guidance_scale,
116
+ num_images_per_prompt=1,
117
+ generator=rng,
118
+ device=device,
119
+ return_dict=False,
120
+ output_type='latent',
121
+ )
122
+
123
+ return latents
124
+
125
+ def save_image(pipe, latents):
126
+ with torch.no_grad():
127
+ images = pipe.vae.decode(latents / pipe.vae.config.scaling_factor, return_dict=False)[0]
128
+ images = pipe.image_processor.postprocess(images, output_type='pil')
129
+
130
+ for i, image in enumerate(images):
131
+ #image.save(f'{i:02d}.png')
132
+ return image
133
+
134
+ def load_model(model = None, device = None):
135
+ global pipe
136
+
137
+ model = model or PATH_TO_MODEL
138
+ device = device or 'cuda:0'
139
+
140
+ pipe = load_model_cpu(model)
141
+
142
+ if USE_FP12:
143
+ pipe = replace_fp12(pipe)
144
+
145
+ free_memory()
146
+ with cuda_profiler(device) as prof:
147
+ pipe.unet = pipe.unet.to(device)
148
+ print('LOAD VRAM', prof['memory'])
149
+ print('LOAD TIME', prof['time'])
150
+
151
+ pipe.text_encoder = pipe.text_encoder.to(device)
152
+ pipe.text_encoder_2 = pipe.text_encoder_2.to(device)
153
+
154
+ if torch.cuda.is_available():
155
+ torch.cuda.synchronize(device)
156
+
157
+ def run(prompt = None, negative_prompt = None, model = None, guidance_scale = None, steps = None, seed = None, device: str = None, use_amp: bool = False):
158
+ global pipe
159
+
160
+ if not pipe:
161
+ load_model(model)
162
+
163
+ _prompt = "masterpiece, best quality, 1girl, portrait"
164
+ _negative_prompt = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry, artist name"
165
+
166
+ prompt = prompt or _prompt
167
+ negative_prompt = negative_prompt or _negative_prompt
168
+ guidance_scale = float(guidance_scale) if guidance_scale else 5.0
169
+ steps = int(steps) if steps else 20
170
+ seed = int(seed) if seed else -1
171
+ device = device or 'cuda:0'
172
+
173
+ free_memory()
174
+ with cuda_profiler(device) as prof:
175
+ latents = generate(pipe, prompt, negative_prompt, seed, device, use_amp, guidance_scale, steps)
176
+ print('UNET VRAM', prof['memory'])
177
+ print('UNET TIME', prof['time'])
178
+
179
+ #pipe.unet = pipe.unet.to('cpu')
180
+ #pipe.text_encoder = pipe.text_encoder.to('cpu')
181
+ #pipe.text_encoder_2 = pipe.text_encoder_2.to('cpu')
182
+
183
+ free_memory()
184
+ pipe.vae = pipe.vae.to(device)
185
+ pipe.vae.enable_slicing()
186
+ return save_image(pipe, latents)
187
+
188
+ def pil_to_webp(img):
189
+ buffer = io.BytesIO()
190
+ img.save(buffer, 'webp')
191
+
192
+ return buffer.getvalue()
193
+
194
+ def bin_to_base64(bin):
195
+ return base64.b64encode(bin).decode('ascii')
fp12/__init__.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from .convert import FP12_MAX, FP12_MIN
2
+ from .convert import to_fp12, fp12_to_fp16
3
+ from .nn import Linear, Conv2d
4
+
5
+ __all__ = [
6
+ 'convert',
7
+ 'nn',
8
+ ]
fp12/convert.py ADDED
The diff for this file is too large to render. See raw diff
 
fp12/nn.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Optional
2
+ import torch
3
+ import torch.nn.functional as F
4
+
5
+ from fp12 import to_fp12, fp12_to_fp16, FP12_MAX
6
+
7
+
8
+ def get_param(data: torch.Tensor):
9
+ if FP12_MAX <= data.abs().max():
10
+ print('[WARN] max(abs(data)) >= FP12_MAX')
11
+
12
+ exp, frac = to_fp12(data)
13
+
14
+ exp.requires_grad_(False)
15
+ frac.requires_grad_(False)
16
+
17
+ exp = torch.nn.Parameter(exp, requires_grad=False)
18
+ frac = torch.nn.Parameter(frac, requires_grad=False)
19
+
20
+ return exp, frac
21
+
22
+
23
+ class Linear(torch.nn.Module):
24
+ def __init__(self, base: torch.nn.Linear) -> None:
25
+ super().__init__()
26
+ self.weight = get_param(base.weight)
27
+ self.weight_shape = base.weight.shape
28
+ if base.bias is not None:
29
+ self.bias = get_param(base.bias)
30
+ self.bias_shape = base.bias.shape
31
+ else:
32
+ self.bias = None
33
+ self.bias_shape = None
34
+ self.to(base.weight.device)
35
+
36
+ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
37
+ weight = fp12_to_fp16(*self.weight).reshape(self.weight_shape)
38
+ bias = fp12_to_fp16(*self.bias).reshape(self.bias_shape) if self.bias else None
39
+ return F.linear(x, weight, bias)
40
+
41
+ def _apply(self, fn, recurse=True):
42
+ super()._apply(fn, recurse)
43
+ self.weight = [fn(p) for p in self.weight]
44
+ if self.bias:
45
+ self.bias = [fn(p) for p in self.bias]
46
+ return self
47
+
48
+
49
+ class Conv2d(torch.nn.Module):
50
+ def __init__(self, base: torch.nn.Conv2d):
51
+ super().__init__()
52
+ self.weight = get_param(base.weight)
53
+ self.weight_shape = base.weight.shape
54
+ if base.bias is not None:
55
+ self.bias = get_param(base.bias)
56
+ self.bias_shape = base.bias.shape
57
+ else:
58
+ self.bias = None
59
+ self.bias_shape = None
60
+
61
+ self.padding_mode = base.padding_mode
62
+ self._reversed_padding_repeated_twice = base._reversed_padding_repeated_twice
63
+ self.stride = base.stride
64
+ self.dilation = base.dilation
65
+ self.groups = base.groups
66
+ self.padding = base.padding
67
+
68
+ self.to(base.weight.device)
69
+
70
+ def _conv_forward(self, input: torch.Tensor, weight: torch.Tensor, bias: Optional[torch.Tensor]):
71
+ if self.padding_mode != 'zeros':
72
+ return F.conv2d(F.pad(input, self._reversed_padding_repeated_twice, mode=self.padding_mode),
73
+ weight, bias, self.stride,
74
+ (0, 0), self.dilation, self.groups)
75
+ return F.conv2d(input, weight, bias, self.stride,
76
+ self.padding, self.dilation, self.groups)
77
+
78
+ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
79
+ weight = fp12_to_fp16(*self.weight).reshape(self.weight_shape)
80
+ bias = fp12_to_fp16(*self.bias).reshape(self.bias_shape) if self.bias else None
81
+ return self._conv_forward(x, weight, bias)
82
+
83
+ def _apply(self, fn, recurse=True):
84
+ super()._apply(fn, recurse)
85
+ self.weight = [fn(p) for p in self.weight]
86
+ if self.bias:
87
+ self.bias = [fn(p) for p in self.bias]
88
+ return self
89
+
install.bat ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ rem -------------------------------------------
4
+ rem NOT guaranteed to work on Windows
5
+
6
+ set REPOS=https://huggingface.co/spaces/aka7774/sdfp12
7
+ set APPDIR=sdfp12
8
+ set VENV=venv
9
+
10
+ rem -------------------------------------------
11
+
12
+ set INSTALL_DIR=%~dp0
13
+ cd /d %INSTALL_DIR%
14
+
15
+ :git_clone
16
+ set DL_URL=%REPOS%
17
+ set DL_DST=%APPDIR%
18
+ git clone %DL_URL% %APPDIR%
19
+ if exist %DL_DST% goto install_python
20
+
21
+ set DL_URL=https://github.com/git-for-windows/git/releases/download/v2.41.0.windows.3/PortableGit-2.41.0.3-64-bit.7z.exe
22
+ set DL_DST=PortableGit-2.41.0.3-64-bit.7z.exe
23
+ curl -L -o %DL_DST% %DL_URL%
24
+ if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
25
+ %DL_DST% -y
26
+ del %DL_DST%
27
+
28
+ set GIT=%INSTALL_DIR%PortableGit\bin\git
29
+ %GIT% clone %REPOS%
30
+
31
+ :install_python
32
+ set DL_URL=https://github.com/indygreg/python-build-standalone/releases/download/20240107/cpython-3.10.13+20240107-i686-pc-windows-msvc-shared-install_only.tar.gz
33
+ set DL_DST="%INSTALL_DIR%python.tar.gz"
34
+ curl -L -o %DL_DST% %DL_URL%
35
+ if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
36
+ tar -xzf %DL_DST%
37
+
38
+ set PYTHON=%INSTALL_DIR%python\python.exe
39
+ set PATH=%PATH%;%INSTALL_DIR%python310\Scripts
40
+
41
+ :install_venv
42
+ cd %APPDIR%
43
+ %PYTHON% -m venv %VENV%
44
+ set PYTHON=%VENV%\Scripts\python.exe
45
+
46
+ :install_pip
47
+ set DL_URL=https://bootstrap.pypa.io/get-pip.py
48
+ set DL_DST=%INSTALL_DIR%get-pip.py
49
+ curl -o %DL_DST% %DL_URL%
50
+ if not exist %DL_DST% bitsadmin /transfer dl %DL_URL% %DL_DST%
51
+ %PYTHON% %DL_DST%
52
+
53
+ %PYTHON% -m pip install gradio
54
+ %PYTHON% -m pip install -r requirements.txt
55
+
56
+ pause
main.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ import time
4
+ import signal
5
+ import psutil
6
+ import io
7
+
8
+ from fastapi import FastAPI, Request, status, Form, UploadFile
9
+ from fastapi.staticfiles import StaticFiles
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from pydantic import BaseModel, Field
12
+ from fastapi.exceptions import RequestValidationError
13
+ from fastapi.responses import Response
14
+
15
+ import fn
16
+ import gradio as gr
17
+ from app import demo
18
+
19
+ app = FastAPI()
20
+
21
+ app.add_middleware(
22
+ CORSMiddleware,
23
+ allow_origins=['*'],
24
+ allow_credentials=True,
25
+ allow_methods=["*"],
26
+ allow_headers=["*"],
27
+ )
28
+
29
+ gr.mount_gradio_app(app, demo, path="/gradio")
30
+
31
+ @app.post("/run")
32
+ async def api_run(prompt = None, negative_prompt = None, model = None, guidance_scale = None, steps = None, seed = None):
33
+ try:
34
+ dst_image = fn.run(prompt, negative_prompt, model, guidance_scale, steps, seed)
35
+ bin = fn.pil_to_webp(dst_image)
36
+
37
+ return Response(content=bin, media_type="image/webp")
38
+ except Exception as e:
39
+ return {"error": str(e)}
40
+
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ torch
4
+ diffusers
5
+ transformers
6
+ psutil
7
+ python-multipart
venv.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/bash
2
+
3
+ python3 -m venv venv
4
+ curl -kL https://bootstrap.pypa.io/get-pip.py | venv/bin/python
5
+
6
+ venv/bin/python -m pip install gradio
7
+ venv/bin/python -m pip install -r requirements.txt