radames HF staff commited on
Commit
3476c71
2 Parent(s): 521d01d 466d3e0

Merge branch 'main' into space-txt2img

Browse files
README.md CHANGED
@@ -6,6 +6,7 @@ colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
  suggested_hardware: a10g-small
 
9
  ---
10
 
11
  # Real-Time Latent Consistency Model
@@ -28,8 +29,9 @@ python -m venv venv
28
  source venv/bin/activate
29
  pip3 install -r requirements.txt
30
  cd frontend && npm install && npm run build && cd ..
31
- python run.py --reload --pipeline controlnet
32
- ```
 
33
 
34
  # Pipelines
35
  You can build your own pipeline following examples here [here](pipelines),
 
6
  sdk: docker
7
  pinned: false
8
  suggested_hardware: a10g-small
9
+ disable_embedding: true
10
  ---
11
 
12
  # Real-Time Latent Consistency Model
 
29
  source venv/bin/activate
30
  pip3 install -r requirements.txt
31
  cd frontend && npm install && npm run build && cd ..
32
+ # fastest pipeline
33
+ python run.py --reload --pipeline img2imgSD21Turbo
34
+ ```
35
 
36
  # Pipelines
37
  You can build your own pipeline following examples here [here](pipelines),
app.py CHANGED
@@ -12,6 +12,10 @@ print("TORCH_DTYPE:", torch_dtype)
12
  print("PIPELINE:", args.pipeline)
13
  print("SAFETY_CHECKER:", args.safety_checker)
14
  print("TORCH_COMPILE:", args.torch_compile)
 
 
 
 
15
 
16
 
17
  app = FastAPI()
 
12
  print("PIPELINE:", args.pipeline)
13
  print("SAFETY_CHECKER:", args.safety_checker)
14
  print("TORCH_COMPILE:", args.torch_compile)
15
+ print("SFast:", args.sfast)
16
+ print("USE_TAESD:", args.taesd)
17
+ print("COMPEL:", args.compel)
18
+ print("DEBUG:", args.debug)
19
 
20
 
21
  app = FastAPI()
app_init.py CHANGED
@@ -15,6 +15,9 @@ from types import SimpleNamespace
15
  from util import pil_to_frame, bytes_to_pil, is_firefox
16
  import asyncio
17
  import os
 
 
 
18
 
19
 
20
  def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
@@ -26,7 +29,7 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
26
  allow_headers=["*"],
27
  )
28
 
29
- @app.websocket("/ws")
30
  async def websocket_endpoint(websocket: WebSocket):
31
  await websocket.accept()
32
  user_count = user_data.get_user_count()
@@ -60,7 +63,7 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
60
  while True:
61
  data = await websocket.receive_json()
62
  if data["status"] != "next_frame":
63
- asyncio.sleep(1.0 / 24)
64
  continue
65
 
66
  params = await websocket.receive_json()
@@ -85,18 +88,18 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
85
  )
86
  await websocket.close()
87
  return
88
- await asyncio.sleep(1.0 / 24)
89
 
90
  except Exception as e:
91
  logging.error(f"Error: {e}")
92
  traceback.print_exc()
93
 
94
- @app.get("/queue_size")
95
  async def get_queue_size():
96
  queue_size = user_data.get_user_count()
97
  return JSONResponse({"queue_size": queue_size})
98
 
99
- @app.get("/stream/{user_id}")
100
  async def stream(user_id: uuid.UUID, request: Request):
101
  try:
102
  print(f"New stream request: {user_id}")
@@ -105,14 +108,15 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
105
  websocket = user_data.get_websocket(user_id)
106
  last_params = SimpleNamespace()
107
  while True:
 
108
  params = await user_data.get_latest_data(user_id)
109
  if not vars(params) or params.__dict__ == last_params.__dict__:
110
  await websocket.send_json({"status": "send_frame"})
111
- await asyncio.sleep(0.1)
112
  continue
113
 
114
  last_params = params
115
  image = pipeline.predict(params)
 
116
  if image is None:
117
  await websocket.send_json({"status": "send_frame"})
118
  continue
@@ -122,6 +126,8 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
122
  if not is_firefox(request.headers["user-agent"]):
123
  yield frame
124
  await websocket.send_json({"status": "send_frame"})
 
 
125
 
126
  return StreamingResponse(
127
  generate(),
@@ -134,7 +140,7 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
134
  return HTTPException(status_code=404, detail="User not found")
135
 
136
  # route to setup frontend
137
- @app.get("/settings")
138
  async def settings():
139
  info_schema = pipeline.Info.schema()
140
  info = pipeline.Info()
 
15
  from util import pil_to_frame, bytes_to_pil, is_firefox
16
  import asyncio
17
  import os
18
+ import time
19
+
20
+ THROTTLE = 1.0 / 120
21
 
22
 
23
  def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
 
29
  allow_headers=["*"],
30
  )
31
 
32
+ @app.websocket("/api/ws")
33
  async def websocket_endpoint(websocket: WebSocket):
34
  await websocket.accept()
35
  user_count = user_data.get_user_count()
 
63
  while True:
64
  data = await websocket.receive_json()
65
  if data["status"] != "next_frame":
66
+ asyncio.sleep(THROTTLE)
67
  continue
68
 
69
  params = await websocket.receive_json()
 
88
  )
89
  await websocket.close()
90
  return
91
+ await asyncio.sleep(THROTTLE)
92
 
93
  except Exception as e:
94
  logging.error(f"Error: {e}")
95
  traceback.print_exc()
96
 
97
+ @app.get("/api/queue")
98
  async def get_queue_size():
99
  queue_size = user_data.get_user_count()
100
  return JSONResponse({"queue_size": queue_size})
101
 
102
+ @app.get("/api/stream/{user_id}")
103
  async def stream(user_id: uuid.UUID, request: Request):
104
  try:
105
  print(f"New stream request: {user_id}")
 
108
  websocket = user_data.get_websocket(user_id)
109
  last_params = SimpleNamespace()
110
  while True:
111
+ last_time = time.time()
112
  params = await user_data.get_latest_data(user_id)
113
  if not vars(params) or params.__dict__ == last_params.__dict__:
114
  await websocket.send_json({"status": "send_frame"})
 
115
  continue
116
 
117
  last_params = params
118
  image = pipeline.predict(params)
119
+
120
  if image is None:
121
  await websocket.send_json({"status": "send_frame"})
122
  continue
 
126
  if not is_firefox(request.headers["user-agent"]):
127
  yield frame
128
  await websocket.send_json({"status": "send_frame"})
129
+ if args.debug:
130
+ print(f"Time taken: {time.time() - last_time}")
131
 
132
  return StreamingResponse(
133
  generate(),
 
140
  return HTTPException(status_code=404, detail="User not found")
141
 
142
  # route to setup frontend
143
+ @app.get("/api/settings")
144
  async def settings():
145
  info_schema = pipeline.Info.schema()
146
  info = pipeline.Info()
build-run.sh CHANGED
@@ -13,4 +13,4 @@ if [ -z ${PIPELINE+x} ]; then
13
  PIPELINE="controlnet"
14
  fi
15
  echo -e "\033[1;32m\npipeline: $PIPELINE \033[0m"
16
- python3 run.py --port 7860 --host 0.0.0.0 --pipeline $PIPELINE
 
13
  PIPELINE="controlnet"
14
  fi
15
  echo -e "\033[1;32m\npipeline: $PIPELINE \033[0m"
16
+ python3 run.py --port 7860 --host 0.0.0.0 --pipeline $PIPELINE --sfast
config.py CHANGED
@@ -12,17 +12,20 @@ class Args(NamedTuple):
12
  timeout: float
13
  safety_checker: bool
14
  torch_compile: bool
15
- use_taesd: bool
16
  pipeline: str
17
  ssl_certfile: str
18
  ssl_keyfile: str
 
 
 
19
 
20
 
21
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
22
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
23
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None) == "True"
24
  TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None) == "True"
25
- USE_TAESD = os.environ.get("USE_TAESD", None) == "True"
26
  default_host = os.getenv("HOST", "0.0.0.0")
27
  default_port = int(os.getenv("PORT", "7860"))
28
  default_mode = os.getenv("MODE", "default")
@@ -36,7 +39,7 @@ parser.add_argument(
36
  )
37
  parser.add_argument(
38
  "--max-queue-size",
39
- "--max_queue_size",
40
  type=int,
41
  default=MAX_QUEUE_SIZE,
42
  help="Max Queue Size",
@@ -44,23 +47,28 @@ parser.add_argument(
44
  parser.add_argument("--timeout", type=float, default=TIMEOUT, help="Timeout")
45
  parser.add_argument(
46
  "--safety-checker",
47
- "--safety_checker",
48
- type=bool,
49
  default=SAFETY_CHECKER,
50
  help="Safety Checker",
51
  )
52
  parser.add_argument(
53
  "--torch-compile",
54
- "--torch_compile",
55
- type=bool,
56
  default=TORCH_COMPILE,
57
  help="Torch Compile",
58
  )
59
  parser.add_argument(
60
- "--use-taesd",
61
- "--use_taesd",
62
- type=bool,
63
- default=USE_TAESD,
 
 
 
 
 
64
  help="Use Tiny Autoencoder",
65
  )
66
  parser.add_argument(
@@ -71,17 +79,36 @@ parser.add_argument(
71
  )
72
  parser.add_argument(
73
  "--ssl-certfile",
74
- "--ssl_certfile",
75
  type=str,
76
  default=None,
77
  help="SSL certfile",
78
  )
79
  parser.add_argument(
80
  "--ssl-keyfile",
81
- "--ssl_keyfile",
82
  type=str,
83
  default=None,
84
  help="SSL keyfile",
85
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
  args = Args(**vars(parser.parse_args()))
 
12
  timeout: float
13
  safety_checker: bool
14
  torch_compile: bool
15
+ taesd: bool
16
  pipeline: str
17
  ssl_certfile: str
18
  ssl_keyfile: str
19
+ sfast: bool
20
+ compel: bool = False
21
+ debug: bool = False
22
 
23
 
24
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
25
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
26
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None) == "True"
27
  TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None) == "True"
28
+ USE_TAESD = os.environ.get("USE_TAESD", "True") == "True"
29
  default_host = os.getenv("HOST", "0.0.0.0")
30
  default_port = int(os.getenv("PORT", "7860"))
31
  default_mode = os.getenv("MODE", "default")
 
39
  )
40
  parser.add_argument(
41
  "--max-queue-size",
42
+ dest="max_queue_size",
43
  type=int,
44
  default=MAX_QUEUE_SIZE,
45
  help="Max Queue Size",
 
47
  parser.add_argument("--timeout", type=float, default=TIMEOUT, help="Timeout")
48
  parser.add_argument(
49
  "--safety-checker",
50
+ dest="safety_checker",
51
+ action="store_true",
52
  default=SAFETY_CHECKER,
53
  help="Safety Checker",
54
  )
55
  parser.add_argument(
56
  "--torch-compile",
57
+ dest="torch_compile",
58
+ action="store_true",
59
  default=TORCH_COMPILE,
60
  help="Torch Compile",
61
  )
62
  parser.add_argument(
63
+ "--taesd",
64
+ dest="taesd",
65
+ action="store_true",
66
+ help="Use Tiny Autoencoder",
67
+ )
68
+ parser.add_argument(
69
+ "--no-taesd",
70
+ dest="taesd",
71
+ action="store_false",
72
  help="Use Tiny Autoencoder",
73
  )
74
  parser.add_argument(
 
79
  )
80
  parser.add_argument(
81
  "--ssl-certfile",
82
+ dest="ssl_certfile",
83
  type=str,
84
  default=None,
85
  help="SSL certfile",
86
  )
87
  parser.add_argument(
88
  "--ssl-keyfile",
89
+ dest="ssl_keyfile",
90
  type=str,
91
  default=None,
92
  help="SSL keyfile",
93
  )
94
+ parser.add_argument(
95
+ "--debug",
96
+ action="store_true",
97
+ default=False,
98
+ help="Debug",
99
+ )
100
+ parser.add_argument(
101
+ "--compel",
102
+ action="store_true",
103
+ default=False,
104
+ help="Compel",
105
+ )
106
+ parser.add_argument(
107
+ "--sfast",
108
+ action="store_true",
109
+ default=False,
110
+ help="Enable Stable Fast",
111
+ )
112
+ parser.set_defaults(taesd=USE_TAESD)
113
 
114
  args = Args(**vars(parser.parse_args()))
frontend/src/lib/components/ImagePlayer.svelte CHANGED
@@ -26,7 +26,11 @@
26
  >
27
  <!-- svelte-ignore a11y-missing-attribute -->
28
  {#if isLCMRunning}
29
- <img bind:this={imageEl} class="aspect-square w-full rounded-lg" src={'/stream/' + $streamId} />
 
 
 
 
30
  <div class="absolute bottom-1 right-1">
31
  <Button
32
  on:click={takeSnapshot}
 
26
  >
27
  <!-- svelte-ignore a11y-missing-attribute -->
28
  {#if isLCMRunning}
29
+ <img
30
+ bind:this={imageEl}
31
+ class="aspect-square w-full rounded-lg"
32
+ src={'/api/stream/' + $streamId}
33
+ />
34
  <div class="absolute bottom-1 right-1">
35
  <Button
36
  on:click={takeSnapshot}
frontend/src/lib/components/VideoInput.svelte CHANGED
@@ -20,7 +20,7 @@
20
  let videoFrameCallbackId: number;
21
 
22
  // ajust the throttle time to your needs
23
- const THROTTLE_TIME = 1000 / 15;
24
  let selectedDevice: string = '';
25
  let videoIsReady = false;
26
 
@@ -41,7 +41,7 @@
41
  }
42
  let lastMillis = 0;
43
  async function onFrameChange(now: DOMHighResTimeStamp, metadata: VideoFrameCallbackMetadata) {
44
- if (now - lastMillis < THROTTLE_TIME) {
45
  videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
46
  return;
47
  }
 
20
  let videoFrameCallbackId: number;
21
 
22
  // ajust the throttle time to your needs
23
+ const THROTTLE = 1000 / 120;
24
  let selectedDevice: string = '';
25
  let videoIsReady = false;
26
 
 
41
  }
42
  let lastMillis = 0;
43
  async function onFrameChange(now: DOMHighResTimeStamp, metadata: VideoFrameCallbackMetadata) {
44
+ if (now - lastMillis < THROTTLE) {
45
  videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
46
  return;
47
  }
frontend/src/lib/lcmLive.ts CHANGED
@@ -20,7 +20,7 @@ export const lcmLiveActions = {
20
 
21
  try {
22
  const websocketURL = `${window.location.protocol === "https:" ? "wss" : "ws"
23
- }:${window.location.host}/ws`;
24
 
25
  websocket = new WebSocket(websocketURL);
26
  websocket.onopen = () => {
 
20
 
21
  try {
22
  const websocketURL = `${window.location.protocol === "https:" ? "wss" : "ws"
23
+ }:${window.location.host}/api/ws`;
24
 
25
  websocket = new WebSocket(websocketURL);
26
  websocket.onopen = () => {
frontend/src/routes/+page.svelte CHANGED
@@ -17,28 +17,35 @@
17
  let isImageMode: boolean = false;
18
  let maxQueueSize: number = 0;
19
  let currentQueueSize: number = 0;
 
 
20
  onMount(() => {
21
  getSettings();
22
  });
23
 
24
  async function getSettings() {
25
- const settings = await fetch('/settings').then((r) => r.json());
26
  pipelineParams = settings.input_params.properties;
27
  pipelineInfo = settings.info.properties;
28
  isImageMode = pipelineInfo.input_mode.default === PipelineMode.IMAGE;
29
  maxQueueSize = settings.max_queue_size;
30
  pageContent = settings.page_content;
31
  console.log(pipelineParams);
32
- if (maxQueueSize > 0) {
 
 
 
 
33
  getQueueSize();
34
- setInterval(() => {
35
- getQueueSize();
36
- }, 2000);
37
  }
38
  }
39
  async function getQueueSize() {
40
- const data = await fetch('/queue_size').then((r) => r.json());
 
 
 
41
  currentQueueSize = data.queue_size;
 
42
  }
43
 
44
  function getSreamdata() {
@@ -61,11 +68,13 @@
61
  disabled = true;
62
  await lcmLiveActions.start(getSreamdata);
63
  disabled = false;
 
64
  } else {
65
  if (isImageMode) {
66
  mediaStreamActions.stop();
67
  }
68
  lcmLiveActions.stop();
 
69
  }
70
  }
71
  </script>
 
17
  let isImageMode: boolean = false;
18
  let maxQueueSize: number = 0;
19
  let currentQueueSize: number = 0;
20
+ let queueCheckerRunning: boolean = false;
21
+
22
  onMount(() => {
23
  getSettings();
24
  });
25
 
26
  async function getSettings() {
27
+ const settings = await fetch('/api/settings').then((r) => r.json());
28
  pipelineParams = settings.input_params.properties;
29
  pipelineInfo = settings.info.properties;
30
  isImageMode = pipelineInfo.input_mode.default === PipelineMode.IMAGE;
31
  maxQueueSize = settings.max_queue_size;
32
  pageContent = settings.page_content;
33
  console.log(pipelineParams);
34
+ toggleQueueChecker(true);
35
+ }
36
+ function toggleQueueChecker(start: boolean) {
37
+ queueCheckerRunning = start && maxQueueSize > 0;
38
+ if (start) {
39
  getQueueSize();
 
 
 
40
  }
41
  }
42
  async function getQueueSize() {
43
+ if (!queueCheckerRunning) {
44
+ return;
45
+ }
46
+ const data = await fetch('/api/queue').then((r) => r.json());
47
  currentQueueSize = data.queue_size;
48
+ setTimeout(getQueueSize, 10000);
49
  }
50
 
51
  function getSreamdata() {
 
68
  disabled = true;
69
  await lcmLiveActions.start(getSreamdata);
70
  disabled = false;
71
+ toggleQueueChecker(false);
72
  } else {
73
  if (isImageMode) {
74
  mediaStreamActions.stop();
75
  }
76
  lcmLiveActions.stop();
77
+ toggleQueueChecker(true);
78
  }
79
  }
80
  </script>
frontend/vite.config.ts CHANGED
@@ -5,8 +5,8 @@ export default defineConfig({
5
  plugins: [sveltekit()],
6
  server: {
7
  proxy: {
8
- '^/settings|/queue_size|/stream': 'http://localhost:7860',
9
- '/ws': {
10
  target: 'ws://localhost:7860',
11
  ws: true
12
  }
 
5
  plugins: [sveltekit()],
6
  server: {
7
  proxy: {
8
+ '/api': 'http://localhost:7860',
9
+ '/api/ws': {
10
  target: 'ws://localhost:7860',
11
  ws: true
12
  }
pipelines/controlnet.py CHANGED
@@ -69,18 +69,18 @@ class Pipeline:
69
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
70
  )
71
  steps: int = Field(
72
- 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
73
  )
74
  width: int = Field(
75
- 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
76
  )
77
  height: int = Field(
78
- 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
79
  )
80
  guidance_scale: float = Field(
81
- 0.2,
82
  min=0,
83
- max=5,
84
  step=0.001,
85
  title="Guidance Scale",
86
  field="range",
@@ -169,20 +169,29 @@ class Pipeline:
169
  safety_checker=None,
170
  controlnet=controlnet_canny,
171
  )
172
- if args.use_taesd:
173
  self.pipe.vae = AutoencoderTiny.from_pretrained(
174
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
175
  ).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
176
  self.canny_torch = SobelOperator(device=device)
177
  self.pipe.set_progress_bar_config(disable=True)
178
  self.pipe.to(device=device, dtype=torch_dtype)
179
  if device.type != "mps":
180
  self.pipe.unet.to(memory_format=torch.channels_last)
181
 
182
- # check if computer has less than 64GB of RAM using sys or os
183
- if psutil.virtual_memory().total < 64 * 1024**3:
184
- self.pipe.enable_attention_slicing()
185
-
186
  if args.torch_compile:
187
  self.pipe.unet = torch.compile(
188
  self.pipe.unet, mode="reduce-overhead", fullgraph=True
@@ -196,16 +205,21 @@ class Pipeline:
196
  image=[Image.new("RGB", (768, 768))],
197
  control_image=[Image.new("RGB", (768, 768))],
198
  )
199
-
200
- self.compel_proc = Compel(
201
- tokenizer=self.pipe.tokenizer,
202
- text_encoder=self.pipe.text_encoder,
203
- truncate_long_prompts=False,
204
- )
205
 
206
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
207
  generator = torch.manual_seed(params.seed)
208
- prompt_embeds = self.compel_proc(params.prompt)
 
 
 
 
 
209
  control_image = self.canny_torch(
210
  params.image, params.canny_low_threshold, params.canny_high_threshold
211
  )
@@ -218,6 +232,7 @@ class Pipeline:
218
  image=params.image,
219
  control_image=control_image,
220
  prompt_embeds=prompt_embeds,
 
221
  generator=generator,
222
  strength=strength,
223
  num_inference_steps=steps,
 
69
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
70
  )
71
  steps: int = Field(
72
+ 2, min=1, max=6, title="Steps", field="range", hide=True, id="steps"
73
  )
74
  width: int = Field(
75
+ 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
76
  )
77
  height: int = Field(
78
+ 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
79
  )
80
  guidance_scale: float = Field(
81
+ 0.0,
82
  min=0,
83
+ max=2,
84
  step=0.001,
85
  title="Guidance Scale",
86
  field="range",
 
169
  safety_checker=None,
170
  controlnet=controlnet_canny,
171
  )
172
+ if args.taesd:
173
  self.pipe.vae = AutoencoderTiny.from_pretrained(
174
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
175
  ).to(device)
176
+
177
+ if args.sfast:
178
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
179
+ compile,
180
+ CompilationConfig,
181
+ )
182
+
183
+ config = CompilationConfig.Default()
184
+ config.enable_xformers = True
185
+ config.enable_triton = True
186
+ config.enable_cuda_graph = True
187
+ self.pipe = compile(self.pipe, config=config)
188
+
189
  self.canny_torch = SobelOperator(device=device)
190
  self.pipe.set_progress_bar_config(disable=True)
191
  self.pipe.to(device=device, dtype=torch_dtype)
192
  if device.type != "mps":
193
  self.pipe.unet.to(memory_format=torch.channels_last)
194
 
 
 
 
 
195
  if args.torch_compile:
196
  self.pipe.unet = torch.compile(
197
  self.pipe.unet, mode="reduce-overhead", fullgraph=True
 
205
  image=[Image.new("RGB", (768, 768))],
206
  control_image=[Image.new("RGB", (768, 768))],
207
  )
208
+ if args.compel:
209
+ self.compel_proc = Compel(
210
+ tokenizer=self.pipe.tokenizer,
211
+ text_encoder=self.pipe.text_encoder,
212
+ truncate_long_prompts=False,
213
+ )
214
 
215
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
216
  generator = torch.manual_seed(params.seed)
217
+ prompt_embeds = None
218
+ prompt = params.prompt
219
+ if hasattr(self, "compel_proc"):
220
+ prompt_embeds = self.compel_proc(params.prompt)
221
+ prompt = None
222
+
223
  control_image = self.canny_torch(
224
  params.image, params.canny_low_threshold, params.canny_high_threshold
225
  )
 
232
  image=params.image,
233
  control_image=control_image,
234
  prompt_embeds=prompt_embeds,
235
+ prompt=prompt,
236
  generator=generator,
237
  strength=strength,
238
  num_inference_steps=steps,
pipelines/controlnetLoraSD15.py CHANGED
@@ -81,7 +81,7 @@ class Pipeline:
81
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
82
  )
83
  steps: int = Field(
84
- 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
85
  )
86
  width: int = Field(
87
  768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
@@ -90,7 +90,7 @@ class Pipeline:
90
  768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
91
  )
92
  guidance_scale: float = Field(
93
- 0.2,
94
  min=0,
95
  max=2,
96
  step=0.001,
@@ -180,7 +180,7 @@ class Pipeline:
180
  base_model_id,
181
  controlnet=controlnet_canny,
182
  )
183
- self.pipes[base_model_id] = pipe
184
  else:
185
  for base_model_id in base_models.keys():
186
  pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
@@ -195,25 +195,23 @@ class Pipeline:
195
  for pipe in self.pipes.values():
196
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
197
  pipe.set_progress_bar_config(disable=True)
198
- pipe.to(device=device, dtype=torch_dtype).to(device)
199
  if device.type != "mps":
200
  pipe.unet.to(memory_format=torch.channels_last)
201
 
202
- if psutil.virtual_memory().total < 64 * 1024**3:
203
- pipe.enable_attention_slicing()
204
-
205
- if args.use_taesd:
206
  pipe.vae = AutoencoderTiny.from_pretrained(
207
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
208
  ).to(device)
209
 
210
  # Load LCM LoRA
211
  pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
212
- pipe.compel_proc = Compel(
213
- tokenizer=pipe.tokenizer,
214
- text_encoder=pipe.text_encoder,
215
- truncate_long_prompts=False,
216
- )
 
 
217
  if args.torch_compile:
218
  pipe.unet = torch.compile(
219
  pipe.unet, mode="reduce-overhead", fullgraph=True
@@ -233,7 +231,12 @@ class Pipeline:
233
 
234
  activation_token = base_models[params.base_model_id]
235
  prompt = f"{activation_token} {params.prompt}"
236
- prompt_embeds = pipe.compel_proc(prompt)
 
 
 
 
 
237
  control_image = self.canny_torch(
238
  params.image, params.canny_low_threshold, params.canny_high_threshold
239
  )
@@ -245,6 +248,7 @@ class Pipeline:
245
  results = pipe(
246
  image=params.image,
247
  control_image=control_image,
 
248
  prompt_embeds=prompt_embeds,
249
  generator=generator,
250
  strength=strength,
 
81
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
82
  )
83
  steps: int = Field(
84
+ 1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
85
  )
86
  width: int = Field(
87
  768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 
90
  768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
91
  )
92
  guidance_scale: float = Field(
93
+ 1.0,
94
  min=0,
95
  max=2,
96
  step=0.001,
 
180
  base_model_id,
181
  controlnet=controlnet_canny,
182
  )
183
+ self.pipes[base_model_id] = pipe
184
  else:
185
  for base_model_id in base_models.keys():
186
  pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
 
195
  for pipe in self.pipes.values():
196
  pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
197
  pipe.set_progress_bar_config(disable=True)
 
198
  if device.type != "mps":
199
  pipe.unet.to(memory_format=torch.channels_last)
200
 
201
+ if args.taesd:
 
 
 
202
  pipe.vae = AutoencoderTiny.from_pretrained(
203
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
204
  ).to(device)
205
 
206
  # Load LCM LoRA
207
  pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
208
+ pipe.to(device=device, dtype=torch_dtype).to(device)
209
+ if args.compel:
210
+ self.compel_proc = Compel(
211
+ tokenizer=pipe.tokenizer,
212
+ text_encoder=pipe.text_encoder,
213
+ truncate_long_prompts=False,
214
+ )
215
  if args.torch_compile:
216
  pipe.unet = torch.compile(
217
  pipe.unet, mode="reduce-overhead", fullgraph=True
 
231
 
232
  activation_token = base_models[params.base_model_id]
233
  prompt = f"{activation_token} {params.prompt}"
234
+ prompt_embeds = None
235
+ prompt = params.prompt
236
+ if hasattr(self, "compel_proc"):
237
+ prompt_embeds = self.compel_proc(prompt)
238
+ prompt = None
239
+
240
  control_image = self.canny_torch(
241
  params.image, params.canny_low_threshold, params.canny_high_threshold
242
  )
 
248
  results = pipe(
249
  image=params.image,
250
  control_image=control_image,
251
+ prompt=prompt,
252
  prompt_embeds=prompt_embeds,
253
  generator=generator,
254
  strength=strength,
pipelines/controlnetLoraSDXL.py CHANGED
@@ -80,7 +80,7 @@ class Pipeline:
80
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
81
  )
82
  steps: int = Field(
83
- 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
84
  )
85
  width: int = Field(
86
  1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
@@ -91,7 +91,7 @@ class Pipeline:
91
  guidance_scale: float = Field(
92
  1.0,
93
  min=0,
94
- max=20,
95
  step=0.001,
96
  title="Guidance Scale",
97
  field="range",
@@ -199,19 +199,31 @@ class Pipeline:
199
  self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
200
  self.pipe.set_progress_bar_config(disable=True)
201
  self.pipe.to(device=device, dtype=torch_dtype).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  if device.type != "mps":
203
  self.pipe.unet.to(memory_format=torch.channels_last)
204
 
205
- if psutil.virtual_memory().total < 64 * 1024**3:
206
- self.pipe.enable_attention_slicing()
 
 
 
 
 
207
 
208
- self.pipe.compel_proc = Compel(
209
- tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
210
- text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
211
- returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
212
- requires_pooled=[False, True],
213
- )
214
- if args.use_taesd:
215
  self.pipe.vae = AutoencoderTiny.from_pretrained(
216
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
217
  ).to(device)
@@ -232,9 +244,23 @@ class Pipeline:
232
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
233
  generator = torch.manual_seed(params.seed)
234
 
235
- prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
236
- [params.prompt, params.negative_prompt]
237
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
  control_image = self.canny_torch(
239
  params.image, params.canny_low_threshold, params.canny_high_threshold
240
  )
@@ -246,10 +272,12 @@ class Pipeline:
246
  results = self.pipe(
247
  image=params.image,
248
  control_image=control_image,
249
- prompt_embeds=prompt_embeds[0:1],
250
- pooled_prompt_embeds=pooled_prompt_embeds[0:1],
251
- negative_prompt_embeds=prompt_embeds[1:2],
252
- negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
 
 
253
  generator=generator,
254
  strength=strength,
255
  num_inference_steps=steps,
 
80
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
81
  )
82
  steps: int = Field(
83
+ 1, min=1, max=10, title="Steps", field="range", hide=True, id="steps"
84
  )
85
  width: int = Field(
86
  1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 
91
  guidance_scale: float = Field(
92
  1.0,
93
  min=0,
94
+ max=2.0,
95
  step=0.001,
96
  title="Guidance Scale",
97
  field="range",
 
199
  self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
200
  self.pipe.set_progress_bar_config(disable=True)
201
  self.pipe.to(device=device, dtype=torch_dtype).to(device)
202
+
203
+ if args.sfast:
204
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
205
+ compile,
206
+ CompilationConfig,
207
+ )
208
+
209
+ config = CompilationConfig.Default()
210
+ config.enable_xformers = True
211
+ config.enable_triton = True
212
+ config.enable_cuda_graph = True
213
+ self.pipe = compile(self.pipe, config=config)
214
+
215
  if device.type != "mps":
216
  self.pipe.unet.to(memory_format=torch.channels_last)
217
 
218
+ if args.compel:
219
+ self.pipe.compel_proc = Compel(
220
+ tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
221
+ text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
222
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
223
+ requires_pooled=[False, True],
224
+ )
225
 
226
+ if args.taesd:
 
 
 
 
 
 
227
  self.pipe.vae = AutoencoderTiny.from_pretrained(
228
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
229
  ).to(device)
 
244
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
245
  generator = torch.manual_seed(params.seed)
246
 
247
+ prompt = params.prompt
248
+ negative_prompt = params.negative_prompt
249
+ prompt_embeds = None
250
+ pooled_prompt_embeds = None
251
+ negative_prompt_embeds = None
252
+ negative_pooled_prompt_embeds = None
253
+ if hasattr(self.pipe, "compel_proc"):
254
+ _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
255
+ [params.prompt, params.negative_prompt]
256
+ )
257
+ prompt = None
258
+ negative_prompt = None
259
+ prompt_embeds = _prompt_embeds[0:1]
260
+ pooled_prompt_embeds = pooled_prompt_embeds[0:1]
261
+ negative_prompt_embeds = _prompt_embeds[1:2]
262
+ negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
263
+
264
  control_image = self.canny_torch(
265
  params.image, params.canny_low_threshold, params.canny_high_threshold
266
  )
 
272
  results = self.pipe(
273
  image=params.image,
274
  control_image=control_image,
275
+ prompt=prompt,
276
+ negative_prompt=negative_prompt,
277
+ prompt_embeds=prompt_embeds,
278
+ pooled_prompt_embeds=pooled_prompt_embeds,
279
+ negative_prompt_embeds=negative_prompt_embeds,
280
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
281
  generator=generator,
282
  strength=strength,
283
  num_inference_steps=steps,
pipelines/{controlnelSD21Turbo.py → controlnetSDTurbo.py} RENAMED
@@ -100,7 +100,7 @@ class Pipeline:
100
  id="strength",
101
  )
102
  controlnet_scale: float = Field(
103
- 0.2,
104
  min=0,
105
  max=1.0,
106
  step=0.001,
@@ -176,10 +176,23 @@ class Pipeline:
176
  safety_checker=None,
177
  )
178
 
179
- if args.use_taesd:
180
  self.pipe.vae = AutoencoderTiny.from_pretrained(
181
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
182
  ).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
183
  self.canny_torch = SobelOperator(device=device)
184
 
185
  self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
@@ -188,15 +201,16 @@ class Pipeline:
188
  if device.type != "mps":
189
  self.pipe.unet.to(memory_format=torch.channels_last)
190
 
191
- if psutil.virtual_memory().total < 64 * 1024**3:
192
- self.pipe.enable_attention_slicing()
193
 
194
- self.pipe.compel_proc = Compel(
195
- tokenizer=self.pipe.tokenizer,
196
- text_encoder=self.pipe.text_encoder,
197
- truncate_long_prompts=True,
198
- )
199
- if args.use_taesd:
 
200
  self.pipe.vae = AutoencoderTiny.from_pretrained(
201
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
202
  ).to(device)
@@ -216,7 +230,13 @@ class Pipeline:
216
 
217
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
218
  generator = torch.manual_seed(params.seed)
219
- prompt_embeds = self.pipe.compel_proc(params.prompt)
 
 
 
 
 
 
220
  control_image = self.canny_torch(
221
  params.image, params.canny_low_threshold, params.canny_high_threshold
222
  )
@@ -224,10 +244,10 @@ class Pipeline:
224
  strength = params.strength
225
  if int(steps * strength) < 1:
226
  steps = math.ceil(1 / max(0.10, strength))
227
- last_time = time.time()
228
  results = self.pipe(
229
  image=params.image,
230
  control_image=control_image,
 
231
  prompt_embeds=prompt_embeds,
232
  generator=generator,
233
  strength=strength,
@@ -240,8 +260,6 @@ class Pipeline:
240
  control_guidance_start=params.controlnet_start,
241
  control_guidance_end=params.controlnet_end,
242
  )
243
- print(f"Time taken: {time.time() - last_time}")
244
-
245
  nsfw_content_detected = (
246
  results.nsfw_content_detected[0]
247
  if "nsfw_content_detected" in results
 
100
  id="strength",
101
  )
102
  controlnet_scale: float = Field(
103
+ 0.325,
104
  min=0,
105
  max=1.0,
106
  step=0.001,
 
176
  safety_checker=None,
177
  )
178
 
179
+ if args.taesd:
180
  self.pipe.vae = AutoencoderTiny.from_pretrained(
181
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
182
  ).to(device)
183
+
184
+ if args.sfast:
185
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
186
+ compile,
187
+ CompilationConfig,
188
+ )
189
+
190
+ config = CompilationConfig.Default()
191
+ config.enable_xformers = True
192
+ config.enable_triton = True
193
+ config.enable_cuda_graph = True
194
+ self.pipe = compile(self.pipe, config=config)
195
+
196
  self.canny_torch = SobelOperator(device=device)
197
 
198
  self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
 
201
  if device.type != "mps":
202
  self.pipe.unet.to(memory_format=torch.channels_last)
203
 
204
+ if args.compel:
205
+ from compel import Compel
206
 
207
+ self.pipe.compel_proc = Compel(
208
+ tokenizer=self.pipe.tokenizer,
209
+ text_encoder=self.pipe.text_encoder,
210
+ truncate_long_prompts=True,
211
+ )
212
+
213
+ if args.taesd:
214
  self.pipe.vae = AutoencoderTiny.from_pretrained(
215
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
216
  ).to(device)
 
230
 
231
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
232
  generator = torch.manual_seed(params.seed)
233
+ prompt = params.prompt
234
+ prompt_embeds = None
235
+ if hasattr(self.pipe, "compel_proc"):
236
+ prompt_embeds = self.pipe.compel_proc(
237
+ [params.prompt, params.negative_prompt]
238
+ )
239
+ prompt = None
240
  control_image = self.canny_torch(
241
  params.image, params.canny_low_threshold, params.canny_high_threshold
242
  )
 
244
  strength = params.strength
245
  if int(steps * strength) < 1:
246
  steps = math.ceil(1 / max(0.10, strength))
 
247
  results = self.pipe(
248
  image=params.image,
249
  control_image=control_image,
250
+ prompt=prompt,
251
  prompt_embeds=prompt_embeds,
252
  generator=generator,
253
  strength=strength,
 
260
  control_guidance_start=params.controlnet_start,
261
  control_guidance_end=params.controlnet_end,
262
  )
 
 
263
  nsfw_content_detected = (
264
  results.nsfw_content_detected[0]
265
  if "nsfw_content_detected" in results
pipelines/controlnetSDXLTurbo.py CHANGED
@@ -185,21 +185,32 @@ class Pipeline:
185
  )
186
  self.canny_torch = SobelOperator(device=device)
187
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  self.pipe.set_progress_bar_config(disable=True)
189
  self.pipe.to(device=device, dtype=torch_dtype).to(device)
190
  if device.type != "mps":
191
  self.pipe.unet.to(memory_format=torch.channels_last)
192
 
193
- if psutil.virtual_memory().total < 64 * 1024**3:
194
- self.pipe.enable_attention_slicing()
 
 
 
 
 
195
 
196
- self.pipe.compel_proc = Compel(
197
- tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
198
- text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
199
- returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
200
- requires_pooled=[False, True],
201
- )
202
- if args.use_taesd:
203
  self.pipe.vae = AutoencoderTiny.from_pretrained(
204
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
205
  ).to(device)
@@ -220,9 +231,23 @@ class Pipeline:
220
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
221
  generator = torch.manual_seed(params.seed)
222
 
223
- prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
224
- [params.prompt, params.negative_prompt]
225
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
  control_image = self.canny_torch(
227
  params.image, params.canny_low_threshold, params.canny_high_threshold
228
  )
@@ -234,10 +259,12 @@ class Pipeline:
234
  results = self.pipe(
235
  image=params.image,
236
  control_image=control_image,
237
- prompt_embeds=prompt_embeds[0:1],
238
- pooled_prompt_embeds=pooled_prompt_embeds[0:1],
239
- negative_prompt_embeds=prompt_embeds[1:2],
240
- negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
 
 
241
  generator=generator,
242
  strength=strength,
243
  num_inference_steps=steps,
 
185
  )
186
  self.canny_torch = SobelOperator(device=device)
187
 
188
+ if args.sfast:
189
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
190
+ compile,
191
+ CompilationConfig,
192
+ )
193
+
194
+ config = CompilationConfig.Default()
195
+ config.enable_xformers = True
196
+ config.enable_triton = True
197
+ config.enable_cuda_graph = True
198
+ self.pipe = compile(self.pipe, config=config)
199
+
200
  self.pipe.set_progress_bar_config(disable=True)
201
  self.pipe.to(device=device, dtype=torch_dtype).to(device)
202
  if device.type != "mps":
203
  self.pipe.unet.to(memory_format=torch.channels_last)
204
 
205
+ if args.compel:
206
+ self.pipe.compel_proc = Compel(
207
+ tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
208
+ text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
209
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
210
+ requires_pooled=[False, True],
211
+ )
212
 
213
+ if args.taesd:
 
 
 
 
 
 
214
  self.pipe.vae = AutoencoderTiny.from_pretrained(
215
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
216
  ).to(device)
 
231
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
232
  generator = torch.manual_seed(params.seed)
233
 
234
+ prompt = params.prompt
235
+ negative_prompt = params.negative_prompt
236
+ prompt_embeds = None
237
+ pooled_prompt_embeds = None
238
+ negative_prompt_embeds = None
239
+ negative_pooled_prompt_embeds = None
240
+ if hasattr(self.pipe, "compel_proc"):
241
+ _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
242
+ [params.prompt, params.negative_prompt]
243
+ )
244
+ prompt = None
245
+ negative_prompt = None
246
+ prompt_embeds = _prompt_embeds[0:1]
247
+ pooled_prompt_embeds = pooled_prompt_embeds[0:1]
248
+ negative_prompt_embeds = _prompt_embeds[1:2]
249
+ negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
250
+
251
  control_image = self.canny_torch(
252
  params.image, params.canny_low_threshold, params.canny_high_threshold
253
  )
 
259
  results = self.pipe(
260
  image=params.image,
261
  control_image=control_image,
262
+ prompt=prompt,
263
+ negative_prompt=negative_prompt,
264
+ prompt_embeds=prompt_embeds,
265
+ pooled_prompt_embeds=pooled_prompt_embeds,
266
+ negative_prompt_embeds=negative_prompt_embeds,
267
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
268
  generator=generator,
269
  strength=strength,
270
  num_inference_steps=steps,
pipelines/controlnetSegmindVegaRT.py ADDED
@@ -0,0 +1,303 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import (
2
+ StableDiffusionXLControlNetImg2ImgPipeline,
3
+ ControlNetModel,
4
+ AutoencoderKL,
5
+ AutoencoderTiny,
6
+ LCMScheduler,
7
+ )
8
+ from compel import Compel, ReturnedEmbeddingsType
9
+ import torch
10
+ from pipelines.utils.canny_gpu import SobelOperator
11
+
12
+ try:
13
+ import intel_extension_for_pytorch as ipex # type: ignore
14
+ except:
15
+ pass
16
+
17
+ import psutil
18
+ from config import Args
19
+ from pydantic import BaseModel, Field
20
+ from PIL import Image
21
+ import math
22
+
23
+ controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
24
+ base_model = "segmind/Segmind-Vega"
25
+ lora_model = "segmind/Segmind-VegaRT"
26
+ taesd_model = "madebyollin/taesdxl"
27
+
28
+ default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
29
+ default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
30
+ page_content = """
31
+ <h1 class="text-3xl font-bold">Real-Time SegmindVegaRT</h1>
32
+ <h3 class="text-xl font-bold">Image-to-Image ControlNet</h3>
33
+ <p class="text-sm">
34
+ This demo showcases
35
+ <a
36
+ href="https://huggingface.co/segmind/Segmind-VegaRT"
37
+ target="_blank"
38
+ class="text-blue-500 underline hover:no-underline">Segmind-VegaRT</a>
39
+ Image to Image pipeline using
40
+ <a
41
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
42
+ target="_blank"
43
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
44
+ > with a MJPEG stream server.
45
+ </p>
46
+ <p class="text-sm text-gray-500">
47
+ Change the prompt to generate different images, accepts <a
48
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
49
+ target="_blank"
50
+ class="text-blue-500 underline hover:no-underline">Compel</a
51
+ > syntax.
52
+ </p>
53
+ """
54
+
55
+
56
+ class Pipeline:
57
+ class Info(BaseModel):
58
+ name: str = "controlnet+SegmindVegaRT"
59
+ title: str = "SegmindVegaRT + Controlnet"
60
+ description: str = "Generates an image from a text prompt"
61
+ input_mode: str = "image"
62
+ page_content: str = page_content
63
+
64
+ class InputParams(BaseModel):
65
+ prompt: str = Field(
66
+ default_prompt,
67
+ title="Prompt",
68
+ field="textarea",
69
+ id="prompt",
70
+ )
71
+ negative_prompt: str = Field(
72
+ default_negative_prompt,
73
+ title="Negative Prompt",
74
+ field="textarea",
75
+ id="negative_prompt",
76
+ hide=True,
77
+ )
78
+ seed: int = Field(
79
+ 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
80
+ )
81
+ steps: int = Field(
82
+ 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
83
+ )
84
+ width: int = Field(
85
+ 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
86
+ )
87
+ height: int = Field(
88
+ 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
89
+ )
90
+ guidance_scale: float = Field(
91
+ 0.0,
92
+ min=0,
93
+ max=1,
94
+ step=0.001,
95
+ title="Guidance Scale",
96
+ field="range",
97
+ hide=True,
98
+ id="guidance_scale",
99
+ )
100
+ strength: float = Field(
101
+ 0.5,
102
+ min=0.25,
103
+ max=1.0,
104
+ step=0.001,
105
+ title="Strength",
106
+ field="range",
107
+ hide=True,
108
+ id="strength",
109
+ )
110
+ controlnet_scale: float = Field(
111
+ 0.5,
112
+ min=0,
113
+ max=1.0,
114
+ step=0.001,
115
+ title="Controlnet Scale",
116
+ field="range",
117
+ hide=True,
118
+ id="controlnet_scale",
119
+ )
120
+ controlnet_start: float = Field(
121
+ 0.0,
122
+ min=0,
123
+ max=1.0,
124
+ step=0.001,
125
+ title="Controlnet Start",
126
+ field="range",
127
+ hide=True,
128
+ id="controlnet_start",
129
+ )
130
+ controlnet_end: float = Field(
131
+ 1.0,
132
+ min=0,
133
+ max=1.0,
134
+ step=0.001,
135
+ title="Controlnet End",
136
+ field="range",
137
+ hide=True,
138
+ id="controlnet_end",
139
+ )
140
+ canny_low_threshold: float = Field(
141
+ 0.31,
142
+ min=0,
143
+ max=1.0,
144
+ step=0.001,
145
+ title="Canny Low Threshold",
146
+ field="range",
147
+ hide=True,
148
+ id="canny_low_threshold",
149
+ )
150
+ canny_high_threshold: float = Field(
151
+ 0.125,
152
+ min=0,
153
+ max=1.0,
154
+ step=0.001,
155
+ title="Canny High Threshold",
156
+ field="range",
157
+ hide=True,
158
+ id="canny_high_threshold",
159
+ )
160
+ debug_canny: bool = Field(
161
+ False,
162
+ title="Debug Canny",
163
+ field="checkbox",
164
+ hide=True,
165
+ id="debug_canny",
166
+ )
167
+
168
+ def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
169
+ controlnet_canny = ControlNetModel.from_pretrained(
170
+ controlnet_model,
171
+ torch_dtype=torch_dtype,
172
+ ).to(device)
173
+ vae = AutoencoderKL.from_pretrained(
174
+ "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
175
+ )
176
+ if args.safety_checker:
177
+ self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
178
+ base_model,
179
+ controlnet=controlnet_canny,
180
+ vae=vae,
181
+ )
182
+ else:
183
+ self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
184
+ base_model,
185
+ safety_checker=None,
186
+ controlnet=controlnet_canny,
187
+ vae=vae,
188
+ )
189
+ self.canny_torch = SobelOperator(device=device)
190
+
191
+ self.pipe.load_lora_weights(lora_model)
192
+ self.pipe.fuse_lora()
193
+ self.pipe.scheduler = LCMScheduler.from_pretrained(
194
+ base_model, subfolder="scheduler"
195
+ )
196
+
197
+ if args.sfast:
198
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
199
+ compile,
200
+ CompilationConfig,
201
+ )
202
+
203
+ config = CompilationConfig.Default()
204
+ config.enable_xformers = True
205
+ config.enable_triton = True
206
+ config.enable_cuda_graph = True
207
+ self.pipe = compile(self.pipe, config=config)
208
+
209
+ self.pipe.set_progress_bar_config(disable=True)
210
+ self.pipe.to(device=device, dtype=torch_dtype).to(device)
211
+ if device.type != "mps":
212
+ self.pipe.unet.to(memory_format=torch.channels_last)
213
+
214
+ if args.compel:
215
+ self.pipe.compel_proc = Compel(
216
+ tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
217
+ text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
218
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
219
+ requires_pooled=[False, True],
220
+ )
221
+ if args.taesd:
222
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
223
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
224
+ ).to(device)
225
+
226
+ if args.torch_compile:
227
+ self.pipe.unet = torch.compile(
228
+ self.pipe.unet, mode="reduce-overhead", fullgraph=True
229
+ )
230
+ self.pipe.vae = torch.compile(
231
+ self.pipe.vae, mode="reduce-overhead", fullgraph=True
232
+ )
233
+ self.pipe(
234
+ prompt="warmup",
235
+ image=[Image.new("RGB", (768, 768))],
236
+ control_image=[Image.new("RGB", (768, 768))],
237
+ )
238
+
239
+ def predict(self, params: "Pipeline.InputParams") -> Image.Image:
240
+ generator = torch.manual_seed(params.seed)
241
+
242
+ prompt = params.prompt
243
+ negative_prompt = params.negative_prompt
244
+ prompt_embeds = None
245
+ pooled_prompt_embeds = None
246
+ negative_prompt_embeds = None
247
+ negative_pooled_prompt_embeds = None
248
+ if hasattr(self.pipe, "compel_proc"):
249
+ _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
250
+ [params.prompt, params.negative_prompt]
251
+ )
252
+ prompt = None
253
+ negative_prompt = None
254
+ prompt_embeds = _prompt_embeds[0:1]
255
+ pooled_prompt_embeds = pooled_prompt_embeds[0:1]
256
+ negative_prompt_embeds = _prompt_embeds[1:2]
257
+ negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
258
+
259
+ control_image = self.canny_torch(
260
+ params.image, params.canny_low_threshold, params.canny_high_threshold
261
+ )
262
+ steps = params.steps
263
+ strength = params.strength
264
+ if int(steps * strength) < 1:
265
+ steps = math.ceil(1 / max(0.10, strength))
266
+
267
+ results = self.pipe(
268
+ image=params.image,
269
+ control_image=control_image,
270
+ prompt=prompt,
271
+ negative_prompt=negative_prompt,
272
+ prompt_embeds=prompt_embeds,
273
+ pooled_prompt_embeds=pooled_prompt_embeds,
274
+ negative_prompt_embeds=negative_prompt_embeds,
275
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
276
+ generator=generator,
277
+ strength=strength,
278
+ num_inference_steps=steps,
279
+ guidance_scale=params.guidance_scale,
280
+ width=params.width,
281
+ height=params.height,
282
+ output_type="pil",
283
+ controlnet_conditioning_scale=params.controlnet_scale,
284
+ control_guidance_start=params.controlnet_start,
285
+ control_guidance_end=params.controlnet_end,
286
+ )
287
+
288
+ nsfw_content_detected = (
289
+ results.nsfw_content_detected[0]
290
+ if "nsfw_content_detected" in results
291
+ else False
292
+ )
293
+ if nsfw_content_detected:
294
+ return None
295
+ result_image = results.images[0]
296
+ if params.debug_canny:
297
+ # paste control_image on top of result_image
298
+ w0, h0 = (200, 200)
299
+ control_image = control_image.resize((w0, h0))
300
+ w1, h1 = result_image.size
301
+ result_image.paste(control_image, (w1 - w0, h1 - h0))
302
+
303
+ return result_image
pipelines/img2img.py CHANGED
@@ -102,20 +102,28 @@ class Pipeline:
102
  base_model,
103
  safety_checker=None,
104
  )
105
- if args.use_taesd:
106
  self.pipe.vae = AutoencoderTiny.from_pretrained(
107
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
108
  ).to(device)
109
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  self.pipe.set_progress_bar_config(disable=True)
111
  self.pipe.to(device=device, dtype=torch_dtype)
112
  if device.type != "mps":
113
  self.pipe.unet.to(memory_format=torch.channels_last)
114
 
115
- # check if computer has less than 64GB of RAM using sys or os
116
- if psutil.virtual_memory().total < 64 * 1024**3:
117
- self.pipe.enable_attention_slicing()
118
-
119
  if args.torch_compile:
120
  print("Running torch compile")
121
  self.pipe.unet = torch.compile(
@@ -130,15 +138,20 @@ class Pipeline:
130
  image=[Image.new("RGB", (768, 768))],
131
  )
132
 
133
- self.compel_proc = Compel(
134
- tokenizer=self.pipe.tokenizer,
135
- text_encoder=self.pipe.text_encoder,
136
- truncate_long_prompts=False,
137
- )
 
138
 
139
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
140
  generator = torch.manual_seed(params.seed)
141
- prompt_embeds = self.compel_proc(params.prompt)
 
 
 
 
142
 
143
  steps = params.steps
144
  strength = params.strength
@@ -147,6 +160,7 @@ class Pipeline:
147
 
148
  results = self.pipe(
149
  image=params.image,
 
150
  prompt_embeds=prompt_embeds,
151
  generator=generator,
152
  strength=strength,
 
102
  base_model,
103
  safety_checker=None,
104
  )
105
+ if args.taesd:
106
  self.pipe.vae = AutoencoderTiny.from_pretrained(
107
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
108
  ).to(device)
109
 
110
+ if args.sfast:
111
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
112
+ compile,
113
+ CompilationConfig,
114
+ )
115
+
116
+ config = CompilationConfig.Default()
117
+ config.enable_xformers = True
118
+ config.enable_triton = True
119
+ config.enable_cuda_graph = True
120
+ self.pipe = compile(self.pipe, config=config)
121
+
122
  self.pipe.set_progress_bar_config(disable=True)
123
  self.pipe.to(device=device, dtype=torch_dtype)
124
  if device.type != "mps":
125
  self.pipe.unet.to(memory_format=torch.channels_last)
126
 
 
 
 
 
127
  if args.torch_compile:
128
  print("Running torch compile")
129
  self.pipe.unet = torch.compile(
 
138
  image=[Image.new("RGB", (768, 768))],
139
  )
140
 
141
+ if args.compel:
142
+ self.compel_proc = Compel(
143
+ tokenizer=self.pipe.tokenizer,
144
+ text_encoder=self.pipe.text_encoder,
145
+ truncate_long_prompts=False,
146
+ )
147
 
148
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
149
  generator = torch.manual_seed(params.seed)
150
+ prompt_embeds = None
151
+ prompt = params.prompt
152
+ if hasattr(self, "compel_proc"):
153
+ prompt_embeds = self.compel_proc(params.prompt)
154
+ prompt = None
155
 
156
  steps = params.steps
157
  strength = params.strength
 
160
 
161
  results = self.pipe(
162
  image=params.image,
163
+ prompt=prompt,
164
  prompt_embeds=prompt_embeds,
165
  generator=generator,
166
  strength=strength,
pipelines/img2imgSDTurbo.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import (
2
+ AutoPipelineForImage2Image,
3
+ AutoencoderTiny,
4
+ )
5
+ import torch
6
+
7
+ try:
8
+ import intel_extension_for_pytorch as ipex # type: ignore
9
+ except:
10
+ pass
11
+
12
+ import psutil
13
+ from config import Args
14
+ from pydantic import BaseModel, Field
15
+ from PIL import Image
16
+ import math
17
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
18
+ compile,
19
+ CompilationConfig,
20
+ )
21
+
22
+ base_model = "stabilityai/sd-turbo"
23
+ taesd_model = "madebyollin/taesd"
24
+
25
+ default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
26
+ default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
27
+ page_content = """
28
+ <h1 class="text-3xl font-bold">Real-Time SD-Turbo</h1>
29
+ <h3 class="text-xl font-bold">Image-to-Image</h3>
30
+ <p class="text-sm">
31
+ This demo showcases
32
+ <a
33
+ href="https://huggingface.co/stabilityai/sdxl-turbo"
34
+ target="_blank"
35
+ class="text-blue-500 underline hover:no-underline">SDXL Turbo</a>
36
+ Image to Image pipeline using
37
+ <a
38
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
39
+ target="_blank"
40
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
41
+ > with a MJPEG stream server.
42
+ </p>
43
+ <p class="text-sm text-gray-500">
44
+ Change the prompt to generate different images, accepts <a
45
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
46
+ target="_blank"
47
+ class="text-blue-500 underline hover:no-underline">Compel</a
48
+ > syntax.
49
+ </p>
50
+ """
51
+
52
+
53
+ class Pipeline:
54
+ class Info(BaseModel):
55
+ name: str = "img2img"
56
+ title: str = "Image-to-Image SDXL"
57
+ description: str = "Generates an image from a text prompt"
58
+ input_mode: str = "image"
59
+ page_content: str = page_content
60
+
61
+ class InputParams(BaseModel):
62
+ prompt: str = Field(
63
+ default_prompt,
64
+ title="Prompt",
65
+ field="textarea",
66
+ id="prompt",
67
+ )
68
+ negative_prompt: str = Field(
69
+ default_negative_prompt,
70
+ title="Negative Prompt",
71
+ field="textarea",
72
+ id="negative_prompt",
73
+ hide=True,
74
+ )
75
+ seed: int = Field(
76
+ 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
77
+ )
78
+ steps: int = Field(
79
+ 1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
80
+ )
81
+ width: int = Field(
82
+ 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
83
+ )
84
+ height: int = Field(
85
+ 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
86
+ )
87
+ strength: float = Field(
88
+ 0.5,
89
+ min=0.25,
90
+ max=1.0,
91
+ step=0.001,
92
+ title="Strength",
93
+ field="range",
94
+ hide=True,
95
+ id="strength",
96
+ )
97
+
98
+ def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
99
+ if args.safety_checker:
100
+ self.pipe = AutoPipelineForImage2Image.from_pretrained(base_model)
101
+ else:
102
+ self.pipe = AutoPipelineForImage2Image.from_pretrained(
103
+ base_model,
104
+ safety_checker=None,
105
+ )
106
+ if args.taesd:
107
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
108
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
109
+ ).to(device)
110
+
111
+ if args.sfast:
112
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
113
+ compile,
114
+ CompilationConfig,
115
+ )
116
+
117
+ config = CompilationConfig.Default()
118
+ config.enable_xformers = True
119
+ config.enable_triton = True
120
+ config.enable_cuda_graph = True
121
+ self.pipe = compile(self.pipe, config=config)
122
+
123
+ self.pipe.set_progress_bar_config(disable=True)
124
+ self.pipe.to(device=device, dtype=torch_dtype)
125
+ if device.type != "mps":
126
+ self.pipe.unet.to(memory_format=torch.channels_last)
127
+
128
+ if args.torch_compile:
129
+ print("Running torch compile")
130
+ self.pipe.unet = torch.compile(
131
+ self.pipe.unet, mode="reduce-overhead", fullgraph=True
132
+ )
133
+ self.pipe.vae = torch.compile(
134
+ self.pipe.vae, mode="reduce-overhead", fullgraph=True
135
+ )
136
+
137
+ self.pipe(
138
+ prompt="warmup",
139
+ image=[Image.new("RGB", (768, 768))],
140
+ )
141
+ if args.compel:
142
+ from compel import Compel
143
+
144
+ self.pipe.compel_proc = Compel(
145
+ tokenizer=self.pipe.tokenizer,
146
+ text_encoder=self.pipe.text_encoder,
147
+ truncate_long_prompts=True,
148
+ )
149
+
150
+ def predict(self, params: "Pipeline.InputParams") -> Image.Image:
151
+ generator = torch.manual_seed(params.seed)
152
+ steps = params.steps
153
+ strength = params.strength
154
+ if int(steps * strength) < 1:
155
+ steps = math.ceil(1 / max(0.10, strength))
156
+
157
+ prompt = params.prompt
158
+ prompt_embeds = None
159
+ if hasattr(self.pipe, "compel_proc"):
160
+ prompt_embeds = self.pipe.compel_proc(
161
+ [params.prompt, params.negative_prompt]
162
+ )
163
+ prompt = None
164
+
165
+ results = self.pipe(
166
+ image=params.image,
167
+ prompt_embeds=prompt_embeds,
168
+ prompt=prompt,
169
+ negative_prompt=params.negative_prompt,
170
+ generator=generator,
171
+ strength=strength,
172
+ num_inference_steps=steps,
173
+ guidance_scale=1.1,
174
+ width=params.width,
175
+ height=params.height,
176
+ output_type="pil",
177
+ )
178
+
179
+ nsfw_content_detected = (
180
+ results.nsfw_content_detected[0]
181
+ if "nsfw_content_detected" in results
182
+ else False
183
+ )
184
+ if nsfw_content_detected:
185
+ return None
186
+ result_image = results.images[0]
187
+
188
+ return result_image
pipelines/img2imgSDXLTurbo.py CHANGED
@@ -73,18 +73,18 @@ class Pipeline:
73
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
74
  )
75
  steps: int = Field(
76
- 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
77
  )
78
  width: int = Field(
79
- 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
80
  )
81
  height: int = Field(
82
- 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
83
  )
84
  guidance_scale: float = Field(
85
- 0.2,
86
  min=0,
87
- max=20,
88
  step=0.001,
89
  title="Guidance Scale",
90
  field="range",
@@ -110,20 +110,28 @@ class Pipeline:
110
  base_model,
111
  safety_checker=None,
112
  )
113
- if args.use_taesd:
114
  self.pipe.vae = AutoencoderTiny.from_pretrained(
115
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
116
  ).to(device)
117
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  self.pipe.set_progress_bar_config(disable=True)
119
  self.pipe.to(device=device, dtype=torch_dtype)
120
  if device.type != "mps":
121
  self.pipe.unet.to(memory_format=torch.channels_last)
122
 
123
- # check if computer has less than 64GB of RAM using sys or os
124
- if psutil.virtual_memory().total < 64 * 1024**3:
125
- self.pipe.enable_attention_slicing()
126
-
127
  if args.torch_compile:
128
  print("Running torch compile")
129
  self.pipe.unet = torch.compile(
@@ -132,24 +140,38 @@ class Pipeline:
132
  self.pipe.vae = torch.compile(
133
  self.pipe.vae, mode="reduce-overhead", fullgraph=True
134
  )
135
-
136
  self.pipe(
137
  prompt="warmup",
138
  image=[Image.new("RGB", (768, 768))],
139
  )
140
 
141
- self.pipe.compel_proc = Compel(
142
- tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
143
- text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
144
- returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
145
- requires_pooled=[False, True],
146
- )
 
147
 
148
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
149
  generator = torch.manual_seed(params.seed)
150
- prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
151
- [params.prompt, params.negative_prompt]
152
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  steps = params.steps
154
  strength = params.strength
155
  if int(steps * strength) < 1:
@@ -157,10 +179,12 @@ class Pipeline:
157
 
158
  results = self.pipe(
159
  image=params.image,
160
- prompt_embeds=prompt_embeds[0:1],
161
- pooled_prompt_embeds=pooled_prompt_embeds[0:1],
162
- negative_prompt_embeds=prompt_embeds[1:2],
163
- negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
 
 
164
  generator=generator,
165
  strength=strength,
166
  num_inference_steps=steps,
 
73
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
74
  )
75
  steps: int = Field(
76
+ 1, min=1, max=10, title="Steps", field="range", hide=True, id="steps"
77
  )
78
  width: int = Field(
79
+ 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
80
  )
81
  height: int = Field(
82
+ 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
83
  )
84
  guidance_scale: float = Field(
85
+ 1.0,
86
  min=0,
87
+ max=1,
88
  step=0.001,
89
  title="Guidance Scale",
90
  field="range",
 
110
  base_model,
111
  safety_checker=None,
112
  )
113
+ if args.taesd:
114
  self.pipe.vae = AutoencoderTiny.from_pretrained(
115
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
116
  ).to(device)
117
 
118
+ if args.sfast:
119
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
120
+ compile,
121
+ CompilationConfig,
122
+ )
123
+
124
+ config = CompilationConfig.Default()
125
+ config.enable_xformers = True
126
+ config.enable_triton = True
127
+ config.enable_cuda_graph = True
128
+ self.pipe = compile(self.pipe, config=config)
129
+
130
  self.pipe.set_progress_bar_config(disable=True)
131
  self.pipe.to(device=device, dtype=torch_dtype)
132
  if device.type != "mps":
133
  self.pipe.unet.to(memory_format=torch.channels_last)
134
 
 
 
 
 
135
  if args.torch_compile:
136
  print("Running torch compile")
137
  self.pipe.unet = torch.compile(
 
140
  self.pipe.vae = torch.compile(
141
  self.pipe.vae, mode="reduce-overhead", fullgraph=True
142
  )
 
143
  self.pipe(
144
  prompt="warmup",
145
  image=[Image.new("RGB", (768, 768))],
146
  )
147
 
148
+ if args.compel:
149
+ self.pipe.compel_proc = Compel(
150
+ tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
151
+ text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
152
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
153
+ requires_pooled=[False, True],
154
+ )
155
 
156
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
157
  generator = torch.manual_seed(params.seed)
158
+ prompt = params.prompt
159
+ negative_prompt = params.negative_prompt
160
+ prompt_embeds = None
161
+ pooled_prompt_embeds = None
162
+ negative_prompt_embeds = None
163
+ negative_pooled_prompt_embeds = None
164
+ if hasattr(self.pipe, "compel_proc"):
165
+ _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
166
+ [params.prompt, params.negative_prompt]
167
+ )
168
+ prompt = None
169
+ negative_prompt = None
170
+ prompt_embeds = _prompt_embeds[0:1]
171
+ pooled_prompt_embeds = pooled_prompt_embeds[0:1]
172
+ negative_prompt_embeds = _prompt_embeds[1:2]
173
+ negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
174
+
175
  steps = params.steps
176
  strength = params.strength
177
  if int(steps * strength) < 1:
 
179
 
180
  results = self.pipe(
181
  image=params.image,
182
+ prompt=prompt,
183
+ negative_prompt=negative_prompt,
184
+ prompt_embeds=prompt_embeds,
185
+ pooled_prompt_embeds=pooled_prompt_embeds,
186
+ negative_prompt_embeds=negative_prompt_embeds,
187
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
188
  generator=generator,
189
  strength=strength,
190
  num_inference_steps=steps,
pipelines/img2imgSegmindVegaRT.py ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import (
2
+ AutoPipelineForImage2Image,
3
+ LCMScheduler,
4
+ AutoencoderTiny,
5
+ )
6
+ from compel import Compel, ReturnedEmbeddingsType
7
+ import torch
8
+
9
+ try:
10
+ import intel_extension_for_pytorch as ipex # type: ignore
11
+ except:
12
+ pass
13
+
14
+ import psutil
15
+ from config import Args
16
+ from pydantic import BaseModel, Field
17
+ from PIL import Image
18
+ import math
19
+
20
+ base_model = "segmind/Segmind-Vega"
21
+ lora_model = "segmind/Segmind-VegaRT"
22
+ taesd_model = "madebyollin/taesdxl"
23
+
24
+ default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
25
+ default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
26
+ page_content = """
27
+ <h1 class="text-3xl font-bold">Real-Time SegmindVegaRT</h1>
28
+ <h3 class="text-xl font-bold">Image-to-Image</h3>
29
+ <p class="text-sm">
30
+ This demo showcases
31
+ <a
32
+ href="https://huggingface.co/segmind/Segmind-VegaRT"
33
+ target="_blank"
34
+ class="text-blue-500 underline hover:no-underline">SegmindVegaRT</a>
35
+ Image to Image pipeline using
36
+ <a
37
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
38
+ target="_blank"
39
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
40
+ > with a MJPEG stream server.
41
+ </p>
42
+ <p class="text-sm text-gray-500">
43
+ Change the prompt to generate different images, accepts <a
44
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
45
+ target="_blank"
46
+ class="text-blue-500 underline hover:no-underline">Compel</a
47
+ > syntax.
48
+ </p>
49
+ """
50
+
51
+
52
+ class Pipeline:
53
+ class Info(BaseModel):
54
+ name: str = "img2img"
55
+ title: str = "Image-to-Image Playground 256"
56
+ description: str = "Generates an image from a text prompt"
57
+ input_mode: str = "image"
58
+ page_content: str = page_content
59
+
60
+ class InputParams(BaseModel):
61
+ prompt: str = Field(
62
+ default_prompt,
63
+ title="Prompt",
64
+ field="textarea",
65
+ id="prompt",
66
+ )
67
+ negative_prompt: str = Field(
68
+ default_negative_prompt,
69
+ title="Negative Prompt",
70
+ field="textarea",
71
+ id="negative_prompt",
72
+ hide=True,
73
+ )
74
+ seed: int = Field(
75
+ 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
76
+ )
77
+ steps: int = Field(
78
+ 1, min=1, max=10, title="Steps", field="range", hide=True, id="steps"
79
+ )
80
+ width: int = Field(
81
+ 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
82
+ )
83
+ height: int = Field(
84
+ 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
85
+ )
86
+ guidance_scale: float = Field(
87
+ 0.0,
88
+ min=0,
89
+ max=1,
90
+ step=0.001,
91
+ title="Guidance Scale",
92
+ field="range",
93
+ hide=True,
94
+ id="guidance_scale",
95
+ )
96
+ strength: float = Field(
97
+ 0.5,
98
+ min=0.25,
99
+ max=1.0,
100
+ step=0.001,
101
+ title="Strength",
102
+ field="range",
103
+ hide=True,
104
+ id="strength",
105
+ )
106
+
107
+ def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
108
+ if args.safety_checker:
109
+ self.pipe = AutoPipelineForImage2Image.from_pretrained(
110
+ base_model,
111
+ variant="fp16",
112
+ )
113
+ else:
114
+ self.pipe = AutoPipelineForImage2Image.from_pretrained(
115
+ base_model,
116
+ safety_checker=None,
117
+ variant="fp16",
118
+ )
119
+ if args.taesd:
120
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
121
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
122
+ ).to(device)
123
+
124
+ self.pipe.load_lora_weights(lora_model)
125
+ self.pipe.fuse_lora()
126
+ self.pipe.scheduler = LCMScheduler.from_pretrained(
127
+ base_model, subfolder="scheduler"
128
+ )
129
+ if args.sfast:
130
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
131
+ compile,
132
+ CompilationConfig,
133
+ )
134
+
135
+ config = CompilationConfig.Default()
136
+ config.enable_xformers = True
137
+ config.enable_triton = True
138
+ config.enable_cuda_graph = True
139
+ self.pipe = compile(self.pipe, config=config)
140
+
141
+ self.pipe.set_progress_bar_config(disable=True)
142
+ self.pipe.to(device=device, dtype=torch_dtype)
143
+ if device.type != "mps":
144
+ self.pipe.unet.to(memory_format=torch.channels_last)
145
+
146
+ if args.torch_compile:
147
+ print("Running torch compile")
148
+ self.pipe.unet = torch.compile(
149
+ self.pipe.unet, mode="reduce-overhead", fullgraph=False
150
+ )
151
+ self.pipe.vae = torch.compile(
152
+ self.pipe.vae, mode="reduce-overhead", fullgraph=False
153
+ )
154
+
155
+ self.pipe(
156
+ prompt="warmup",
157
+ image=[Image.new("RGB", (768, 768))],
158
+ )
159
+ if args.compel:
160
+ self.pipe.compel_proc = Compel(
161
+ tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
162
+ text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
163
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
164
+ requires_pooled=[False, True],
165
+ )
166
+
167
+ def predict(self, params: "Pipeline.InputParams") -> Image.Image:
168
+ generator = torch.manual_seed(params.seed)
169
+ prompt = params.prompt
170
+ negative_prompt = params.negative_prompt
171
+ prompt_embeds = None
172
+ pooled_prompt_embeds = None
173
+ negative_prompt_embeds = None
174
+ negative_pooled_prompt_embeds = None
175
+ if hasattr(self.pipe, "compel_proc"):
176
+ _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
177
+ [params.prompt, params.negative_prompt]
178
+ )
179
+ prompt = None
180
+ negative_prompt = None
181
+ prompt_embeds = _prompt_embeds[0:1]
182
+ pooled_prompt_embeds = pooled_prompt_embeds[0:1]
183
+ negative_prompt_embeds = _prompt_embeds[1:2]
184
+ negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
185
+
186
+ steps = params.steps
187
+ strength = params.strength
188
+ if int(steps * strength) < 1:
189
+ steps = math.ceil(1 / max(0.10, strength))
190
+
191
+ results = self.pipe(
192
+ image=params.image,
193
+ prompt=prompt,
194
+ negative_prompt=negative_prompt,
195
+ prompt_embeds=prompt_embeds,
196
+ pooled_prompt_embeds=pooled_prompt_embeds,
197
+ negative_prompt_embeds=negative_prompt_embeds,
198
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
199
+ generator=generator,
200
+ strength=strength,
201
+ num_inference_steps=steps,
202
+ guidance_scale=params.guidance_scale,
203
+ width=params.width,
204
+ height=params.height,
205
+ output_type="pil",
206
+ )
207
+
208
+ nsfw_content_detected = (
209
+ results.nsfw_content_detected[0]
210
+ if "nsfw_content_detected" in results
211
+ else False
212
+ )
213
+ if nsfw_content_detected:
214
+ return None
215
+ result_image = results.images[0]
216
+
217
+ return result_image
pipelines/txt2img.py CHANGED
@@ -85,20 +85,28 @@ class Pipeline:
85
  self.pipe = DiffusionPipeline.from_pretrained(
86
  base_model, safety_checker=None
87
  )
88
- if args.use_taesd:
89
  self.pipe.vae = AutoencoderTiny.from_pretrained(
90
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
91
  ).to(device)
92
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  self.pipe.set_progress_bar_config(disable=True)
94
  self.pipe.to(device=device, dtype=torch_dtype)
95
  if device.type != "mps":
96
  self.pipe.unet.to(memory_format=torch.channels_last)
97
 
98
- # check if computer has less than 64GB of RAM using sys or os
99
- if psutil.virtual_memory().total < 64 * 1024**3:
100
- self.pipe.enable_attention_slicing()
101
-
102
  if args.torch_compile:
103
  self.pipe.unet = torch.compile(
104
  self.pipe.unet, mode="reduce-overhead", fullgraph=True
@@ -109,17 +117,24 @@ class Pipeline:
109
 
110
  self.pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
111
 
112
- self.compel_proc = Compel(
113
- tokenizer=self.pipe.tokenizer,
114
- text_encoder=self.pipe.text_encoder,
115
- truncate_long_prompts=False,
116
- )
 
117
 
118
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
119
  generator = torch.manual_seed(params.seed)
120
- prompt_embeds = self.compel_proc(params.prompt)
 
 
 
 
 
121
  results = self.pipe(
122
  prompt_embeds=prompt_embeds,
 
123
  generator=generator,
124
  num_inference_steps=params.steps,
125
  guidance_scale=params.guidance_scale,
 
85
  self.pipe = DiffusionPipeline.from_pretrained(
86
  base_model, safety_checker=None
87
  )
88
+ if args.taesd:
89
  self.pipe.vae = AutoencoderTiny.from_pretrained(
90
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
91
  ).to(device)
92
 
93
+ if args.sfast:
94
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
95
+ compile,
96
+ CompilationConfig,
97
+ )
98
+
99
+ config = CompilationConfig.Default()
100
+ config.enable_xformers = True
101
+ config.enable_triton = True
102
+ config.enable_cuda_graph = True
103
+ self.pipe = compile(self.pipe, config=config)
104
+
105
  self.pipe.set_progress_bar_config(disable=True)
106
  self.pipe.to(device=device, dtype=torch_dtype)
107
  if device.type != "mps":
108
  self.pipe.unet.to(memory_format=torch.channels_last)
109
 
 
 
 
 
110
  if args.torch_compile:
111
  self.pipe.unet = torch.compile(
112
  self.pipe.unet, mode="reduce-overhead", fullgraph=True
 
117
 
118
  self.pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
119
 
120
+ if args.compel:
121
+ self.compel_proc = Compel(
122
+ tokenizer=self.pipe.tokenizer,
123
+ text_encoder=self.pipe.text_encoder,
124
+ truncate_long_prompts=False,
125
+ )
126
 
127
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
128
  generator = torch.manual_seed(params.seed)
129
+ prompt_embeds = None
130
+ prompt = params.prompt
131
+ if hasattr(self, "compel_proc"):
132
+ prompt_embeds = self.compel_proc(params.prompt)
133
+ prompt = None
134
+
135
  results = self.pipe(
136
  prompt_embeds=prompt_embeds,
137
+ prompt=prompt,
138
  generator=generator,
139
  num_inference_steps=params.steps,
140
  guidance_scale=params.guidance_scale,
pipelines/txt2imgLora.py CHANGED
@@ -92,20 +92,19 @@ class Pipeline:
92
  self.pipe = DiffusionPipeline.from_pretrained(
93
  base_model, safety_checker=None
94
  )
95
- if args.use_taesd:
96
  self.pipe.vae = AutoencoderTiny.from_pretrained(
97
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
98
  ).to(device)
 
99
  self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
100
  self.pipe.set_progress_bar_config(disable=True)
 
101
  self.pipe.to(device=device, dtype=torch_dtype)
 
102
  if device.type != "mps":
103
  self.pipe.unet.to(memory_format=torch.channels_last)
104
 
105
- # check if computer has less than 64GB of RAM using sys or os
106
- if psutil.virtual_memory().total < 64 * 1024**3:
107
- self.pipe.enable_attention_slicing()
108
-
109
  if args.torch_compile:
110
  self.pipe.unet = torch.compile(
111
  self.pipe.unet, mode="reduce-overhead", fullgraph=True
@@ -116,18 +115,35 @@ class Pipeline:
116
 
117
  self.pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
118
 
119
- self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
 
 
 
 
120
 
121
- self.compel_proc = Compel(
122
- tokenizer=self.pipe.tokenizer,
123
- text_encoder=self.pipe.text_encoder,
124
- truncate_long_prompts=False,
125
- )
 
 
 
 
 
 
 
126
 
127
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
128
  generator = torch.manual_seed(params.seed)
129
- prompt_embeds = self.compel_proc(params.prompt)
 
 
 
 
 
130
  results = self.pipe(
 
131
  prompt_embeds=prompt_embeds,
132
  generator=generator,
133
  num_inference_steps=params.steps,
 
92
  self.pipe = DiffusionPipeline.from_pretrained(
93
  base_model, safety_checker=None
94
  )
95
+ if args.taesd:
96
  self.pipe.vae = AutoencoderTiny.from_pretrained(
97
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
98
  ).to(device)
99
+
100
  self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
101
  self.pipe.set_progress_bar_config(disable=True)
102
+ self.pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
103
  self.pipe.to(device=device, dtype=torch_dtype)
104
+
105
  if device.type != "mps":
106
  self.pipe.unet.to(memory_format=torch.channels_last)
107
 
 
 
 
 
108
  if args.torch_compile:
109
  self.pipe.unet = torch.compile(
110
  self.pipe.unet, mode="reduce-overhead", fullgraph=True
 
115
 
116
  self.pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
117
 
118
+ if args.sfast:
119
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
120
+ compile,
121
+ CompilationConfig,
122
+ )
123
 
124
+ config = CompilationConfig.Default()
125
+ config.enable_xformers = True
126
+ config.enable_triton = True
127
+ config.enable_cuda_graph = True
128
+ self.pipe = compile(self.pipe, config=config)
129
+
130
+ if args.compel:
131
+ self.compel_proc = Compel(
132
+ tokenizer=self.pipe.tokenizer,
133
+ text_encoder=self.pipe.text_encoder,
134
+ truncate_long_prompts=False,
135
+ )
136
 
137
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
138
  generator = torch.manual_seed(params.seed)
139
+ prompt_embeds = None
140
+ prompt = params.prompt
141
+ if hasattr(self, "compel_proc"):
142
+ prompt_embeds = self.compel_proc(params.prompt)
143
+ prompt = None
144
+
145
  results = self.pipe(
146
+ prompt=prompt,
147
  prompt_embeds=prompt_embeds,
148
  generator=generator,
149
  num_inference_steps=params.steps,
pipelines/txt2imgLoraSDXL.py CHANGED
@@ -111,19 +111,29 @@ class Pipeline:
111
  self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
112
  self.pipe.set_progress_bar_config(disable=True)
113
  self.pipe.to(device=device, dtype=torch_dtype).to(device)
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  if device.type != "mps":
115
  self.pipe.unet.to(memory_format=torch.channels_last)
116
 
117
- if psutil.virtual_memory().total < 64 * 1024**3:
118
- self.pipe.enable_attention_slicing()
119
-
120
  self.pipe.compel_proc = Compel(
121
  tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
122
  text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
123
  returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
124
  requires_pooled=[False, True],
125
  )
126
- if args.use_taesd:
127
  self.pipe.vae = AutoencoderTiny.from_pretrained(
128
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
129
  ).to(device)
@@ -142,14 +152,30 @@ class Pipeline:
142
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
143
  generator = torch.manual_seed(params.seed)
144
 
145
- prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
146
- [params.prompt, params.negative_prompt]
147
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
  results = self.pipe(
149
- prompt_embeds=prompt_embeds[0:1],
150
- pooled_prompt_embeds=pooled_prompt_embeds[0:1],
151
- negative_prompt_embeds=prompt_embeds[1:2],
152
- negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
 
 
153
  generator=generator,
154
  num_inference_steps=params.steps,
155
  guidance_scale=params.guidance_scale,
 
111
  self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
112
  self.pipe.set_progress_bar_config(disable=True)
113
  self.pipe.to(device=device, dtype=torch_dtype).to(device)
114
+
115
+ if args.sfast:
116
+ from sfast.compilers.stable_diffusion_pipeline_compiler import (
117
+ compile,
118
+ CompilationConfig,
119
+ )
120
+
121
+ config = CompilationConfig.Default()
122
+ config.enable_xformers = True
123
+ config.enable_triton = True
124
+ config.enable_cuda_graph = True
125
+ self.pipe = compile(self.pipe, config=config)
126
+
127
  if device.type != "mps":
128
  self.pipe.unet.to(memory_format=torch.channels_last)
129
 
 
 
 
130
  self.pipe.compel_proc = Compel(
131
  tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
132
  text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
133
  returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
134
  requires_pooled=[False, True],
135
  )
136
+ if args.taesd:
137
  self.pipe.vae = AutoencoderTiny.from_pretrained(
138
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
139
  ).to(device)
 
152
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
153
  generator = torch.manual_seed(params.seed)
154
 
155
+ prompt = params.prompt
156
+ negative_prompt = params.negative_prompt
157
+ prompt_embeds = None
158
+ pooled_prompt_embeds = None
159
+ negative_prompt_embeds = None
160
+ negative_pooled_prompt_embeds = None
161
+ if hasattr(self.pipe, "compel_proc"):
162
+ _prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
163
+ [params.prompt, params.negative_prompt]
164
+ )
165
+ prompt = None
166
+ negative_prompt = None
167
+ prompt_embeds = _prompt_embeds[0:1]
168
+ pooled_prompt_embeds = pooled_prompt_embeds[0:1]
169
+ negative_prompt_embeds = _prompt_embeds[1:2]
170
+ negative_pooled_prompt_embeds = pooled_prompt_embeds[1:2]
171
+
172
  results = self.pipe(
173
+ prompt=prompt,
174
+ negative_prompt=negative_prompt,
175
+ prompt_embeds=prompt_embeds,
176
+ pooled_prompt_embeds=pooled_prompt_embeds,
177
+ negative_prompt_embeds=negative_prompt_embeds,
178
+ negative_pooled_prompt_embeds=negative_pooled_prompt_embeds,
179
  generator=generator,
180
  num_inference_steps=params.steps,
181
  guidance_scale=params.guidance_scale,
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- git+https://github.com/huggingface/diffusers@dadd55fb36acc862254cf935826d54349b0fcd8c
2
  transformers==4.35.2
3
  --extra-index-url https://download.pytorch.org/whl/cu121;
4
  torch==2.1.0
@@ -10,4 +10,5 @@ compel==2.0.2
10
  controlnet-aux==0.0.7
11
  peft==0.6.0
12
  xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
13
- markdown2
 
 
1
+ git+https://github.com/huggingface/diffusers@2d94c7838e273c40920ffd6d24d724357add7f2d
2
  transformers==4.35.2
3
  --extra-index-url https://download.pytorch.org/whl/cu121;
4
  torch==2.1.0
 
10
  controlnet-aux==0.0.7
11
  peft==0.6.0
12
  xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
13
+ markdown2
14
+ stable_fast @ https://github.com/chengzeyi/stable-fast/releases/download/v0.0.15.post1/stable_fast-0.0.15.post1+torch211cu121-cp310-cp310-manylinux2014_x86_64.whl