radames HF staff commited on
Commit
521d01d
2 Parent(s): 9bed4b6 2398f39

Merge branch 'main' into space-txt2img

Browse files
README.md CHANGED
@@ -127,6 +127,13 @@ docker build -t lcm-live .
127
  docker run -ti -p 7860:7860 --gpus all lcm-live
128
  ```
129
 
 
 
 
 
 
 
 
130
  or with environment variables
131
 
132
  ```bash
 
127
  docker run -ti -p 7860:7860 --gpus all lcm-live
128
  ```
129
 
130
+ reuse models data from host to avoid downloading them again, you can change `~/.cache/huggingface` to any other directory, but if you use hugingface-cli locally, you can share the same cache
131
+
132
+ ```bash
133
+ docker run -ti -p 7860:7860 -e HF_HOME=/data -v ~/.cache/huggingface:/data --gpus all lcm-live
134
+ ```
135
+
136
+
137
  or with environment variables
138
 
139
  ```bash
app_init.py CHANGED
@@ -3,6 +3,7 @@ from fastapi.responses import StreamingResponse, JSONResponse
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from fastapi.staticfiles import StaticFiles
5
  from fastapi import Request
 
6
 
7
  import logging
8
  import traceback
@@ -13,6 +14,7 @@ import time
13
  from types import SimpleNamespace
14
  from util import pil_to_frame, bytes_to_pil, is_firefox
15
  import asyncio
 
16
 
17
 
18
  def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
@@ -41,11 +43,7 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
41
  await websocket.send_json(
42
  {"status": "connected", "message": "Connected", "userId": str(user_id)}
43
  )
44
- await websocket.send_json(
45
- {
46
- "status": "send_frame",
47
- }
48
- )
49
  await handle_websocket_data(user_id, websocket)
50
  except WebSocketDisconnect as e:
51
  logging.error(f"WebSocket Error: {e}, {user_id}")
@@ -71,13 +69,12 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
71
  params = SimpleNamespace(**params.dict())
72
  if info.input_mode == "image":
73
  image_data = await websocket.receive_bytes()
 
 
 
74
  params.image = bytes_to_pil(image_data)
75
  await user_data.update_data(user_id, params)
76
- await websocket.send_json(
77
- {
78
- "status": "wait",
79
- }
80
- )
81
  if args.timeout > 0 and time.time() - last_time > args.timeout:
82
  await websocket.send_json(
83
  {
@@ -110,11 +107,7 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
110
  while True:
111
  params = await user_data.get_latest_data(user_id)
112
  if not vars(params) or params.__dict__ == last_params.__dict__:
113
- await websocket.send_json(
114
- {
115
- "status": "send_frame",
116
- }
117
- )
118
  await asyncio.sleep(0.1)
119
  continue
120
 
@@ -143,14 +136,22 @@ def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
143
  # route to setup frontend
144
  @app.get("/settings")
145
  async def settings():
146
- info = pipeline.Info.schema()
 
 
 
 
147
  input_params = pipeline.InputParams.schema()
148
  return JSONResponse(
149
  {
150
- "info": info,
151
  "input_params": input_params,
152
  "max_queue_size": args.max_queue_size,
 
153
  }
154
  )
155
 
 
 
 
156
  app.mount("/", StaticFiles(directory="public", html=True), name="public")
 
3
  from fastapi.middleware.cors import CORSMiddleware
4
  from fastapi.staticfiles import StaticFiles
5
  from fastapi import Request
6
+ import markdown2
7
 
8
  import logging
9
  import traceback
 
14
  from types import SimpleNamespace
15
  from util import pil_to_frame, bytes_to_pil, is_firefox
16
  import asyncio
17
+ import os
18
 
19
 
20
  def init_app(app: FastAPI, user_data: UserData, args: Args, pipeline):
 
43
  await websocket.send_json(
44
  {"status": "connected", "message": "Connected", "userId": str(user_id)}
45
  )
46
+ await websocket.send_json({"status": "send_frame"})
 
 
 
 
47
  await handle_websocket_data(user_id, websocket)
48
  except WebSocketDisconnect as e:
49
  logging.error(f"WebSocket Error: {e}, {user_id}")
 
69
  params = SimpleNamespace(**params.dict())
70
  if info.input_mode == "image":
71
  image_data = await websocket.receive_bytes()
72
+ if len(image_data) == 0:
73
+ await websocket.send_json({"status": "send_frame"})
74
+ continue
75
  params.image = bytes_to_pil(image_data)
76
  await user_data.update_data(user_id, params)
77
+ await websocket.send_json({"status": "wait"})
 
 
 
 
78
  if args.timeout > 0 and time.time() - last_time > args.timeout:
79
  await websocket.send_json(
80
  {
 
107
  while True:
108
  params = await user_data.get_latest_data(user_id)
109
  if not vars(params) or params.__dict__ == last_params.__dict__:
110
+ await websocket.send_json({"status": "send_frame"})
 
 
 
 
111
  await asyncio.sleep(0.1)
112
  continue
113
 
 
136
  # route to setup frontend
137
  @app.get("/settings")
138
  async def settings():
139
+ info_schema = pipeline.Info.schema()
140
+ info = pipeline.Info()
141
+ if info.page_content:
142
+ page_content = markdown2.markdown(info.page_content)
143
+
144
  input_params = pipeline.InputParams.schema()
145
  return JSONResponse(
146
  {
147
+ "info": info_schema,
148
  "input_params": input_params,
149
  "max_queue_size": args.max_queue_size,
150
+ "page_content": page_content if info.page_content else "",
151
  }
152
  )
153
 
154
+ if not os.path.exists("public"):
155
+ os.makedirs("public")
156
+
157
  app.mount("/", StaticFiles(directory="public", html=True), name="public")
frontend/src/lib/components/MediaListSwitcher.svelte CHANGED
@@ -18,17 +18,19 @@
18
  <div class="flex items-center justify-center text-xs">
19
  <button
20
  title="Share your screen"
21
- class="border-1 my-1 block cursor-pointer rounded-md border-gray-500 border-opacity-50 bg-slate-100 bg-opacity-30 p-[2px] font-medium text-white"
22
  on:click={() => mediaStreamActions.startScreenCapture()}
23
  >
24
- <Screen classList={'w-100'} />
 
 
25
  </button>
26
  {#if $mediaDevices}
27
  <select
28
  bind:value={deviceId}
29
  on:change={() => mediaStreamActions.switchCamera(deviceId)}
30
  id="devices-list"
31
- class="border-1 block cursor-pointer rounded-md border-gray-800 border-opacity-50 bg-slate-100 bg-opacity-30 p-[2px] font-medium text-white"
32
  >
33
  {#each $mediaDevices as device, i}
34
  <option value={device.deviceId}>{device.label}</option>
 
18
  <div class="flex items-center justify-center text-xs">
19
  <button
20
  title="Share your screen"
21
+ class="border-1 my-1 flex cursor-pointer gap-1 rounded-md border-gray-500 border-opacity-50 bg-slate-100 bg-opacity-30 p-1 font-medium text-white"
22
  on:click={() => mediaStreamActions.startScreenCapture()}
23
  >
24
+ <span>Share</span>
25
+
26
+ <Screen classList={''} />
27
  </button>
28
  {#if $mediaDevices}
29
  <select
30
  bind:value={deviceId}
31
  on:change={() => mediaStreamActions.switchCamera(deviceId)}
32
  id="devices-list"
33
+ class="border-1 block cursor-pointer rounded-md border-gray-800 border-opacity-50 bg-slate-100 bg-opacity-30 p-1 font-medium text-white"
34
  >
35
  {#each $mediaDevices as device, i}
36
  <option value={device.deviceId}>{device.label}</option>
frontend/src/lib/components/PipelineOptions.svelte CHANGED
@@ -1,6 +1,5 @@
1
  <script lang="ts">
2
- import { createEventDispatcher } from 'svelte';
3
- import type { FieldProps } from '$lib/types';
4
  import { FieldType } from '$lib/types';
5
  import InputRange from './InputRange.svelte';
6
  import SeedInput from './SeedInput.svelte';
@@ -9,10 +8,10 @@
9
  import Selectlist from './Selectlist.svelte';
10
  import { pipelineValues } from '$lib/store';
11
 
12
- export let pipelineParams: FieldProps[];
13
 
14
- $: advanceOptions = pipelineParams?.filter((e) => e?.hide == true);
15
- $: featuredOptions = pipelineParams?.filter((e) => e?.hide !== true);
16
  </script>
17
 
18
  <div class="flex flex-col gap-3">
@@ -37,7 +36,7 @@
37
  <details>
38
  <summary class="cursor-pointer font-medium">Advanced Options</summary>
39
  <div
40
- class="grid grid-cols-1 items-center gap-3 {pipelineParams.length > 5
41
  ? 'sm:grid-cols-2'
42
  : ''}"
43
  >
 
1
  <script lang="ts">
2
+ import type { Fields } from '$lib/types';
 
3
  import { FieldType } from '$lib/types';
4
  import InputRange from './InputRange.svelte';
5
  import SeedInput from './SeedInput.svelte';
 
8
  import Selectlist from './Selectlist.svelte';
9
  import { pipelineValues } from '$lib/store';
10
 
11
+ export let pipelineParams: Fields;
12
 
13
+ $: advanceOptions = Object.values(pipelineParams)?.filter((e) => e?.hide == true);
14
+ $: featuredOptions = Object.values(pipelineParams)?.filter((e) => e?.hide !== true);
15
  </script>
16
 
17
  <div class="flex flex-col gap-3">
 
36
  <details>
37
  <summary class="cursor-pointer font-medium">Advanced Options</summary>
38
  <div
39
+ class="grid grid-cols-1 items-center gap-3 {Object.values(pipelineParams).length > 5
40
  ? 'sm:grid-cols-2'
41
  : ''}"
42
  >
frontend/src/lib/components/VideoInput.svelte CHANGED
@@ -10,21 +10,24 @@
10
  mediaDevices
11
  } from '$lib/mediaStream';
12
  import MediaListSwitcher from './MediaListSwitcher.svelte';
 
 
 
13
 
14
  let videoEl: HTMLVideoElement;
15
  let canvasEl: HTMLCanvasElement;
16
  let ctx: CanvasRenderingContext2D;
17
  let videoFrameCallbackId: number;
18
- const WIDTH = 768;
19
- const HEIGHT = 768;
20
  // ajust the throttle time to your needs
21
  const THROTTLE_TIME = 1000 / 15;
22
  let selectedDevice: string = '';
 
23
 
24
  onMount(() => {
25
  ctx = canvasEl.getContext('2d') as CanvasRenderingContext2D;
26
- canvasEl.width = WIDTH;
27
- canvasEl.height = HEIGHT;
28
  });
29
  $: {
30
  console.log(selectedDevice);
@@ -44,35 +47,34 @@
44
  }
45
  const videoWidth = videoEl.videoWidth;
46
  const videoHeight = videoEl.videoHeight;
47
- const blob = await grapCropBlobImg(
48
- videoEl,
49
- videoWidth / 2 - WIDTH / 2,
50
- videoHeight / 2 - HEIGHT / 2,
51
- WIDTH,
52
- HEIGHT
53
- );
54
-
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  onFrameChangeStore.set({ blob });
56
  videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
57
  }
58
 
59
- $: if ($mediaStreamStatus == MediaStreamStatusEnum.CONNECTED) {
60
  videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
61
  }
62
- async function grapCropBlobImg(
63
- video: HTMLVideoElement,
64
- x: number,
65
- y: number,
66
- width: number,
67
- height: number
68
- ) {
69
- const canvas = new OffscreenCanvas(width, height);
70
-
71
- const ctx = canvas.getContext('2d') as OffscreenCanvasRenderingContext2D;
72
- ctx.drawImage(video, x, y, width, height, 0, 0, width, height);
73
- const blob = await canvas.convertToBlob({ type: 'image/jpeg', quality: 1 });
74
- return blob;
75
- }
76
  </script>
77
 
78
  <div class="relative mx-auto max-w-lg overflow-hidden rounded-lg border border-slate-300">
@@ -85,6 +87,9 @@
85
  <video
86
  class="pointer-events-none aspect-square w-full object-cover"
87
  bind:this={videoEl}
 
 
 
88
  playsinline
89
  autoplay
90
  muted
 
10
  mediaDevices
11
  } from '$lib/mediaStream';
12
  import MediaListSwitcher from './MediaListSwitcher.svelte';
13
+ export let width = 512;
14
+ export let height = 512;
15
+ const size = { width, height };
16
 
17
  let videoEl: HTMLVideoElement;
18
  let canvasEl: HTMLCanvasElement;
19
  let ctx: CanvasRenderingContext2D;
20
  let videoFrameCallbackId: number;
21
+
 
22
  // ajust the throttle time to your needs
23
  const THROTTLE_TIME = 1000 / 15;
24
  let selectedDevice: string = '';
25
+ let videoIsReady = false;
26
 
27
  onMount(() => {
28
  ctx = canvasEl.getContext('2d') as CanvasRenderingContext2D;
29
+ canvasEl.width = size.width;
30
+ canvasEl.height = size.height;
31
  });
32
  $: {
33
  console.log(selectedDevice);
 
47
  }
48
  const videoWidth = videoEl.videoWidth;
49
  const videoHeight = videoEl.videoHeight;
50
+ let height0 = videoHeight;
51
+ let width0 = videoWidth;
52
+ let x0 = 0;
53
+ let y0 = 0;
54
+ if (videoWidth > videoHeight) {
55
+ width0 = videoHeight;
56
+ x0 = (videoWidth - videoHeight) / 2;
57
+ } else {
58
+ height0 = videoWidth;
59
+ y0 = (videoHeight - videoWidth) / 2;
60
+ }
61
+ ctx.drawImage(videoEl, x0, y0, width0, height0, 0, 0, size.width, size.height);
62
+ const blob = await new Promise<Blob>((resolve) => {
63
+ canvasEl.toBlob(
64
+ (blob) => {
65
+ resolve(blob as Blob);
66
+ },
67
+ 'image/jpeg',
68
+ 1
69
+ );
70
+ });
71
  onFrameChangeStore.set({ blob });
72
  videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
73
  }
74
 
75
+ $: if ($mediaStreamStatus == MediaStreamStatusEnum.CONNECTED && videoIsReady) {
76
  videoFrameCallbackId = videoEl.requestVideoFrameCallback(onFrameChange);
77
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  </script>
79
 
80
  <div class="relative mx-auto max-w-lg overflow-hidden rounded-lg border border-slate-300">
 
87
  <video
88
  class="pointer-events-none aspect-square w-full object-cover"
89
  bind:this={videoEl}
90
+ on:loadeddata={() => {
91
+ videoIsReady = true;
92
+ }}
93
  playsinline
94
  autoplay
95
  muted
frontend/src/lib/types.ts CHANGED
@@ -11,6 +11,11 @@ export const enum PipelineMode {
11
  TEXT = "text",
12
  }
13
 
 
 
 
 
 
14
  export interface FieldProps {
15
  default: number | string;
16
  max?: number;
 
11
  TEXT = "text",
12
  }
13
 
14
+
15
+ export interface Fields {
16
+ [key: string]: FieldProps;
17
+ }
18
+
19
  export interface FieldProps {
20
  default: number | string;
21
  max?: number;
frontend/src/routes/+page.svelte CHANGED
@@ -1,6 +1,6 @@
1
  <script lang="ts">
2
  import { onMount } from 'svelte';
3
- import type { FieldProps, PipelineInfo } from '$lib/types';
4
  import { PipelineMode } from '$lib/types';
5
  import ImagePlayer from '$lib/components/ImagePlayer.svelte';
6
  import VideoInput from '$lib/components/VideoInput.svelte';
@@ -11,8 +11,9 @@
11
  import { mediaStreamActions, onFrameChangeStore } from '$lib/mediaStream';
12
  import { getPipelineValues, deboucedPipelineValues } from '$lib/store';
13
 
14
- let pipelineParams: FieldProps[];
15
  let pipelineInfo: PipelineInfo;
 
16
  let isImageMode: boolean = false;
17
  let maxQueueSize: number = 0;
18
  let currentQueueSize: number = 0;
@@ -22,11 +23,12 @@
22
 
23
  async function getSettings() {
24
  const settings = await fetch('/settings').then((r) => r.json());
25
- pipelineParams = Object.values(settings.input_params.properties);
26
  pipelineInfo = settings.info.properties;
27
  isImageMode = pipelineInfo.input_mode.default === PipelineMode.IMAGE;
28
  maxQueueSize = settings.max_queue_size;
29
- pipelineParams = pipelineParams.filter((e) => e?.disabled !== true);
 
30
  if (maxQueueSize > 0) {
31
  getQueueSize();
32
  setInterval(() => {
@@ -68,33 +70,17 @@
68
  }
69
  </script>
70
 
 
 
 
 
 
 
71
  <main class="container mx-auto flex max-w-5xl flex-col gap-3 px-4 py-4">
72
  <article class="text-center">
73
- <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
74
- {#if pipelineInfo?.title?.default}
75
- <h3 class="text-xl font-bold">{pipelineInfo?.title?.default}</h3>
76
  {/if}
77
- <p class="text-sm">
78
- This demo showcases
79
- <a
80
- href="https://huggingface.co/blog/lcm_lora"
81
- target="_blank"
82
- class="text-blue-500 underline hover:no-underline">LCM LoRA</a
83
- >
84
- Image to Image pipeline using
85
- <a
86
- href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
87
- target="_blank"
88
- class="text-blue-500 underline hover:no-underline">Diffusers</a
89
- > with a MJPEG stream server.
90
- </p>
91
- <p class="text-sm text-gray-500">
92
- Change the prompt to generate different images, accepts <a
93
- href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
94
- target="_blank"
95
- class="text-blue-500 underline hover:no-underline">Compel</a
96
- > syntax.
97
- </p>
98
  {#if maxQueueSize > 0}
99
  <p class="text-sm">
100
  There are <span id="queue_size" class="font-bold">{currentQueueSize}</span>
@@ -111,7 +97,10 @@
111
  <article class="my-3 grid grid-cols-1 gap-3 sm:grid-cols-2">
112
  {#if isImageMode}
113
  <div class="sm:col-start-1">
114
- <VideoInput></VideoInput>
 
 
 
115
  </div>
116
  {/if}
117
  <div class={isImageMode ? 'sm:col-start-2' : 'col-span-2'}>
 
1
  <script lang="ts">
2
  import { onMount } from 'svelte';
3
+ import type { Fields, PipelineInfo } from '$lib/types';
4
  import { PipelineMode } from '$lib/types';
5
  import ImagePlayer from '$lib/components/ImagePlayer.svelte';
6
  import VideoInput from '$lib/components/VideoInput.svelte';
 
11
  import { mediaStreamActions, onFrameChangeStore } from '$lib/mediaStream';
12
  import { getPipelineValues, deboucedPipelineValues } from '$lib/store';
13
 
14
+ let pipelineParams: Fields;
15
  let pipelineInfo: PipelineInfo;
16
+ let pageContent: string;
17
  let isImageMode: boolean = false;
18
  let maxQueueSize: number = 0;
19
  let currentQueueSize: number = 0;
 
23
 
24
  async function getSettings() {
25
  const settings = await fetch('/settings').then((r) => r.json());
26
+ pipelineParams = settings.input_params.properties;
27
  pipelineInfo = settings.info.properties;
28
  isImageMode = pipelineInfo.input_mode.default === PipelineMode.IMAGE;
29
  maxQueueSize = settings.max_queue_size;
30
+ pageContent = settings.page_content;
31
+ console.log(pipelineParams);
32
  if (maxQueueSize > 0) {
33
  getQueueSize();
34
  setInterval(() => {
 
70
  }
71
  </script>
72
 
73
+ <svelte:head>
74
+ <script
75
+ src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.9/iframeResizer.contentWindow.min.js"
76
+ ></script>
77
+ </svelte:head>
78
+
79
  <main class="container mx-auto flex max-w-5xl flex-col gap-3 px-4 py-4">
80
  <article class="text-center">
81
+ {#if pageContent}
82
+ {@html pageContent}
 
83
  {/if}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  {#if maxQueueSize > 0}
85
  <p class="text-sm">
86
  There are <span id="queue_size" class="font-bold">{currentQueueSize}</span>
 
97
  <article class="my-3 grid grid-cols-1 gap-3 sm:grid-cols-2">
98
  {#if isImageMode}
99
  <div class="sm:col-start-1">
100
+ <VideoInput
101
+ width={Number(pipelineParams.width.default)}
102
+ height={Number(pipelineParams.height.default)}
103
+ ></VideoInput>
104
  </div>
105
  {/if}
106
  <div class={isImageMode ? 'sm:col-start-2' : 'col-span-2'}>
frontend/tailwind.config.js CHANGED
@@ -1,6 +1,6 @@
1
  /** @type {import('tailwindcss').Config} */
2
  export default {
3
- content: ['./src/**/*.{html,js,svelte,ts}'],
4
  theme: {
5
  extend: {}
6
  },
 
1
  /** @type {import('tailwindcss').Config} */
2
  export default {
3
+ content: ['./src/**/*.{html,js,svelte,ts}', '../pipelines/**/*.py'],
4
  theme: {
5
  extend: {}
6
  },
pipelines/controlnelSD21Turbo.py ADDED
@@ -0,0 +1,260 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import (
2
+ StableDiffusionControlNetImg2ImgPipeline,
3
+ ControlNetModel,
4
+ LCMScheduler,
5
+ AutoencoderTiny,
6
+ )
7
+ from compel import Compel
8
+ import torch
9
+ from pipelines.utils.canny_gpu import SobelOperator
10
+
11
+ try:
12
+ import intel_extension_for_pytorch as ipex # type: ignore
13
+ except:
14
+ pass
15
+
16
+ import psutil
17
+ from config import Args
18
+ from pydantic import BaseModel, Field
19
+ from PIL import Image
20
+ import math
21
+ import time
22
+
23
+ #
24
+ taesd_model = "madebyollin/taesd"
25
+ controlnet_model = "thibaud/controlnet-sd21-canny-diffusers"
26
+ base_model = "stabilityai/sd-turbo"
27
+
28
+ default_prompt = "Portrait of The Joker halloween costume, face painting, with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
29
+ page_content = """
30
+ <h1 class="text-3xl font-bold">Real-Time SDv2.1 Turbo</h1>
31
+ <h3 class="text-xl font-bold">Image-to-Image ControlNet</h3>
32
+ <p class="text-sm">
33
+ This demo showcases
34
+ <a
35
+ href="https://huggingface.co/stabilityai/sd-turbo"
36
+ target="_blank"
37
+ class="text-blue-500 underline hover:no-underline">SD Turbo</a>
38
+ Image to Image pipeline using
39
+ <a
40
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
41
+ target="_blank"
42
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
43
+ > with a MJPEG stream server.
44
+ </p>
45
+ <p class="text-sm text-gray-500">
46
+ Change the prompt to generate different images, accepts <a
47
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
48
+ target="_blank"
49
+ class="text-blue-500 underline hover:no-underline">Compel</a
50
+ > syntax.
51
+ </p>
52
+ """
53
+
54
+
55
+ class Pipeline:
56
+ class Info(BaseModel):
57
+ name: str = "controlnet+sd15Turbo"
58
+ title: str = "SDv1.5 Turbo + Controlnet"
59
+ description: str = "Generates an image from a text prompt"
60
+ input_mode: str = "image"
61
+ page_content: str = page_content
62
+
63
+ class InputParams(BaseModel):
64
+ prompt: str = Field(
65
+ default_prompt,
66
+ title="Prompt",
67
+ field="textarea",
68
+ id="prompt",
69
+ )
70
+ seed: int = Field(
71
+ 4402026899276587, min=0, title="Seed", field="seed", hide=True, id="seed"
72
+ )
73
+ steps: int = Field(
74
+ 1, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
75
+ )
76
+ width: int = Field(
77
+ 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
78
+ )
79
+ height: int = Field(
80
+ 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
81
+ )
82
+ guidance_scale: float = Field(
83
+ 1.21,
84
+ min=0,
85
+ max=10,
86
+ step=0.001,
87
+ title="Guidance Scale",
88
+ field="range",
89
+ hide=True,
90
+ id="guidance_scale",
91
+ )
92
+ strength: float = Field(
93
+ 0.8,
94
+ min=0.10,
95
+ max=1.0,
96
+ step=0.001,
97
+ title="Strength",
98
+ field="range",
99
+ hide=True,
100
+ id="strength",
101
+ )
102
+ controlnet_scale: float = Field(
103
+ 0.2,
104
+ min=0,
105
+ max=1.0,
106
+ step=0.001,
107
+ title="Controlnet Scale",
108
+ field="range",
109
+ hide=True,
110
+ id="controlnet_scale",
111
+ )
112
+ controlnet_start: float = Field(
113
+ 0.0,
114
+ min=0,
115
+ max=1.0,
116
+ step=0.001,
117
+ title="Controlnet Start",
118
+ field="range",
119
+ hide=True,
120
+ id="controlnet_start",
121
+ )
122
+ controlnet_end: float = Field(
123
+ 1.0,
124
+ min=0,
125
+ max=1.0,
126
+ step=0.001,
127
+ title="Controlnet End",
128
+ field="range",
129
+ hide=True,
130
+ id="controlnet_end",
131
+ )
132
+ canny_low_threshold: float = Field(
133
+ 0.31,
134
+ min=0,
135
+ max=1.0,
136
+ step=0.001,
137
+ title="Canny Low Threshold",
138
+ field="range",
139
+ hide=True,
140
+ id="canny_low_threshold",
141
+ )
142
+ canny_high_threshold: float = Field(
143
+ 0.125,
144
+ min=0,
145
+ max=1.0,
146
+ step=0.001,
147
+ title="Canny High Threshold",
148
+ field="range",
149
+ hide=True,
150
+ id="canny_high_threshold",
151
+ )
152
+ debug_canny: bool = Field(
153
+ False,
154
+ title="Debug Canny",
155
+ field="checkbox",
156
+ hide=True,
157
+ id="debug_canny",
158
+ )
159
+
160
+ def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
161
+ controlnet_canny = ControlNetModel.from_pretrained(
162
+ controlnet_model, torch_dtype=torch_dtype
163
+ ).to(device)
164
+
165
+ self.pipes = {}
166
+
167
+ if args.safety_checker:
168
+ self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
169
+ base_model,
170
+ controlnet=controlnet_canny,
171
+ )
172
+ else:
173
+ self.pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
174
+ base_model,
175
+ controlnet=controlnet_canny,
176
+ safety_checker=None,
177
+ )
178
+
179
+ if args.use_taesd:
180
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
181
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
182
+ ).to(device)
183
+ self.canny_torch = SobelOperator(device=device)
184
+
185
+ self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
186
+ self.pipe.set_progress_bar_config(disable=True)
187
+ self.pipe.to(device=device, dtype=torch_dtype).to(device)
188
+ if device.type != "mps":
189
+ self.pipe.unet.to(memory_format=torch.channels_last)
190
+
191
+ if psutil.virtual_memory().total < 64 * 1024**3:
192
+ self.pipe.enable_attention_slicing()
193
+
194
+ self.pipe.compel_proc = Compel(
195
+ tokenizer=self.pipe.tokenizer,
196
+ text_encoder=self.pipe.text_encoder,
197
+ truncate_long_prompts=True,
198
+ )
199
+ if args.use_taesd:
200
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
201
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
202
+ ).to(device)
203
+
204
+ if args.torch_compile:
205
+ self.pipe.unet = torch.compile(
206
+ self.pipe.unet, mode="reduce-overhead", fullgraph=True
207
+ )
208
+ self.pipe.vae = torch.compile(
209
+ self.pipe.vae, mode="reduce-overhead", fullgraph=True
210
+ )
211
+ self.pipe(
212
+ prompt="warmup",
213
+ image=[Image.new("RGB", (768, 768))],
214
+ control_image=[Image.new("RGB", (768, 768))],
215
+ )
216
+
217
+ def predict(self, params: "Pipeline.InputParams") -> Image.Image:
218
+ generator = torch.manual_seed(params.seed)
219
+ prompt_embeds = self.pipe.compel_proc(params.prompt)
220
+ control_image = self.canny_torch(
221
+ params.image, params.canny_low_threshold, params.canny_high_threshold
222
+ )
223
+ steps = params.steps
224
+ strength = params.strength
225
+ if int(steps * strength) < 1:
226
+ steps = math.ceil(1 / max(0.10, strength))
227
+ last_time = time.time()
228
+ results = self.pipe(
229
+ image=params.image,
230
+ control_image=control_image,
231
+ prompt_embeds=prompt_embeds,
232
+ generator=generator,
233
+ strength=strength,
234
+ num_inference_steps=steps,
235
+ guidance_scale=params.guidance_scale,
236
+ width=params.width,
237
+ height=params.height,
238
+ output_type="pil",
239
+ controlnet_conditioning_scale=params.controlnet_scale,
240
+ control_guidance_start=params.controlnet_start,
241
+ control_guidance_end=params.controlnet_end,
242
+ )
243
+ print(f"Time taken: {time.time() - last_time}")
244
+
245
+ nsfw_content_detected = (
246
+ results.nsfw_content_detected[0]
247
+ if "nsfw_content_detected" in results
248
+ else False
249
+ )
250
+ if nsfw_content_detected:
251
+ return None
252
+ result_image = results.images[0]
253
+ if params.debug_canny:
254
+ # paste control_image on top of result_image
255
+ w0, h0 = (200, 200)
256
+ control_image = control_image.resize((w0, h0))
257
+ w1, h1 = result_image.size
258
+ result_image.paste(control_image, (w1 - w0, h1 - h0))
259
+
260
+ return result_image
pipelines/controlnet.py CHANGED
@@ -16,12 +16,38 @@ import psutil
16
  from config import Args
17
  from pydantic import BaseModel, Field
18
  from PIL import Image
 
19
 
20
  base_model = "SimianLuo/LCM_Dreamshaper_v7"
21
  taesd_model = "madebyollin/taesd"
22
  controlnet_model = "lllyasviel/control_v11p_sd15_canny"
23
 
24
  default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
 
27
  class Pipeline:
@@ -30,6 +56,7 @@ class Pipeline:
30
  title: str = "LCM + Controlnet"
31
  description: str = "Generates an image from a text prompt"
32
  input_mode: str = "image"
 
33
 
34
  class InputParams(BaseModel):
35
  prompt: str = Field(
@@ -42,13 +69,13 @@ class Pipeline:
42
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
43
  )
44
  steps: int = Field(
45
- 4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
46
  )
47
  width: int = Field(
48
- 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
49
  )
50
  height: int = Field(
51
- 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
52
  )
53
  guidance_scale: float = Field(
54
  0.2,
@@ -145,7 +172,7 @@ class Pipeline:
145
  if args.use_taesd:
146
  self.pipe.vae = AutoencoderTiny.from_pretrained(
147
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
148
- )
149
  self.canny_torch = SobelOperator(device=device)
150
  self.pipe.set_progress_bar_config(disable=True)
151
  self.pipe.to(device=device, dtype=torch_dtype)
@@ -182,14 +209,18 @@ class Pipeline:
182
  control_image = self.canny_torch(
183
  params.image, params.canny_low_threshold, params.canny_high_threshold
184
  )
 
 
 
 
185
 
186
  results = self.pipe(
187
  image=params.image,
188
  control_image=control_image,
189
  prompt_embeds=prompt_embeds,
190
  generator=generator,
191
- strength=params.strength,
192
- num_inference_steps=params.steps,
193
  guidance_scale=params.guidance_scale,
194
  width=params.width,
195
  height=params.height,
 
16
  from config import Args
17
  from pydantic import BaseModel, Field
18
  from PIL import Image
19
+ import math
20
 
21
  base_model = "SimianLuo/LCM_Dreamshaper_v7"
22
  taesd_model = "madebyollin/taesd"
23
  controlnet_model = "lllyasviel/control_v11p_sd15_canny"
24
 
25
  default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
26
+ page_content = """
27
+ <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
28
+ <h3 class="text-xl font-bold">LCM + Controlnet Canny</h3>
29
+ <p class="text-sm">
30
+ This demo showcases
31
+ <a
32
+ href="https://huggingface.co/blog/lcm_lora"
33
+ target="_blank"
34
+ class="text-blue-500 underline hover:no-underline">LCM LoRA</a
35
+ >
36
+ ControlNet + Image to Image pipeline using
37
+ <a
38
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
39
+ target="_blank"
40
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
41
+ > with a MJPEG stream server.
42
+ </p>
43
+ <p class="text-sm text-gray-500">
44
+ Change the prompt to generate different images, accepts <a
45
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
46
+ target="_blank"
47
+ class="text-blue-500 underline hover:no-underline">Compel</a
48
+ > syntax.
49
+ </p>
50
+ """
51
 
52
 
53
  class Pipeline:
 
56
  title: str = "LCM + Controlnet"
57
  description: str = "Generates an image from a text prompt"
58
  input_mode: str = "image"
59
+ page_content: str = page_content
60
 
61
  class InputParams(BaseModel):
62
  prompt: str = Field(
 
69
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
70
  )
71
  steps: int = Field(
72
+ 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
73
  )
74
  width: int = Field(
75
+ 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
76
  )
77
  height: int = Field(
78
+ 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
79
  )
80
  guidance_scale: float = Field(
81
  0.2,
 
172
  if args.use_taesd:
173
  self.pipe.vae = AutoencoderTiny.from_pretrained(
174
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
175
+ ).to(device)
176
  self.canny_torch = SobelOperator(device=device)
177
  self.pipe.set_progress_bar_config(disable=True)
178
  self.pipe.to(device=device, dtype=torch_dtype)
 
209
  control_image = self.canny_torch(
210
  params.image, params.canny_low_threshold, params.canny_high_threshold
211
  )
212
+ steps = params.steps
213
+ strength = params.strength
214
+ if int(steps * strength) < 1:
215
+ steps = math.ceil(1 / max(0.10, strength))
216
 
217
  results = self.pipe(
218
  image=params.image,
219
  control_image=control_image,
220
  prompt_embeds=prompt_embeds,
221
  generator=generator,
222
+ strength=strength,
223
+ num_inference_steps=steps,
224
  guidance_scale=params.guidance_scale,
225
  width=params.width,
226
  height=params.height,
pipelines/controlnetLoraSD15.py CHANGED
@@ -2,6 +2,7 @@ from diffusers import (
2
  StableDiffusionControlNetImg2ImgPipeline,
3
  ControlNetModel,
4
  LCMScheduler,
 
5
  )
6
  from compel import Compel
7
  import torch
@@ -16,6 +17,7 @@ import psutil
16
  from config import Args
17
  from pydantic import BaseModel, Field
18
  from PIL import Image
 
19
 
20
  taesd_model = "madebyollin/taesd"
21
  controlnet_model = "lllyasviel/control_v11p_sd15_canny"
@@ -26,17 +28,40 @@ base_models = {
26
  "nitrosocke/mo-di-diffusion": "modern disney style",
27
  }
28
  lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
29
-
30
-
31
  default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
 
34
  class Pipeline:
35
  class Info(BaseModel):
36
  name: str = "controlnet+loras+sd15"
37
- title: str = "LCM + LoRA + Controlnet "
38
  description: str = "Generates an image from a text prompt"
39
  input_mode: str = "image"
 
40
 
41
  class InputParams(BaseModel):
42
  prompt: str = Field(
@@ -45,24 +70,24 @@ class Pipeline:
45
  field="textarea",
46
  id="prompt",
47
  )
48
- model_id: str = Field(
49
  "plasmo/woolitize",
50
  title="Base Model",
51
  values=list(base_models.keys()),
52
  field="select",
53
- id="model_id",
54
  )
55
  seed: int = Field(
56
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
57
  )
58
  steps: int = Field(
59
- 4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
60
  )
61
  width: int = Field(
62
- 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
63
  )
64
  height: int = Field(
65
- 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
66
  )
67
  guidance_scale: float = Field(
68
  0.2,
@@ -150,20 +175,20 @@ class Pipeline:
150
  self.pipes = {}
151
 
152
  if args.safety_checker:
153
- for model_id in base_models.keys():
154
  pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
155
- model_id,
156
  controlnet=controlnet_canny,
157
  )
158
- self.pipes[model_id] = pipe
159
  else:
160
- for model_id in base_models.keys():
161
  pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
162
- model_id,
163
  safety_checker=None,
164
  controlnet=controlnet_canny,
165
  )
166
- self.pipes[model_id] = pipe
167
 
168
  self.canny_torch = SobelOperator(device=device)
169
 
@@ -177,6 +202,11 @@ class Pipeline:
177
  if psutil.virtual_memory().total < 64 * 1024**3:
178
  pipe.enable_attention_slicing()
179
 
 
 
 
 
 
180
  # Load LCM LoRA
181
  pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
182
  pipe.compel_proc = Compel(
@@ -199,23 +229,26 @@ class Pipeline:
199
 
200
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
201
  generator = torch.manual_seed(params.seed)
202
- print(f"Using model: {params.model_id}")
203
- pipe = self.pipes[params.model_id]
204
 
205
- activation_token = base_models[params.model_id]
206
  prompt = f"{activation_token} {params.prompt}"
207
  prompt_embeds = pipe.compel_proc(prompt)
208
  control_image = self.canny_torch(
209
  params.image, params.canny_low_threshold, params.canny_high_threshold
210
  )
 
 
 
 
211
 
212
  results = pipe(
213
  image=params.image,
214
  control_image=control_image,
215
  prompt_embeds=prompt_embeds,
216
  generator=generator,
217
- strength=params.strength,
218
- num_inference_steps=params.steps,
219
  guidance_scale=params.guidance_scale,
220
  width=params.width,
221
  height=params.height,
 
2
  StableDiffusionControlNetImg2ImgPipeline,
3
  ControlNetModel,
4
  LCMScheduler,
5
+ AutoencoderTiny,
6
  )
7
  from compel import Compel
8
  import torch
 
17
  from config import Args
18
  from pydantic import BaseModel, Field
19
  from PIL import Image
20
+ import math
21
 
22
  taesd_model = "madebyollin/taesd"
23
  controlnet_model = "lllyasviel/control_v11p_sd15_canny"
 
28
  "nitrosocke/mo-di-diffusion": "modern disney style",
29
  }
30
  lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
 
 
31
  default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
32
+ page_content = """
33
+ <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model SDv1.5</h1>
34
+ <h3 class="text-xl font-bold">LCM + LoRA + Controlnet + Canny</h3>
35
+ <p class="text-sm">
36
+ This demo showcases
37
+ <a
38
+ href="https://huggingface.co/blog/lcm_lora"
39
+ target="_blank"
40
+ class="text-blue-500 underline hover:no-underline">LCM LoRA</a>
41
+ + ControlNet + Image to Imasge pipeline using
42
+ <a
43
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
44
+ target="_blank"
45
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
46
+ > with a MJPEG stream server.
47
+ </p>
48
+ <p class="text-sm text-gray-500">
49
+ Change the prompt to generate different images, accepts <a
50
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
51
+ target="_blank"
52
+ class="text-blue-500 underline hover:no-underline">Compel</a
53
+ > syntax.
54
+ </p>
55
+ """
56
 
57
 
58
  class Pipeline:
59
  class Info(BaseModel):
60
  name: str = "controlnet+loras+sd15"
61
+ title: str = "LCM + LoRA + Controlnet"
62
  description: str = "Generates an image from a text prompt"
63
  input_mode: str = "image"
64
+ page_content: str = page_content
65
 
66
  class InputParams(BaseModel):
67
  prompt: str = Field(
 
70
  field="textarea",
71
  id="prompt",
72
  )
73
+ base_model_id: str = Field(
74
  "plasmo/woolitize",
75
  title="Base Model",
76
  values=list(base_models.keys()),
77
  field="select",
78
+ id="base_model_id",
79
  )
80
  seed: int = Field(
81
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
82
  )
83
  steps: int = Field(
84
+ 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
85
  )
86
  width: int = Field(
87
+ 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
88
  )
89
  height: int = Field(
90
+ 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
91
  )
92
  guidance_scale: float = Field(
93
  0.2,
 
175
  self.pipes = {}
176
 
177
  if args.safety_checker:
178
+ for base_model_id in base_models.keys():
179
  pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
180
+ base_model_id,
181
  controlnet=controlnet_canny,
182
  )
183
+ self.pipes[base_model_id] = pipe
184
  else:
185
+ for base_model_id in base_models.keys():
186
  pipe = StableDiffusionControlNetImg2ImgPipeline.from_pretrained(
187
+ base_model_id,
188
  safety_checker=None,
189
  controlnet=controlnet_canny,
190
  )
191
+ self.pipes[base_model_id] = pipe
192
 
193
  self.canny_torch = SobelOperator(device=device)
194
 
 
202
  if psutil.virtual_memory().total < 64 * 1024**3:
203
  pipe.enable_attention_slicing()
204
 
205
+ if args.use_taesd:
206
+ pipe.vae = AutoencoderTiny.from_pretrained(
207
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
208
+ ).to(device)
209
+
210
  # Load LCM LoRA
211
  pipe.load_lora_weights(lcm_lora_id, adapter_name="lcm")
212
  pipe.compel_proc = Compel(
 
229
 
230
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
231
  generator = torch.manual_seed(params.seed)
232
+ pipe = self.pipes[params.base_model_id]
 
233
 
234
+ activation_token = base_models[params.base_model_id]
235
  prompt = f"{activation_token} {params.prompt}"
236
  prompt_embeds = pipe.compel_proc(prompt)
237
  control_image = self.canny_torch(
238
  params.image, params.canny_low_threshold, params.canny_high_threshold
239
  )
240
+ steps = params.steps
241
+ strength = params.strength
242
+ if int(steps * strength) < 1:
243
+ steps = math.ceil(1 / max(0.10, strength))
244
 
245
  results = pipe(
246
  image=params.image,
247
  control_image=control_image,
248
  prompt_embeds=prompt_embeds,
249
  generator=generator,
250
+ strength=strength,
251
+ num_inference_steps=steps,
252
  guidance_scale=params.guidance_scale,
253
  width=params.width,
254
  height=params.height,
pipelines/controlnetLoraSDXL.py CHANGED
@@ -3,6 +3,7 @@ from diffusers import (
3
  ControlNetModel,
4
  LCMScheduler,
5
  AutoencoderKL,
 
6
  )
7
  from compel import Compel, ReturnedEmbeddingsType
8
  import torch
@@ -17,30 +18,49 @@ import psutil
17
  from config import Args
18
  from pydantic import BaseModel, Field
19
  from PIL import Image
 
20
 
21
  controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
22
  model_id = "stabilityai/stable-diffusion-xl-base-1.0"
23
  lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
24
-
25
- # # base model with activation token, it will prepend the prompt with the activation token
26
- base_models = {
27
- "plasmo/woolitize": "woolitize",
28
- "nitrosocke/Ghibli-Diffusion": "ghibli style",
29
- "nitrosocke/mo-di-diffusion": "modern disney style",
30
- }
31
- # lcm_lora_id = "latent-consistency/lcm-lora-sdv1-5"
32
 
33
 
34
  default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
35
  default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
 
38
  class Pipeline:
39
  class Info(BaseModel):
40
  name: str = "controlnet+loras+sdxl"
41
- title: str = "SDXL + LCM + LoRA + Controlnet "
42
  description: str = "Generates an image from a text prompt"
43
  input_mode: str = "image"
 
44
 
45
  class InputParams(BaseModel):
46
  prompt: str = Field(
@@ -60,13 +80,13 @@ class Pipeline:
60
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
61
  )
62
  steps: int = Field(
63
- 4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
64
  )
65
  width: int = Field(
66
- 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
67
  )
68
  height: int = Field(
69
- 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
70
  )
71
  guidance_scale: float = Field(
72
  1.0,
@@ -79,10 +99,10 @@ class Pipeline:
79
  id="guidance_scale",
80
  )
81
  strength: float = Field(
82
- 0.5,
83
  min=0.25,
84
  max=1.0,
85
- step=0.001,
86
  title="Strength",
87
  field="range",
88
  hide=True,
@@ -191,6 +211,10 @@ class Pipeline:
191
  returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
192
  requires_pooled=[False, True],
193
  )
 
 
 
 
194
 
195
  if args.torch_compile:
196
  self.pipe.unet = torch.compile(
@@ -214,6 +238,10 @@ class Pipeline:
214
  control_image = self.canny_torch(
215
  params.image, params.canny_low_threshold, params.canny_high_threshold
216
  )
 
 
 
 
217
 
218
  results = self.pipe(
219
  image=params.image,
@@ -223,8 +251,8 @@ class Pipeline:
223
  negative_prompt_embeds=prompt_embeds[1:2],
224
  negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
225
  generator=generator,
226
- strength=params.strength,
227
- num_inference_steps=params.steps,
228
  guidance_scale=params.guidance_scale,
229
  width=params.width,
230
  height=params.height,
 
3
  ControlNetModel,
4
  LCMScheduler,
5
  AutoencoderKL,
6
+ AutoencoderTiny,
7
  )
8
  from compel import Compel, ReturnedEmbeddingsType
9
  import torch
 
18
  from config import Args
19
  from pydantic import BaseModel, Field
20
  from PIL import Image
21
+ import math
22
 
23
  controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
24
  model_id = "stabilityai/stable-diffusion-xl-base-1.0"
25
  lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
26
+ taesd_model = "madebyollin/taesdxl"
 
 
 
 
 
 
 
27
 
28
 
29
  default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
30
  default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
31
+ page_content = """
32
+ <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model SDXL</h1>
33
+ <h3 class="text-xl font-bold">SDXL + LCM + LoRA + Controlnet</h3>
34
+ <p class="text-sm">
35
+ This demo showcases
36
+ <a
37
+ href="https://huggingface.co/blog/lcm_lora"
38
+ target="_blank"
39
+ class="text-blue-500 underline hover:no-underline">LCM LoRA</a>
40
+ + SDXL + Controlnet + Image to Image pipeline using
41
+ <a
42
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
43
+ target="_blank"
44
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
45
+ > with a MJPEG stream server.
46
+ </p>
47
+ <p class="text-sm text-gray-500">
48
+ Change the prompt to generate different images, accepts <a
49
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
50
+ target="_blank"
51
+ class="text-blue-500 underline hover:no-underline">Compel</a
52
+ > syntax.
53
+ </p>
54
+ """
55
 
56
 
57
  class Pipeline:
58
  class Info(BaseModel):
59
  name: str = "controlnet+loras+sdxl"
60
+ title: str = "SDXL + LCM + LoRA + Controlnet"
61
  description: str = "Generates an image from a text prompt"
62
  input_mode: str = "image"
63
+ page_content: str = page_content
64
 
65
  class InputParams(BaseModel):
66
  prompt: str = Field(
 
80
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
81
  )
82
  steps: int = Field(
83
+ 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
84
  )
85
  width: int = Field(
86
+ 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
87
  )
88
  height: int = Field(
89
+ 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
90
  )
91
  guidance_scale: float = Field(
92
  1.0,
 
99
  id="guidance_scale",
100
  )
101
  strength: float = Field(
102
+ 1,
103
  min=0.25,
104
  max=1.0,
105
+ step=0.0001,
106
  title="Strength",
107
  field="range",
108
  hide=True,
 
211
  returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
212
  requires_pooled=[False, True],
213
  )
214
+ if args.use_taesd:
215
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
216
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
217
+ ).to(device)
218
 
219
  if args.torch_compile:
220
  self.pipe.unet = torch.compile(
 
238
  control_image = self.canny_torch(
239
  params.image, params.canny_low_threshold, params.canny_high_threshold
240
  )
241
+ steps = params.steps
242
+ strength = params.strength
243
+ if int(steps * strength) < 1:
244
+ steps = math.ceil(1 / max(0.10, strength))
245
 
246
  results = self.pipe(
247
  image=params.image,
 
251
  negative_prompt_embeds=prompt_embeds[1:2],
252
  negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
253
  generator=generator,
254
+ strength=strength,
255
+ num_inference_steps=steps,
256
  guidance_scale=params.guidance_scale,
257
  width=params.width,
258
  height=params.height,
pipelines/controlnetSDXLTurbo.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import (
2
+ StableDiffusionXLControlNetImg2ImgPipeline,
3
+ ControlNetModel,
4
+ AutoencoderKL,
5
+ AutoencoderTiny,
6
+ )
7
+ from compel import Compel, ReturnedEmbeddingsType
8
+ import torch
9
+ from pipelines.utils.canny_gpu import SobelOperator
10
+
11
+ try:
12
+ import intel_extension_for_pytorch as ipex # type: ignore
13
+ except:
14
+ pass
15
+
16
+ import psutil
17
+ from config import Args
18
+ from pydantic import BaseModel, Field
19
+ from PIL import Image
20
+ import math
21
+
22
+ controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
23
+ model_id = "stabilityai/sdxl-turbo"
24
+ taesd_model = "madebyollin/taesdxl"
25
+
26
+ default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
27
+ default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
28
+ page_content = """
29
+ <h1 class="text-3xl font-bold">Real-Time SDXL Turbo</h1>
30
+ <h3 class="text-xl font-bold">Image-to-Image ControlNet</h3>
31
+ <p class="text-sm">
32
+ This demo showcases
33
+ <a
34
+ href="https://huggingface.co/stabilityai/sdxl-turbo"
35
+ target="_blank"
36
+ class="text-blue-500 underline hover:no-underline">SDXL Turbo</a>
37
+ Image to Image pipeline using
38
+ <a
39
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
40
+ target="_blank"
41
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
42
+ > with a MJPEG stream server.
43
+ </p>
44
+ <p class="text-sm text-gray-500">
45
+ Change the prompt to generate different images, accepts <a
46
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
47
+ target="_blank"
48
+ class="text-blue-500 underline hover:no-underline">Compel</a
49
+ > syntax.
50
+ </p>
51
+ """
52
+
53
+
54
+ class Pipeline:
55
+ class Info(BaseModel):
56
+ name: str = "controlnet+SDXL+Turbo"
57
+ title: str = "SDXL Turbo + Controlnet"
58
+ description: str = "Generates an image from a text prompt"
59
+ input_mode: str = "image"
60
+ page_content: str = page_content
61
+
62
+ class InputParams(BaseModel):
63
+ prompt: str = Field(
64
+ default_prompt,
65
+ title="Prompt",
66
+ field="textarea",
67
+ id="prompt",
68
+ )
69
+ negative_prompt: str = Field(
70
+ default_negative_prompt,
71
+ title="Negative Prompt",
72
+ field="textarea",
73
+ id="negative_prompt",
74
+ hide=True,
75
+ )
76
+ seed: int = Field(
77
+ 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
78
+ )
79
+ steps: int = Field(
80
+ 2, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
81
+ )
82
+ width: int = Field(
83
+ 1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
84
+ )
85
+ height: int = Field(
86
+ 1024, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
87
+ )
88
+ guidance_scale: float = Field(
89
+ 1.0,
90
+ min=0,
91
+ max=10,
92
+ step=0.001,
93
+ title="Guidance Scale",
94
+ field="range",
95
+ hide=True,
96
+ id="guidance_scale",
97
+ )
98
+ strength: float = Field(
99
+ 0.5,
100
+ min=0.25,
101
+ max=1.0,
102
+ step=0.001,
103
+ title="Strength",
104
+ field="range",
105
+ hide=True,
106
+ id="strength",
107
+ )
108
+ controlnet_scale: float = Field(
109
+ 0.5,
110
+ min=0,
111
+ max=1.0,
112
+ step=0.001,
113
+ title="Controlnet Scale",
114
+ field="range",
115
+ hide=True,
116
+ id="controlnet_scale",
117
+ )
118
+ controlnet_start: float = Field(
119
+ 0.0,
120
+ min=0,
121
+ max=1.0,
122
+ step=0.001,
123
+ title="Controlnet Start",
124
+ field="range",
125
+ hide=True,
126
+ id="controlnet_start",
127
+ )
128
+ controlnet_end: float = Field(
129
+ 1.0,
130
+ min=0,
131
+ max=1.0,
132
+ step=0.001,
133
+ title="Controlnet End",
134
+ field="range",
135
+ hide=True,
136
+ id="controlnet_end",
137
+ )
138
+ canny_low_threshold: float = Field(
139
+ 0.31,
140
+ min=0,
141
+ max=1.0,
142
+ step=0.001,
143
+ title="Canny Low Threshold",
144
+ field="range",
145
+ hide=True,
146
+ id="canny_low_threshold",
147
+ )
148
+ canny_high_threshold: float = Field(
149
+ 0.125,
150
+ min=0,
151
+ max=1.0,
152
+ step=0.001,
153
+ title="Canny High Threshold",
154
+ field="range",
155
+ hide=True,
156
+ id="canny_high_threshold",
157
+ )
158
+ debug_canny: bool = Field(
159
+ False,
160
+ title="Debug Canny",
161
+ field="checkbox",
162
+ hide=True,
163
+ id="debug_canny",
164
+ )
165
+
166
+ def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
167
+ controlnet_canny = ControlNetModel.from_pretrained(
168
+ controlnet_model, torch_dtype=torch_dtype
169
+ ).to(device)
170
+ vae = AutoencoderKL.from_pretrained(
171
+ "madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch_dtype
172
+ )
173
+ if args.safety_checker:
174
+ self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
175
+ model_id,
176
+ controlnet=controlnet_canny,
177
+ vae=vae,
178
+ )
179
+ else:
180
+ self.pipe = StableDiffusionXLControlNetImg2ImgPipeline.from_pretrained(
181
+ model_id,
182
+ safety_checker=None,
183
+ controlnet=controlnet_canny,
184
+ vae=vae,
185
+ )
186
+ self.canny_torch = SobelOperator(device=device)
187
+
188
+ self.pipe.set_progress_bar_config(disable=True)
189
+ self.pipe.to(device=device, dtype=torch_dtype).to(device)
190
+ if device.type != "mps":
191
+ self.pipe.unet.to(memory_format=torch.channels_last)
192
+
193
+ if psutil.virtual_memory().total < 64 * 1024**3:
194
+ self.pipe.enable_attention_slicing()
195
+
196
+ self.pipe.compel_proc = Compel(
197
+ tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
198
+ text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
199
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
200
+ requires_pooled=[False, True],
201
+ )
202
+ if args.use_taesd:
203
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
204
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
205
+ ).to(device)
206
+
207
+ if args.torch_compile:
208
+ self.pipe.unet = torch.compile(
209
+ self.pipe.unet, mode="reduce-overhead", fullgraph=True
210
+ )
211
+ self.pipe.vae = torch.compile(
212
+ self.pipe.vae, mode="reduce-overhead", fullgraph=True
213
+ )
214
+ self.pipe(
215
+ prompt="warmup",
216
+ image=[Image.new("RGB", (768, 768))],
217
+ control_image=[Image.new("RGB", (768, 768))],
218
+ )
219
+
220
+ def predict(self, params: "Pipeline.InputParams") -> Image.Image:
221
+ generator = torch.manual_seed(params.seed)
222
+
223
+ prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
224
+ [params.prompt, params.negative_prompt]
225
+ )
226
+ control_image = self.canny_torch(
227
+ params.image, params.canny_low_threshold, params.canny_high_threshold
228
+ )
229
+ steps = params.steps
230
+ strength = params.strength
231
+ if int(steps * strength) < 1:
232
+ steps = math.ceil(1 / max(0.10, strength))
233
+
234
+ results = self.pipe(
235
+ image=params.image,
236
+ control_image=control_image,
237
+ prompt_embeds=prompt_embeds[0:1],
238
+ pooled_prompt_embeds=pooled_prompt_embeds[0:1],
239
+ negative_prompt_embeds=prompt_embeds[1:2],
240
+ negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
241
+ generator=generator,
242
+ strength=strength,
243
+ num_inference_steps=steps,
244
+ guidance_scale=params.guidance_scale,
245
+ width=params.width,
246
+ height=params.height,
247
+ output_type="pil",
248
+ controlnet_conditioning_scale=params.controlnet_scale,
249
+ control_guidance_start=params.controlnet_start,
250
+ control_guidance_end=params.controlnet_end,
251
+ )
252
+
253
+ nsfw_content_detected = (
254
+ results.nsfw_content_detected[0]
255
+ if "nsfw_content_detected" in results
256
+ else False
257
+ )
258
+ if nsfw_content_detected:
259
+ return None
260
+ result_image = results.images[0]
261
+ if params.debug_canny:
262
+ # paste control_image on top of result_image
263
+ w0, h0 = (200, 200)
264
+ control_image = control_image.resize((w0, h0))
265
+ w1, h1 = result_image.size
266
+ result_image.paste(control_image, (w1 - w0, h1 - h0))
267
+
268
+ return result_image
pipelines/img2img.py CHANGED
@@ -14,11 +14,36 @@ import psutil
14
  from config import Args
15
  from pydantic import BaseModel, Field
16
  from PIL import Image
 
17
 
18
  base_model = "SimianLuo/LCM_Dreamshaper_v7"
19
  taesd_model = "madebyollin/taesd"
20
 
21
  default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
 
24
  class Pipeline:
@@ -27,6 +52,7 @@ class Pipeline:
27
  title: str = "Image-to-Image LCM"
28
  description: str = "Generates an image from a text prompt"
29
  input_mode: str = "image"
 
30
 
31
  class InputParams(BaseModel):
32
  prompt: str = Field(
@@ -39,13 +65,13 @@ class Pipeline:
39
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
40
  )
41
  steps: int = Field(
42
- 4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
43
  )
44
  width: int = Field(
45
- 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
46
  )
47
  height: int = Field(
48
- 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
49
  )
50
  guidance_scale: float = Field(
51
  0.2,
@@ -79,7 +105,7 @@ class Pipeline:
79
  if args.use_taesd:
80
  self.pipe.vae = AutoencoderTiny.from_pretrained(
81
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
82
- )
83
 
84
  self.pipe.set_progress_bar_config(disable=True)
85
  self.pipe.to(device=device, dtype=torch_dtype)
@@ -113,12 +139,18 @@ class Pipeline:
113
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
114
  generator = torch.manual_seed(params.seed)
115
  prompt_embeds = self.compel_proc(params.prompt)
 
 
 
 
 
 
116
  results = self.pipe(
117
  image=params.image,
118
  prompt_embeds=prompt_embeds,
119
  generator=generator,
120
- strength=params.strength,
121
- num_inference_steps=params.steps,
122
  guidance_scale=params.guidance_scale,
123
  width=params.width,
124
  height=params.height,
 
14
  from config import Args
15
  from pydantic import BaseModel, Field
16
  from PIL import Image
17
+ import math
18
 
19
  base_model = "SimianLuo/LCM_Dreamshaper_v7"
20
  taesd_model = "madebyollin/taesd"
21
 
22
  default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
23
+ page_content = """
24
+ <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
25
+ <h3 class="text-xl font-bold">Image-to-Image LCM</h3>
26
+ <p class="text-sm">
27
+ This demo showcases
28
+ <a
29
+ href="https://huggingface.co/blog/lcm_lora"
30
+ target="_blank"
31
+ class="text-blue-500 underline hover:no-underline">LCM</a>
32
+ Image to Image pipeline using
33
+ <a
34
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
35
+ target="_blank"
36
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
37
+ > with a MJPEG stream server.
38
+ </p>
39
+ <p class="text-sm text-gray-500">
40
+ Change the prompt to generate different images, accepts <a
41
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
42
+ target="_blank"
43
+ class="text-blue-500 underline hover:no-underline">Compel</a
44
+ > syntax.
45
+ </p>
46
+ """
47
 
48
 
49
  class Pipeline:
 
52
  title: str = "Image-to-Image LCM"
53
  description: str = "Generates an image from a text prompt"
54
  input_mode: str = "image"
55
+ page_content: str = page_content
56
 
57
  class InputParams(BaseModel):
58
  prompt: str = Field(
 
65
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
66
  )
67
  steps: int = Field(
68
+ 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
69
  )
70
  width: int = Field(
71
+ 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
72
  )
73
  height: int = Field(
74
+ 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
75
  )
76
  guidance_scale: float = Field(
77
  0.2,
 
105
  if args.use_taesd:
106
  self.pipe.vae = AutoencoderTiny.from_pretrained(
107
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
108
+ ).to(device)
109
 
110
  self.pipe.set_progress_bar_config(disable=True)
111
  self.pipe.to(device=device, dtype=torch_dtype)
 
139
  def predict(self, params: "Pipeline.InputParams") -> Image.Image:
140
  generator = torch.manual_seed(params.seed)
141
  prompt_embeds = self.compel_proc(params.prompt)
142
+
143
+ steps = params.steps
144
+ strength = params.strength
145
+ if int(steps * strength) < 1:
146
+ steps = math.ceil(1 / max(0.10, strength))
147
+
148
  results = self.pipe(
149
  image=params.image,
150
  prompt_embeds=prompt_embeds,
151
  generator=generator,
152
+ strength=strength,
153
+ num_inference_steps=steps,
154
  guidance_scale=params.guidance_scale,
155
  width=params.width,
156
  height=params.height,
pipelines/img2imgSDXLTurbo.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from diffusers import (
2
+ AutoPipelineForImage2Image,
3
+ AutoencoderTiny,
4
+ )
5
+ from compel import Compel, ReturnedEmbeddingsType
6
+ import torch
7
+
8
+ try:
9
+ import intel_extension_for_pytorch as ipex # type: ignore
10
+ except:
11
+ pass
12
+
13
+ import psutil
14
+ from config import Args
15
+ from pydantic import BaseModel, Field
16
+ from PIL import Image
17
+ import math
18
+
19
+ base_model = "stabilityai/sdxl-turbo"
20
+ taesd_model = "madebyollin/taesdxl"
21
+
22
+ default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
23
+ default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
24
+ page_content = """
25
+ <h1 class="text-3xl font-bold">Real-Time SDXL Turbo</h1>
26
+ <h3 class="text-xl font-bold">Image-to-Image</h3>
27
+ <p class="text-sm">
28
+ This demo showcases
29
+ <a
30
+ href="https://huggingface.co/stabilityai/sdxl-turbo"
31
+ target="_blank"
32
+ class="text-blue-500 underline hover:no-underline">SDXL Turbo</a>
33
+ Image to Image pipeline using
34
+ <a
35
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/sdxl_turbo"
36
+ target="_blank"
37
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
38
+ > with a MJPEG stream server.
39
+ </p>
40
+ <p class="text-sm text-gray-500">
41
+ Change the prompt to generate different images, accepts <a
42
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
43
+ target="_blank"
44
+ class="text-blue-500 underline hover:no-underline">Compel</a
45
+ > syntax.
46
+ </p>
47
+ """
48
+
49
+
50
+ class Pipeline:
51
+ class Info(BaseModel):
52
+ name: str = "img2img"
53
+ title: str = "Image-to-Image SDXL"
54
+ description: str = "Generates an image from a text prompt"
55
+ input_mode: str = "image"
56
+ page_content: str = page_content
57
+
58
+ class InputParams(BaseModel):
59
+ prompt: str = Field(
60
+ default_prompt,
61
+ title="Prompt",
62
+ field="textarea",
63
+ id="prompt",
64
+ )
65
+ negative_prompt: str = Field(
66
+ default_negative_prompt,
67
+ title="Negative Prompt",
68
+ field="textarea",
69
+ id="negative_prompt",
70
+ hide=True,
71
+ )
72
+ seed: int = Field(
73
+ 2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
74
+ )
75
+ steps: int = Field(
76
+ 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
77
+ )
78
+ width: int = Field(
79
+ 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
80
+ )
81
+ height: int = Field(
82
+ 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
83
+ )
84
+ guidance_scale: float = Field(
85
+ 0.2,
86
+ min=0,
87
+ max=20,
88
+ step=0.001,
89
+ title="Guidance Scale",
90
+ field="range",
91
+ hide=True,
92
+ id="guidance_scale",
93
+ )
94
+ strength: float = Field(
95
+ 0.5,
96
+ min=0.25,
97
+ max=1.0,
98
+ step=0.001,
99
+ title="Strength",
100
+ field="range",
101
+ hide=True,
102
+ id="strength",
103
+ )
104
+
105
+ def __init__(self, args: Args, device: torch.device, torch_dtype: torch.dtype):
106
+ if args.safety_checker:
107
+ self.pipe = AutoPipelineForImage2Image.from_pretrained(base_model)
108
+ else:
109
+ self.pipe = AutoPipelineForImage2Image.from_pretrained(
110
+ base_model,
111
+ safety_checker=None,
112
+ )
113
+ if args.use_taesd:
114
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
115
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
116
+ ).to(device)
117
+
118
+ self.pipe.set_progress_bar_config(disable=True)
119
+ self.pipe.to(device=device, dtype=torch_dtype)
120
+ if device.type != "mps":
121
+ self.pipe.unet.to(memory_format=torch.channels_last)
122
+
123
+ # check if computer has less than 64GB of RAM using sys or os
124
+ if psutil.virtual_memory().total < 64 * 1024**3:
125
+ self.pipe.enable_attention_slicing()
126
+
127
+ if args.torch_compile:
128
+ print("Running torch compile")
129
+ self.pipe.unet = torch.compile(
130
+ self.pipe.unet, mode="reduce-overhead", fullgraph=True
131
+ )
132
+ self.pipe.vae = torch.compile(
133
+ self.pipe.vae, mode="reduce-overhead", fullgraph=True
134
+ )
135
+
136
+ self.pipe(
137
+ prompt="warmup",
138
+ image=[Image.new("RGB", (768, 768))],
139
+ )
140
+
141
+ self.pipe.compel_proc = Compel(
142
+ tokenizer=[self.pipe.tokenizer, self.pipe.tokenizer_2],
143
+ text_encoder=[self.pipe.text_encoder, self.pipe.text_encoder_2],
144
+ returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
145
+ requires_pooled=[False, True],
146
+ )
147
+
148
+ def predict(self, params: "Pipeline.InputParams") -> Image.Image:
149
+ generator = torch.manual_seed(params.seed)
150
+ prompt_embeds, pooled_prompt_embeds = self.pipe.compel_proc(
151
+ [params.prompt, params.negative_prompt]
152
+ )
153
+ steps = params.steps
154
+ strength = params.strength
155
+ if int(steps * strength) < 1:
156
+ steps = math.ceil(1 / max(0.10, strength))
157
+
158
+ results = self.pipe(
159
+ image=params.image,
160
+ prompt_embeds=prompt_embeds[0:1],
161
+ pooled_prompt_embeds=pooled_prompt_embeds[0:1],
162
+ negative_prompt_embeds=prompt_embeds[1:2],
163
+ negative_pooled_prompt_embeds=pooled_prompt_embeds[1:2],
164
+ generator=generator,
165
+ strength=strength,
166
+ num_inference_steps=steps,
167
+ guidance_scale=params.guidance_scale,
168
+ width=params.width,
169
+ height=params.height,
170
+ output_type="pil",
171
+ )
172
+
173
+ nsfw_content_detected = (
174
+ results.nsfw_content_detected[0]
175
+ if "nsfw_content_detected" in results
176
+ else False
177
+ )
178
+ if nsfw_content_detected:
179
+ return None
180
+ result_image = results.images[0]
181
+
182
+ return result_image
pipelines/txt2img.py CHANGED
@@ -17,6 +17,28 @@ taesd_model = "madebyollin/taesd"
17
 
18
  default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  class Pipeline:
22
  class Info(BaseModel):
@@ -24,6 +46,7 @@ class Pipeline:
24
  title: str = "Text-to-Image LCM"
25
  description: str = "Generates an image from a text prompt"
26
  input_mode: str = "text"
 
27
 
28
  class InputParams(BaseModel):
29
  prompt: str = Field(
@@ -39,10 +62,10 @@ class Pipeline:
39
  4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
40
  )
41
  width: int = Field(
42
- 512, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
43
  )
44
  height: int = Field(
45
- 512, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
46
  )
47
  guidance_scale: float = Field(
48
  8.0,
@@ -65,7 +88,7 @@ class Pipeline:
65
  if args.use_taesd:
66
  self.pipe.vae = AutoencoderTiny.from_pretrained(
67
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
68
- )
69
 
70
  self.pipe.set_progress_bar_config(disable=True)
71
  self.pipe.to(device=device, dtype=torch_dtype)
 
17
 
18
  default_prompt = "Portrait of The Terminator with , glare pose, detailed, intricate, full of colour, cinematic lighting, trending on artstation, 8k, hyperrealistic, focused, extreme details, unreal engine 5 cinematic, masterpiece"
19
 
20
+ page_content = """<h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
21
+ <h3 class="text-xl font-bold">Text-to-Image</h3>
22
+ <p class="text-sm">
23
+ This demo showcases
24
+ <a
25
+ href="https://huggingface.co/SimianLuo/LCM_Dreamshaper_v7"
26
+ target="_blank"
27
+ class="text-blue-500 underline hover:no-underline">LCM</a>
28
+ Image to Image pipeline using
29
+ <a
30
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
31
+ target="_blank"
32
+ class="text-blue-500 underline hover:no-underline">Diffusers</a> with a MJPEG stream server
33
+ </p>
34
+ <p class="text-sm text-gray-500">
35
+ Change the prompt to generate different images, accepts <a
36
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
37
+ target="_blank"
38
+ class="text-blue-500 underline hover:no-underline">Compel</a
39
+ > syntax.
40
+ </p>"""
41
+
42
 
43
  class Pipeline:
44
  class Info(BaseModel):
 
46
  title: str = "Text-to-Image LCM"
47
  description: str = "Generates an image from a text prompt"
48
  input_mode: str = "text"
49
+ page_content: str = page_content
50
 
51
  class InputParams(BaseModel):
52
  prompt: str = Field(
 
62
  4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
63
  )
64
  width: int = Field(
65
+ 768, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
66
  )
67
  height: int = Field(
68
+ 768, min=2, max=15, title="Height", disabled=True, hide=True, id="height"
69
  )
70
  guidance_scale: float = Field(
71
  8.0,
 
88
  if args.use_taesd:
89
  self.pipe.vae = AutoencoderTiny.from_pretrained(
90
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
91
+ ).to(device)
92
 
93
  self.pipe.set_progress_bar_config(disable=True)
94
  self.pipe.to(device=device, dtype=torch_dtype)
pipelines/txt2imgLora.py CHANGED
@@ -18,6 +18,34 @@ taesd_model = "madebyollin/taesd"
18
 
19
  default_prompt = "Analog style photograph of young Harrison Ford as Han Solo, star wars behind the scenes"
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  class Pipeline:
23
  class Info(BaseModel):
@@ -25,6 +53,7 @@ class Pipeline:
25
  title: str = "Text-to-Image LCM + LoRa"
26
  description: str = "Generates an image from a text prompt"
27
  input_mode: str = "text"
 
28
 
29
  class InputParams(BaseModel):
30
  prompt: str = Field(
@@ -66,7 +95,7 @@ class Pipeline:
66
  if args.use_taesd:
67
  self.pipe.vae = AutoencoderTiny.from_pretrained(
68
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
69
- )
70
  self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
71
  self.pipe.set_progress_bar_config(disable=True)
72
  self.pipe.to(device=device, dtype=torch_dtype)
 
18
 
19
  default_prompt = "Analog style photograph of young Harrison Ford as Han Solo, star wars behind the scenes"
20
 
21
+ page_content = """
22
+ <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model SDv1.5</h1>
23
+ <h3 class="text-xl font-bold">Text-to-Image LCM + LoRa</h3>
24
+ <p class="text-sm">
25
+ This demo showcases
26
+ <a
27
+ href="https://huggingface.co/blog/lcm_lora"
28
+ target="_blank"
29
+ class="text-blue-500 underline hover:no-underline">LCM</a>
30
+ Image to Image pipeline using
31
+ <a
32
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
33
+ target="_blank"
34
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
35
+ > with a MJPEG stream server. Featuring <a
36
+ href="https://huggingface.co/wavymulder/Analog-Diffusion"
37
+ target="_blank"
38
+ class="text-blue-500 underline hover:no-underline">Analog-Diffusion</a>
39
+ </p>
40
+ <p class="text-sm text-gray-500">
41
+ Change the prompt to generate different images, accepts <a
42
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
43
+ target="_blank"
44
+ class="text-blue-500 underline hover:no-underline">Compel</a
45
+ > syntax.
46
+ </p>
47
+ """
48
+
49
 
50
  class Pipeline:
51
  class Info(BaseModel):
 
53
  title: str = "Text-to-Image LCM + LoRa"
54
  description: str = "Generates an image from a text prompt"
55
  input_mode: str = "text"
56
+ page_content: str = page_content
57
 
58
  class InputParams(BaseModel):
59
  prompt: str = Field(
 
95
  if args.use_taesd:
96
  self.pipe.vae = AutoencoderTiny.from_pretrained(
97
  taesd_model, torch_dtype=torch_dtype, use_safetensors=True
98
+ ).to(device)
99
  self.pipe.scheduler = LCMScheduler.from_config(self.pipe.scheduler.config)
100
  self.pipe.set_progress_bar_config(disable=True)
101
  self.pipe.to(device=device, dtype=torch_dtype)
pipelines/txt2imgLoraSDXL.py CHANGED
@@ -1,8 +1,4 @@
1
- from diffusers import (
2
- DiffusionPipeline,
3
- LCMScheduler,
4
- AutoencoderKL,
5
- )
6
  from compel import Compel, ReturnedEmbeddingsType
7
  import torch
8
 
@@ -16,13 +12,38 @@ from config import Args
16
  from pydantic import BaseModel, Field
17
  from PIL import Image
18
 
19
- controlnet_model = "diffusers/controlnet-canny-sdxl-1.0"
20
  model_id = "stabilityai/stable-diffusion-xl-base-1.0"
21
  lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
 
22
 
23
 
24
  default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
25
  default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
 
28
  class Pipeline:
@@ -30,6 +51,7 @@ class Pipeline:
30
  name: str = "LCM+Lora+SDXL"
31
  title: str = "Text-to-Image SDXL + LCM + LoRA"
32
  description: str = "Generates an image from a text prompt"
 
33
  input_mode: str = "text"
34
 
35
  class InputParams(BaseModel):
@@ -50,7 +72,7 @@ class Pipeline:
50
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
51
  )
52
  steps: int = Field(
53
- 4, min=2, max=15, title="Steps", field="range", hide=True, id="steps"
54
  )
55
  width: int = Field(
56
  1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
@@ -101,6 +123,10 @@ class Pipeline:
101
  returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
102
  requires_pooled=[False, True],
103
  )
 
 
 
 
104
 
105
  if args.torch_compile:
106
  self.pipe.unet = torch.compile(
 
1
+ from diffusers import DiffusionPipeline, LCMScheduler, AutoencoderKL, AutoencoderTiny
 
 
 
 
2
  from compel import Compel, ReturnedEmbeddingsType
3
  import torch
4
 
 
12
  from pydantic import BaseModel, Field
13
  from PIL import Image
14
 
 
15
  model_id = "stabilityai/stable-diffusion-xl-base-1.0"
16
  lcm_lora_id = "latent-consistency/lcm-lora-sdxl"
17
+ taesd_model = "madebyollin/taesdxl"
18
 
19
 
20
  default_prompt = "close-up photography of old man standing in the rain at night, in a street lit by lamps, leica 35mm summilux"
21
  default_negative_prompt = "blurry, low quality, render, 3D, oversaturated"
22
+ page_content = """
23
+ <h1 class="text-3xl font-bold">Real-Time Latent Consistency Model</h1>
24
+ <h3 class="text-xl font-bold">Text-to-Image SDXL + LCM + LoRA</h3>
25
+ <p class="text-sm">
26
+ This demo showcases
27
+ <a
28
+ href="https://huggingface.co/blog/lcm_lora"
29
+ target="_blank"
30
+ class="text-blue-500 underline hover:no-underline">LCM LoRA</a
31
+ >
32
+ Text to Image pipeline using
33
+ <a
34
+ href="https://huggingface.co/docs/diffusers/main/en/using-diffusers/lcm#performing-inference-with-lcm"
35
+ target="_blank"
36
+ class="text-blue-500 underline hover:no-underline">Diffusers</a
37
+ > with a MJPEG stream server.
38
+ </p>
39
+ <p class="text-sm text-gray-500">
40
+ Change the prompt to generate different images, accepts <a
41
+ href="https://github.com/damian0815/compel/blob/main/doc/syntax.md"
42
+ target="_blank"
43
+ class="text-blue-500 underline hover:no-underline">Compel</a
44
+ > syntax.
45
+ </p>
46
+ """
47
 
48
 
49
  class Pipeline:
 
51
  name: str = "LCM+Lora+SDXL"
52
  title: str = "Text-to-Image SDXL + LCM + LoRA"
53
  description: str = "Generates an image from a text prompt"
54
+ page_content: str = page_content
55
  input_mode: str = "text"
56
 
57
  class InputParams(BaseModel):
 
72
  2159232, min=0, title="Seed", field="seed", hide=True, id="seed"
73
  )
74
  steps: int = Field(
75
+ 4, min=1, max=15, title="Steps", field="range", hide=True, id="steps"
76
  )
77
  width: int = Field(
78
  1024, min=2, max=15, title="Width", disabled=True, hide=True, id="width"
 
123
  returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
124
  requires_pooled=[False, True],
125
  )
126
+ if args.use_taesd:
127
+ self.pipe.vae = AutoencoderTiny.from_pretrained(
128
+ taesd_model, torch_dtype=torch_dtype, use_safetensors=True
129
+ ).to(device)
130
 
131
  if args.torch_compile:
132
  self.pipe.unet = torch.compile(
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- git+https://github.com/huggingface/diffusers@6f1435332bc74e286af5e88014236e4cc712b747
2
  transformers==4.35.2
3
  --extra-index-url https://download.pytorch.org/whl/cu121;
4
  torch==2.1.0
@@ -9,4 +9,5 @@ accelerate==0.24.0
9
  compel==2.0.2
10
  controlnet-aux==0.0.7
11
  peft==0.6.0
12
- xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
 
 
1
+ git+https://github.com/huggingface/diffusers@dadd55fb36acc862254cf935826d54349b0fcd8c
2
  transformers==4.35.2
3
  --extra-index-url https://download.pytorch.org/whl/cu121;
4
  torch==2.1.0
 
9
  compel==2.0.2
10
  controlnet-aux==0.0.7
11
  peft==0.6.0
12
+ xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
13
+ markdown2