radames commited on
Commit
dabf711
1 Parent(s): c7f8801
Files changed (4) hide show
  1. app-controlnet.py +10 -4
  2. app-img2img.py +8 -2
  3. app-txt2img.py +8 -2
  4. requirements.txt +1 -1
app-controlnet.py CHANGED
@@ -35,6 +35,7 @@ import psutil
35
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
36
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
37
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 
38
  WIDTH = 512
39
  HEIGHT = 512
40
  # disable tiny autoencoder for better quality speed tradeoff
@@ -100,15 +101,20 @@ pipe.unet.to(memory_format=torch.channels_last)
100
  if psutil.virtual_memory().total < 64 * 1024**3:
101
  pipe.enable_attention_slicing()
102
 
103
- if not mps_available and not xpu_available:
104
- pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
105
- pipe(prompt="warmup", image=[Image.new("RGB", (768, 768))], control_image=[Image.new("RGB", (768, 768))])
106
-
107
  compel_proc = Compel(
108
  tokenizer=pipe.tokenizer,
109
  text_encoder=pipe.text_encoder,
110
  truncate_long_prompts=False,
111
  )
 
 
 
 
 
 
 
 
 
112
  user_queue_map = {}
113
 
114
 
 
35
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
36
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
37
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
38
+ TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
39
  WIDTH = 512
40
  HEIGHT = 512
41
  # disable tiny autoencoder for better quality speed tradeoff
 
101
  if psutil.virtual_memory().total < 64 * 1024**3:
102
  pipe.enable_attention_slicing()
103
 
 
 
 
 
104
  compel_proc = Compel(
105
  tokenizer=pipe.tokenizer,
106
  text_encoder=pipe.text_encoder,
107
  truncate_long_prompts=False,
108
  )
109
+ if TORCH_COMPILE:
110
+ pipe.text_encoder = torch.compile(pipe.text_encoder, mode="max-autotune", fullgraph=False)
111
+ pipe.tokenizer = torch.compile(pipe.tokenizer, mode="max-autotune", fullgraph=False)
112
+ pipe.unet = torch.compile(pipe.unet, mode="max-autotune", fullgraph=False)
113
+ pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=False)
114
+
115
+ pipe(prompt="warmup", image=[Image.new("RGB", (768, 768))], control_image=[Image.new("RGB", (768, 768))])
116
+
117
+
118
  user_queue_map = {}
119
 
120
 
app-img2img.py CHANGED
@@ -29,6 +29,8 @@ import psutil
29
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
30
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
31
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 
 
32
  WIDTH = 512
33
  HEIGHT = 512
34
  # disable tiny autoencoder for better quality speed tradeoff
@@ -76,8 +78,12 @@ pipe.unet.to(memory_format=torch.channels_last)
76
  if psutil.virtual_memory().total < 64 * 1024**3:
77
  pipe.enable_attention_slicing()
78
 
79
- if not mps_available and not xpu_available:
80
- pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
 
 
 
 
81
  pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
82
 
83
  compel_proc = Compel(
 
29
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
30
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
31
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
32
+ TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
33
+
34
  WIDTH = 512
35
  HEIGHT = 512
36
  # disable tiny autoencoder for better quality speed tradeoff
 
78
  if psutil.virtual_memory().total < 64 * 1024**3:
79
  pipe.enable_attention_slicing()
80
 
81
+ if TORCH_COMPILE:
82
+ pipe.text_encoder = torch.compile(pipe.text_encoder, mode="max-autotune", fullgraph=False)
83
+ pipe.tokenizer = torch.compile(pipe.tokenizer, mode="max-autotune", fullgraph=False)
84
+ pipe.unet = torch.compile(pipe.unet, mode="max-autotune", fullgraph=False)
85
+ pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=False)
86
+
87
  pipe(prompt="warmup", image=[Image.new("RGB", (512, 512))])
88
 
89
  compel_proc = Compel(
app-txt2img.py CHANGED
@@ -30,6 +30,8 @@ import psutil
30
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
31
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
32
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
 
 
33
  WIDTH = 768
34
  HEIGHT = 768
35
  # disable tiny autoencoder for better quality speed tradeoff
@@ -76,8 +78,12 @@ pipe.unet.to(memory_format=torch.channels_last)
76
  if psutil.virtual_memory().total < 64 * 1024**3:
77
  pipe.enable_attention_slicing()
78
 
79
- if not mps_available and not xpu_available:
80
- pipe.unet = torch.compile(pipe.unet, mode="reduce-overhead", fullgraph=True)
 
 
 
 
81
  pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
82
 
83
  compel_proc = Compel(
 
30
  MAX_QUEUE_SIZE = int(os.environ.get("MAX_QUEUE_SIZE", 0))
31
  TIMEOUT = float(os.environ.get("TIMEOUT", 0))
32
  SAFETY_CHECKER = os.environ.get("SAFETY_CHECKER", None)
33
+ TORCH_COMPILE = os.environ.get("TORCH_COMPILE", None)
34
+
35
  WIDTH = 768
36
  HEIGHT = 768
37
  # disable tiny autoencoder for better quality speed tradeoff
 
78
  if psutil.virtual_memory().total < 64 * 1024**3:
79
  pipe.enable_attention_slicing()
80
 
81
+ if TORCH_COMPILE:
82
+ pipe.text_encoder = torch.compile(pipe.text_encoder, mode="max-autotune", fullgraph=False)
83
+ pipe.tokenizer = torch.compile(pipe.tokenizer, mode="max-autotune", fullgraph=False)
84
+ pipe.unet = torch.compile(pipe.unet, mode="max-autotune", fullgraph=False)
85
+ pipe.vae = torch.compile(pipe.vae, mode="max-autotune", fullgraph=False)
86
+
87
  pipe(prompt="warmup", num_inference_steps=1, guidance_scale=8.0)
88
 
89
  compel_proc = Compel(
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- diffusers==0.22.1
2
  transformers==4.34.1
3
  gradio==3.50.2
4
  --extra-index-url https://download.pytorch.org/whl/cu121
 
1
+ diffusers==0.22.2
2
  transformers==4.34.1
3
  gradio==3.50.2
4
  --extra-index-url https://download.pytorch.org/whl/cu121