ford442 commited on
Commit
c3b6a89
·
verified ·
1 Parent(s): 54370eb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -15
app.py CHANGED
@@ -87,10 +87,10 @@ os.putenv("HF_HUB_ENABLE_HF_TRANSFER","1")
87
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
88
 
89
  def load_and_prepare_model():
90
- vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False).to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
91
  #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True)
92
- sched = DPMSolverSDEScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler')
93
-
94
  #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear")
95
  pipe = StableDiffusionXLPipeline.from_pretrained(
96
  'ford442/RealVisXL_V5.0_BF16',
@@ -99,29 +99,25 @@ def load_and_prepare_model():
99
  # low_cpu_mem_usage = False,
100
  add_watermarker=False,
101
  )
102
- pipe.vae = vaeXL #.to(torch.bfloat16)
103
  pipe.scheduler = sched
104
  #pipe.vae.do_resize=False
105
  #pipe.vae.vae_scale_factor=8
106
  #pipe.to(device=device, dtype=torch.bfloat16)
107
  pipe.to(device)
108
-
109
- #Make sure the unet is contiguous
110
- pipe.unet = pipe.unet.to(memory_format=torch.contiguous_format)
111
-
112
  pipe.to(torch.bfloat16)
113
  pipe.vae.set_default_attn_processor()
114
  print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
115
  pipe.watermark=None
116
- pipe.safety_checker=None
 
 
 
117
  return pipe
118
 
119
- # Preload and compile both models
120
- pipe = load_and_prepare_model()
121
-
122
-
123
  # for compile
124
  hidet.option.parallel_build(True)
 
125
  torch._dynamo.config.suppress_errors = True
126
  torch._dynamo.disallow_in_graph(diffusers.models.attention.BasicTransformerBlock)
127
  # more search
@@ -134,9 +130,9 @@ hidet.torch.dynamo_config.use_fp16(True)
134
  hidet.torch.dynamo_config.use_fp16_reduction(True)
135
  # use tensorcore
136
  hidet.torch.dynamo_config.use_tensor_core()
137
- pipe.unet = torch.compile(pipe.unet, backend="hidet")
138
-
139
 
 
140
 
141
  MAX_SEED = np.iinfo(np.int64).max
142
 
 
87
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
88
 
89
  def load_and_prepare_model():
90
+ vaeXL = AutoencoderKL.from_pretrained("stabilityai/sdxl-vae", safety_checker=None, use_safetensors=False) #.to(device).to(torch.bfloat16) #.to(device=device, dtype=torch.bfloat16)
91
  #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1,use_karras_sigmas=True)
92
+ #sched = DPMSolverSDEScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler')
93
+ sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear", beta_start=0.00085, beta_end=0.012, steps_offset=1) #,use_karras_sigmas=True)
94
  #sched = EulerAncestralDiscreteScheduler.from_pretrained('ford442/RealVisXL_V5.0_BF16', subfolder='scheduler',beta_schedule="scaled_linear")
95
  pipe = StableDiffusionXLPipeline.from_pretrained(
96
  'ford442/RealVisXL_V5.0_BF16',
 
99
  # low_cpu_mem_usage = False,
100
  add_watermarker=False,
101
  )
102
+ pipe.vae = vaeXL.to(torch.bfloat16)
103
  pipe.scheduler = sched
104
  #pipe.vae.do_resize=False
105
  #pipe.vae.vae_scale_factor=8
106
  #pipe.to(device=device, dtype=torch.bfloat16)
107
  pipe.to(device)
 
 
 
 
108
  pipe.to(torch.bfloat16)
109
  pipe.vae.set_default_attn_processor()
110
  print(f'init noise scale: {pipe.scheduler.init_noise_sigma}')
111
  pipe.watermark=None
112
+ pipe.safety_checker=None
113
+ pipe.unet = pipe.unet.to(memory_format=torch.contiguous_format)
114
+ pipe.unet = torch.compile(pipe.unet, backend="hidet")
115
+
116
  return pipe
117
 
 
 
 
 
118
  # for compile
119
  hidet.option.parallel_build(True)
120
+ hidet.option.parallel_tune(-1,16.0)
121
  torch._dynamo.config.suppress_errors = True
122
  torch._dynamo.disallow_in_graph(diffusers.models.attention.BasicTransformerBlock)
123
  # more search
 
130
  hidet.torch.dynamo_config.use_fp16_reduction(True)
131
  # use tensorcore
132
  hidet.torch.dynamo_config.use_tensor_core()
133
+ # Preload and compile both models
 
134
 
135
+ pipe = load_and_prepare_model()
136
 
137
  MAX_SEED = np.iinfo(np.int64).max
138