Spaces:
Runtime error
Runtime error
Update merged_app2.py
Browse files- merged_app2.py +16 -20
merged_app2.py
CHANGED
@@ -150,12 +150,9 @@ def download_models():
|
|
150 |
print(f"Error downloading {filename}: {e}")
|
151 |
|
152 |
ensure_directories()
|
153 |
-
|
154 |
download_models()
|
155 |
|
156 |
|
157 |
-
|
158 |
-
|
159 |
hf_hub_download(repo_id="black-forest-labs/FLUX.1-Redux-dev", filename="flux1-redux-dev.safetensors", local_dir="models/style_models")
|
160 |
hf_hub_download(repo_id="black-forest-labs/FLUX.1-Depth-dev", filename="flux1-depth-dev.safetensors", local_dir="models/diffusion_models")
|
161 |
hf_hub_download(repo_id="Comfy-Org/sigclip_vision_384", filename="sigclip_vision_patch14_384.safetensors", local_dir="models/clip_vision")
|
@@ -165,7 +162,6 @@ hf_hub_download(repo_id="comfyanonymous/flux_text_encoders", filename="clip_l.sa
|
|
165 |
t5_path = hf_hub_download(repo_id="comfyanonymous/flux_text_encoders", filename="t5xxl_fp16.safetensors", local_dir="models/text_encoders/t5")
|
166 |
|
167 |
|
168 |
-
|
169 |
sd15_name = 'stablediffusionapi/realistic-vision-v51'
|
170 |
tokenizer = CLIPTokenizer.from_pretrained(sd15_name, subfolder="tokenizer")
|
171 |
text_encoder = CLIPTextModel.from_pretrained(sd15_name, subfolder="text_encoder")
|
@@ -173,23 +169,10 @@ vae = AutoencoderKL.from_pretrained(sd15_name, subfolder="vae")
|
|
173 |
unet = UNet2DConditionModel.from_pretrained(sd15_name, subfolder="unet")
|
174 |
|
175 |
|
176 |
-
|
177 |
-
# fill_pipe = FluxFillPipeline.from_single_file(
|
178 |
-
# "https://huggingface.co/SporkySporkness/FLUX.1-Fill-dev-GGUF/flux1-fill-dev-fp16-Q5_0-GGUF.gguf",
|
179 |
-
# text_encoder= text_encoder,
|
180 |
-
# text_encoder_2 = t5_path,
|
181 |
-
# ignore_mismatched_sizes=True,
|
182 |
-
# low_cpu_mem_usage=False,
|
183 |
-
# torch_dtype=torch.bfloat16
|
184 |
-
# ).to("cuda")
|
185 |
-
|
186 |
from diffusers import FluxTransformer2DModel, FluxFillPipeline, GGUFQuantizationConfig
|
187 |
from transformers import T5EncoderModel
|
188 |
import torch
|
189 |
|
190 |
-
# transformer = FluxTransformer2DModel.from_pretrained("AlekseyCalvin/FluxFillDev_fp8_Diffusers", subfolder="transformer", torch_dtype=torch.bfloat16).to("cuda")
|
191 |
-
# text_encoder_2 = T5EncoderModel.from_pretrained("AlekseyCalvin/FluxFillDev_fp8_Diffusers", subfolder="text_encoder_2", torch_dtype=torch.bfloat16).to("cuda")
|
192 |
-
# fill_pipe = FluxFillPipeline.from_pretrained("black-forest-labs/FLUX.1-Fill-dev", transformer=transformer, text_encoder_2=text_encoder_2, torch_dtype=torch.bfloat16).to("cuda")
|
193 |
|
194 |
|
195 |
ckpt_path = (
|
@@ -209,8 +192,6 @@ fill_pipe = FluxFillPipeline.from_pretrained(
|
|
209 |
torch_dtype=torch.bfloat16,
|
210 |
)
|
211 |
|
212 |
-
fill_pipe.enable_model_cpu_offload()
|
213 |
-
|
214 |
|
215 |
try:
|
216 |
import xformers
|
@@ -221,6 +202,11 @@ except ImportError:
|
|
221 |
XFORMERS_AVAILABLE = False
|
222 |
print("xformers not available - Using default attention")
|
223 |
|
|
|
|
|
|
|
|
|
|
|
224 |
# Memory optimizations for RTX 2070
|
225 |
torch.backends.cudnn.benchmark = True
|
226 |
if torch.cuda.is_available():
|
@@ -505,6 +491,9 @@ pipe = prepare_pipeline(
|
|
505 |
dtype=dtype,
|
506 |
)
|
507 |
|
|
|
|
|
|
|
508 |
|
509 |
# Move models to device with consistent dtype
|
510 |
text_encoder = text_encoder.to(device=device, dtype=dtype)
|
@@ -554,6 +543,10 @@ t2i_pipe = StableDiffusionPipeline(
|
|
554 |
image_encoder=None
|
555 |
)
|
556 |
|
|
|
|
|
|
|
|
|
557 |
i2i_pipe = StableDiffusionImg2ImgPipeline(
|
558 |
vae=vae,
|
559 |
text_encoder=text_encoder,
|
@@ -566,6 +559,9 @@ i2i_pipe = StableDiffusionImg2ImgPipeline(
|
|
566 |
image_encoder=None
|
567 |
)
|
568 |
|
|
|
|
|
|
|
569 |
|
570 |
@torch.inference_mode()
|
571 |
def encode_prompt_inner(txt: str):
|
@@ -1054,7 +1050,7 @@ def use_orientation(selected_image:gr.SelectData):
|
|
1054 |
|
1055 |
|
1056 |
def generate_description(object_description,image, detail="high", max_tokens=250):
|
1057 |
-
openai_api_key = os.getenv
|
1058 |
client = OpenAI(api_key=openai_api_key)
|
1059 |
|
1060 |
if image is not None:
|
|
|
150 |
print(f"Error downloading {filename}: {e}")
|
151 |
|
152 |
ensure_directories()
|
|
|
153 |
download_models()
|
154 |
|
155 |
|
|
|
|
|
156 |
hf_hub_download(repo_id="black-forest-labs/FLUX.1-Redux-dev", filename="flux1-redux-dev.safetensors", local_dir="models/style_models")
|
157 |
hf_hub_download(repo_id="black-forest-labs/FLUX.1-Depth-dev", filename="flux1-depth-dev.safetensors", local_dir="models/diffusion_models")
|
158 |
hf_hub_download(repo_id="Comfy-Org/sigclip_vision_384", filename="sigclip_vision_patch14_384.safetensors", local_dir="models/clip_vision")
|
|
|
162 |
t5_path = hf_hub_download(repo_id="comfyanonymous/flux_text_encoders", filename="t5xxl_fp16.safetensors", local_dir="models/text_encoders/t5")
|
163 |
|
164 |
|
|
|
165 |
sd15_name = 'stablediffusionapi/realistic-vision-v51'
|
166 |
tokenizer = CLIPTokenizer.from_pretrained(sd15_name, subfolder="tokenizer")
|
167 |
text_encoder = CLIPTextModel.from_pretrained(sd15_name, subfolder="text_encoder")
|
|
|
169 |
unet = UNet2DConditionModel.from_pretrained(sd15_name, subfolder="unet")
|
170 |
|
171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
172 |
from diffusers import FluxTransformer2DModel, FluxFillPipeline, GGUFQuantizationConfig
|
173 |
from transformers import T5EncoderModel
|
174 |
import torch
|
175 |
|
|
|
|
|
|
|
176 |
|
177 |
|
178 |
ckpt_path = (
|
|
|
192 |
torch_dtype=torch.bfloat16,
|
193 |
)
|
194 |
|
|
|
|
|
195 |
|
196 |
try:
|
197 |
import xformers
|
|
|
202 |
XFORMERS_AVAILABLE = False
|
203 |
print("xformers not available - Using default attention")
|
204 |
|
205 |
+
fill_pipe.enable_model_cpu_offload()
|
206 |
+
fill_pipe.enable_vae_slicing()
|
207 |
+
fill_pipe.enable_xformers_memory_efficient_attention()
|
208 |
+
|
209 |
+
|
210 |
# Memory optimizations for RTX 2070
|
211 |
torch.backends.cudnn.benchmark = True
|
212 |
if torch.cuda.is_available():
|
|
|
491 |
dtype=dtype,
|
492 |
)
|
493 |
|
494 |
+
pipe.enable_model_cpu_offload()
|
495 |
+
pipe.enable_vae_slicing()
|
496 |
+
pipe.enable_xformers_memory_efficient_attention()
|
497 |
|
498 |
# Move models to device with consistent dtype
|
499 |
text_encoder = text_encoder.to(device=device, dtype=dtype)
|
|
|
543 |
image_encoder=None
|
544 |
)
|
545 |
|
546 |
+
t2i_pipe.enable_model_cpu_offload()
|
547 |
+
t2i_pipe.enable_vae_slicing()
|
548 |
+
t2i_pipe.enable_xformers_memory_efficient_attention()
|
549 |
+
|
550 |
i2i_pipe = StableDiffusionImg2ImgPipeline(
|
551 |
vae=vae,
|
552 |
text_encoder=text_encoder,
|
|
|
559 |
image_encoder=None
|
560 |
)
|
561 |
|
562 |
+
i2i_pipe.enable_model_cpu_offload()
|
563 |
+
i2i_pipe.enable_vae_slicing()
|
564 |
+
i2i_pipe.enable_xformers_memory_efficient_attention()
|
565 |
|
566 |
@torch.inference_mode()
|
567 |
def encode_prompt_inner(txt: str):
|
|
|
1050 |
|
1051 |
|
1052 |
def generate_description(object_description,image, detail="high", max_tokens=250):
|
1053 |
+
openai_api_key = os.getenv("OPENAI_API_KEY")
|
1054 |
client = OpenAI(api_key=openai_api_key)
|
1055 |
|
1056 |
if image is not None:
|