pixagram-dev

Runtime error

App Files Files Community

pixagram-dev / models.py

primerz

Update models.py

e036d10 verified 19 days ago

raw

history blame contribute delete

15.1 kB

	"""
	Model loading and initialization for Pixagram AI Pixel Art Generator
	FIXED VERSION - Uses correct InstantID pipeline and Compel encoder
	"""
	import torch
	import time
	import os
	from diffusers import (
	ControlNetModel,
	AutoencoderKL,
	LCMScheduler
	)
	from transformers import (
	CLIPTokenizer, CLIPTextModel, CLIPTextModelWithProjection
	)
	from insightface.app import FaceAnalysis
	from controlnet_aux import ZoeDetector, OpenposeDetector, LeresDetector, MidasDetector, MediapipeFaceDetector
	from huggingface_hub import hf_hub_download, snapshot_download

	# --- START FIX: Import correct pipeline and Compel ---
	from pipeline_stable_diffusion_xl_instantid_img2img import StableDiffusionXLInstantIDImg2ImgPipeline
	from compel import Compel, ReturnedEmbeddingsType
	# --- END FIX ---

	from config import (
	device, dtype, MODEL_REPO, MODEL_FILES, HUGGINGFACE_TOKEN,
	FACE_DETECTION_CONFIG, CLIP_SKIP, DOWNLOAD_CONFIG
	)

	# (We keep download_model_with_retry, load_face_analysis, load_depth_detector,
	# load_openpose_detector, and load_mediapipe_face_detector as they were)
	# ... (Keep all original functions from line 25 down to line 180) ...
	def download_model_with_retry(repo_id, filename, max_retries=None, **kwargs):
	"""Download model with retry logic and proper token handling."""
	if max_retries is None:
	max_retries = DOWNLOAD_CONFIG['max_retries']

	# Ensure token is passed if available
	if HUGGINGFACE_TOKEN and "token" not in kwargs:
	kwargs["token"] = HUGGINGFACE_TOKEN

	for attempt in range(max_retries):
	try:
	print(f" Attempting to download {filename} (attempt {attempt + 1}/{max_retries})...")

	return hf_hub_download(
	repo_id=repo_id,
	filename=filename,
	**kwargs
	)

	except Exception as e:
	print(f" [WARNING] Download attempt {attempt + 1} failed: {e}")

	if attempt < max_retries - 1:
	print(f" Retrying in {DOWNLOAD_CONFIG['retry_delay']} seconds...")
	time.sleep(DOWNLOAD_CONFIG['retry_delay'])
	else:
	print(f" [ERROR] Failed to download {filename} after {max_retries} attempts")
	raise

	return None


	def load_face_analysis():
	"""
	Load face analysis model with proper model downloading from HuggingFace.
	Downloads from DIAMONIK7777/antelopev2 which has the correct model structure.
	"""
	print("Loading face analysis model...")

	try:
	antelope_download = snapshot_download(repo_id="DIAMONIK7777/antelopev2", local_dir="/data/models/antelopev2")
	# --- FIX: Load InsightFace on CPU to save VRAM ---
	face_app = FaceAnalysis(name='antelopev2', root='/data', providers=['CPUExecutionProvider'])
	face_app.prepare(ctx_id=0, det_size=(640, 640))
	print(" [OK] Face analysis loaded (on CPU)")
	return face_app, True

	except Exception as e:
	print(f" [ERROR] Face detection not available: {e}")
	import traceback
	traceback.print_exc()
	return None, False


	def load_depth_detector():
	"""
	Load depth detector with fallback hierarchy: Leres → Zoe → Midas.
	Returns (detector, detector_type, success).
	"""
	print("Loading depth detector with fallback hierarchy...")

	# Try LeresDetector first (best quality)
	try:
	print(" Attempting LeresDetector (highest quality)...")
	# --- FIX: Load on CPU ---
	leres_depth = LeresDetector.from_pretrained("lllyasviel/Annotators")
	# leres_depth.to(device) # Removed
	print(" [OK] LeresDetector loaded successfully (on CPU)")
	return leres_depth, 'leres', True
	except Exception as e:
	print(f" [INFO] LeresDetector not available: {e}")

	# Fallback to ZoeDetector
	try:
	print(" Attempting ZoeDetector (fallback #1)...")
	# --- FIX: Load on CPU ---
	zoe_depth = ZoeDetector.from_pretrained("lllyasviel/Annotators")
	# zoe_depth.to(device) # Removed
	print(" [OK] ZoeDetector loaded successfully (on CPU)")
	return zoe_depth, 'zoe', True
	except Exception as e:
	print(f" [INFO] ZoeDetector not available: {e}")

	# Final fallback to MidasDetector
	try:
	print(" Attempting MidasDetector (fallback #2)...")
	# --- FIX: Load on CPU ---
	midas_depth = MidasDetector.from_pretrained("lllyasviel/Annotators")
	# midas_depth.to(device) # Removed
	print(" [OK] MidasDetector loaded successfully (on CPU)")
	return midas_depth, 'midas', True
	except Exception as e:
	print(f" [WARNING] MidasDetector not available: {e}")

	print(" [ERROR] No depth detector available")
	return None, None, False

	# --- NEW FUNCTION ---
	def load_openpose_detector():
	"""Load OpenPose detector."""
	print("Loading OpenPose detector...")
	try:
	# --- FIX: Load on CPU ---
	openpose = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
	# openpose.to(device) # Removed
	print(" [OK] OpenPose loaded successfully (on CPU)")
	return openpose, True
	except Exception as e:
	print(f" [WARNING] OpenPose not available: {e}")
	return None, False
	# --- END NEW FUNCTION ---

	# --- NEW FUNCTION ---
	def load_mediapipe_face_detector():
	"""Load MediapipeFaceDetector for advanced face detection."""
	print("Loading MediapipeFaceDetector...")
	try:
	face_detector = MediapipeFaceDetector()
	print(" [OK] MediapipeFaceDetector loaded successfully")
	return face_detector, True
	except Exception as e:
	print(f" [WARNING] MediapipeFaceDetector not available: {e}")
	return None, False
	# --- END NEW FUNCTION ---

	def load_controlnets():
	"""Load ControlNet models."""
	print("Loading ControlNet Zoe Depth model...")
	# --- FIX: Load core models on GPU ---
	controlnet_depth = ControlNetModel.from_pretrained(
	"xinsir/controlnet-depth-sdxl-1.0",
	torch_dtype=dtype
	).to(device)
	print(" [OK] ControlNet Depth loaded (on GPU)")

	# --- NEW: Load OpenPose ControlNet ---
	print("Loading ControlNet OpenPose model...")
	try:
	# --- FIX: Load core models on GPU ---
	controlnet_openpose = ControlNetModel.from_pretrained(
	"xinsir/controlnet-openpose-sdxl-1.0",
	torch_dtype=dtype
	).to(device)
	print(" [OK] ControlNet OpenPose loaded (on GPU)")
	except Exception as e:
	print(f" [WARNING] ControlNet OpenPose not available: {e}")
	controlnet_openpose = None
	# --- END NEW ---

	print("Loading InstantID ControlNet...")
	try:
	# --- FIX: Load core models on GPU ---
	controlnet_instantid = ControlNetModel.from_pretrained(
	"InstantX/InstantID",
	subfolder="ControlNetModel",
	torch_dtype=dtype
	).to(device)
	print(" [OK] InstantID ControlNet loaded successfully (on GPU)")
	# Return all three models
	return controlnet_depth, controlnet_instantid, controlnet_openpose, True
	except Exception as e:
	print(f" [WARNING] InstantID ControlNet not available: {e}")
	# Return models, indicating InstantID failure
	return controlnet_depth, None, controlnet_openpose, False

	# --- START: REMOVED load_image_encoder ---
	# (The new pipeline handles this internally)
	# --- END: REMOVED load_image_encoder ---

	def load_sdxl_pipeline(controlnets):
	"""Load SDXL checkpoint from HuggingFace Hub."""
	print("Loading SDXL checkpoint (horizon) with bundled VAE from HuggingFace Hub...")

	# --- START FIX: Load base text models for Compel (from previous fix) ---
	print(" Loading base tokenizers and text encoders...")
	BASE_MODEL = "stabilityai/stable-diffusion-xl-base-1.0"
	tokenizer = CLIPTokenizer.from_pretrained(BASE_MODEL, subfolder="tokenizer")
	tokenizer_2 = CLIPTokenizer.from_pretrained(BASE_MODEL, subfolder="tokenizer_2")
	text_encoder = CLIPTextModel.from_pretrained(
	BASE_MODEL, subfolder="text_encoder", torch_dtype=dtype
	).to(device)
	text_encoder_2 = CLIPTextModelWithProjection.from_pretrained(
	BASE_MODEL, subfolder="text_encoder_2", torch_dtype=dtype
	).to(device)
	print(" [OK] Base text/token models loaded")
	# --- END FIX ---

	try:
	model_path = download_model_with_retry(MODEL_REPO, MODEL_FILES['checkpoint'], repo_type="model")

	# --- START FIX: Load the CORRECT pipeline ---
	pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_single_file(
	model_path,
	controlnet=controlnets,
	torch_dtype=dtype,
	use_safetensors=True,
	# Pass components
	tokenizer=tokenizer,
	tokenizer_2=tokenizer_2,
	text_encoder=text_encoder,
	text_encoder_2=text_encoder_2,
	).to(device)
	# --- END FIX ---

	print(" [OK] Custom checkpoint loaded successfully (VAE bundled)")
	return pipe, True
	except Exception as e:
	print(f" [WARNING] Could not load custom checkpoint: {e}")
	print(" Using default SDXL base model")

	# --- START FIX: Fallback to the CORRECT pipeline ---
	pipe = StableDiffusionXLInstantIDImg2ImgPipeline.from_pretrained(
	"stabilityai/stable-diffusion-xl-base-1.0",
	controlnet=controlnets,
	torch_dtype=dtype,
	use_safetensors=True,
	# Pass components
	tokenizer=tokenizer,
	tokenizer_2=tokenizer_2,
	text_encoder=text_encoder,
	text_encoder_2=text_encoder_2,
	).to(device)
	# --- END FIX ---
	return pipe, False


	def load_loras(pipe):
	"""Load all LORAs from HuggingFace Hub."""
	print("Loading all LORAs from HuggingFace Hub...")
	loaded_loras = {}

	lora_files = {
	"retroart": MODEL_FILES.get("lora_retroart"),
	"vga": MODEL_FILES.get("lora_vga"),
	"lucasart": MODEL_FILES.get("lora_lucasart")
	}

	for adapter_name, filename in lora_files.items():
	if not filename:
	print(f" [INFO] No file specified for LORA '{adapter_name}', skipping.")
	loaded_loras[adapter_name] = False
	continue

	try:
	lora_path = download_model_with_retry(MODEL_REPO, filename, repo_type="model")
	pipe.load_lora_weights(lora_path, adapter_name=adapter_name)
	print(f" [OK] LORA loaded successfully: {filename} as '{adapter_name}'")
	loaded_loras[adapter_name] = True
	except Exception as e:
	print(f" [WARNING] Could not load LORA {filename}: {e}")
	loaded_loras[adapter_name] = False

	success = any(loaded_loras.values())
	if not success:
	print(" [WARNING] No LORAs were loaded successfully.")

	return loaded_loras, success


	# --- START FIX: Replace setup_ip_adapter ---
	def setup_ip_adapter(pipe):
	"""
	Setup IP-Adapter for InstantID face embeddings using the pipeline's method.
	"""
	print("Setting up IP-Adapter for InstantID face embeddings...")
	try:
	# Download InstantID weights
	ip_adapter_path = download_model_with_retry(
	"InstantX/InstantID",
	"ip-adapter.bin",
	repo_type="model"
	)

	# Use the pipeline's built-in loader
	pipe.load_ip_adapter_instantid(ip_adapter_path)

	print(" [OK] IP-Adapter fully loaded via pipeline")
	return None, True # We don't need to return a model

	except Exception as e:
	print(f" [ERROR] Could not setup IP-Adapter: {e}")
	import traceback
	traceback.print_exc()
	return None, False
	# --- END FIX ---


	# --- START FIX: Replace setup_cappella with setup_compel ---
	def setup_compel(pipe):
	"""Setup Compel for robust prompt encoding."""
	print("Setting up Compel (prompt encoder)...")
	try:
	compel = Compel(
	tokenizer=[pipe.tokenizer, pipe.tokenizer_2],
	text_encoder=[pipe.text_encoder, pipe.text_encoder_2],
	returned_embeddings_type=ReturnedEmbeddingsType.PENULTIMATE_HIDDEN_STATES_NON_NORMALIZED,
	requires_pooled=[False, True]
	)
	print(" [OK] Compel loaded successfully.")
	return compel, True
	except Exception as e:
	print(f" [WARNING] Compel not available: {e}")
	return None, False
	# --- END FIX ---


	def setup_scheduler(pipe):
	"""Setup LCM scheduler."""
	print("Setting up LCM scheduler...")
	pipe.scheduler = LCMScheduler.from_config(pipe.scheduler.config)
	print(" [OK] LCM scheduler configured")


	def optimize_pipeline(pipe):
	"""Apply optimizations to pipeline."""
	if device == "cuda":
	try:
	pipe.enable_xformers_memory_efficient_attention()
	print(" [OK] xformers enabled")
	except Exception as e:
	print(f" [INFO] xformers not available: {e}")


	def load_caption_model():
	"""
	Load caption model with proper error handling.
	Tries multiple models in order of quality.
	"""
	print("Loading caption model...")

	# Try GIT-Large first (good balance of quality and compatibility)
	try:
	from transformers import AutoProcessor, AutoModelForCausalLM

	print(" Attempting GIT-Large (recommended)...")
	caption_processor = AutoProcessor.from_pretrained("microsoft/git-large-coco")
	caption_model = AutoModelForCausalLM.from_pretrained(
	"microsoft/git-large-coco",
	torch_dtype=dtype
	)
	print(" [OK] GIT-Large model loaded (produces detailed captions, on CPU)")
	return caption_processor, caption_model, True, 'git'
	except Exception as e1:
	print(f" [INFO] GIT-Large not available: {e1}")

	# Try BLIP base as fallback
	try:
	from transformers import BlipProcessor, BlipForConditionalGeneration

	print(" Attempting BLIP base (fallback)...")
	caption_processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
	caption_model = BlipForConditionalGeneration.from_pretrained(
	"Salesforce/blip-image-captioning-base",
	torch_dtype=dtype
	)
	print(" [OK] BLIP base model loaded (standard captions, on CPU)")
	return caption_processor, caption_model, True, 'blip'
	except Exception as e2:
	print(f" [WARNING] Caption models not available: {e2}")
	print(" Caption generation will be disabled")
	return None, None, False, 'none'


	def set_clip_skip(pipe):
	"""Set CLIP skip value."""
	if hasattr(pipe, 'text_encoder'):
	print(f" [OK] CLIP skip set to {CLIP_SKIP}")


	print("[OK] Model loading functions ready")