Can you provide the README.md, boss?

#2
by hugxd - opened

"The idea of this model feels intriguing, but I've been trying for half a day and still can't get this model to run. I'm not sure how to properly load the Denoiser model into the Diffuser's pipeline:

from tld.denoiser import Denoiser

import torch
from diffusers import AutoencoderTiny, UNet2DConditionModel, EulerDiscreteScheduler, DDIMScheduler, DDPMScheduler
import numpy as np
from PIL import Image

from tld.denoiser import Denoiser
from tld.diffusion import DiffusionGenerator, DiffusionTransformer
from tld.configs import LTDConfig, DenoiserConfig, TrainConfig
from dataclasses import dataclass, asdict

# 加载模型: autoencoder可以通过SD权重指定subfolder来单独加载
autoencoder = AutoencoderTiny.from_pretrained(
    "/data/xiedong/MobileDiffusionStable", subfolder="vae")

# denoiser_cfg = DenoiserConfig(n_channels=4)
denoiser_cfg = DenoiserConfig()
cfg = LTDConfig(denoiser_cfg=denoiser_cfg)
denoiser = Denoiser(**asdict(cfg.denoiser_cfg))
# state_dict = torch.load("/data/xiedong/MobileDiffusionStable/unet/diffusion_pytorch_model.safetensors",
#                         map_location=torch.device("cpu"))

import torch
from safetensors import safe_open
from safetensors.torch import save_file

# 定义一个空字典来存储张量
tensors = {}

# 使用 safe_load 函数加载模型
with safe_open("/data/xiedong/MobileDiffusionStable/unet/diffusion_pytorch_model.safetensors", framework="pt",
               device="cpu") as f:
    # 遍历模型中的所有键
    for key in f.keys():
        # 将每个键对应的张量添加到字典中
        tensors[key] = f.get_tensor(key)



denoiser.load_state_dict(tensors)

unet = denoiser

from transformers import CLIPTextModel, CLIPTokenizer

text_encoder = CLIPTextModel.from_pretrained(
    "/data/xiedong/MobileDiffusionStable", subfolder="text_encoder")

tokenizer = CLIPTokenizer.from_pretrained(
    "/data/xiedong/MobileDiffusionStable", subfolder="tokenizer")

print(tokenizer.model_max_length)

prompt = "a photograph of an astronaut riding a horse"
text_input_ids = tokenizer(
    prompt,
    padding="max_length",
    max_length=tokenizer.model_max_length,
    truncation=True,
    return_tensors="pt"
).input_ids

from diffusers import AutoPipelineForText2Image
import torch

pipeline_text2image = AutoPipelineForText2Image.from_pretrained(
    "./majicmixRealistic_v7.torchscript", torch_dtype=torch.float16, variant="fp16", use_safetensors=True)

scheduler = DDPMScheduler.from_pretrained("/data/xiedong/MobileDiffusionStable", subfolder="scheduler")

pipeline_text2image.vae = autoencoder
pipeline_text2image.unet = unet
pipeline_text2image.text_encoder = text_encoder
pipeline_text2image.tokenizer = tokenizer
pipeline_text2image.scheduler = scheduler
pipeline_text2image.safety_checker = None

pipeline_text2image = pipeline_text2image.to("cuda")

prompt = "a cat "
image = pipeline_text2image(prompt=prompt, num_inference_steps=20).images[0]

image.save("1.png")


Sign up or log in to comment