Spaces:
Running
on
Zero
Running
on
Zero
NAME: ACE_0.6B_512 | |
IS_DEFAULT: False | |
USE_DYNAMIC_MODEL: False | |
DEFAULT_PARAS: | |
PARAS: | |
# | |
INPUT: | |
INPUT_IMAGE: | |
INPUT_MASK: | |
TASK: | |
PROMPT: "" | |
NEGATIVE_PROMPT: "" | |
OUTPUT_HEIGHT: 512 | |
OUTPUT_WIDTH: 512 | |
SAMPLER: ddim | |
SAMPLE_STEPS: 20 | |
GUIDE_SCALE: 4.5 | |
GUIDE_RESCALE: 0.5 | |
SEED: -1 | |
TAR_INDEX: 0 | |
OUTPUT: | |
LATENT: | |
IMAGES: | |
SEED: | |
MODULES_PARAS: | |
FIRST_STAGE_MODEL: | |
FUNCTION: | |
- NAME: encode | |
DTYPE: float16 | |
INPUT: ["IMAGE"] | |
- NAME: decode | |
DTYPE: float16 | |
INPUT: ["LATENT"] | |
# | |
DIFFUSION_MODEL: | |
FUNCTION: | |
- NAME: forward | |
DTYPE: float16 | |
INPUT: ["SAMPLE_STEPS", "SAMPLE", "GUIDE_SCALE"] | |
# | |
COND_STAGE_MODEL: | |
FUNCTION: | |
- NAME: encode_list_of_list | |
DTYPE: bfloat16 | |
INPUT: ["PROMPT"] | |
# | |
MODEL: | |
NAME: LatentDiffusionACE | |
PRETRAINED_MODEL: | |
IGNORE_KEYS: [ ] | |
SCALE_FACTOR: 0.18215 | |
SIZE_FACTOR: 8 | |
DECODER_BIAS: 0.5 | |
DEFAULT_N_PROMPT: "" | |
TEXT_IDENTIFIER: [ '{image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ] | |
USE_TEXT_POS_EMBEDDINGS: True | |
# | |
DIFFUSION: | |
NAME: BaseDiffusion | |
PREDICTION_TYPE: eps | |
MIN_SNR_GAMMA: | |
NOISE_SCHEDULER: | |
NAME: LinearScheduler | |
NUM_TIMESTEPS: 1000 | |
BETA_MIN: 0.0001 | |
BETA_MAX: 0.02 | |
# | |
DIFFUSION_MODEL: | |
NAME: ACE | |
PRETRAINED_MODEL: hf://scepter-studio/ACE-0.6B-512px@models/dit/ace_0.6b_512px.pth | |
IGNORE_KEYS: [ ] | |
PATCH_SIZE: 2 | |
IN_CHANNELS: 4 | |
HIDDEN_SIZE: 1152 | |
DEPTH: 28 | |
NUM_HEADS: 16 | |
MLP_RATIO: 4.0 | |
PRED_SIGMA: True | |
DROP_PATH: 0.0 | |
WINDOW_DIZE: 0 | |
Y_CHANNELS: 4096 | |
MAX_SEQ_LEN: 1024 | |
QK_NORM: True | |
USE_GRAD_CHECKPOINT: True | |
ATTENTION_BACKEND: flash_attn | |
# | |
FIRST_STAGE_MODEL: | |
NAME: AutoencoderKL | |
EMBED_DIM: 4 | |
PRETRAINED_MODEL: hf://scepter-studio/ACE-0.6B-512px@models/vae/vae.bin | |
IGNORE_KEYS: [] | |
# | |
ENCODER: | |
NAME: Encoder | |
CH: 128 | |
OUT_CH: 3 | |
NUM_RES_BLOCKS: 2 | |
IN_CHANNELS: 3 | |
ATTN_RESOLUTIONS: [ ] | |
CH_MULT: [ 1, 2, 4, 4 ] | |
Z_CHANNELS: 4 | |
DOUBLE_Z: True | |
DROPOUT: 0.0 | |
RESAMP_WITH_CONV: True | |
# | |
DECODER: | |
NAME: Decoder | |
CH: 128 | |
OUT_CH: 3 | |
NUM_RES_BLOCKS: 2 | |
IN_CHANNELS: 3 | |
ATTN_RESOLUTIONS: [ ] | |
CH_MULT: [ 1, 2, 4, 4 ] | |
Z_CHANNELS: 4 | |
DROPOUT: 0.0 | |
RESAMP_WITH_CONV: True | |
GIVE_PRE_END: False | |
TANH_OUT: False | |
# | |
COND_STAGE_MODEL: | |
NAME: T5EmbedderHF | |
PRETRAINED_MODEL: hf://scepter-studio/ACE-0.6B-512px@models/text_encoder/t5-v1_1-xxl/ | |
TOKENIZER_PATH: hf://scepter-studio/ACE-0.6B-512px@models/tokenizer/t5-v1_1-xxl | |
LENGTH: 120 | |
T5_DTYPE: bfloat16 | |
ADDED_IDENTIFIER: [ '{image}', '{caption}', '{mask}', '{ref_image}', '{image1}', '{image2}', '{image3}', '{image4}', '{image5}', '{image6}', '{image7}', '{image8}', '{image9}' ] | |
CLEAN: whitespace | |
USE_GRAD: False | |