init
Browse files
configs/models/qwen2_5_1_5b_radio_sd3_dynamic_puffin.py
CHANGED
|
@@ -7,7 +7,7 @@ from diffusers import AutoencoderKL, FlowMatchEulerDiscreteScheduler
|
|
| 7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 8 |
|
| 9 |
llm_name_or_path = 'Qwen/Qwen2.5-1.5B-Instruct'
|
| 10 |
-
sd3_model_name_or_path = "
|
| 11 |
|
| 12 |
prompt_template = dict(
|
| 13 |
SYSTEM=('<|im_start|>system\n{system}<|im_end|>\n'),
|
|
@@ -40,26 +40,26 @@ model = dict(type=Qwen2p5RadioStableDiffusion3HFDynamic,
|
|
| 40 |
num_attention_heads=16,
|
| 41 |
),
|
| 42 |
transformer=dict(
|
| 43 |
-
type=SD3Transformer2DModel.
|
| 44 |
pretrained_model_name_or_path=sd3_model_name_or_path,
|
| 45 |
subfolder="transformer",
|
| 46 |
torch_dtype=torch.bfloat16,
|
| 47 |
#local_files_only=True,
|
| 48 |
),
|
| 49 |
test_scheduler=dict(
|
| 50 |
-
type=FlowMatchEulerDiscreteScheduler.
|
| 51 |
pretrained_model_name_or_path=sd3_model_name_or_path,
|
| 52 |
subfolder="scheduler",
|
| 53 |
#local_files_only=True,
|
| 54 |
),
|
| 55 |
train_scheduler=dict(
|
| 56 |
-
type=FlowMatchEulerDiscreteScheduler.
|
| 57 |
pretrained_model_name_or_path=sd3_model_name_or_path,
|
| 58 |
subfolder="scheduler",
|
| 59 |
#local_files_only=True,
|
| 60 |
),
|
| 61 |
vae=dict(
|
| 62 |
-
type=AutoencoderKL.
|
| 63 |
pretrained_model_name_or_path=sd3_model_name_or_path,
|
| 64 |
subfolder="vae",
|
| 65 |
torch_dtype=torch.bfloat16,
|
|
|
|
| 7 |
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 8 |
|
| 9 |
llm_name_or_path = 'Qwen/Qwen2.5-1.5B-Instruct'
|
| 10 |
+
sd3_model_name_or_path = "configs/sd3"
|
| 11 |
|
| 12 |
prompt_template = dict(
|
| 13 |
SYSTEM=('<|im_start|>system\n{system}<|im_end|>\n'),
|
|
|
|
| 40 |
num_attention_heads=16,
|
| 41 |
),
|
| 42 |
transformer=dict(
|
| 43 |
+
type=SD3Transformer2DModel.from_config,
|
| 44 |
pretrained_model_name_or_path=sd3_model_name_or_path,
|
| 45 |
subfolder="transformer",
|
| 46 |
torch_dtype=torch.bfloat16,
|
| 47 |
#local_files_only=True,
|
| 48 |
),
|
| 49 |
test_scheduler=dict(
|
| 50 |
+
type=FlowMatchEulerDiscreteScheduler.from_config,
|
| 51 |
pretrained_model_name_or_path=sd3_model_name_or_path,
|
| 52 |
subfolder="scheduler",
|
| 53 |
#local_files_only=True,
|
| 54 |
),
|
| 55 |
train_scheduler=dict(
|
| 56 |
+
type=FlowMatchEulerDiscreteScheduler.from_config,
|
| 57 |
pretrained_model_name_or_path=sd3_model_name_or_path,
|
| 58 |
subfolder="scheduler",
|
| 59 |
#local_files_only=True,
|
| 60 |
),
|
| 61 |
vae=dict(
|
| 62 |
+
type=AutoencoderKL.from_config,
|
| 63 |
pretrained_model_name_or_path=sd3_model_name_or_path,
|
| 64 |
subfolder="vae",
|
| 65 |
torch_dtype=torch.bfloat16,
|
configs/sd3/scheduler/scheduler_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "FlowMatchEulerDiscreteScheduler",
|
| 3 |
+
"_diffusers_version": "0.29.0.dev0",
|
| 4 |
+
"num_train_timesteps": 1000,
|
| 5 |
+
"shift": 3.0
|
| 6 |
+
}
|
configs/sd3/transformer/config.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "SD3Transformer2DModel",
|
| 3 |
+
"_diffusers_version": "0.29.0.dev0",
|
| 4 |
+
"attention_head_dim": 64,
|
| 5 |
+
"caption_projection_dim": 1536,
|
| 6 |
+
"in_channels": 16,
|
| 7 |
+
"joint_attention_dim": 4096,
|
| 8 |
+
"num_attention_heads": 24,
|
| 9 |
+
"num_layers": 24,
|
| 10 |
+
"out_channels": 16,
|
| 11 |
+
"patch_size": 2,
|
| 12 |
+
"pooled_projection_dim": 2048,
|
| 13 |
+
"pos_embed_max_size": 192,
|
| 14 |
+
"sample_size": 128
|
| 15 |
+
}
|
configs/sd3/vae/config.json
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_class_name": "AutoencoderKL",
|
| 3 |
+
"_diffusers_version": "0.29.0.dev0",
|
| 4 |
+
"act_fn": "silu",
|
| 5 |
+
"block_out_channels": [
|
| 6 |
+
128,
|
| 7 |
+
256,
|
| 8 |
+
512,
|
| 9 |
+
512
|
| 10 |
+
],
|
| 11 |
+
"down_block_types": [
|
| 12 |
+
"DownEncoderBlock2D",
|
| 13 |
+
"DownEncoderBlock2D",
|
| 14 |
+
"DownEncoderBlock2D",
|
| 15 |
+
"DownEncoderBlock2D"
|
| 16 |
+
],
|
| 17 |
+
"force_upcast": true,
|
| 18 |
+
"in_channels": 3,
|
| 19 |
+
"latent_channels": 16,
|
| 20 |
+
"latents_mean": null,
|
| 21 |
+
"latents_std": null,
|
| 22 |
+
"layers_per_block": 2,
|
| 23 |
+
"norm_num_groups": 32,
|
| 24 |
+
"out_channels": 3,
|
| 25 |
+
"sample_size": 1024,
|
| 26 |
+
"scaling_factor": 1.5305,
|
| 27 |
+
"shift_factor": 0.0609,
|
| 28 |
+
"up_block_types": [
|
| 29 |
+
"UpDecoderBlock2D",
|
| 30 |
+
"UpDecoderBlock2D",
|
| 31 |
+
"UpDecoderBlock2D",
|
| 32 |
+
"UpDecoderBlock2D"
|
| 33 |
+
],
|
| 34 |
+
"use_post_quant_conv": false,
|
| 35 |
+
"use_quant_conv": false
|
| 36 |
+
}
|