kelseye commited on
Commit
580dfb0
·
verified ·
1 Parent(s): 42e60c1

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ ---
4
+ # Templates - Meme Panda (FLUX.2-klein-base-4B)
5
+
6
+ This model is part of the first batch of Diffusion Templates series models open-sourced by [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio). It's an Easter egg model capable of generating various meme-style panda head expression images.
7
+
8
+ ## Demo
9
+
10
+ |Prompt: A meme with a happy expression.|Prompt: A meme with a sleepy expression.|Prompt: A meme with a surprised expression.|
11
+ |-|-|-|
12
+ |![](./assets/image_PandaMeme_happy.jpg)|![](./assets/image_PandaMeme_sleepy.jpg)|![](./assets/image_PandaMeme_surprised.jpg)|
13
+
14
+ ## Inference Code
15
+
16
+ * Install [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio)
17
+
18
+ ```
19
+ git clone https://github.com/modelscope/DiffSynth-Studio.git
20
+ cd DiffSynth-Studio
21
+ pip install -e .
22
+ ```
23
+
24
+ * Direct inference (requires 40G GPU memory)
25
+
26
+ ```python
27
+ from diffsynth.diffusion.template import TemplatePipeline
28
+ from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
29
+ import torch
30
+ ```
31
+
32
+ pipe = Flux2ImagePipeline.from_pretrained(
33
+ torch_dtype=torch.bfloat16,
34
+ device="cuda",
35
+ model_configs=[
36
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
37
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
38
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
39
+ ],
40
+ tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
41
+ )
42
+ template = TemplatePipeline.from_pretrained(
43
+ torch_dtype=torch.bfloat16,
44
+ device="cuda",
45
+ model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")],
46
+ )
47
+ image = template(
48
+ pipe,
49
+ prompt="A meme with a sleepy expression.",
50
+ seed=0, cfg_scale=4, num_inference_steps=50,
51
+ template_inputs = [{}],
52
+ negative_template_inputs = [{}],
53
+ )
54
+ image.save("image_PandaMeme_sleepy.jpg")
55
+ image = template(
56
+ pipe,
57
+ prompt="A meme with a happy expression.",
58
+ seed=0, cfg_scale=4, num_inference_steps=50,
59
+ template_inputs = [{}],
60
+ negative_template_inputs = [{}],
61
+ )
62
+ image.save("image_PandaMeme_happy.jpg")
63
+ image = template(
64
+ pipe,
65
+ prompt="A meme with a surprised expression.",
66
+ seed=0, cfg_scale=4, num_inference_steps=50,
67
+ template_inputs = [{}],
68
+ negative_template_inputs = [{}],
69
+ )
70
+ image.save("image_PandaMeme_surprised.jpg")
71
+ ```
72
+
73
+ * Enable lazy loading and memory management, requires 24GB VRAM
74
+
75
+ ```python
76
+ from diffsynth.diffusion.template import TemplatePipeline
77
+ from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
78
+ import torch
79
+
80
+ ```python
81
+ vram_config = {
82
+ "offload_dtype": "disk",
83
+ "offload_device": "disk",
84
+ "onload_dtype": torch.float8_e4m3fn,
85
+ "onload_device": "cpu",
86
+ "preparing_dtype": torch.float8_e4m3fn,
87
+ "preparing_device": "cuda",
88
+ "computation_dtype": torch.bfloat16,
89
+ "computation_device": "cuda",
90
+ }
91
+ pipe = Flux2ImagePipeline.from_pretrained(
92
+ torch_dtype=torch.bfloat16,
93
+ device="cuda",
94
+ model_configs=[
95
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
96
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
97
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
98
+ ],
99
+ tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
100
+ vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
101
+ )
102
+ template = TemplatePipeline.from_pretrained(
103
+ torch_dtype=torch.bfloat16,
104
+ device="cuda",
105
+ model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")],
106
+ lazy_loading=True,
107
+ )
108
+ image = template(
109
+ pipe,
110
+ prompt="A meme with a sleepy expression.",
111
+ seed=0, cfg_scale=4, num_inference_steps=50,
112
+ template_inputs=[{}],
113
+ negative_template_inputs=[{}],
114
+ )
115
+ image.save("image_PandaMeme_sleepy.jpg")
116
+ image = template(
117
+ pipe,
118
+ prompt="A meme with a happy expression.",
119
+ seed=0, cfg_scale=4, num_inference_steps=50,
120
+ template_inputs=[{}],
121
+ negative_template_inputs=[{}],
122
+ )
123
+ image.save("image_PandaMeme_happy.jpg")
124
+ image = template(
125
+ pipe,
126
+ prompt="A meme with a surprised expression.",
127
+ seed=0, cfg_scale=4, num_inference_steps=50,
128
+ template_inputs=[{}],
129
+ negative_template_inputs=[{}],
130
+ )
131
+ image.save("image_PandaMeme_surprised.jpg")
132
+ ```
133
+
134
+ ## Training Code
135
+
136
+ After installing DiffSynth-Studio, use the following script to start training. For more information, please refer to the [DiffSynth-Studio Documentation](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/).
137
+
138
+ ```shell
139
+ modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-PandaMeme/*" --local_dir ./data/diffsynth_example_dataset
140
+
141
+ accelerate launch examples/flux2/model_training/train.py \
142
+ --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme \
143
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme/metadata.jsonl \
144
+ --extra_inputs "template_inputs" \
145
+ --max_pixels 1048576 \
146
+ --dataset_repeat 50 \
147
+ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
148
+ --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-PandaMeme:" \
149
+ --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
150
+ --learning_rate 1e-4 \
151
+ --num_epochs 2 \
152
+ --remove_prefix_in_ckpt "pipe.template_model." \
153
+ --output_path "./models/train/Template-KleinBase4B-PandaMeme_full" \
154
+ --trainable_models "template_model" \
155
+ --use_gradient_checkpointing \
156
+ --find_unused_parameters
157
+ ```
README_from_modelscope.md ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ frameworks:
3
+ - Pytorch
4
+ license: Apache License 2.0
5
+ tags: []
6
+ tasks:
7
+ - text-to-video-synthesis
8
+
9
+ #model-type:
10
+ ##如 gpt、phi、llama、chatglm、baichuan 等
11
+ #- gpt
12
+
13
+ #domain:
14
+ ##如 nlp、cv、audio、multi-modal
15
+ #- nlp
16
+
17
+ #language:
18
+ ##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa
19
+ #- cn
20
+
21
+ #metrics:
22
+ ##如 CIDEr、Blue、ROUGE 等
23
+ #- CIDEr
24
+
25
+ #tags:
26
+ ##各种自定义,包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他
27
+ #- pretrained
28
+
29
+ #tools:
30
+ ##如 vllm、fastchat、llamacpp、AdaSeq 等
31
+ #- vllm
32
+ ---
33
+ # Templates-魔性熊猫(FLUX.2-klein-base-4B)
34
+
35
+ 本模型是 [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) 开源的首批 Diffusion Templates 系列模型。这是一个彩蛋模型,能够生成各种魔性的熊猫头表情包。
36
+
37
+ ## 效果展示
38
+
39
+ |Prompt: A meme with a happy expression.|Prompt: A meme with a sleepy expression.|Prompt: A meme with a surprised expression.|
40
+ |-|-|-|
41
+ |![](./assets/image_PandaMeme_happy.jpg)|![](./assets/image_PandaMeme_sleepy.jpg)|![](./assets/image_PandaMeme_surprised.jpg)|
42
+
43
+ ## 推理代码
44
+
45
+ * 安装 [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio)
46
+
47
+ ```
48
+ git clone https://github.com/modelscope/DiffSynth-Studio.git
49
+ cd DiffSynth-Studio
50
+ pip install -e .
51
+ ```
52
+
53
+ * 直接推理,需 40G 显存
54
+
55
+ ```python
56
+ from diffsynth.diffusion.template import TemplatePipeline
57
+ from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
58
+ import torch
59
+
60
+ pipe = Flux2ImagePipeline.from_pretrained(
61
+ torch_dtype=torch.bfloat16,
62
+ device="cuda",
63
+ model_configs=[
64
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
65
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
66
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
67
+ ],
68
+ tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
69
+ )
70
+ template = TemplatePipeline.from_pretrained(
71
+ torch_dtype=torch.bfloat16,
72
+ device="cuda",
73
+ model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")],
74
+ )
75
+ image = template(
76
+ pipe,
77
+ prompt="A meme with a sleepy expression.",
78
+ seed=0, cfg_scale=4, num_inference_steps=50,
79
+ template_inputs = [{}],
80
+ negative_template_inputs = [{}],
81
+ )
82
+ image.save("image_PandaMeme_sleepy.jpg")
83
+ image = template(
84
+ pipe,
85
+ prompt="A meme with a happy expression.",
86
+ seed=0, cfg_scale=4, num_inference_steps=50,
87
+ template_inputs = [{}],
88
+ negative_template_inputs = [{}],
89
+ )
90
+ image.save("image_PandaMeme_happy.jpg")
91
+ image = template(
92
+ pipe,
93
+ prompt="A meme with a surprised expression.",
94
+ seed=0, cfg_scale=4, num_inference_steps=50,
95
+ template_inputs = [{}],
96
+ negative_template_inputs = [{}],
97
+ )
98
+ image.save("image_PandaMeme_surprised.jpg")
99
+ ```
100
+
101
+ * 开启惰性加载和显存管理,需 24G 显存
102
+
103
+ ```python
104
+ from diffsynth.diffusion.template import TemplatePipeline
105
+ from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
106
+ import torch
107
+
108
+ vram_config = {
109
+ "offload_dtype": "disk",
110
+ "offload_device": "disk",
111
+ "onload_dtype": torch.float8_e4m3fn,
112
+ "onload_device": "cpu",
113
+ "preparing_dtype": torch.float8_e4m3fn,
114
+ "preparing_device": "cuda",
115
+ "computation_dtype": torch.bfloat16,
116
+ "computation_device": "cuda",
117
+ }
118
+ pipe = Flux2ImagePipeline.from_pretrained(
119
+ torch_dtype=torch.bfloat16,
120
+ device="cuda",
121
+ model_configs=[
122
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
123
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
124
+ ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
125
+ ],
126
+ tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
127
+ vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
128
+ )
129
+ template = TemplatePipeline.from_pretrained(
130
+ torch_dtype=torch.bfloat16,
131
+ device="cuda",
132
+ model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")],
133
+ lazy_loading=True,
134
+ )
135
+ image = template(
136
+ pipe,
137
+ prompt="A meme with a sleepy expression.",
138
+ seed=0, cfg_scale=4, num_inference_steps=50,
139
+ template_inputs = [{}],
140
+ negative_template_inputs = [{}],
141
+ )
142
+ image.save("image_PandaMeme_sleepy.jpg")
143
+ image = template(
144
+ pipe,
145
+ prompt="A meme with a happy expression.",
146
+ seed=0, cfg_scale=4, num_inference_steps=50,
147
+ template_inputs = [{}],
148
+ negative_template_inputs = [{}],
149
+ )
150
+ image.save("image_PandaMeme_happy.jpg")
151
+ image = template(
152
+ pipe,
153
+ prompt="A meme with a surprised expression.",
154
+ seed=0, cfg_scale=4, num_inference_steps=50,
155
+ template_inputs = [{}],
156
+ negative_template_inputs = [{}],
157
+ )
158
+ image.save("image_PandaMeme_surprised.jpg")
159
+ ```
160
+
161
+ ## 训练代码
162
+
163
+ 安装 DiffSynth-Studio 后,使用以下脚本可开启训练,更多信息请参考 [DiffSynth-Studio 文档](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/)。
164
+
165
+ ```shell
166
+ modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-PandaMeme/*" --local_dir ./data/diffsynth_example_dataset
167
+
168
+ accelerate launch examples/flux2/model_training/train.py \
169
+ --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme \
170
+ --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme/metadata.jsonl \
171
+ --extra_inputs "template_inputs" \
172
+ --max_pixels 1048576 \
173
+ --dataset_repeat 50 \
174
+ --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
175
+ --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-PandaMeme:" \
176
+ --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
177
+ --learning_rate 1e-4 \
178
+ --num_epochs 2 \
179
+ --remove_prefix_in_ckpt "pipe.template_model." \
180
+ --output_path "./models/train/Template-KleinBase4B-PandaMeme_full" \
181
+ --trainable_models "template_model" \
182
+ --use_gradient_checkpointing \
183
+ --find_unused_parameters
184
+ ```
assets/image_PandaMeme_happy.jpg ADDED
assets/image_PandaMeme_sleepy.jpg ADDED
assets/image_PandaMeme_surprised.jpg ADDED
configuration.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"framework":"Pytorch","task":"text-to-video-synthesis"}
model.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from diffsynth import load_state_dict
3
+ from safetensors.torch import save_file
4
+
5
+
6
+ class SingleKVCacheModel(torch.nn.Module):
7
+ def __init__(self, shape):
8
+ super().__init__()
9
+ self.k = torch.nn.Parameter(torch.zeros(shape))
10
+ self.v = torch.nn.Parameter(torch.zeros(shape))
11
+
12
+ def forward(self):
13
+ return (self.k, self.v)
14
+
15
+
16
+ class StaticKVCacheModel(torch.nn.Module):
17
+ def __init__(self):
18
+ super().__init__()
19
+ self.block_names = [f"double_{i}" for i in range(5)] + [f"single_{i}" for i in range(20)]
20
+ self.cache = torch.nn.ModuleList([SingleKVCacheModel((1, 4608, 24, 128)) for _ in self.block_names])
21
+
22
+ def load_from_kv_cache(self, kv_cache):
23
+ state_dict = {}
24
+ for block_id, block_name in enumerate(self.block_names):
25
+ state_dict[f"cache.{block_id}.k"] = kv_cache[block_name][0]
26
+ state_dict[f"cache.{block_id}.v"] = kv_cache[block_name][1]
27
+ self.load_state_dict(state_dict)
28
+
29
+ @torch.no_grad()
30
+ def process_inputs(self, **kwargs):
31
+ return {}
32
+
33
+ def forward(self, **kwargs):
34
+ kv_cache = {}
35
+ for block_name, cache in zip(self.block_names, self.cache):
36
+ kv_cache[block_name] = cache()
37
+ return {"kv_cache": kv_cache}
38
+
39
+
40
+ def convert_from_kv_cache(kv_cache, path):
41
+ model = StaticKVCacheModel().to(torch.bfloat16)
42
+ model.load_from_kv_cache(kv_cache)
43
+ save_file(model.state_dict(), path)
44
+
45
+
46
+ class DataAnnotator:
47
+ def __call__(self, **kwargs):
48
+ return kwargs
49
+
50
+
51
+ TEMPLATE_MODEL = StaticKVCacheModel
52
+ TEMPLATE_MODEL_PATH = "model.safetensors"
53
+ TEMPLATE_DATA_PROCESSOR = DataAnnotator
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb27bbf85da1474756864e4f823ce0b66811caaa9cd41d3bd0f4898fa1699c11
3
+ size 1415582160