Upload folder using huggingface_hub

Browse files

Files changed (8) hide show

README.md +157 -0
README_from_modelscope.md +184 -0
assets/image_PandaMeme_happy.jpg +0 -0
assets/image_PandaMeme_sleepy.jpg +0 -0
assets/image_PandaMeme_surprised.jpg +0 -0
configuration.json +1 -0
model.py +53 -0
model.safetensors +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,157 @@

+---
+license: apache-2.0
+---
+# Templates - Meme Panda (FLUX.2-klein-base-4B)
+This model is part of the first batch of Diffusion Templates series models open-sourced by [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio). It's an Easter egg model capable of generating various meme-style panda head expression images.
+## Demo
+|Prompt: A meme with a happy expression.|Prompt: A meme with a sleepy expression.|Prompt: A meme with a surprised expression.|
+|-|-|-|
+|![](./assets/image_PandaMeme_happy.jpg)|![](./assets/image_PandaMeme_sleepy.jpg)|![](./assets/image_PandaMeme_surprised.jpg)|
+## Inference Code
+* Install [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio)
+```
+git clone https://github.com/modelscope/DiffSynth-Studio.git
+cd DiffSynth-Studio
+pip install -e .
+```
+* Direct inference (requires 40G GPU memory)
+```python
+from diffsynth.diffusion.template import TemplatePipeline
+from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
+import torch
+```
+pipe = Flux2ImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
+    ],
+    tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
+)
+template = TemplatePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")],
+)
+image = template(
+    pipe,
+    prompt="A meme with a sleepy expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs = [{}],
+    negative_template_inputs = [{}],
+)
+image.save("image_PandaMeme_sleepy.jpg")
+image = template(
+    pipe,
+    prompt="A meme with a happy expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs = [{}],
+    negative_template_inputs = [{}],
+)
+image.save("image_PandaMeme_happy.jpg")
+image = template(
+    pipe,
+    prompt="A meme with a surprised expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs = [{}],
+    negative_template_inputs = [{}],
+)
+image.save("image_PandaMeme_surprised.jpg")
+```
+* Enable lazy loading and memory management, requires 24GB VRAM
+```python
+from diffsynth.diffusion.template import TemplatePipeline
+from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
+import torch
+```python
+vram_config = {
+    "offload_dtype": "disk",
+    "offload_device": "disk",
+    "onload_dtype": torch.float8_e4m3fn,
+    "onload_device": "cpu",
+    "preparing_dtype": torch.float8_e4m3fn,
+    "preparing_device": "cuda",
+    "computation_dtype": torch.bfloat16,
+    "computation_device": "cuda",
+}
+pipe = Flux2ImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
+    ],
+    tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
+    vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
+)
+template = TemplatePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")],
+    lazy_loading=True,
+)
+image = template(
+    pipe,
+    prompt="A meme with a sleepy expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs=[{}],
+    negative_template_inputs=[{}],
+)
+image.save("image_PandaMeme_sleepy.jpg")
+image = template(
+    pipe,
+    prompt="A meme with a happy expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs=[{}],
+    negative_template_inputs=[{}],
+)
+image.save("image_PandaMeme_happy.jpg")
+image = template(
+    pipe,
+    prompt="A meme with a surprised expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs=[{}],
+    negative_template_inputs=[{}],
+)
+image.save("image_PandaMeme_surprised.jpg")
+```
+## Training Code
+After installing DiffSynth-Studio, use the following script to start training. For more information, please refer to the [DiffSynth-Studio Documentation](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/).
+```shell
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-PandaMeme/*" --local_dir ./data/diffsynth_example_dataset
+accelerate launch examples/flux2/model_training/train.py \
+  --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme \
+  --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme/metadata.jsonl \
+  --extra_inputs "template_inputs" \
+  --max_pixels 1048576 \
+  --dataset_repeat 50 \
+  --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
+  --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-PandaMeme:" \
+  --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
+  --learning_rate 1e-4 \
+  --num_epochs 2 \
+  --remove_prefix_in_ckpt "pipe.template_model." \
+  --output_path "./models/train/Template-KleinBase4B-PandaMeme_full" \
+  --trainable_models "template_model" \
+  --use_gradient_checkpointing \
+  --find_unused_parameters
+```

README_from_modelscope.md ADDED Viewed

	@@ -0,0 +1,184 @@

+---
+frameworks:
+- Pytorch
+license: Apache License 2.0
+tags: []
+tasks:
+- text-to-video-synthesis
+#model-type:
+##如 gpt、phi、llama、chatglm、baichuan 等
+#- gpt
+#domain:
+##如 nlp、cv、audio、multi-modal
+#- nlp
+#language:
+##语言代码列表 https://help.aliyun.com/document_detail/215387.html?spm=a2c4g.11186623.0.0.9f8d7467kni6Aa
+#- cn
+#metrics:
+##如 CIDEr、Blue、ROUGE 等
+#- CIDEr
+#tags:
+##各种自定义，包括 pretrained、fine-tuned、instruction-tuned、RL-tuned 等训练方法和其他
+#- pretrained
+#tools:
+##如 vllm、fastchat、llamacpp、AdaSeq 等
+#- vllm
+---
+# Templates-魔性熊猫（FLUX.2-klein-base-4B）
+本模型是 [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio) 开源的首批 Diffusion Templates 系列模型。这是一个彩蛋模型，能够生成各种魔性的熊猫头表情包。
+## 效果展示
+|Prompt: A meme with a happy expression.|Prompt: A meme with a sleepy expression.|Prompt: A meme with a surprised expression.|
+|-|-|-|
+|![](./assets/image_PandaMeme_happy.jpg)|![](./assets/image_PandaMeme_sleepy.jpg)|![](./assets/image_PandaMeme_surprised.jpg)|
+## 推理代码
+* 安装 [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio)
+```
+git clone https://github.com/modelscope/DiffSynth-Studio.git
+cd DiffSynth-Studio
+pip install -e .
+```
+* 直接推理，需 40G 显存
+```python
+from diffsynth.diffusion.template import TemplatePipeline
+from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
+import torch
+pipe = Flux2ImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors"),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
+    ],
+    tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
+)
+template = TemplatePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")],
+)
+image = template(
+    pipe,
+    prompt="A meme with a sleepy expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs = [{}],
+    negative_template_inputs = [{}],
+)
+image.save("image_PandaMeme_sleepy.jpg")
+image = template(
+    pipe,
+    prompt="A meme with a happy expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs = [{}],
+    negative_template_inputs = [{}],
+)
+image.save("image_PandaMeme_happy.jpg")
+image = template(
+    pipe,
+    prompt="A meme with a surprised expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs = [{}],
+    negative_template_inputs = [{}],
+)
+image.save("image_PandaMeme_surprised.jpg")
+```
+* 开启惰性加载和显存管理，需 24G 显存
+```python
+from diffsynth.diffusion.template import TemplatePipeline
+from diffsynth.pipelines.flux2_image import Flux2ImagePipeline, ModelConfig
+import torch
+vram_config = {
+    "offload_dtype": "disk",
+    "offload_device": "disk",
+    "onload_dtype": torch.float8_e4m3fn,
+    "onload_device": "cpu",
+    "preparing_dtype": torch.float8_e4m3fn,
+    "preparing_device": "cuda",
+    "computation_dtype": torch.bfloat16,
+    "computation_device": "cuda",
+}
+pipe = Flux2ImagePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-base-4B", origin_file_pattern="transformer/*.safetensors", **vram_config),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="text_encoder/*.safetensors", **vram_config),
+        ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="vae/diffusion_pytorch_model.safetensors"),
+    ],
+    tokenizer_config=ModelConfig(model_id="black-forest-labs/FLUX.2-klein-4B", origin_file_pattern="tokenizer/"),
+    vram_limit=torch.cuda.mem_get_info("cuda")[1] / (1024 ** 3) - 0.5,
+)
+template = TemplatePipeline.from_pretrained(
+    torch_dtype=torch.bfloat16,
+    device="cuda",
+    model_configs=[ModelConfig(model_id="DiffSynth-Studio/Template-KleinBase4B-PandaMeme")],
+    lazy_loading=True,
+)
+image = template(
+    pipe,
+    prompt="A meme with a sleepy expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs = [{}],
+    negative_template_inputs = [{}],
+)
+image.save("image_PandaMeme_sleepy.jpg")
+image = template(
+    pipe,
+    prompt="A meme with a happy expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs = [{}],
+    negative_template_inputs = [{}],
+)
+image.save("image_PandaMeme_happy.jpg")
+image = template(
+    pipe,
+    prompt="A meme with a surprised expression.",
+    seed=0, cfg_scale=4, num_inference_steps=50,
+    template_inputs = [{}],
+    negative_template_inputs = [{}],
+)
+image.save("image_PandaMeme_surprised.jpg")
+```
+## 训练代码
+安装 DiffSynth-Studio 后，使用以下脚本可开启训练，更多信息请参考 [DiffSynth-Studio 文档](https://diffsynth-studio-doc.readthedocs.io/zh-cn/latest/)。
+```shell
+modelscope download --dataset DiffSynth-Studio/diffsynth_example_dataset --include "flux2/Template-KleinBase4B-PandaMeme/*" --local_dir ./data/diffsynth_example_dataset
+accelerate launch examples/flux2/model_training/train.py \
+  --dataset_base_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme \
+  --dataset_metadata_path data/diffsynth_example_dataset/flux2/Template-KleinBase4B-PandaMeme/metadata.jsonl \
+  --extra_inputs "template_inputs" \
+  --max_pixels 1048576 \
+  --dataset_repeat 50 \
+  --model_id_with_origin_paths "black-forest-labs/FLUX.2-klein-4B:text_encoder/*.safetensors,black-forest-labs/FLUX.2-klein-base-4B:transformer/*.safetensors,black-forest-labs/FLUX.2-klein-4B:vae/diffusion_pytorch_model.safetensors" \
+  --template_model_id_or_path "DiffSynth-Studio/Template-KleinBase4B-PandaMeme:" \
+  --tokenizer_path "black-forest-labs/FLUX.2-klein-4B:tokenizer/" \
+  --learning_rate 1e-4 \
+  --num_epochs 2 \
+  --remove_prefix_in_ckpt "pipe.template_model." \
+  --output_path "./models/train/Template-KleinBase4B-PandaMeme_full" \
+  --trainable_models "template_model" \
+  --use_gradient_checkpointing \
+  --find_unused_parameters
+```

assets/image_PandaMeme_happy.jpg ADDED Viewed

assets/image_PandaMeme_sleepy.jpg ADDED Viewed

assets/image_PandaMeme_surprised.jpg ADDED Viewed

configuration.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"framework":"Pytorch","task":"text-to-video-synthesis"}

model.py ADDED Viewed

	@@ -0,0 +1,53 @@

+import torch
+from diffsynth import load_state_dict
+from safetensors.torch import save_file
+class SingleKVCacheModel(torch.nn.Module):
+    def __init__(self, shape):
+        super().__init__()
+        self.k = torch.nn.Parameter(torch.zeros(shape))
+        self.v = torch.nn.Parameter(torch.zeros(shape))
+    def forward(self):
+        return (self.k, self.v)
+class StaticKVCacheModel(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.block_names = [f"double_{i}" for i in range(5)] + [f"single_{i}" for i in range(20)]
+        self.cache = torch.nn.ModuleList([SingleKVCacheModel((1, 4608, 24, 128)) for _ in self.block_names])
+    def load_from_kv_cache(self, kv_cache):
+        state_dict = {}
+        for block_id, block_name in enumerate(self.block_names):
+            state_dict[f"cache.{block_id}.k"] = kv_cache[block_name][0]
+            state_dict[f"cache.{block_id}.v"] = kv_cache[block_name][1]
+        self.load_state_dict(state_dict)
+    @torch.no_grad()
+    def process_inputs(self, **kwargs):
+        return {}
+    def forward(self, **kwargs):
+        kv_cache = {}
+        for block_name, cache in zip(self.block_names, self.cache):
+            kv_cache[block_name] = cache()
+        return {"kv_cache": kv_cache}
+def convert_from_kv_cache(kv_cache, path):
+    model = StaticKVCacheModel().to(torch.bfloat16)
+    model.load_from_kv_cache(kv_cache)
+    save_file(model.state_dict(), path)
+class DataAnnotator:
+    def __call__(self, **kwargs):
+        return kwargs
+TEMPLATE_MODEL = StaticKVCacheModel
+TEMPLATE_MODEL_PATH = "model.safetensors"
+TEMPLATE_DATA_PROCESSOR = DataAnnotator

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb27bbf85da1474756864e4f823ce0b66811caaa9cd41d3bd0f4898fa1699c11
+size 1415582160