Spaces:

Adapter
/

T2I-Adapter

Runtime error

App Files Files Community

Adapter commited on Feb 24, 2023

Commit

2254a67

•

1 Parent(s): ee11c4c

rebuild+depth

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +0 -0
.gitignore +128 -0
LICENSE +0 -0
README.md +0 -0
app.py +4 -2
configs/stable-diffusion/app.yaml +0 -0
configs/stable-diffusion/test_keypose.yaml +0 -87
configs/stable-diffusion/test_mask.yaml +0 -87
configs/stable-diffusion/test_mask_sketch.yaml +0 -87
configs/stable-diffusion/test_sketch.yaml +0 -87
configs/stable-diffusion/test_sketch_edit.yaml +0 -87
configs/stable-diffusion/train_keypose.yaml +0 -87
configs/stable-diffusion/train_mask.yaml +0 -87
configs/stable-diffusion/train_sketch.yaml +0 -87
dataset_coco.py +0 -138
demo/demos.py +26 -1
demo/model.py +69 -6
dist_util.py +0 -91
environment.yaml +0 -0
examples/edit_cat/edge.png +0 -0
examples/edit_cat/edge_2.png +0 -0
examples/edit_cat/im.png +0 -0
examples/edit_cat/mask.png +0 -0
examples/keypose/iron.png +0 -0
examples/seg/dinner.png +0 -0
examples/seg/motor.png +0 -0
examples/seg_sketch/edge.png +0 -0
examples/seg_sketch/mask.png +0 -0
examples/sketch/car.png +0 -0
examples/sketch/girl.jpeg +0 -0
examples/sketch/human.png +0 -0
examples/sketch/scenery.jpg +0 -0
examples/sketch/scenery2.jpg +0 -0
gradio_keypose.py +0 -254
gradio_sketch.py +0 -147
ldm/data/__init__.py +0 -0
ldm/data/base.py +0 -0
ldm/data/imagenet.py +0 -0
ldm/data/lsun.py +0 -0
ldm/lr_scheduler.py +0 -0
ldm/models/autoencoder.py +0 -0
ldm/models/diffusion/__init__.py +0 -0
ldm/models/diffusion/classifier.py +0 -0
ldm/models/diffusion/ddim.py +0 -0
ldm/models/diffusion/ddpm.py +0 -0
ldm/models/diffusion/dpm_solver/__init__.py +0 -0
ldm/models/diffusion/dpm_solver/dpm_solver.py +0 -0
ldm/models/diffusion/dpm_solver/sampler.py +0 -0
ldm/models/diffusion/plms.py +0 -0
ldm/modules/attention.py +0 -0

.gitattributes CHANGED Viewed

File without changes

.gitignore ADDED Viewed

	@@ -0,0 +1,128 @@

+# ignored folders
+models
+# ignored folders
+tmp/*
+*.DS_Store
+.idea
+# ignored files
+version.py
+# ignored files with suffix
+# *.html
+# *.png
+# *.jpeg
+# *.jpg
+# *.gif
+# *.pth
+# *.zip
+# template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.pyc
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# pyenv
+.python-version
+# celery beat schedule file
+celerybeat-schedule
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/

LICENSE CHANGED Viewed

File without changes

README.md CHANGED Viewed

File without changes

app.py CHANGED Viewed

@@ -8,14 +8,14 @@ os.system('mim install mmcv-full==1.7.0')
 from demo.model import Model_all
 import gradio as gr
-from demo.demos import create_demo_keypose, create_demo_sketch, create_demo_draw, create_demo_seg
 import torch
 import subprocess
 import shlex
 from huggingface_hub import hf_hub_url
 urls = {
-    'TencentARC/T2I-Adapter':['models/t2iadapter_keypose_sd14v1.pth', 'models/t2iadapter_seg_sd14v1.pth', 'models/t2iadapter_sketch_sd14v1.pth'],
     'CompVis/stable-diffusion-v-1-4-original':['sd-v1-4.ckpt'],
     'andite/anything-v4.0':['anything-v4.0-pruned.ckpt', 'anything-v4.0.vae.pt'],
 }
@@ -72,5 +72,7 @@ with gr.Blocks(css='style.css') as demo:
             create_demo_draw(model.process_draw)
         with gr.TabItem('Segmentation'):
             create_demo_seg(model.process_seg)
 demo.queue().launch(debug=True, server_name='0.0.0.0')

 from demo.model import Model_all
 import gradio as gr
+from demo.demos import create_demo_keypose, create_demo_sketch, create_demo_draw, create_demo_seg, create_demo_depth
 import torch
 import subprocess
 import shlex
 from huggingface_hub import hf_hub_url
 urls = {
+    'TencentARC/T2I-Adapter':['models/t2iadapter_keypose_sd14v1.pth', 'models/t2iadapter_seg_sd14v1.pth', 'models/t2iadapter_sketch_sd14v1.pth', 'models/t2iadapter_depth_sd14v1.pth'],
     'CompVis/stable-diffusion-v-1-4-original':['sd-v1-4.ckpt'],
     'andite/anything-v4.0':['anything-v4.0-pruned.ckpt', 'anything-v4.0.vae.pt'],
 }
             create_demo_draw(model.process_draw)
         with gr.TabItem('Segmentation'):
             create_demo_seg(model.process_seg)
+        with gr.TabItem('Depth'):
+            create_demo_depth(model.process_depth)
 demo.queue().launch(debug=True, server_name='0.0.0.0')

configs/stable-diffusion/app.yaml CHANGED Viewed

File without changes

configs/stable-diffusion/test_keypose.yaml DELETED Viewed

@@ -1,87 +0,0 @@
-name: test_keypose
-model:
-  base_learning_rate: 1.0e-04
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "jpg"
-    cond_stage_key: "txt"
-    image_size: 64
-    channels: 4
-    cond_stage_trainable: false   # Note: different from the one we trained before
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    scheduler_config: # 10000 warmup steps
-      target: ldm.lr_scheduler.LambdaLinearScheduler
-      params:
-        warm_up_steps: [ 10000 ]
-        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
-        f_start: [ 1.e-6 ]
-        f_max: [ 1. ]
-        f_min: [ 1. ]
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 32 # unused
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 768
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config: #__is_unconditional__
-      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
-      params:
-        version: models/clip-vit-large-patch14
-logger:
-  print_freq: 100
-  save_checkpoint_freq: !!float 1e4
-  use_tb_logger: true
-  wandb:
-    project: ~
-    resume_id: ~
-dist_params:
-  backend: nccl
-  port: 29500
-training:
-  lr: !!float 1e-5
-  save_freq: 1e4

configs/stable-diffusion/test_mask.yaml DELETED Viewed

@@ -1,87 +0,0 @@
-name: test_mask
-model:
-  base_learning_rate: 1.0e-04
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "jpg"
-    cond_stage_key: "txt"
-    image_size: 64
-    channels: 4
-    cond_stage_trainable: false   # Note: different from the one we trained before
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    scheduler_config: # 10000 warmup steps
-      target: ldm.lr_scheduler.LambdaLinearScheduler
-      params:
-        warm_up_steps: [ 10000 ]
-        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
-        f_start: [ 1.e-6 ]
-        f_max: [ 1. ]
-        f_min: [ 1. ]
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 32 # unused
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 768
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config: #__is_unconditional__
-      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
-      params:
-        version: models/clip-vit-large-patch14
-logger:
-  print_freq: 100
-  save_checkpoint_freq: !!float 1e4
-  use_tb_logger: true
-  wandb:
-    project: ~
-    resume_id: ~
-dist_params:
-  backend: nccl
-  port: 29500
-training:
-  lr: !!float 1e-5
-  save_freq: 1e4

configs/stable-diffusion/test_mask_sketch.yaml DELETED Viewed

@@ -1,87 +0,0 @@
-name: test_mask_sketch
-model:
-  base_learning_rate: 1.0e-04
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "jpg"
-    cond_stage_key: "txt"
-    image_size: 64
-    channels: 4
-    cond_stage_trainable: false   # Note: different from the one we trained before
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    scheduler_config: # 10000 warmup steps
-      target: ldm.lr_scheduler.LambdaLinearScheduler
-      params:
-        warm_up_steps: [ 10000 ]
-        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
-        f_start: [ 1.e-6 ]
-        f_max: [ 1. ]
-        f_min: [ 1. ]
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 32 # unused
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 768
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config: #__is_unconditional__
-      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
-      params:
-        version: models/clip-vit-large-patch14
-logger:
-  print_freq: 100
-  save_checkpoint_freq: !!float 1e4
-  use_tb_logger: true
-  wandb:
-    project: ~
-    resume_id: ~
-dist_params:
-  backend: nccl
-  port: 29500
-training:
-  lr: !!float 1e-5
-  save_freq: 1e4

configs/stable-diffusion/test_sketch.yaml DELETED Viewed

@@ -1,87 +0,0 @@
-name: test_sketch
-model:
-  base_learning_rate: 1.0e-04
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "jpg"
-    cond_stage_key: "txt"
-    image_size: 64
-    channels: 4
-    cond_stage_trainable: false   # Note: different from the one we trained before
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    scheduler_config: # 10000 warmup steps
-      target: ldm.lr_scheduler.LambdaLinearScheduler
-      params:
-        warm_up_steps: [ 10000 ]
-        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
-        f_start: [ 1.e-6 ]
-        f_max: [ 1. ]
-        f_min: [ 1. ]
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 32 # unused
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 768
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config: #__is_unconditional__
-      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
-      params:
-        version: models/clip-vit-large-patch14
-logger:
-  print_freq: 100
-  save_checkpoint_freq: !!float 1e4
-  use_tb_logger: true
-  wandb:
-    project: ~
-    resume_id: ~
-dist_params:
-  backend: nccl
-  port: 29500
-training:
-  lr: !!float 1e-5
-  save_freq: 1e4

configs/stable-diffusion/test_sketch_edit.yaml DELETED Viewed

@@ -1,87 +0,0 @@
-name: test_sketch_edit
-model:
-  base_learning_rate: 1.0e-04
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "jpg"
-    cond_stage_key: "txt"
-    image_size: 64
-    channels: 4
-    cond_stage_trainable: false   # Note: different from the one we trained before
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    scheduler_config: # 10000 warmup steps
-      target: ldm.lr_scheduler.LambdaLinearScheduler
-      params:
-        warm_up_steps: [ 10000 ]
-        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
-        f_start: [ 1.e-6 ]
-        f_max: [ 1. ]
-        f_min: [ 1. ]
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 32 # unused
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 768
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config: #__is_unconditional__
-      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
-      params:
-        version: models/clip-vit-large-patch14
-logger:
-  print_freq: 100
-  save_checkpoint_freq: !!float 1e4
-  use_tb_logger: true
-  wandb:
-    project: ~
-    resume_id: ~
-dist_params:
-  backend: nccl
-  port: 29500
-training:
-  lr: !!float 1e-5
-  save_freq: 1e4

configs/stable-diffusion/train_keypose.yaml DELETED Viewed

@@ -1,87 +0,0 @@
-name: train_keypose
-model:
-  base_learning_rate: 1.0e-04
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "jpg"
-    cond_stage_key: "txt"
-    image_size: 64
-    channels: 4
-    cond_stage_trainable: false   # Note: different from the one we trained before
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    scheduler_config: # 10000 warmup steps
-      target: ldm.lr_scheduler.LambdaLinearScheduler
-      params:
-        warm_up_steps: [ 10000 ]
-        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
-        f_start: [ 1.e-6 ]
-        f_max: [ 1. ]
-        f_min: [ 1. ]
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 32 # unused
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 768
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config: #__is_unconditional__
-      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
-      params:
-        version: models/clip-vit-large-patch14
-logger:
-  print_freq: 100
-  save_checkpoint_freq: !!float 1e4
-  use_tb_logger: true
-  wandb:
-    project: ~
-    resume_id: ~
-dist_params:
-  backend: nccl
-  port: 29500
-training:
-  lr: !!float 1e-5
-  save_freq: 1e4

configs/stable-diffusion/train_mask.yaml DELETED Viewed

@@ -1,87 +0,0 @@
-name: train_mask
-model:
-  base_learning_rate: 1.0e-04
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "jpg"
-    cond_stage_key: "txt"
-    image_size: 64
-    channels: 4
-    cond_stage_trainable: false   # Note: different from the one we trained before
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    scheduler_config: # 10000 warmup steps
-      target: ldm.lr_scheduler.LambdaLinearScheduler
-      params:
-        warm_up_steps: [ 10000 ]
-        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
-        f_start: [ 1.e-6 ]
-        f_max: [ 1. ]
-        f_min: [ 1. ]
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 32 # unused
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 768
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config: #__is_unconditional__
-      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
-      params:
-        version: models/clip-vit-large-patch14
-logger:
-  print_freq: 100
-  save_checkpoint_freq: !!float 1e4
-  use_tb_logger: true
-  wandb:
-    project: ~
-    resume_id: ~
-dist_params:
-  backend: nccl
-  port: 29500
-training:
-  lr: !!float 1e-5
-  save_freq: 1e4

configs/stable-diffusion/train_sketch.yaml DELETED Viewed

@@ -1,87 +0,0 @@
-name: train_sketch
-model:
-  base_learning_rate: 1.0e-04
-  target: ldm.models.diffusion.ddpm.LatentDiffusion
-  params:
-    linear_start: 0.00085
-    linear_end: 0.0120
-    num_timesteps_cond: 1
-    log_every_t: 200
-    timesteps: 1000
-    first_stage_key: "jpg"
-    cond_stage_key: "txt"
-    image_size: 64
-    channels: 4
-    cond_stage_trainable: false   # Note: different from the one we trained before
-    conditioning_key: crossattn
-    monitor: val/loss_simple_ema
-    scale_factor: 0.18215
-    use_ema: False
-    scheduler_config: # 10000 warmup steps
-      target: ldm.lr_scheduler.LambdaLinearScheduler
-      params:
-        warm_up_steps: [ 10000 ]
-        cycle_lengths: [ 10000000000000 ] # incredibly large number to prevent corner cases
-        f_start: [ 1.e-6 ]
-        f_max: [ 1. ]
-        f_min: [ 1. ]
-    unet_config:
-      target: ldm.modules.diffusionmodules.openaimodel.UNetModel
-      params:
-        image_size: 32 # unused
-        in_channels: 4
-        out_channels: 4
-        model_channels: 320
-        attention_resolutions: [ 4, 2, 1 ]
-        num_res_blocks: 2
-        channel_mult: [ 1, 2, 4, 4 ]
-        num_heads: 8
-        use_spatial_transformer: True
-        transformer_depth: 1
-        context_dim: 768
-        use_checkpoint: True
-        legacy: False
-    first_stage_config:
-      target: ldm.models.autoencoder.AutoencoderKL
-      params:
-        embed_dim: 4
-        monitor: val/rec_loss
-        ddconfig:
-          double_z: true
-          z_channels: 4
-          resolution: 256
-          in_channels: 3
-          out_ch: 3
-          ch: 128
-          ch_mult:
-          - 1
-          - 2
-          - 4
-          - 4
-          num_res_blocks: 2
-          attn_resolutions: []
-          dropout: 0.0
-        lossconfig:
-          target: torch.nn.Identity
-    cond_stage_config: #__is_unconditional__
-      target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
-      params:
-        version: models/clip-vit-large-patch14
-logger:
-  print_freq: 100
-  save_checkpoint_freq: !!float 1e4
-  use_tb_logger: true
-  wandb:
-    project: ~
-    resume_id: ~
-dist_params:
-  backend: nccl
-  port: 29500
-training:
-  lr: !!float 1e-5
-  save_freq: 1e4

dataset_coco.py DELETED Viewed

@@ -1,138 +0,0 @@
-import torch
-import json
-import cv2
-import torch
-import os
-from basicsr.utils import img2tensor, tensor2img
-import random
-class dataset_coco():
-    def __init__(self, path_json, root_path, image_size, mode='train'):
-        super(dataset_coco, self).__init__()
-        with open(path_json, 'r', encoding='utf-8') as fp:
-            data = json.load(fp)
-        data = data['images']
-        self.paths = []
-        self.root_path = root_path
-        for file in data:
-            input_path = file['filepath']
-            if mode == 'train':
-                if 'val' not in input_path:
-                    self.paths.append(file)
-            else:
-                if 'val' in input_path:
-                    self.paths.append(file)
-    def __getitem__(self, idx):
-        file = self.paths[idx]
-        input_path = file['filepath']
-        input_name = file['filename']
-        path = os.path.join(self.root_path, input_path, input_name)
-        im = cv2.imread(path)
-        im = cv2.resize(im, (512,512))
-        im = img2tensor(im, bgr2rgb=True, float32=True)/255.
-        sentences = file['sentences']
-        sentence =  sentences[int(random.random()*len(sentences))]['raw'].strip('.')
-        return {'im':im, 'sentence':sentence}
-    def __len__(self):
-        return len(self.paths)
-class dataset_coco_mask():
-    def __init__(self, path_json, root_path_im, root_path_mask, image_size):
-        super(dataset_coco_mask, self).__init__()
-        with open(path_json, 'r', encoding='utf-8') as fp:
-            data = json.load(fp)
-        data = data['annotations']
-        self.files = []
-        self.root_path_im = root_path_im
-        self.root_path_mask = root_path_mask
-        for file in data:
-            name = "%012d.png"%file['image_id']
-            self.files.append({'name':name, 'sentence':file['caption']})
-    def __getitem__(self, idx):
-        file = self.files[idx]
-        name = file['name']
-        # print(os.path.join(self.root_path_im, name))
-        im = cv2.imread(os.path.join(self.root_path_im, name.replace('.png','.jpg')))
-        im = cv2.resize(im, (512,512))
-        im = img2tensor(im, bgr2rgb=True, float32=True)/255.
-        mask = cv2.imread(os.path.join(self.root_path_mask, name))#[:,:,0]
-        mask = cv2.resize(mask, (512,512))
-        mask = img2tensor(mask, bgr2rgb=True, float32=True)[0].unsqueeze(0)#/255.
-        sentence = file['sentence']
-        return {'im':im, 'mask':mask, 'sentence':sentence}
-    def __len__(self):
-        return len(self.files)
-class dataset_coco_mask_color():
-    def __init__(self, path_json, root_path_im, root_path_mask, image_size):
-        super(dataset_coco_mask_color, self).__init__()
-        with open(path_json, 'r', encoding='utf-8') as fp:
-            data = json.load(fp)
-        data = data['annotations']
-        self.files = []
-        self.root_path_im = root_path_im
-        self.root_path_mask = root_path_mask
-        for file in data:
-            name = "%012d.png"%file['image_id']
-            self.files.append({'name':name, 'sentence':file['caption']})
-    def __getitem__(self, idx):
-        file = self.files[idx]
-        name = file['name']
-        # print(os.path.join(self.root_path_im, name))
-        im = cv2.imread(os.path.join(self.root_path_im, name.replace('.png','.jpg')))
-        im = cv2.resize(im, (512,512))
-        im = img2tensor(im, bgr2rgb=True, float32=True)/255.
-        mask = cv2.imread(os.path.join(self.root_path_mask, name))#[:,:,0]
-        mask = cv2.resize(mask, (512,512))
-        mask = img2tensor(mask, bgr2rgb=True, float32=True)/255.#[0].unsqueeze(0)#/255.
-        sentence = file['sentence']
-        return {'im':im, 'mask':mask, 'sentence':sentence}
-    def __len__(self):
-        return len(self.files)
-class dataset_coco_mask_color_sig():
-    def __init__(self, path_json, root_path_im, root_path_mask, image_size):
-        super(dataset_coco_mask_color_sig, self).__init__()
-        with open(path_json, 'r', encoding='utf-8') as fp:
-            data = json.load(fp)
-        data = data['annotations']
-        self.files = []
-        self.root_path_im = root_path_im
-        self.root_path_mask = root_path_mask
-        reg = {}
-        for file in data:
-            name = "%012d.png"%file['image_id']
-            if name in reg:
-                continue
-            self.files.append({'name':name, 'sentence':file['caption']})
-            reg[name] = name
-    def __getitem__(self, idx):
-        file = self.files[idx]
-        name = file['name']
-        # print(os.path.join(self.root_path_im, name))
-        im = cv2.imread(os.path.join(self.root_path_im, name.replace('.png','.jpg')))
-        im = cv2.resize(im, (512,512))
-        im = img2tensor(im, bgr2rgb=True, float32=True)/255.
-        mask = cv2.imread(os.path.join(self.root_path_mask, name))#[:,:,0]
-        mask = cv2.resize(mask, (512,512))
-        mask = img2tensor(mask, bgr2rgb=True, float32=True)/255.#[0].unsqueeze(0)#/255.
-        sentence = file['sentence']
-        return {'im':im, 'mask':mask, 'sentence':sentence, 'name': name}
-    def __len__(self):
-        return len(self.files)

demo/demos.py CHANGED Viewed

@@ -85,7 +85,32 @@ def create_demo_seg(process):
                 with gr.Row():
                     type_in = gr.inputs.Radio(['Segmentation', 'Image'], type="value", default='Image', label='You can input an image or a segmentation. If you choose to input a segmentation, it must correspond to the coco-stuff')
                 run_button = gr.Button(label="Run")
-                con_strength = gr.Slider(label="Controling Strength (The guidance strength of the segmentation to the result)", minimum=0, maximum=1, value=0.4, step=0.1)
                 scale = gr.Slider(label="Guidance Scale (Classifier free guidance)", minimum=0.1, maximum=30.0, value=7.5, step=0.1)
                 fix_sample = gr.inputs.Radio(['True', 'False'], type="value", default='False', label='Fix Sampling\n (Fix the random seed)')
                 base_model = gr.inputs.Radio(['sd-v1-4.ckpt', 'anything-v4.0-pruned.ckpt'], type="value", default='sd-v1-4.ckpt', label='The base model you want to use')

                 with gr.Row():
                     type_in = gr.inputs.Radio(['Segmentation', 'Image'], type="value", default='Image', label='You can input an image or a segmentation. If you choose to input a segmentation, it must correspond to the coco-stuff')
                 run_button = gr.Button(label="Run")
+                con_strength = gr.Slider(label="Controling Strength (The guidance strength of the segmentation to the result)", minimum=0, maximum=1, value=1, step=0.1)
+                scale = gr.Slider(label="Guidance Scale (Classifier free guidance)", minimum=0.1, maximum=30.0, value=7.5, step=0.1)
+                fix_sample = gr.inputs.Radio(['True', 'False'], type="value", default='False', label='Fix Sampling\n (Fix the random seed)')
+                base_model = gr.inputs.Radio(['sd-v1-4.ckpt', 'anything-v4.0-pruned.ckpt'], type="value", default='sd-v1-4.ckpt', label='The base model you want to use')
+            with gr.Column():
+                result = gr.Gallery(label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto')
+            ips = [input_img, type_in, prompt, neg_prompt, pos_prompt, fix_sample, scale, con_strength, base_model]
+        run_button.click(fn=process, inputs=ips, outputs=[result])
+    return demo
+def create_demo_depth(process):
+    with gr.Blocks() as demo:
+        with gr.Row():
+            gr.Markdown('## T2I-Adapter (Depth)')
+        with gr.Row():
+            with gr.Column():
+                input_img = gr.Image(source='upload', type="numpy")
+                prompt = gr.Textbox(label="Prompt")
+                neg_prompt = gr.Textbox(label="Negative Prompt",
+                value='ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face')
+                pos_prompt = gr.Textbox(label="Positive Prompt",
+                value = 'crafted, elegant, meticulous, magnificent, maximum details, extremely hyper aesthetic, intricately detailed')
+                with gr.Row():
+                    type_in = gr.inputs.Radio(['Depth', 'Image'], type="value", default='Image', label='You can input an image or a depth map')
+                run_button = gr.Button(label="Run")
+                con_strength = gr.Slider(label="Controling Strength (The guidance strength of the depth map to the result)", minimum=0, maximum=1, value=1, step=0.1)
                 scale = gr.Slider(label="Guidance Scale (Classifier free guidance)", minimum=0.1, maximum=30.0, value=7.5, step=0.1)
                 fix_sample = gr.inputs.Radio(['True', 'False'], type="value", default='False', label='Fix Sampling\n (Fix the random seed)')
                 base_model = gr.inputs.Radio(['sd-v1-4.ckpt', 'anything-v4.0-pruned.ckpt'], type="value", default='sd-v1-4.ckpt', label='The base model you want to use')

demo/model.py CHANGED Viewed

@@ -4,7 +4,9 @@ from pytorch_lightning import seed_everything
 from ldm.models.diffusion.plms import PLMSSampler
 from ldm.modules.encoders.adapter import Adapter
 from ldm.util import instantiate_from_config
-from model_edge import pidinet
 import gradio as gr
 from omegaconf import OmegaConf
 import mmcv
@@ -13,7 +15,6 @@ from mmpose.apis import (inference_top_down_pose_model, init_pose_model, process
 import os
 import cv2
 import numpy as np
-from seger import seger, Colorize
 import torch.nn.functional as F
 def preprocessing(image, device):
@@ -136,10 +137,8 @@ class Model_all:
         self.model_sketch = Adapter(channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True,
                                     use_conv=False).to(device)
         self.model_sketch.load_state_dict(torch.load("models/t2iadapter_sketch_sd14v1.pth", map_location=device))
-        self.model_edge = pidinet()
-        ckp = torch.load('models/table5_pidinet.pth', map_location='cpu')['state_dict']
-        self.model_edge.load_state_dict({k.replace('module.', ''): v for k, v in ckp.items()})
-        self.model_edge.to(device)
         # segmentation part
         self.model_seger = seger().to(device)
@@ -147,6 +146,11 @@ class Model_all:
         self.coler = Colorize(n=182)
         self.model_seg = Adapter(cin=int(3*64), channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True, use_conv=False).to(device)
         self.model_seg.load_state_dict(torch.load("models/t2iadapter_seg_sd14v1.pth", map_location=device))
         # keypose part
         self.model_pose = Adapter(cin=int(3 * 64), channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True,
@@ -248,6 +252,65 @@ class Model_all:
         return [im_edge, x_samples_ddim]
     @torch.no_grad()
     def process_seg(self, input_img, type_in, prompt, neg_prompt, pos_prompt, fix_sample, scale,
                        con_strength, base_model):

 from ldm.models.diffusion.plms import PLMSSampler
 from ldm.modules.encoders.adapter import Adapter
 from ldm.util import instantiate_from_config
+from ldm.modules.structure_condition.model_edge import pidinet
+from ldm.modules.structure_condition.model_seg import seger, Colorize
+from ldm.modules.structure_condition.midas.api import MiDaSInference
 import gradio as gr
 from omegaconf import OmegaConf
 import mmcv
 import os
 import cv2
 import numpy as np
 import torch.nn.functional as F
 def preprocessing(image, device):
         self.model_sketch = Adapter(channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True,
                                     use_conv=False).to(device)
         self.model_sketch.load_state_dict(torch.load("models/t2iadapter_sketch_sd14v1.pth", map_location=device))
+        self.model_edge = pidinet().to(device)
+        self.model_edge.load_state_dict({k.replace('module.', ''): v for k, v in torch.load('models/table5_pidinet.pth', map_location=device)['state_dict'].items()})
         # segmentation part
         self.model_seger = seger().to(device)
         self.coler = Colorize(n=182)
         self.model_seg = Adapter(cin=int(3*64), channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True, use_conv=False).to(device)
         self.model_seg.load_state_dict(torch.load("models/t2iadapter_seg_sd14v1.pth", map_location=device))
+        self.depth_model = MiDaSInference(model_type='dpt_hybrid').to(device)
+        # depth part
+        self.model_depth = Adapter(cin=3*64, channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True, use_conv=False).to(device)
+        self.model_depth.load_state_dict(torch.load("models/t2iadapter_depth_sd14v1.pth", map_location=device))
         # keypose part
         self.model_pose = Adapter(cin=int(3 * 64), channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True,
         return [im_edge, x_samples_ddim]
+    @torch.no_grad()
+    def process_depth(self, input_img, type_in, prompt, neg_prompt, pos_prompt, fix_sample, scale,
+                       con_strength, base_model):
+        if self.current_base != base_model:
+            ckpt = os.path.join("models", base_model)
+            pl_sd = torch.load(ckpt, map_location="cuda")
+            if "state_dict" in pl_sd:
+                sd = pl_sd["state_dict"]
+            else:
+                sd = pl_sd
+            self.base_model.load_state_dict(sd, strict=False)
+            self.current_base = base_model
+            if 'anything' in base_model.lower():
+                self.load_vae()
+        con_strength = int((1 - con_strength) * 50)
+        if fix_sample == 'True':
+            seed_everything(42)
+        im = cv2.resize(input_img, (512, 512))
+        if type_in == 'Depth':
+            im_depth = im.copy()
+            depth = img2tensor(im).unsqueeze(0) / 255.
+        elif type_in == 'Image':
+            im = img2tensor(im).unsqueeze(0) / 127.5 - 1.0
+            depth = self.depth_model(im.to(self.device)).repeat(1, 3, 1, 1)
+            depth -= torch.min(depth)
+            depth /= torch.max(depth)
+            im_depth = tensor2img(depth)
+        # extract condition features
+        c = self.base_model.get_learned_conditioning([prompt + ', ' + pos_prompt])
+        nc = self.base_model.get_learned_conditioning([neg_prompt])
+        features_adapter = self.model_depth(depth.to(self.device))
+        shape = [4, 64, 64]
+        # sampling
+        samples_ddim, _ = self.sampler.sample(S=50,
+                                              conditioning=c,
+                                              batch_size=1,
+                                              shape=shape,
+                                              verbose=False,
+                                              unconditional_guidance_scale=scale,
+                                              unconditional_conditioning=nc,
+                                              eta=0.0,
+                                              x_T=None,
+                                              features_adapter1=features_adapter,
+                                              mode='sketch',
+                                              con_strength=con_strength)
+        x_samples_ddim = self.base_model.decode_first_stage(samples_ddim)
+        x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
+        x_samples_ddim = x_samples_ddim.to('cpu')
+        x_samples_ddim = x_samples_ddim.permute(0, 2, 3, 1).numpy()[0]
+        x_samples_ddim = 255. * x_samples_ddim
+        x_samples_ddim = x_samples_ddim.astype(np.uint8)
+        return [im_depth, x_samples_ddim]
     @torch.no_grad()
     def process_seg(self, input_img, type_in, prompt, neg_prompt, pos_prompt, fix_sample, scale,
                        con_strength, base_model):

dist_util.py DELETED Viewed

@@ -1,91 +0,0 @@
-# Modified from https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/dist_utils.py  # noqa: E501
-import functools
-import os
-import subprocess
-import torch
-import torch.distributed as dist
-import torch.multiprocessing as mp
-from torch.nn.parallel import DataParallel, DistributedDataParallel
-def init_dist(launcher, backend='nccl', **kwargs):
-    if mp.get_start_method(allow_none=True) is None:
-        mp.set_start_method('spawn')
-    if launcher == 'pytorch':
-        _init_dist_pytorch(backend, **kwargs)
-    elif launcher == 'slurm':
-        _init_dist_slurm(backend, **kwargs)
-    else:
-        raise ValueError(f'Invalid launcher type: {launcher}')
-def _init_dist_pytorch(backend, **kwargs):
-    rank = int(os.environ['RANK'])
-    num_gpus = torch.cuda.device_count()
-    torch.cuda.set_device(rank % num_gpus)
-    dist.init_process_group(backend=backend, **kwargs)
-def _init_dist_slurm(backend, port=None):
-    """Initialize slurm distributed training environment.
-    If argument ``port`` is not specified, then the master port will be system
-    environment variable ``MASTER_PORT``. If ``MASTER_PORT`` is not in system
-    environment variable, then a default port ``29500`` will be used.
-    Args:
-        backend (str): Backend of torch.distributed.
-        port (int, optional): Master port. Defaults to None.
-    """
-    proc_id = int(os.environ['SLURM_PROCID'])
-    ntasks = int(os.environ['SLURM_NTASKS'])
-    node_list = os.environ['SLURM_NODELIST']
-    num_gpus = torch.cuda.device_count()
-    torch.cuda.set_device(proc_id % num_gpus)
-    addr = subprocess.getoutput(f'scontrol show hostname {node_list} | head -n1')
-    # specify master port
-    if port is not None:
-        os.environ['MASTER_PORT'] = str(port)
-    elif 'MASTER_PORT' in os.environ:
-        pass  # use MASTER_PORT in the environment variable
-    else:
-        # 29500 is torch.distributed default port
-        os.environ['MASTER_PORT'] = '29500'
-    os.environ['MASTER_ADDR'] = addr
-    os.environ['WORLD_SIZE'] = str(ntasks)
-    os.environ['LOCAL_RANK'] = str(proc_id % num_gpus)
-    os.environ['RANK'] = str(proc_id)
-    dist.init_process_group(backend=backend)
-def get_dist_info():
-    if dist.is_available():
-        initialized = dist.is_initialized()
-    else:
-        initialized = False
-    if initialized:
-        rank = dist.get_rank()
-        world_size = dist.get_world_size()
-    else:
-        rank = 0
-        world_size = 1
-    return rank, world_size
-def master_only(func):
-    @functools.wraps(func)
-    def wrapper(*args, **kwargs):
-        rank, _ = get_dist_info()
-        if rank == 0:
-            return func(*args, **kwargs)
-    return wrapper
-def get_bare_model(net):
-    """Get bare model, especially under wrapping with
-    DistributedDataParallel or DataParallel.
-    """
-    if isinstance(net, (DataParallel, DistributedDataParallel)):
-        net = net.module
-    return net

environment.yaml CHANGED Viewed

File without changes

examples/edit_cat/edge.png DELETED Viewed

Binary file (5.98 kB)

examples/edit_cat/edge_2.png DELETED Viewed

Binary file (13.3 kB)

examples/edit_cat/im.png DELETED Viewed

Binary file (508 kB)

examples/edit_cat/mask.png DELETED Viewed

Binary file (4.65 kB)

examples/keypose/iron.png DELETED Viewed

Binary file (15.6 kB)

examples/seg/dinner.png DELETED Viewed

Binary file (17.8 kB)

examples/seg/motor.png DELETED Viewed

Binary file (20.9 kB)

examples/seg_sketch/edge.png DELETED Viewed

Binary file (12.9 kB)

examples/seg_sketch/mask.png DELETED Viewed

Binary file (22.2 kB)

examples/sketch/car.png DELETED Viewed

Binary file (13.2 kB)

examples/sketch/girl.jpeg DELETED Viewed

Binary file (214 kB)

examples/sketch/human.png DELETED Viewed

Binary file (768 kB)

examples/sketch/scenery.jpg DELETED Viewed

Binary file (99.8 kB)

examples/sketch/scenery2.jpg DELETED Viewed

Binary file (144 kB)

gradio_keypose.py DELETED Viewed

@@ -1,254 +0,0 @@
-import os
-import os.path as osp
-import cv2
-import numpy as np
-import torch
-from basicsr.utils import img2tensor, tensor2img
-from pytorch_lightning import seed_everything
-from ldm.models.diffusion.plms import PLMSSampler
-from ldm.modules.encoders.adapter import Adapter
-from ldm.util import instantiate_from_config
-from model_edge import pidinet
-import gradio as gr
-from omegaconf import OmegaConf
-import mmcv
-from mmdet.apis import inference_detector, init_detector
-from mmpose.apis import (inference_top_down_pose_model, init_pose_model, process_mmdet_results, vis_pose_result)
-skeleton = [[15, 13], [13, 11], [16, 14], [14, 12], [11, 12], [5, 11], [6, 12], [5, 6], [5, 7], [6, 8], [7, 9], [8, 10],
-            [1, 2], [0, 1], [0, 2], [1, 3], [2, 4], [3, 5], [4, 6]]
-pose_kpt_color = [[51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255], [0, 255, 0],
-                  [255, 128, 0], [0, 255, 0], [255, 128, 0], [0, 255, 0], [255, 128, 0], [0, 255, 0], [255, 128, 0],
-                  [0, 255, 0], [255, 128, 0], [0, 255, 0], [255, 128, 0]]
-pose_link_color = [[0, 255, 0], [0, 255, 0], [255, 128, 0], [255, 128, 0],
-                   [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255], [0, 255, 0], [255, 128, 0],
-                   [0, 255, 0], [255, 128, 0], [51, 153, 255], [51, 153, 255], [51, 153, 255], [51, 153, 255],
-                   [51, 153, 255], [51, 153, 255], [51, 153, 255]]
-def imshow_keypoints(img,
-                     pose_result,
-                     skeleton=None,
-                     kpt_score_thr=0.1,
-                     pose_kpt_color=None,
-                     pose_link_color=None,
-                     radius=4,
-                     thickness=1):
-    """Draw keypoints and links on an image.
-    Args:
-            img (ndarry): The image to draw poses on.
-            pose_result (list[kpts]): The poses to draw. Each element kpts is
-                a set of K keypoints as an Kx3 numpy.ndarray, where each
-                keypoint is represented as x, y, score.
-            kpt_score_thr (float, optional): Minimum score of keypoints
-                to be shown. Default: 0.3.
-            pose_kpt_color (np.array[Nx3]`): Color of N keypoints. If None,
-                the keypoint will not be drawn.
-            pose_link_color (np.array[Mx3]): Color of M links. If None, the
-                links will not be drawn.
-            thickness (int): Thickness of lines.
-    """
-    img_h, img_w, _ = img.shape
-    img = np.zeros(img.shape)
-    for idx, kpts in enumerate(pose_result):
-        if idx > 1:
-            continue
-        kpts = kpts['keypoints']
-        # print(kpts)
-        kpts = np.array(kpts, copy=False)
-        # draw each point on image
-        if pose_kpt_color is not None:
-            assert len(pose_kpt_color) == len(kpts)
-            for kid, kpt in enumerate(kpts):
-                x_coord, y_coord, kpt_score = int(kpt[0]), int(kpt[1]), kpt[2]
-                if kpt_score < kpt_score_thr or pose_kpt_color[kid] is None:
-                    # skip the point that should not be drawn
-                    continue
-                color = tuple(int(c) for c in pose_kpt_color[kid])
-                cv2.circle(img, (int(x_coord), int(y_coord)), radius, color, -1)
-        # draw links
-        if skeleton is not None and pose_link_color is not None:
-            assert len(pose_link_color) == len(skeleton)
-            for sk_id, sk in enumerate(skeleton):
-                pos1 = (int(kpts[sk[0], 0]), int(kpts[sk[0], 1]))
-                pos2 = (int(kpts[sk[1], 0]), int(kpts[sk[1], 1]))
-                if (pos1[0] <= 0 or pos1[0] >= img_w or pos1[1] <= 0 or pos1[1] >= img_h or pos2[0] <= 0
-                        or pos2[0] >= img_w or pos2[1] <= 0 or pos2[1] >= img_h or kpts[sk[0], 2] < kpt_score_thr
-                        or kpts[sk[1], 2] < kpt_score_thr or pose_link_color[sk_id] is None):
-                    # skip the link that should not be drawn
-                    continue
-                color = tuple(int(c) for c in pose_link_color[sk_id])
-                cv2.line(img, pos1, pos2, color, thickness=thickness)
-    return img
-def load_model_from_config(config, ckpt, verbose=False):
-    print(f"Loading model from {ckpt}")
-    pl_sd = torch.load(ckpt, map_location="cpu")
-    if "global_step" in pl_sd:
-        print(f"Global Step: {pl_sd['global_step']}")
-    if "state_dict" in pl_sd:
-        sd = pl_sd["state_dict"]
-    else:
-        sd = pl_sd
-    model = instantiate_from_config(config.model)
-    m, u = model.load_state_dict(sd, strict=False)
-    model.cuda()
-    model.eval()
-    return model
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
-config = OmegaConf.load("configs/stable-diffusion/test_keypose.yaml")
-config.model.params.cond_stage_config.params.device = device
-model = load_model_from_config(config, "models/sd-v1-4.ckpt").to(device)
-current_base = 'sd-v1-4.ckpt'
-model_ad = Adapter(cin=int(3*64), channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True, use_conv=False).to(device)
-model_ad.load_state_dict(torch.load("models/t2iadapter_keypose_sd14v1.pth"))
-sampler = PLMSSampler(model)
-## mmpose
-det_config = 'models/faster_rcnn_r50_fpn_coco.py'
-det_checkpoint = 'models/faster_rcnn_r50_fpn_1x_coco_20200130-047c8118.pth'
-pose_config = 'models/hrnet_w48_coco_256x192.py'
-pose_checkpoint = 'models/hrnet_w48_coco_256x192-b9e0b3ab_20200708.pth'
-det_cat_id = 1
-bbox_thr = 0.2
-## detector
-det_config_mmcv = mmcv.Config.fromfile(det_config)
-det_model = init_detector(det_config_mmcv, det_checkpoint, device=device)
-pose_config_mmcv = mmcv.Config.fromfile(pose_config)
-pose_model = init_pose_model(pose_config_mmcv, pose_checkpoint, device=device)
-W, H = 512, 512
-def process(input_img, type_in, prompt, neg_prompt, fix_sample, scale, con_strength, base_model):
-    global current_base
-    if current_base != base_model:
-        ckpt = os.path.join("models", base_model)
-        pl_sd = torch.load(ckpt, map_location="cpu")
-        if "state_dict" in pl_sd:
-            sd = pl_sd["state_dict"]
-        else:
-            sd = pl_sd
-        model.load_state_dict(sd, strict=False)
-        current_base = base_model
-    con_strength = int((1-con_strength)*50)
-    if fix_sample == 'True':
-        seed_everything(42)
-    im = cv2.resize(input_img,(W,H))
-    if type_in == 'Keypose':
-        im_pose = im.copy()
-        im = img2tensor(im).unsqueeze(0)/255.
-    elif type_in == 'Image':
-        image = im.copy()
-        im = img2tensor(im).unsqueeze(0)/255.
-        mmdet_results = inference_detector(det_model, image)
-        # keep the person class bounding boxes.
-        person_results = process_mmdet_results(mmdet_results, det_cat_id)
-        # optional
-        return_heatmap = False
-        dataset = pose_model.cfg.data['test']['type']
-        # e.g. use ('backbone', ) to return backbone feature
-        output_layer_names = None
-        pose_results, returned_outputs = inference_top_down_pose_model(
-            pose_model,
-            image,
-            person_results,
-            bbox_thr=bbox_thr,
-            format='xyxy',
-            dataset=dataset,
-            dataset_info=None,
-            return_heatmap=return_heatmap,
-            outputs=output_layer_names)
-        # show the results
-        im_pose = imshow_keypoints(
-            image,
-            pose_results,
-            skeleton=skeleton,
-            pose_kpt_color=pose_kpt_color,
-            pose_link_color=pose_link_color,
-            radius=2,
-            thickness=2)
-    im_pose = cv2.resize(im_pose,(W,H))
-    with torch.no_grad():
-        c = model.get_learned_conditioning([prompt])
-        nc = model.get_learned_conditioning([neg_prompt])
-        # extract condition features
-        pose = img2tensor(im_pose, bgr2rgb=True, float32=True)/255.
-        pose = pose.unsqueeze(0)
-        features_adapter = model_ad(pose.to(device))
-        shape = [4, W//8, H//8]
-        # sampling
-        samples_ddim, _ = sampler.sample(S=50,
-                                        conditioning=c,
-                                        batch_size=1,
-                                        shape=shape,
-                                        verbose=False,
-                                        unconditional_guidance_scale=scale,
-                                        unconditional_conditioning=nc,
-                                        eta=0.0,
-                                        x_T=None,
-                                        features_adapter1=features_adapter,
-                                        mode = 'sketch',
-                                        con_strength = con_strength)
-        x_samples_ddim = model.decode_first_stage(samples_ddim)
-        x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
-        x_samples_ddim = x_samples_ddim.to('cpu')
-        x_samples_ddim = x_samples_ddim.permute(0, 2, 3, 1).numpy()[0]
-        x_samples_ddim = 255.*x_samples_ddim
-        x_samples_ddim = x_samples_ddim.astype(np.uint8)
-    return [im_pose[:,:,::-1].astype(np.uint8), x_samples_ddim]
-DESCRIPTION = '''# T2I-Adapter (Keypose)
-[Paper](https://arxiv.org/abs/2302.08453)               [GitHub](https://github.com/TencentARC/T2I-Adapter)
-This gradio demo is for keypose-guided generation. The current functions include:
-- Keypose to Image Generation
-- Image to Image Generation
-- Generation with **Anything** setting
-'''
-block = gr.Blocks().queue()
-with block:
-    with gr.Row():
-        gr.Markdown(DESCRIPTION)
-    with gr.Row():
-        with gr.Column():
-            input_img = gr.Image(source='upload', type="numpy")
-            prompt = gr.Textbox(label="Prompt")
-            neg_prompt = gr.Textbox(label="Negative Prompt",
-            value='ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face')
-            with gr.Row():
-                type_in = gr.inputs.Radio(['Keypose', 'Image'], type="value", default='Image', label='Input Types\n (You can input an image or a keypose map)')
-                fix_sample = gr.inputs.Radio(['True', 'False'], type="value", default='False', label='Fix Sampling\n (Fix the random seed to produce a fixed output)')
-            run_button = gr.Button(label="Run")
-            con_strength = gr.Slider(label="Controling Strength (The guidance strength of the keypose to the result)", minimum=0, maximum=1, value=1, step=0.1)
-            scale = gr.Slider(label="Guidance Scale (Classifier free guidance)", minimum=0.1, maximum=30.0, value=9, step=0.1)
-            base_model = gr.inputs.Radio(['sd-v1-4.ckpt', 'anything-v4.0-pruned.ckpt'], type="value", default='sd-v1-4.ckpt', label='The base model you want to use')
-        with gr.Column():
-            result = gr.Gallery(label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto')
-        ips = [input_img, type_in, prompt, neg_prompt, fix_sample, scale, con_strength, base_model]
-    run_button.click(fn=process, inputs=ips, outputs=[result])
-block.launch(server_name='0.0.0.0')

gradio_sketch.py DELETED Viewed

@@ -1,147 +0,0 @@
-import os
-import os.path as osp
-import cv2
-import numpy as np
-import torch
-from basicsr.utils import img2tensor, tensor2img
-from pytorch_lightning import seed_everything
-from ldm.models.diffusion.plms import PLMSSampler
-from ldm.modules.encoders.adapter import Adapter
-from ldm.util import instantiate_from_config
-from model_edge import pidinet
-import gradio as gr
-from omegaconf import OmegaConf
-def load_model_from_config(config, ckpt, verbose=False):
-    print(f"Loading model from {ckpt}")
-    pl_sd = torch.load(ckpt, map_location="cpu")
-    if "global_step" in pl_sd:
-        print(f"Global Step: {pl_sd['global_step']}")
-    if "state_dict" in pl_sd:
-        sd = pl_sd["state_dict"]
-    else:
-        sd = pl_sd
-    model = instantiate_from_config(config.model)
-    m, u = model.load_state_dict(sd, strict=False)
-    # if len(m) > 0 and verbose:
-    #     print("missing keys:")
-    #     print(m)
-    # if len(u) > 0 and verbose:
-    #     print("unexpected keys:")
-    #     print(u)
-    model.cuda()
-    model.eval()
-    return model
-device = 'cuda' if torch.cuda.is_available() else 'cpu'
-config = OmegaConf.load("configs/stable-diffusion/test_sketch.yaml")
-config.model.params.cond_stage_config.params.device = device
-model = load_model_from_config(config, "models/sd-v1-4.ckpt").to(device)
-current_base = 'sd-v1-4.ckpt'
-model_ad = Adapter(channels=[320, 640, 1280, 1280][:4], nums_rb=2, ksize=1, sk=True, use_conv=False).to(device)
-model_ad.load_state_dict(torch.load("models/t2iadapter_sketch_sd14v1.pth"))
-net_G = pidinet()
-ckp = torch.load('models/table5_pidinet.pth', map_location='cpu')['state_dict']
-net_G.load_state_dict({k.replace('module.',''):v for k, v in ckp.items()})
-net_G.to(device)
-sampler = PLMSSampler(model)
-save_memory=True
-W, H = 512, 512
-def process(input_img, type_in, color_back, prompt, neg_prompt, fix_sample, scale, con_strength, base_model):
-    global current_base
-    if current_base != base_model:
-        ckpt = os.path.join("models", base_model)
-        pl_sd = torch.load(ckpt, map_location="cpu")
-        if "state_dict" in pl_sd:
-            sd = pl_sd["state_dict"]
-        else:
-            sd = pl_sd
-        model.load_state_dict(sd, strict=False) #load_model_from_config(config, os.path.join("models", base_model)).to(device)
-        current_base = base_model
-    con_strength = int((1-con_strength)*50)
-    if fix_sample == 'True':
-        seed_everything(42)
-    im = cv2.resize(input_img,(W,H))
-    if type_in == 'Sketch':
-        if color_back == 'White':
-            im = 255-im
-        im_edge = im.copy()
-        im = img2tensor(im)[0].unsqueeze(0).unsqueeze(0)/255.
-        im = im>0.5
-        im = im.float()
-    elif type_in == 'Image':
-        im = img2tensor(im).unsqueeze(0)/255.
-        im = net_G(im.to(device))[-1]
-        im = im>0.5
-        im = im.float()
-        im_edge = tensor2img(im)
-    with torch.no_grad():
-        c = model.get_learned_conditioning([prompt])
-        nc = model.get_learned_conditioning([neg_prompt])
-        # extract condition features
-        features_adapter = model_ad(im.to(device))
-        shape = [4, W//8, H//8]
-        # sampling
-        samples_ddim, _ = sampler.sample(S=50,
-                                        conditioning=c,
-                                        batch_size=1,
-                                        shape=shape,
-                                        verbose=False,
-                                        unconditional_guidance_scale=scale,
-                                        unconditional_conditioning=nc,
-                                        eta=0.0,
-                                        x_T=None,
-                                        features_adapter1=features_adapter,
-                                        mode = 'sketch',
-                                        con_strength = con_strength)
-        x_samples_ddim = model.decode_first_stage(samples_ddim)
-        x_samples_ddim = torch.clamp((x_samples_ddim + 1.0) / 2.0, min=0.0, max=1.0)
-        x_samples_ddim = x_samples_ddim.to('cpu')
-        x_samples_ddim = x_samples_ddim.permute(0, 2, 3, 1).numpy()[0]
-        x_samples_ddim = 255.*x_samples_ddim
-        x_samples_ddim = x_samples_ddim.astype(np.uint8)
-    return [im_edge, x_samples_ddim]
-DESCRIPTION = '''# T2I-Adapter (Sketch)
-[Paper](https://arxiv.org/abs/2302.08453)               [GitHub](https://github.com/TencentARC/T2I-Adapter)
-This gradio demo is for sketch-guided generation. The current functions include:
-- Sketch to Image Generation
-- Image to Image Generation
-- Generation with **Anything** setting
-'''
-block = gr.Blocks().queue()
-with block:
-    with gr.Row():
-        gr.Markdown(DESCRIPTION)
-    with gr.Row():
-        with gr.Column():
-            input_img = gr.Image(source='upload', type="numpy")
-            prompt = gr.Textbox(label="Prompt")
-            neg_prompt = gr.Textbox(label="Negative Prompt",
-            value='ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face')
-            with gr.Row():
-                type_in = gr.inputs.Radio(['Sketch', 'Image'], type="value", default='Image', label='Input Types\n (You can input an image or a sketch)')
-                color_back = gr.inputs.Radio(['White', 'Black'], type="value", default='Black', label='Color of the sketch background\n (Only work for sketch input)')
-            run_button = gr.Button(label="Run")
-            con_strength = gr.Slider(label="Controling Strength (The guidance strength of the sketch to the result)", minimum=0, maximum=1, value=0.4, step=0.1)
-            scale = gr.Slider(label="Guidance Scale (Classifier free guidance)", minimum=0.1, maximum=30.0, value=9, step=0.1)
-            fix_sample = gr.inputs.Radio(['True', 'False'], type="value", default='False', label='Fix Sampling\n (Fix the random seed)')
-            base_model = gr.inputs.Radio(['sd-v1-4.ckpt', 'anything-v4.0-pruned.ckpt'], type="value", default='sd-v1-4.ckpt', label='The base model you want to use')
-        with gr.Column():
-            result = gr.Gallery(label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto')
-        ips = [input_img, type_in, color_back, prompt, neg_prompt, fix_sample, scale, con_strength, base_model]
-    run_button.click(fn=process, inputs=ips, outputs=[result])
-block.launch(server_name='0.0.0.0')

ldm/data/__init__.py CHANGED Viewed

File without changes

ldm/data/base.py CHANGED Viewed

File without changes

ldm/data/imagenet.py CHANGED Viewed

File without changes

ldm/data/lsun.py CHANGED Viewed

File without changes

ldm/lr_scheduler.py CHANGED Viewed

File without changes

ldm/models/autoencoder.py CHANGED Viewed

File without changes

ldm/models/diffusion/__init__.py CHANGED Viewed

File without changes

ldm/models/diffusion/classifier.py CHANGED Viewed

File without changes

ldm/models/diffusion/ddim.py CHANGED Viewed

File without changes

ldm/models/diffusion/ddpm.py CHANGED Viewed

File without changes

ldm/models/diffusion/dpm_solver/__init__.py CHANGED Viewed

File without changes

ldm/models/diffusion/dpm_solver/dpm_solver.py CHANGED Viewed

File without changes

ldm/models/diffusion/dpm_solver/sampler.py CHANGED Viewed

File without changes

ldm/models/diffusion/plms.py CHANGED Viewed

File without changes

ldm/modules/attention.py CHANGED Viewed

File without changes