franciszzj commited on
Commit
bafa7b2
1 Parent(s): b213d84
leffa/conf/TARGETS DELETED
@@ -1,17 +0,0 @@
1
- load("//gen_ai/genie/components:macros.bzl", "genie_hydra_config_bundle")
2
-
3
- oncall("genads_infra")
4
-
5
- # All configs including dataloading, torchtnt, and profiling.
6
- # Note you need all dependencies for hydra instantiation here.
7
- genie_hydra_config_bundle(
8
- name = "idm_vton_hydra_configs",
9
- srcs = glob(["**/*.yaml"]),
10
- deps = [
11
- "//caffe2:torch",
12
- "//genads/common/data:transforms",
13
- "//genads/idm_vton:idm_vton_lib",
14
- "//media_dataloader/api:api",
15
- "//torchmultimodal/fb/genai/transforms:transforms",
16
- ],
17
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/constants/base.yaml DELETED
@@ -1,31 +0,0 @@
1
- # for virtual try-on
2
- # height: 512
3
- # width: 384
4
- # batch_size: 8
5
- height: 1024
6
- width: 768
7
- batch_size: 2
8
-
9
- # for pose transfer
10
- # height: 256
11
- # width: 176
12
- # batch_size: 8
13
- # height: 512
14
- # width: 352
15
- # batch_size: 4
16
- # height: 1024
17
- # width: 704
18
- # batch_size: 1
19
-
20
- precision: bf16
21
-
22
- max_steps: null
23
- max_epochs: 200
24
- max_train_steps_per_epoch: null
25
-
26
- evaluate_every_n_train_steps: null
27
- evaluate_every_n_train_epochs: null
28
- max_eval_steps_per_eval_epoch: null
29
-
30
- use_torchsnapshot: false
31
- checkpoint_every_n_steps: 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/datasets/deepfashion_test.yaml DELETED
@@ -1,47 +0,0 @@
1
- deepfashion_test:
2
- dataset:
3
- _target_: media_dataloader.api.EnrichingDataset
4
- datasource:
5
- _target_: media_dataloader.api.LazyHiveDataSource
6
- namespace: mgenai
7
- table: deepfashion_pose_transfer
8
- partition_filter_predicate_list: ["ds = '2024-08-15' AND set_name = 'val'"]
9
- enrichments:
10
- - _target_: media_dataloader.api.media_lookups.ManifoldLookups
11
- lookup_handle_to_media_columns:
12
- to_img_manifold_path: "image"
13
- from_img_manifold_path: "cloth"
14
- to_img_iuv_manifold_path: "image_densepose"
15
- from_img_iuv_manifold_path: "cloth_densepose"
16
- collate_fn:
17
- - _target_: media_dataloader.api.Collate
18
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
19
- image_field: image
20
- blob_field: image
21
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
22
- image_field: cloth
23
- blob_field: cloth
24
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
25
- image_field: image_densepose
26
- blob_field: image_densepose
27
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
28
- image_field: cloth_densepose
29
- blob_field: cloth_densepose
30
- - _target_: leffa.datasets.transform.VtonTransform
31
- height: ${constants.height}
32
- width: ${constants.width}
33
- is_train: false
34
- dataset: deepfashion
35
- aug_garment_ratio: 0.0
36
- get_garment_from_person_ratio: 0.0
37
- aug_mask_ratio: 0.0
38
-
39
- dataloader:
40
- _target_: media_dataloader.api.StatefulDataLoader
41
- dataset: ${datasets.deepfashion_test.dataset}
42
- batch_size: ${constants.batch_size}
43
- num_workers: 0
44
- prefetch_factor: null
45
- pin_memory: true
46
- persistent_workers: false
47
- multiprocessing_context: null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/datasets/deepfashion_train.yaml DELETED
@@ -1,47 +0,0 @@
1
- deepfashion_train:
2
- dataset:
3
- _target_: media_dataloader.api.EnrichingDataset
4
- datasource:
5
- _target_: media_dataloader.api.LazyHiveDataSource
6
- namespace: mgenai
7
- table: deepfashion_pose_transfer
8
- partition_filter_predicate_list: ["ds = '2024-08-15' AND set_name = 'train'"]
9
- enrichments:
10
- - _target_: media_dataloader.api.media_lookups.ManifoldLookups
11
- lookup_handle_to_media_columns:
12
- to_img_manifold_path: "image"
13
- from_img_manifold_path: "cloth"
14
- to_img_iuv_manifold_path: "image_densepose"
15
- from_img_iuv_manifold_path: "cloth_densepose"
16
- collate_fn:
17
- - _target_: media_dataloader.api.Collate
18
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
19
- image_field: image
20
- blob_field: image
21
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
22
- image_field: cloth
23
- blob_field: cloth
24
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
25
- image_field: image_densepose
26
- blob_field: image_densepose
27
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
28
- image_field: cloth_densepose
29
- blob_field: cloth_densepose
30
- - _target_: leffa.datasets.transform.VtonTransform
31
- height: ${constants.height}
32
- width: ${constants.width}
33
- is_train: true
34
- dataset: deepfashion
35
- aug_garment_ratio: 0.0
36
- get_garment_from_person_ratio: 0.0
37
- aug_mask_ratio: 0.0
38
-
39
- dataloader:
40
- _target_: media_dataloader.api.StatefulDataLoader
41
- dataset: ${datasets.deepfashion_train.dataset}
42
- batch_size: ${constants.batch_size}
43
- num_workers: 4
44
- prefetch_factor: 2
45
- pin_memory: true
46
- persistent_workers: true
47
- multiprocessing_context: forkserver
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/datasets/dress_code_test.yaml DELETED
@@ -1,56 +0,0 @@
1
- dress_code_test:
2
- dataset:
3
- _target_: media_dataloader.api.EnrichingDataset
4
- datasource:
5
- _target_: media_dataloader.api.LazyHiveDataSource
6
- namespace: ad_metrics
7
- table: vton_public_dataset_dress_code_test_paired_v2
8
- # table: vton_public_dataset_dress_code_test_unpaired_v2
9
- # table: vton_public_dataset_dress_code_test_upper_body_paired_v2
10
- # table: vton_public_dataset_dress_code_test_upper_body_unpaired_v2
11
- partition_filter_predicate_list: ["ds = '2024-09-14'"]
12
- # table: vton_public_dataset_dress_code_test_lower_body_paired_v2
13
- # table: vton_public_dataset_dress_code_test_lower_body_unpaired_v2
14
- # table: vton_public_dataset_dress_code_test_dresses_paired_v2
15
- # table: vton_public_dataset_dress_code_test_dresses_unpaired_v2
16
- # partition_filter_predicate_list: ["ds = '2024-09-16'"]
17
- enrichments:
18
- - _target_: media_dataloader.api.media_lookups.ManifoldLookups
19
- lookup_handle_to_media_columns:
20
- image_manifold_path: "image"
21
- cloth_manifold_path: "cloth"
22
- agnostic_mask_manifold_path: "agnostic_mask"
23
- dense_manifold_path: "image_densepose"
24
- label_map_manifold_path: "image_parse"
25
- collate_fn:
26
- - _target_: media_dataloader.api.Collate
27
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
28
- image_field: image
29
- blob_field: image
30
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
31
- image_field: cloth
32
- blob_field: cloth
33
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
34
- image_field: agnostic_mask
35
- blob_field: agnostic_mask
36
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
37
- image_field: image_densepose
38
- blob_field: image_densepose
39
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
40
- image_field: image_parse
41
- blob_field: image_parse
42
- - _target_: leffa.datasets.transform.VtonTransform
43
- height: ${constants.height}
44
- width: ${constants.width}
45
- is_train: false
46
- dataset: dress_code
47
-
48
- dataloader:
49
- _target_: media_dataloader.api.StatefulDataLoader
50
- dataset: ${datasets.dress_code_test.dataset}
51
- batch_size: ${constants.batch_size}
52
- num_workers: 0
53
- prefetch_factor: null
54
- pin_memory: true
55
- persistent_workers: false
56
- multiprocessing_context: null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/datasets/dress_code_train.yaml DELETED
@@ -1,55 +0,0 @@
1
- dress_code_train:
2
- dataset:
3
- _target_: media_dataloader.api.EnrichingDataset
4
- datasource:
5
- _target_: media_dataloader.api.LazyHiveDataSource
6
- namespace: ad_metrics
7
- table: vton_public_dataset_dress_code_train_v2
8
- # table: vton_public_dataset_dress_code_train_upper_body_v2
9
- partition_filter_predicate_list: ["ds = '2024-09-14'"]
10
- # table: vton_public_dataset_dress_code_train_lower_body_v2
11
- # table: vton_public_dataset_dress_code_train_dresses_v2
12
- # partition_filter_predicate_list: ["ds = '2024-09-15'"]
13
- enrichments:
14
- - _target_: media_dataloader.api.media_lookups.ManifoldLookups
15
- lookup_handle_to_media_columns:
16
- image_manifold_path: "image"
17
- cloth_manifold_path: "cloth"
18
- agnostic_mask_manifold_path: "agnostic_mask"
19
- dense_manifold_path: "image_densepose"
20
- label_map_manifold_path: "image_parse"
21
- collate_fn:
22
- - _target_: media_dataloader.api.Collate
23
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
24
- image_field: image
25
- blob_field: image
26
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
27
- image_field: cloth
28
- blob_field: cloth
29
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
30
- image_field: agnostic_mask
31
- blob_field: agnostic_mask
32
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
33
- image_field: image_densepose
34
- blob_field: image_densepose
35
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
36
- image_field: image_parse
37
- blob_field: image_parse
38
- - _target_: leffa.datasets.transform.VtonTransform
39
- height: ${constants.height}
40
- width: ${constants.width}
41
- is_train: true
42
- dataset: dress_code
43
- aug_garment_ratio: 0.0
44
- get_garment_from_person_ratio: 0.0
45
- aug_mask_ratio: 0.0
46
-
47
- dataloader:
48
- _target_: media_dataloader.api.StatefulDataLoader
49
- dataset: ${datasets.dress_code_train.dataset}
50
- batch_size: ${constants.batch_size}
51
- num_workers: 4
52
- prefetch_factor: 2
53
- pin_memory: true
54
- persistent_workers: true
55
- multiprocessing_context: forkserver
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/datasets/viton_hd_test.yaml DELETED
@@ -1,54 +0,0 @@
1
- viton_hd_test:
2
- dataset:
3
- _target_: media_dataloader.api.EnrichingDataset
4
- datasource:
5
- _target_: media_dataloader.api.LazyHiveDataSource
6
- namespace: ad_metrics
7
- table: vton_public_dataset_viton_hd_test_paired_v2
8
- partition_filter_predicate_list: ["ds = '2024-10-30'"]
9
- # table: vton_public_dataset_viton_hd_test_unpaired_v1
10
- # partition_filter_predicate_list: ["ds = '2024-09-12'"]
11
- enrichments:
12
- - _target_: media_dataloader.api.media_lookups.ManifoldLookups
13
- lookup_handle_to_media_columns:
14
- image_manifold_path: "image"
15
- cloth_manifold_path: "cloth"
16
- agnostic_mask_manifold_path: "agnostic_mask"
17
- image_densepose_manifold_path: "image_densepose"
18
- cloth_mask_manifold_path: "cloth_mask"
19
- image_parse_v3_manifold_path: "image_parse"
20
- collate_fn:
21
- - _target_: media_dataloader.api.Collate
22
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
23
- image_field: image
24
- blob_field: image
25
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
26
- image_field: cloth
27
- blob_field: cloth
28
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
29
- image_field: agnostic_mask
30
- blob_field: agnostic_mask
31
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
32
- image_field: image_densepose
33
- blob_field: image_densepose
34
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
35
- image_field: cloth_mask
36
- blob_field: cloth_mask
37
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
38
- image_field: image_parse
39
- blob_field: image_parse
40
- - _target_: leffa.datasets.transform.VtonTransform
41
- height: ${constants.height}
42
- width: ${constants.width}
43
- is_train: false
44
- dataset: viton_hd
45
-
46
- dataloader:
47
- _target_: media_dataloader.api.StatefulDataLoader
48
- dataset: ${datasets.viton_hd_test.dataset}
49
- batch_size: ${constants.batch_size}
50
- num_workers: 0
51
- prefetch_factor: null
52
- pin_memory: true
53
- persistent_workers: false
54
- multiprocessing_context: null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/datasets/viton_hd_test_local.yaml DELETED
@@ -1,20 +0,0 @@
1
- viton_hd_test_local:
2
- dataset:
3
- _target_: leffa.datasets.viton_hd.VitonHDLocalDataset
4
- # _target_: leffa.datasets.random_dataset.RandomDataset
5
- dataroot_path: /home/zijianzhou/data/viton_hd
6
- phase: test
7
- order: unpaired
8
- size:
9
- - ${constants.height}
10
- - ${constants.width}
11
-
12
- dataloader:
13
- _target_: torch.utils.data.DataLoader
14
- dataset: ${datasets.viton_hd_test_local.dataset}
15
- batch_size: ${constants.batch_size}
16
- num_workers: 0
17
- prefetch_factor: null
18
- pin_memory: true
19
- persistent_workers: false
20
- multiprocessing_context: null
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/datasets/viton_hd_train.yaml DELETED
@@ -1,55 +0,0 @@
1
- viton_hd_train:
2
- dataset:
3
- _target_: media_dataloader.api.EnrichingDataset
4
- datasource:
5
- _target_: media_dataloader.api.LazyHiveDataSource
6
- namespace: ad_metrics
7
- table: vton_public_dataset_viton_hd_train_v2
8
- partition_filter_predicate_list: ["ds = '2024-10-30'"]
9
- enrichments:
10
- - _target_: media_dataloader.api.media_lookups.ManifoldLookups
11
- lookup_handle_to_media_columns:
12
- image_manifold_path: "image"
13
- cloth_manifold_path: "cloth"
14
- agnostic_mask_manifold_path: "agnostic_mask"
15
- image_densepose_manifold_path: "image_densepose"
16
- cloth_mask_manifold_path: "cloth_mask"
17
- image_parse_v3_manifold_path: "image_parse"
18
- collate_fn:
19
- - _target_: media_dataloader.api.Collate
20
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
21
- image_field: image
22
- blob_field: image
23
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
24
- image_field: cloth
25
- blob_field: cloth
26
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
27
- image_field: agnostic_mask
28
- blob_field: agnostic_mask
29
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
30
- image_field: image_densepose
31
- blob_field: image_densepose
32
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
33
- image_field: cloth_mask
34
- blob_field: cloth_mask
35
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
36
- image_field: image_parse
37
- blob_field: image_parse
38
- - _target_: leffa.datasets.transform.VtonTransform
39
- height: ${constants.height}
40
- width: ${constants.width}
41
- is_train: true
42
- dataset: viton_hd
43
- aug_garment_ratio: 0.0
44
- get_garment_from_person_ratio: 0.0
45
- aug_mask_ratio: 0.0
46
-
47
- dataloader:
48
- _target_: media_dataloader.api.StatefulDataLoader
49
- dataset: ${datasets.viton_hd_train.dataset}
50
- batch_size: ${constants.batch_size}
51
- num_workers: 4
52
- prefetch_factor: 2
53
- pin_memory: true
54
- persistent_workers: true
55
- multiprocessing_context: forkserver
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/eval/base.yaml DELETED
@@ -1,4 +0,0 @@
1
- warmup_iters: 0
2
- evaluate_every_n_train_steps: ${constants.evaluate_every_n_train_steps}
3
- evaluate_every_n_train_epochs: ${constants.evaluate_every_n_train_epochs}
4
- max_eval_steps_per_eval_epoch: ${constants.max_eval_steps_per_eval_epoch}
 
 
 
 
 
leffa/conf/model/cat_vton_flux.yaml DELETED
@@ -1,19 +0,0 @@
1
- _target_: leffa.models.cat_vton_model.CatVtonModel
2
- # FLUX.1-dev
3
- pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/FLUX.1-dev
4
- new_in_channels: 33 # 16+1+16
5
- height: ${constants.height}
6
- width: ${constants.width}
7
- garment_dropout_ratio: 0.1
8
- use_dream: false
9
- dream_detail_preservation: 10.0
10
- use_garment_mask: false
11
- only_optimize_unet_attn1: true
12
- use_learning_flow_in_attention: false
13
- learning_flow_in_attention_lambda: 0.001
14
- learning_flow_in_attention_stop_timestep: 500
15
- use_attention_flow_loss: false
16
- attention_flow_loss_lambda: 0.001
17
- use_pixel_space_supervision: false
18
- pixel_space_supervision_lambda: 10.0
19
- use_densepose: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/model/cat_vton_sd15.yaml DELETED
@@ -1,20 +0,0 @@
1
- _target_: leffa.models.cat_vton_model.CatVtonModel
2
- # SD1.5
3
- pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-inpainting
4
- pretrained_vae_name_or_path: manifold://genads_models/tree/zijianzhou/model/sd-vae-ft-mse
5
- new_in_channels: 9
6
- height: ${constants.height}
7
- width: ${constants.width}
8
- garment_dropout_ratio: 0.1
9
- use_dream: true
10
- dream_detail_preservation: 10.0
11
- use_garment_mask: false
12
- only_optimize_unet_attn1: true
13
- use_learning_flow_in_attention: false
14
- learning_flow_in_attention_lambda: 0.001
15
- learning_flow_in_attention_stop_timestep: 500
16
- use_attention_flow_loss: false
17
- attention_flow_loss_lambda: 0.001
18
- use_pixel_space_supervision: true
19
- pixel_space_supervision_lambda: 10.0
20
- use_densepose: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/model/cat_vton_sdxl.yaml DELETED
@@ -1,11 +0,0 @@
1
- _target_: leffa.models.cat_vton_model.CatVtonModel
2
- # SDXL
3
- pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-xl-1.0-inpainting-0.1
4
- new_in_channels: 9
5
- height: ${constants.height}
6
- width: ${constants.width}
7
- garment_dropout_ratio: 0.1
8
- use_dream: true
9
- dream_detail_preservation: 10.0
10
- use_garment_mask: false
11
- only_optimize_unet_attn1: false
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/model/idm_vton_sd15.yaml DELETED
@@ -1,16 +0,0 @@
1
- _target_: leffa.models.idm_vton_model.IdmVtonModel
2
- pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-inpainting
3
- pretrained_vae_name_or_path: manifold://genads_models/tree/zijianzhou/model/sd-vae-ft-mse
4
- pretrained_garmentnet_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-inpainting
5
- new_in_channels: 13
6
- height: ${constants.height}
7
- width: ${constants.width}
8
- garment_dropout_ratio: 0.1
9
- use_dream: false
10
- dream_detail_preservation: 10.0
11
- skip_cross_attention: true
12
- skip_cross_attention_garmentnet: true
13
- copy_unet_to_unet_encoder: false
14
- only_optimize_unet_attn1: false
15
- optimize_unet: true
16
- optimize_unet_encoder: true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/model/idm_vton_sdxl.yaml DELETED
@@ -1,17 +0,0 @@
1
- _target_: leffa.models.idm_vton_model.IdmVtonModel
2
- pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-xl-1.0-inpainting-0.1
3
- pretrained_garmentnet_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-xl-base-1.0
4
- pretrained_image_encoder_path: manifold://genads_models/tree/zijianzhou/model/IP-Adapter/models/image_encoder
5
- pretrained_ip_adapter_path: manifold://genads_models/tree/zijianzhou/model/IP-Adapter/sdxl_models/ip-adapter-plus_sdxl_vit-h.bin
6
- new_in_channels: 13
7
- height: ${constants.height}
8
- width: ${constants.width}
9
- garment_dropout_ratio: 0.1
10
- use_dream: false
11
- dream_detail_preservation: 10.0
12
- skip_cross_attention: false
13
- skip_cross_attention_garmentnet: false
14
- copy_unet_to_unet_encoder: false
15
- only_optimize_unet_attn1: false
16
- optimize_unet: true
17
- optimize_unet_encoder: false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/model/simple_vton_sd15.yaml DELETED
@@ -1,23 +0,0 @@
1
- _target_: leffa.models.simple_vton_model.SimpleVtonModel
2
- pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-inpainting
3
- pretrained_vae_name_or_path: manifold://genads_models/tree/zijianzhou/model/sd-vae-ft-mse
4
- pretrained_garmentnet_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-inpainting
5
- new_in_channels: 12
6
- height: ${constants.height}
7
- width: ${constants.width}
8
- garment_dropout_ratio: 0.1
9
- use_dream: false
10
- dream_detail_preservation: 10.0
11
- skip_cross_attention: true
12
- skip_cross_attention_garmentnet: true
13
- copy_unet_to_unet_encoder: false
14
- only_optimize_unet_attn1: false
15
- optimize_unet: true
16
- optimize_unet_encoder: true
17
- use_learning_flow_in_attention: false
18
- learning_flow_in_attention_lambda: 0.001
19
- learning_flow_in_attention_stop_timestep: 500
20
- use_attention_flow_loss: false
21
- attention_flow_loss_lambda: 0.001
22
- use_pixel_space_supervision: false
23
- pixel_space_supervision_lambda: 10.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/model/simple_vton_sdxl.yaml DELETED
@@ -1,22 +0,0 @@
1
- _target_: leffa.models.simple_vton_model.SimpleVtonModel
2
- pretrained_model_name_or_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-xl-1.0-inpainting-0.1
3
- pretrained_garmentnet_path: manifold://genads_models/tree/zijianzhou/model/stable-diffusion-xl-base-1.0
4
- new_in_channels: 12
5
- height: ${constants.height}
6
- width: ${constants.width}
7
- garment_dropout_ratio: 0.1
8
- use_dream: false
9
- dream_detail_preservation: 10.0
10
- skip_cross_attention: true
11
- skip_cross_attention_garmentnet: true
12
- copy_unet_to_unet_encoder: false
13
- only_optimize_unet_attn1: false
14
- optimize_unet: true
15
- optimize_unet_encoder: true
16
- use_learning_flow_in_attention: false
17
- learning_flow_in_attention_lambda: 0.001
18
- learning_flow_in_attention_stop_timestep: 500
19
- use_attention_flow_loss: false
20
- attention_flow_loss_lambda: 0.001
21
- use_pixel_space_supervision: false
22
- pixel_space_supervision_lambda: 10.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/predict.yaml DELETED
@@ -1,66 +0,0 @@
1
- # File used for running pure prediction with torchtnt, including a custom score computation.
2
-
3
- seed: 42
4
-
5
- defaults:
6
- - constants: base
7
- # - model: idm_vton_sd15
8
- # - model: idm_vton_sdxl
9
- # - model: cat_vton_sd15
10
- # - model: cat_vton_sdxl
11
- - model: simple_vton_sd15
12
- # - model: simple_vton_sdxl
13
- # - model: simple_vton_sd15_fluxvae
14
- # - model: simple_vton_sdxl_fluxvae
15
- - datasets:
16
- - viton_hd_test
17
- - dress_code_test
18
- - deepfashion_test
19
- - _self_
20
-
21
- constants:
22
- # for virtual try-on
23
- # height: 512
24
- # width: 384
25
- height: 1024
26
- width: 768
27
- # for pose transfer
28
- # height: 256
29
- # width: 176
30
- # height: 512
31
- # width: 352
32
- # height: 1024
33
- # width: 704
34
- batch_size: 1
35
-
36
- # null to go through whole dataloader.
37
- max_steps_per_epoch: null
38
- dataloader: ${datasets.viton_hd_test.dataloader}
39
- # dataloader: ${datasets.dress_code_test.dataloader}
40
- # dataloader: ${datasets.deepfashion_test.dataloader}
41
- manifold_log_dir: manifold://genads_models/tree/zijianzhou/output/simple_vton/tmp
42
-
43
- inference:
44
- # _target_: leffa.inference.IdmVtonInference
45
- # _target_: leffa.inference.CatVtonInference
46
- _target_: leffa.inference.SimpleVtonInference
47
- _partial_: True
48
- model: ${model}
49
- model_entity_id: null
50
- checkpoint_version: null
51
-
52
- callbacks:
53
- - _target_: leffa.callbacks.save_image_callback.SaveImageCallback
54
- manifold_path: ${manifold_log_dir}/generated_paired
55
- # manifold_path: ${manifold_log_dir}/generated_unpaired
56
- task_type: vton
57
- # manifold_path: ${manifold_log_dir}
58
- # task_type: pose_transfer
59
-
60
- unit:
61
- _target_: leffa.vton_pred_unit.VtonPredUnit
62
- inference_fn: ${inference}
63
-
64
- hydra:
65
- run:
66
- dir: /tmp/
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/test.yaml DELETED
@@ -1,89 +0,0 @@
1
- ### Main entry for the training config in hydra.
2
- ### Only top level configurations can run, we decompose the full
3
- ### config to multiple subfolders for better reusability.
4
-
5
- seed: 42
6
-
7
- defaults:
8
- - constants: base
9
- - model: simple_vton_sd15
10
- - trainer: base
11
- - eval: base
12
- - datasets:
13
- - viton_hd_train
14
- - viton_hd_test
15
- - viton_hd_test_local
16
- - dress_code_train
17
- - dress_code_test
18
- - deepfashion_train
19
- - deepfashion_test
20
- - _self_
21
-
22
- constants:
23
- height: 1024
24
- width: 768
25
- batch_size: 1
26
-
27
- train_dataset: ${datasets.viton_hd_train}
28
- # train_dataset: ${datasets.dress_code_train}
29
- # train_dataset: ${datasets.deepfashion_train}
30
- eval_dataset: ${datasets.viton_hd_test}
31
-
32
- unit:
33
- _target_: leffa.vton_unit.VtonUnit
34
- _partial_: True
35
- model: ${model}
36
- # strategy: ddp
37
- strategy:
38
- _target_: leffa.utils.create_fsdp_strategy
39
- sharding_strategy: SHARD_GRAD_OP
40
- state_dict_type: SHARDED_STATE_DICT
41
- mixed_precision:
42
- param_dtype: ${constants.precision}
43
- reduce_dtype: ${constants.precision}
44
- cast_forward_inputs: True
45
- class_paths:
46
- # For VAE (first stage)
47
- - diffusers.models.unets.unet_2d_blocks.DownEncoderBlock2D
48
- - diffusers.models.unets.unet_2d_blocks.UNetMidBlock2D
49
- - diffusers.models.unets.unet_2d_blocks.UpDecoderBlock2D
50
- # For UNet (unet stage) IdmVton
51
- - leffa.models.diffusion_model.attentionhacked_tryon.BasicTransformerBlock
52
- - leffa.models.diffusion_model.attentionhacked_garment.BasicTransformerBlock
53
- # For UNet (unet stage) CatVton
54
- - diffusers.models.attention.BasicTransformerBlock
55
- # For CLIP (condition stage)
56
- - transformers.CLIPTextModel
57
- - transformers.CLIPTextModelWithProjection
58
- - transformers.CLIPVisionModelWithProjection
59
- optim_fn:
60
- _target_: torch.optim.AdamW
61
- _partial_: True
62
- lr: 1.0e-5
63
- betas: [0.9, 0.999]
64
- eps: 1.0e-8
65
- weight_decay: 1.0e-2
66
- amsgrad: false
67
- lr_scheduler_fn:
68
- _target_: torch.optim.lr_scheduler.ConstantLR
69
- _partial_: True
70
- factor: 1.0
71
- swa_params:
72
- _target_: torchtnt.framework.auto_unit.SWAParams
73
- warmup_steps_or_epochs: 0
74
- step_or_epoch_update_freq: 1
75
- averaging_method: ema
76
- ema_decay: 0.9999
77
- use_lit: True
78
- precision: ${constants.precision}
79
- clip_grad_norm: 1.0
80
-
81
- umm_metadata:
82
- model_type_name: ads_genads_ldm
83
- model_series_name: ads_genads_ldm
84
- oncall: ai_genads
85
-
86
- checkpoint:
87
- checkpoint_dir: null
88
- checkpoint_path: null
89
- checkpoint_every_n_steps: ${constants.checkpoint_every_n_steps}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/train.yaml DELETED
@@ -1,84 +0,0 @@
1
- ### Main entry for the training config in hydra.
2
- ### Only top level configurations can run, we decompose the full
3
- ### config to multiple subfolders for better reusability.
4
-
5
- seed: 42
6
-
7
- defaults:
8
- - constants: base
9
- - model: simple_vton_sd15
10
- - trainer: base
11
- - eval: base
12
- - datasets:
13
- - viton_hd_train
14
- - viton_hd_test
15
- - viton_hd_test_local
16
- - dress_code_train
17
- - dress_code_test
18
- - deepfashion_train
19
- - deepfashion_test
20
- - _self_
21
-
22
- train_dataset: ${datasets.viton_hd_train}
23
- # train_dataset: ${datasets.dress_code_train}
24
- # train_dataset: ${datasets.deepfashion_train}
25
- eval_dataset: null
26
-
27
- unit:
28
- _target_: leffa.vton_unit.VtonUnit
29
- _partial_: True
30
- model: ${model}
31
- # strategy: ddp
32
- strategy:
33
- _target_: leffa.utils.create_fsdp_strategy
34
- sharding_strategy: SHARD_GRAD_OP
35
- state_dict_type: SHARDED_STATE_DICT
36
- mixed_precision:
37
- param_dtype: ${constants.precision}
38
- reduce_dtype: ${constants.precision}
39
- cast_forward_inputs: True
40
- class_paths:
41
- # For VAE (first stage)
42
- - diffusers.models.unets.unet_2d_blocks.DownEncoderBlock2D
43
- - diffusers.models.unets.unet_2d_blocks.UNetMidBlock2D
44
- - diffusers.models.unets.unet_2d_blocks.UpDecoderBlock2D
45
- # For UNet (unet stage) IdmVton
46
- - leffa.models.diffusion_model.attentionhacked_tryon.BasicTransformerBlock
47
- - leffa.models.diffusion_model.attentionhacked_garment.BasicTransformerBlock
48
- # For UNet (unet stage) CatVton
49
- - diffusers.models.attention.BasicTransformerBlock
50
- # For CLIP (condition stage)
51
- - transformers.CLIPTextModel
52
- - transformers.CLIPTextModelWithProjection
53
- - transformers.CLIPVisionModelWithProjection
54
- optim_fn:
55
- _target_: torch.optim.AdamW
56
- _partial_: True
57
- lr: 1.0e-5
58
- betas: [0.9, 0.999]
59
- eps: 1.0e-8
60
- weight_decay: 1.0e-2
61
- amsgrad: false
62
- lr_scheduler_fn:
63
- _target_: torch.optim.lr_scheduler.ConstantLR
64
- _partial_: True
65
- factor: 1.0
66
- swa_params:
67
- _target_: torchtnt.framework.auto_unit.SWAParams
68
- warmup_steps_or_epochs: 0
69
- step_or_epoch_update_freq: 1
70
- averaging_method: ema
71
- ema_decay: 0.9999
72
- use_lit: True
73
- precision: ${constants.precision}
74
- clip_grad_norm: 1.0
75
-
76
- umm_metadata:
77
- model_type_name: ads_genads_ldm
78
- model_series_name: ads_genads_ldm
79
- oncall: ai_genads
80
-
81
- checkpoint:
82
- checkpoint_dir: null
83
- checkpoint_path: null
84
- checkpoint_every_n_steps: ${constants.checkpoint_every_n_steps}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/train_local.yaml DELETED
@@ -1,9 +0,0 @@
1
- ## Starter configuration to be ran locally for testing purpose.
2
-
3
- defaults:
4
- - train
5
- - _self_
6
-
7
- trainer:
8
- resume_from_last_ckpt: false
9
- model_entity_id: null
 
 
 
 
 
 
 
 
 
 
leffa/conf/train_mae.yaml DELETED
@@ -1,159 +0,0 @@
1
- constants:
2
- img_size: 512
3
- batch_size: 16
4
- precision: fp32
5
- max_epochs: 1000
6
- max_steps: null
7
- max_train_steps_per_epoch: null
8
- evaluate_every_n_train_steps: null
9
- evaluate_every_n_train_epochs: 10
10
- max_eval_steps_per_eval_epoch: null
11
- use_torchsnapshot: false
12
- checkpoint_every_n_steps: 500
13
- model:
14
- _target_: leffa.models.idm_vton_model.Mae4BgGen
15
- img_size: ${constants.img_size}
16
- patch_size: 16
17
- embed_dim: 1024
18
- depth: 24
19
- num_heads: 16
20
- # pretrained_path: manifold://genads_models/tree/zijianzhou/model/mae/mae_pretrain_vit_large.pth
21
- pretrained_path: null
22
- bg_masking_type: min
23
- trainer:
24
- max_epochs: ${constants.max_epochs}
25
- max_steps: ${constants.max_steps}
26
- max_train_steps_per_epoch: ${constants.max_train_steps_per_epoch}
27
- checkpoint_every_n_steps: ${constants.checkpoint_every_n_steps}
28
- model_entity_id: null
29
- resume_from_last_ckpt: true
30
- model_store_checkpoint_version: null
31
- garbage_collector_interval: 5001
32
- pretrained_weights: null
33
- log_dir: manifold://fblearner_flow_run_metrics/tree/torchmultimodal/idm_vton/logs/
34
- use_pt2: false
35
- memory_snapshot: false
36
- eval:
37
- warmup_iters: 0
38
- evaluate_every_n_train_steps: ${constants.evaluate_every_n_train_steps}
39
- evaluate_every_n_train_epochs: ${constants.evaluate_every_n_train_epochs}
40
- max_eval_steps_per_eval_epoch: ${constants.max_eval_steps_per_eval_epoch}
41
- datasets:
42
- mae_train:
43
- dataset:
44
- _target_: media_dataloader.api.EnrichingDataset
45
- datasource:
46
- _target_: media_dataloader.api.LazyHiveDataSource
47
- namespace: ad_metrics
48
- table: hybrid_3_0_1st_shein_data
49
- partition_filter_predicate_list:
50
- - ds = '2024-07-20'
51
- enrichments:
52
- - _target_: media_dataloader.api.media_lookups.EverstoreLookups
53
- lookup_handle_to_media_columns:
54
- everstore_handle: "image"
55
- - _target_: media_dataloader.api.media_lookups.ManifoldLookups
56
- lookup_handle_to_media_columns:
57
- binary_mask_manifold_path: bg_mask
58
- collate_fn:
59
- - _target_: media_dataloader.api.Collate
60
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
61
- image_field: image
62
- blob_field: image
63
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
64
- image_field: bg_mask
65
- blob_field: bg_mask
66
- - _target_: leffa.datasets.transform.MaeTransform
67
- input_size: ${constants.img_size}
68
- is_train: true
69
- dataloader:
70
- _target_: media_dataloader.api.StatefulDataLoader
71
- dataset: ${datasets.mae_train.dataset}
72
- batch_size: ${constants.batch_size}
73
- num_workers: 8
74
- prefetch_factor: 2
75
- pin_memory: true
76
- persistent_workers: true
77
- multiprocessing_context: forkserver
78
- mae_test:
79
- dataset:
80
- _target_: media_dataloader.api.EnrichingDataset
81
- datasource:
82
- _target_: media_dataloader.api.LazyHiveDataSource
83
- namespace: ad_metrics
84
- table: hybrid_3_0_1st_shein_data
85
- partition_filter_predicate_list:
86
- - ds = '2024-07-20'
87
- enrichments:
88
- - _target_: media_dataloader.api.media_lookups.EverstoreLookups
89
- lookup_handle_to_media_columns:
90
- everstore_handle: "image"
91
- - _target_: media_dataloader.api.media_lookups.ManifoldLookups
92
- lookup_handle_to_media_columns:
93
- binary_mask_manifold_path: bg_mask
94
- collate_fn:
95
- - _target_: media_dataloader.api.Collate
96
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
97
- image_field: image
98
- blob_field: image
99
- - _target_: torchmultimodal.fb.genai.transforms.hive_transforms.EverstoreImageToPILTransform
100
- image_field: bg_mask
101
- blob_field: bg_mask
102
- - _target_: leffa.datasets.transform.MaeTransform
103
- input_size: ${constants.img_size}
104
- is_train: false
105
- dataloader:
106
- _target_: media_dataloader.api.StatefulDataLoader
107
- dataset: ${datasets.mae_train.dataset}
108
- batch_size: ${constants.batch_size}
109
- num_workers: 0
110
- prefetch_factor: null
111
- pin_memory: true
112
- persistent_workers: false
113
- multiprocessing_context: null
114
- seed: 42
115
- train_dataset: ${datasets.mae_train}
116
- eval_dataset: null
117
- # eval_dataset: ${datasets.mae_test}
118
- unit:
119
- _target_: leffa.vton_unit.VtonUnit
120
- _partial_: true
121
- model: ${model}
122
- strategy: ddp
123
- # strategy:
124
- # _target_: leffa.utils.create_fsdp_strategy
125
- # sharding_strategy: FULL_SHARD
126
- # state_dict_type: SHARDED_STATE_DICT
127
- # class_paths:
128
- # - leffa.models.idm_vton_model.MaskedAutoencoderViT
129
- optim_fn:
130
- _target_: torch.optim.AdamW
131
- _partial_: true
132
- lr: 1.0e-05
133
- betas:
134
- - 0.9
135
- - 0.999
136
- eps: 1.0e-08
137
- weight_decay: 0.01
138
- amsgrad: false
139
- lr_scheduler_fn:
140
- _target_: torch.optim.lr_scheduler.ConstantLR
141
- _partial_: true
142
- factor: 1.0
143
- swa_params:
144
- _target_: torchtnt.framework.auto_unit.SWAParams
145
- warmup_steps_or_epochs: 0
146
- step_or_epoch_update_freq: 1
147
- averaging_method: ema
148
- ema_decay: 0.9999
149
- use_lit: true
150
- precision: ${constants.precision}
151
- clip_grad_norm: 1.0
152
- umm_metadata:
153
- model_type_name: ads_genads_ldm
154
- model_series_name: ads_genads_ldm
155
- oncall: ai_genads
156
- checkpoint:
157
- checkpoint_dir: null
158
- checkpoint_path: null
159
- checkpoint_every_n_steps: ${constants.checkpoint_every_n_steps}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
leffa/conf/trainer/base.yaml DELETED
@@ -1,14 +0,0 @@
1
- max_epochs: ${constants.max_epochs}
2
- max_steps: ${constants.max_steps}
3
- max_train_steps_per_epoch: ${constants.max_train_steps_per_epoch}
4
-
5
- checkpoint_every_n_steps: ${constants.checkpoint_every_n_steps}
6
-
7
- resume_from_last_ckpt: True
8
- model_entity_id: null
9
- model_store_checkpoint_version: null
10
- garbage_collector_interval: 5001
11
- pretrained_weights: null
12
- log_dir: manifold://fblearner_flow_run_metrics/tree/torchmultimodal/idm_vton/logs
13
- use_pt2: False
14
- memory_snapshot: False