Mark000111888 commited on
Commit
cf03cd8
1 Parent(s): 9657bd3
args.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "pretrained_model_name_or_path": "/opt/ml/stable-diffusion-v1-5",
3
+ "pretrained_vae_name_or_path": "stabilityai/sd-vae-ft-mse",
4
+ "revision": "fp16",
5
+ "tokenizer_name": null,
6
+ "instance_data_dir": null,
7
+ "class_data_dir": null,
8
+ "instance_prompt": "photo of xyz person",
9
+ "class_prompt": "photo of a person",
10
+ "save_sample_prompt": null,
11
+ "save_sample_negative_prompt": null,
12
+ "n_save_sample": 4,
13
+ "save_guidance_scale": 7.5,
14
+ "save_infer_steps": 50,
15
+ "pad_tokens": false,
16
+ "with_prior_preservation": true,
17
+ "prior_loss_weight": 1.0,
18
+ "num_class_images": 50,
19
+ "output_dir": "/opt/ml/model",
20
+ "image_output_dir": "image-samples",
21
+ "seed": 15213,
22
+ "resolution": 512,
23
+ "center_crop": false,
24
+ "train_text_encoder": true,
25
+ "train_batch_size": 2,
26
+ "sample_batch_size": 4,
27
+ "num_train_epochs": 60,
28
+ "max_train_steps": 1500,
29
+ "gradient_accumulation_steps": 1,
30
+ "gradient_checkpointing": true,
31
+ "learning_rate": 1e-06,
32
+ "scale_lr": false,
33
+ "lr_scheduler": "constant",
34
+ "lr_warmup_steps": 0,
35
+ "use_8bit_adam": true,
36
+ "adam_beta1": 0.9,
37
+ "adam_beta2": 0.999,
38
+ "adam_weight_decay": 0.01,
39
+ "adam_epsilon": 1e-08,
40
+ "max_grad_norm": 1.0,
41
+ "push_to_hub": false,
42
+ "hub_token": null,
43
+ "hub_model_id": null,
44
+ "logging_dir": "logs",
45
+ "log_interval": 10,
46
+ "save_interval": 10000,
47
+ "save_min_steps": 0,
48
+ "mixed_precision": "fp16",
49
+ "not_cache_latents": false,
50
+ "hflip": false,
51
+ "local_rank": -1,
52
+ "concepts_list": [
53
+ {
54
+ "instance_prompt": "photo of zwx model",
55
+ "class_prompt": "photo of a model",
56
+ "instance_data_dir": "/opt/ml/input/data/zwx_jl",
57
+ "class_data_dir": "/opt/ml/input/data/female_jl"
58
+ }
59
+ ]
60
+ }
code/inference.py ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Additional inference script for stable diffusion
3
+ Edited by Xiaoxiao
4
+ """
5
+
6
+ from diffusers import StableDiffusionPipeline, DDIMScheduler
7
+ import torch
8
+ import base64
9
+ import numpy as np
10
+
11
+ def process_data(data: dict) -> dict:
12
+ g_cuda = None
13
+ g_cuda = torch.Generator(device='cuda')
14
+
15
+ return {
16
+ "prompt": data.pop("prompt", data),
17
+ "negative_prompt": data.pop("negative_prompt", ""),
18
+ "num_images_per_prompt": min(data.pop("num_samples", 2),5),
19
+ "guidance_scale": data.pop("guidance_scale", 7.5),
20
+ "num_inference_steps": min(data.pop("num_inference_steps", 50), 50),
21
+ "height": 512,
22
+ "width": 512,
23
+ "generator":g_cuda.manual_seed(data.pop("seed",15213))
24
+ }
25
+
26
+
27
+ def model_fn(model_dir: str):
28
+ scheduler = DDIMScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", clip_sample=False, set_alpha_to_one=False)
29
+ t2i_pipe = StableDiffusionPipeline.from_pretrained(
30
+ model_dir,
31
+ scheduler=scheduler,
32
+ safety_checker=None,
33
+ torch_dtype=torch.float16
34
+ )
35
+ if torch.cuda.is_available():
36
+ t2i_pipe = t2i_pipe.to("cuda")
37
+
38
+ t2i_pipe.enable_attention_slicing()
39
+ return t2i_pipe
40
+
41
+
42
+ def predict_fn(data: dict, hgf_pipe) -> dict:
43
+
44
+ with torch.autocast("cuda"):
45
+ images = hgf_pipe(**process_data(data))["images"]
46
+
47
+ # return dictionary, which will be json serializable
48
+ return {
49
+ "images": [
50
+ base64.b64encode(np.array(image).astype(np.uint8)).decode("utf-8")
51
+ for image in images
52
+ ]
53
+ }
code/requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ numpy==1.23.4
2
+ torch==1.12.1
3
+ diffusers==0.9.0
4
+ transformers==4.23.1
5
+ spacy==3.4.2
6
+ ftfy==6.1.1
7
+ huggingface-hub==0.11.1
model_index.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableDiffusionPipeline",
3
+ "_diffusers_version": "0.9.0",
4
+ "requires_safety_checker": false,
5
+ "scheduler": [
6
+ "diffusers",
7
+ "DDIMScheduler"
8
+ ],
9
+ "text_encoder": [
10
+ "transformers",
11
+ "CLIPTextModel"
12
+ ],
13
+ "tokenizer": [
14
+ "transformers",
15
+ "CLIPTokenizer"
16
+ ],
17
+ "unet": [
18
+ "diffusers",
19
+ "UNet2DConditionModel"
20
+ ],
21
+ "vae": [
22
+ "diffusers",
23
+ "AutoencoderKL"
24
+ ]
25
+ }
samples/0-0.png ADDED
samples/0-1.png ADDED
samples/0-2.png ADDED
samples/0-3.png ADDED
scheduler/scheduler_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "DDIMScheduler",
3
+ "_diffusers_version": "0.9.0",
4
+ "beta_end": 0.012,
5
+ "beta_schedule": "scaled_linear",
6
+ "beta_start": 0.00085,
7
+ "clip_sample": false,
8
+ "num_train_timesteps": 1000,
9
+ "prediction_type": "epsilon",
10
+ "set_alpha_to_one": false,
11
+ "steps_offset": 1,
12
+ "trained_betas": null
13
+ }
text_encoder/config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/opt/ml/stable-diffusion-v1-5",
3
+ "architectures": [
4
+ "CLIPTextModel"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 0,
8
+ "dropout": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "quick_gelu",
11
+ "hidden_size": 768,
12
+ "initializer_factor": 1.0,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 3072,
15
+ "layer_norm_eps": 1e-05,
16
+ "max_position_embeddings": 77,
17
+ "model_type": "clip_text_model",
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "pad_token_id": 1,
21
+ "projection_dim": 768,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.23.1",
24
+ "vocab_size": 49408
25
+ }
text_encoder/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a6edb965c48d852c06dffe144c820a54ebc69fbc6950d69265654648a22a42b
3
+ size 492308087
tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer/special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<|startoftext|>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|endoftext|>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<|endoftext|>",
17
+ "unk_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": true,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer/tokenizer_config.json ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": {
4
+ "__type": "AddedToken",
5
+ "content": "<|startoftext|>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false
10
+ },
11
+ "do_lower_case": true,
12
+ "eos_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<|endoftext|>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "errors": "replace",
21
+ "model_max_length": 77,
22
+ "name_or_path": "/opt/ml/stable-diffusion-v1-5/tokenizer",
23
+ "pad_token": "<|endoftext|>",
24
+ "special_tokens_map_file": "./special_tokens_map.json",
25
+ "tokenizer_class": "CLIPTokenizer",
26
+ "unk_token": {
27
+ "__type": "AddedToken",
28
+ "content": "<|endoftext|>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ }
34
+ }
tokenizer/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
unet/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edce6d1f2df31d7a12d0242225853097e7e0f6f69ce97b8cb26a2e27f3e4318c
3
+ size 1013039104