jkcarney commited on Dec 19, 2022

Commit

b946c74

•

1 Parent(s): e25a3fb

please commit this time:

Browse files

Files changed (19) hide show

README.md +22 -3
feature_extractor/preprocessor_config.json +28 -0
model_index.json +33 -0
prompts.txt +67 -0
romcom-diffusion-1.0.ckpt +3 -0
romcom-diffusion-1.0.safetensors +3 -0
safety_checker/config.json +181 -0
safety_checker/pytorch_model.bin +3 -0
scheduler/scheduler_config.json +14 -0
text_encoder/config.json +25 -0
text_encoder/pytorch_model.bin +3 -0
tokenizer/merges.txt +0 -0
tokenizer/special_tokens_map.json +24 -0
tokenizer/tokenizer_config.json +34 -0
tokenizer/vocab.json +0 -0
unet/config.json +42 -0
unet/diffusion_pytorch_model.bin +3 -0
vae/config.json +30 -0
vae/diffusion_pytorch_model.bin +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,22 @@
----
-license: creativeml-openrail-m
----

+![samples1](./samples1.png)
+romcom-diffusion is a Dreambooth model fine tuned on cropped 512x512 stills from various 1990s era romantic comedy movies. These stills are primarily of human subjects, but there are also samples of wide shots, crowds, and cars. Currently the list of movies used is:
+1. 10 Things I Hate About You (64)
+2. Clueless (45)
+3. American Pie (37)
+In the future I may release a new version with more movie stills from additional movies. My hope is that more samples from more movies would increase facial fidelity and style.
+In the prompt, use activation token: `romcom style`
+Trained using Stable Diffusion 1.5 as a base.
+I also recommend including `movie still` in your prompt; it was the class prompt for regularization images and I found it produces the best results.
+I've found the best results using the `DPM++ 2S a Karras` sampler as well, but obviously feel free to experiment.
+Sample image generation parameters can be found in `prompts.txt`
+![samples1](./samples2.png)

feature_extractor/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "crop_size": {
+    "height": 224,
+    "width": 224
+  },
+  "do_center_crop": true,
+  "do_convert_rgb": true,
+  "do_normalize": true,
+  "do_rescale": true,
+  "do_resize": true,
+  "feature_extractor_type": "CLIPFeatureExtractor",
+  "image_mean": [
+    0.48145466,
+    0.4578275,
+    0.40821073
+  ],
+  "image_processor_type": "CLIPImageProcessor",
+  "image_std": [
+    0.26862954,
+    0.26130258,
+    0.27577711
+  ],
+  "resample": 3,
+  "rescale_factor": 0.00392156862745098,
+  "size": {
+    "shortest_edge": 224
+  }
+}

model_index.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_class_name": "StableDiffusionPipeline",
+  "_diffusers_version": "0.11.0.dev0",
+  "feature_extractor": [
+    "transformers",
+    "CLIPImageProcessor"
+  ],
+  "requires_safety_checker": true,
+  "safety_checker": [
+    "stable_diffusion",
+    "StableDiffusionSafetyChecker"
+  ],
+  "scheduler": [
+    "diffusers",
+    "PNDMScheduler"
+  ],
+  "text_encoder": [
+    "transformers",
+    "CLIPTextModel"
+  ],
+  "tokenizer": [
+    "transformers",
+    "CLIPTokenizer"
+  ],
+  "unet": [
+    "diffusers",
+    "UNet2DConditionModel"
+  ],
+  "vae": [
+    "diffusers",
+    "AutoencoderKL"
+  ]
+}

prompts.txt ADDED Viewed

	@@ -0,0 +1,67 @@

+In order, left to right, top to bottom:
+-- 1st group of pictures --
+romcom style, girl, movie still, beautiful, wearing a red shirt, rim lit, cinematic lighting
+Negative prompt: doll, plastic, fake
+Steps: 30, Sampler: DPM++ 2S a Karras, CFG scale: 6, Seed: 233807765, Size: 512x512, Model hash: 10a66fa5
+romcom style, group of teenagers, on oil rig, ocean background, movie still
+Steps: 30, Sampler: DPM++ 2S a Karras, CFG scale: 6, Seed: 357362878, Size: 512x512, Model hash: 10a66fa5
+(romcom style), motorcycle, chrome, epic, riding down the street, 1990s
+Negative prompt: fake, render, 3d
+Steps: 15, Sampler: DPM++ 2S a Karras, CFG scale: 6, Seed: 2399310647, Size: 512x512, Model hash: 10a66fa5
+romcom style, Emma Watson, beautiful portrait, prom, wearing pink dress, multicolored bokeh, movie still, cinematic lighting, volumetric lighting
+Negative prompt: fake, render, 3d, far shot
+Steps: 30, Sampler: Euler a, CFG scale: 7, Seed: 2157213550, Size: 512x512, Model hash: 10a66fa5
+(romcom style), cute girl, bokeh, winter, Christmas lights, snow, festive
+Negative prompt: fake, render, 3d, far shot, ugly
+Steps: 30, Sampler: DPM++ 2S a Karras, CFG scale: 7, Seed: 296356889, Size: 512x512, Model hash: 10a66fa5
+romcom style house in the spring, establishing shot, 1990s, architecture, green grass, flowers, gardening, flower petals, cherry blossom
+Steps: 15, Sampler: DPM++ 2S a Karras, CFG scale: 7, Seed: 3159225673, Size: 512x512, Model hash: 10a66fa5
+romcom style group of teenagers, Star Trek, starship enterprise, on the bridge, control panel, futuristic
+Negative prompt: fake, doll, render, plastic
+Steps: 20, Sampler: DPM++ 2S a Karras, CFG scale: 6, Seed: 36883219, Size: 512x512, Model hash: 10a66fa5
+romcom style portrait of Barack Obama, close up, highly detailed face, wearing sunglasses, cool, 1990s
+Negative prompt: fake, doll, render, plastic, medium shot
+Steps: 40, Sampler: DPM++ 2S a Karras, CFG scale: 6, Seed: 597586508, Size: 512x512, Model hash: 10a66fa5
+-- 2nd group of pictures --
+romcom style portrait of a handsome boy, school dance, prom, wearing suit, dancing, detailed face
+Negative prompt: fake, doll, render, plastic, 3d, outside, field, grass
+Steps: 30, Sampler: DPM++ 2S a Karras, CFG scale: 7.5, Seed: 2303673528, Size: 512x512, Model hash: 10a66fa5
+cyberpunk, romcom style, portrait of futuristic people
+Negative prompt: comic
+Steps: 30, Sampler: DPM++ 2S a Karras, CFG scale: 7.5, Seed: 742429872, Size: 512x512, Model hash: 10a66fa5
+romcom style
+Steps: 30, Sampler: DPM++ 2S a Karras, CFG scale: 7.5, Seed: 3105091988, Size: 512x512, Model hash: 10a66fa5
+romcom style city photo establishing shot, cityscape, sprawling, movie still
+Negative prompt: close up, portrait
+Steps: 30, Sampler: DPM++ 2S a Karras, CFG scale: 7.5, Seed: 3464931610, Size: 512x512, Model hash: 10a66fa5
+(romcom style) portrait, beautiful, black girl, movie still, outside, rim lit, wearing yellow shirt, happy, smiling, highly detailed face
+Negative prompt: ugly, plastic, doll, fake, render, 3d
+Steps: 45, Sampler: DPM++ 2S a Karras, CFG scale: 7.5, Seed: 4243226663, Size: 512x512, Model hash: 10a66fa5
+romcom style party, night time, drinking, (house party), group, pool
+Negative prompt: ugly, plastic, doll, fake, render, 3d
+Steps: 25, Sampler: DPM++ 2S a Karras, CFG scale: 7.5, Seed: 1582002945, Size: 512x512, Model hash: 10a66fa5
+romcom style polar bear in the Artic, happy, smiling, National Geographic
+Steps: 25, Sampler: DPM++ 2S a Karras, CFG scale: 7.5, Seed: 2482861593, Size: 512x512, Model hash: 10a66fa5
+romcom style picture of Channing Tatum, jock, 1990s, rim lit, movie still
+Negative prompt: fake, doll, plastic, render
+Steps: 25, Sampler: DPM++ 2S a Karras, CFG scale: 7.5, Seed: 2512714771, Size: 512x512, Model hash: 10a66fa5

romcom-diffusion-1.0.ckpt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fb245708c0c154a2752143e0c34db93c8cc03df6dfc21f71ffba27ad29d12f45
+size 4265343031

romcom-diffusion-1.0.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3d6676c87d7c7d725209f5a3d2a4bb2d384730a3bd04eda5c5d146b837dc7494
+size 4265096997

safety_checker/config.json ADDED Viewed

	@@ -0,0 +1,181 @@

+{
+  "_commit_hash": "63534535d4730d5976c5c647a7f2adaea1102f5b",
+  "_name_or_path": "/home/carnejk1/.cache/huggingface/diffusers/models--runwayml--stable-diffusion-v1-5/snapshots/63534535d4730d5976c5c647a7f2adaea1102f5b/safety_checker",
+  "architectures": [
+    "StableDiffusionSafetyChecker"
+  ],
+  "initializer_factor": 1.0,
+  "logit_scale_init_value": 2.6592,
+  "model_type": "clip",
+  "projection_dim": 768,
+  "text_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 0,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 768,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 3072,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 77,
+    "min_length": 0,
+    "model_type": "clip_text_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 12,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 12,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": 1,
+    "prefix": null,
+    "problem_type": null,
+    "projection_dim": 512,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.25.1",
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "vocab_size": 49408
+  },
+  "text_config_dict": {
+    "hidden_size": 768,
+    "intermediate_size": 3072,
+    "num_attention_heads": 12,
+    "num_hidden_layers": 12
+  },
+  "torch_dtype": "float32",
+  "transformers_version": null,
+  "vision_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": null,
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "dropout": 0.0,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "quick_gelu",
+    "hidden_size": 1024,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "image_size": 224,
+    "initializer_factor": 1.0,
+    "initializer_range": 0.02,
+    "intermediate_size": 4096,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "model_type": "clip_vision_model",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 16,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_hidden_layers": 24,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 14,
+    "prefix": null,
+    "problem_type": null,
+    "projection_dim": 512,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": null,
+    "torchscript": false,
+    "transformers_version": "4.25.1",
+    "typical_p": 1.0,
+    "use_bfloat16": false
+  },
+  "vision_config_dict": {
+    "hidden_size": 1024,
+    "intermediate_size": 4096,
+    "num_attention_heads": 16,
+    "num_hidden_layers": 24,
+    "patch_size": 14
+  }
+}

safety_checker/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:16d28f2b37109f222cdc33620fdd262102ac32112be0352a7f77e9614b35a394
+size 1216064769

scheduler/scheduler_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+  "_class_name": "PNDMScheduler",
+  "_diffusers_version": "0.11.0.dev0",
+  "beta_end": 0.012,
+  "beta_schedule": "scaled_linear",
+  "beta_start": 0.00085,
+  "clip_sample": false,
+  "num_train_timesteps": 1000,
+  "prediction_type": "epsilon",
+  "set_alpha_to_one": false,
+  "skip_prk_steps": true,
+  "steps_offset": 1,
+  "trained_betas": null
+}

text_encoder/config.json ADDED Viewed

	@@ -0,0 +1,25 @@

+{
+  "_name_or_path": "runwayml/stable-diffusion-v1-5",
+  "architectures": [
+    "CLIPTextModel"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "dropout": 0.0,
+  "eos_token_id": 2,
+  "hidden_act": "quick_gelu",
+  "hidden_size": 768,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 3072,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 77,
+  "model_type": "clip_text_model",
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  "pad_token_id": 1,
+  "projection_dim": 768,
+  "torch_dtype": "float32",
+  "transformers_version": "4.25.1",
+  "vocab_size": 49408
+}

text_encoder/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:078a1896dfa687cf74a44224824190bc925074c91a242c1836105f7e3568ab22
+size 492309793

tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "<|endoftext|>",
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "add_prefix_space": false,
+  "bos_token": {
+    "__type": "AddedToken",
+    "content": "<|startoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "do_lower_case": true,
+  "eos_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "errors": "replace",
+  "model_max_length": 77,
+  "name_or_path": "/home/carnejk1/.cache/huggingface/diffusers/models--runwayml--stable-diffusion-v1-5/snapshots/63534535d4730d5976c5c647a7f2adaea1102f5b/tokenizer",
+  "pad_token": "<|endoftext|>",
+  "special_tokens_map_file": "./special_tokens_map.json",
+  "tokenizer_class": "CLIPTokenizer",
+  "unk_token": {
+    "__type": "AddedToken",
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

unet/config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "_class_name": "UNet2DConditionModel",
+  "_diffusers_version": "0.11.0.dev0",
+  "_name_or_path": "runwayml/stable-diffusion-v1-5",
+  "act_fn": "silu",
+  "attention_head_dim": 8,
+  "block_out_channels": [
+    320,
+    640,
+    1280,
+    1280
+  ],
+  "center_input_sample": false,
+  "cross_attention_dim": 768,
+  "down_block_types": [
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "CrossAttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "dual_cross_attention": false,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "sample_size": 64,
+  "up_block_types": [
+    "UpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D",
+    "CrossAttnUpBlock2D"
+  ],
+  "upcast_attention": false,
+  "use_linear_projection": false
+}

unet/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:20a4cf228a89e71d000dd4a9f1af86457763b3606e36381694649c83b93d99b8
+size 3438375973

vae/config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.11.0.dev0",
+  "_name_or_path": "/home/carnejk1/.cache/huggingface/diffusers/models--runwayml--stable-diffusion-v1-5/snapshots/63534535d4730d5976c5c647a7f2adaea1102f5b/vae",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    512,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 512,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

vae/diffusion_pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af27ea858349760ebe3311953e0bfe8d6fd257dc9537ae0b2b938c262132a2c6
+size 334711857