khawir commited on
Commit
23549a1
β€’
1 Parent(s): b89b9f5

sdxl -> sd1.5

Browse files
app/api/generate.py CHANGED
@@ -142,13 +142,15 @@ def image_variations(model: Request, request: ImageVariations, db: Annotated[Ses
142
 
143
  prompt = create_prompt(request.prompt, medium=request.medium, style=request.style, artist=request.artist, website=request.website, resolution=request.resolution, additional_details=request.additional_details, color=request.color, lightning=request.lightning)
144
  image = decode_image(request.image)
145
- image.resize((512, 512))
146
 
147
  if prompt:
148
  crud.create_prompt(db=db, user_id=current_user.user_id, prompt=prompt)
 
 
149
 
150
- images = model.state.iv_model.generate(pil_image=image, num_samples=request.num_samples, num_inference_steps=request.num_inference_steps,
151
- seed=request.seed, prompt=prompt, scale=request.scale, negative_prompt=request.negative_prompt)
152
 
153
  images = [encode_image(image) for image in images]
154
 
 
142
 
143
  prompt = create_prompt(request.prompt, medium=request.medium, style=request.style, artist=request.artist, website=request.website, resolution=request.resolution, additional_details=request.additional_details, color=request.color, lightning=request.lightning)
144
  image = decode_image(request.image)
145
+ image.resize((256, 256))
146
 
147
  if prompt:
148
  crud.create_prompt(db=db, user_id=current_user.user_id, prompt=prompt)
149
+ else:
150
+ request.scale = 1.0
151
 
152
+ images = model.state.iv_model.generate(pil_image=image, num_samples=request.num_samples, num_inference_steps=request.num_inference_steps, seed=request.seed,
153
+ prompt=prompt, negative_prompt=request.negative_prompt, scale=request.scale, guidance_scale=request.guidance_scale)
154
 
155
  images = [encode_image(image) for image in images]
156
 
app/core/schemas.py CHANGED
@@ -43,14 +43,14 @@ class User(UserBase):
43
  class Generate(BaseModel):
44
  seed: int | None = None
45
  negative_prompt : str = "ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face"
46
- medium: str | None = None
47
- style: str | None = None
48
- artist: str | None = None
49
- website: str | None = None
50
- resolution: str | None = None
51
- additional_details: str | None = None
52
- color: str | None = None
53
- lightning: str | None = None
54
 
55
 
56
  class TextImage(Generate):
@@ -62,7 +62,7 @@ class TextImage(Generate):
62
  class ImageImage(Generate):
63
  prompt: str
64
  image: str
65
- num_inference_steps: int = 10
66
  guidance_scale: float = 7.5
67
  image_guidance_scale: float = 1.5
68
 
@@ -73,7 +73,8 @@ class BackgroundRemoval(BaseModel):
73
 
74
  class ImageVariations(Generate):
75
  image: str
76
- num_samples: int = 2
77
  num_inference_steps: int = 30
78
- prompt: str | None = None
79
  scale: float = 0.5
 
 
43
  class Generate(BaseModel):
44
  seed: int | None = None
45
  negative_prompt : str = "ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face"
46
+ medium: str | None = ""
47
+ style: str | None = ""
48
+ artist: str | None = ""
49
+ website: str | None = ""
50
+ resolution: str | None = ""
51
+ additional_details: str | None = ""
52
+ color: str | None = ""
53
+ lightning: str | None = ""
54
 
55
 
56
  class TextImage(Generate):
 
62
  class ImageImage(Generate):
63
  prompt: str
64
  image: str
65
+ num_inference_steps: int = 30
66
  guidance_scale: float = 7.5
67
  image_guidance_scale: float = 1.5
68
 
 
73
 
74
  class ImageVariations(Generate):
75
  image: str
76
+ num_samples: int = 4
77
  num_inference_steps: int = 30
78
+ prompt: str | None = ""
79
  scale: float = 0.5
80
+ guidance_scale: float = 7.5
app/main.py CHANGED
@@ -6,9 +6,9 @@ from fastapi.security import OAuth2PasswordRequestForm
6
  from fastapi import APIRouter, FastAPI, HTTPException, Depends
7
 
8
  import torch
9
- from ip_adapter import IPAdapterXL
10
  from transformers import AutoModelForImageSegmentation
11
- from diffusers import AutoPipelineForText2Image, DPMSolverMultistepScheduler, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler, StableDiffusionXLPipeline
12
 
13
  from app.api import user
14
  from app.api import prompt
@@ -20,9 +20,10 @@ from app.security import authenticate_user, create_access_token, timedelta
20
 
21
  @asynccontextmanager
22
  async def lifespan(app: FastAPI):
23
- base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
24
- image_encoder_path = "sdxl_models/image_encoder"
25
- ip_ckpt = "sdxl_models/ip-adapter_sdxl.bin"
 
26
 
27
  ti_pipe = AutoPipelineForText2Image.from_pretrained(
28
  'lykon/dreamshaper-xl-v2-turbo', torch_dtype=torch.float16, variant="fp16")
@@ -40,19 +41,33 @@ async def lifespan(app: FastAPI):
40
  "briaai/RMBG-1.4", trust_remote_code=True)
41
  br_model.to(DEVICE)
42
 
43
- sdxl_pipe = StableDiffusionXLPipeline.from_pretrained(
 
 
 
 
 
 
 
 
 
 
44
  base_model_path,
45
  torch_dtype=torch.float16,
46
- add_watermarker=False,
 
 
 
47
  )
48
- iv_model = IPAdapterXL(sdxl_pipe, image_encoder_path, ip_ckpt, DEVICE)
49
 
50
  yield {'ti_pipe': ti_pipe, 'ii_pipe': ii_pipe, 'br_model': br_model, 'iv_model': iv_model}
51
 
52
  del ti_pipe
53
  del ii_pipe
54
  del br_model
55
- del sdxl_pipe
 
56
  del iv_model
57
 
58
 
 
6
  from fastapi import APIRouter, FastAPI, HTTPException, Depends
7
 
8
  import torch
9
+ from ip_adapter import IPAdapter
10
  from transformers import AutoModelForImageSegmentation
11
+ from diffusers import AutoPipelineForText2Image, DPMSolverMultistepScheduler, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler, StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
12
 
13
  from app.api import user
14
  from app.api import prompt
 
20
 
21
  @asynccontextmanager
22
  async def lifespan(app: FastAPI):
23
+ base_model_path = "runwayml/stable-diffusion-v1-5"
24
+ vae_model_path = "stabilityai/sd-vae-ft-mse"
25
+ image_encoder_path = "models/image_encoder/"
26
+ ip_ckpt = "models/ip-adapter_sd15.bin"
27
 
28
  ti_pipe = AutoPipelineForText2Image.from_pretrained(
29
  'lykon/dreamshaper-xl-v2-turbo', torch_dtype=torch.float16, variant="fp16")
 
41
  "briaai/RMBG-1.4", trust_remote_code=True)
42
  br_model.to(DEVICE)
43
 
44
+ noise_scheduler = DDIMScheduler(
45
+ num_train_timesteps=1000,
46
+ beta_start=0.00085,
47
+ beta_end=0.012,
48
+ beta_schedule="scaled_linear",
49
+ clip_sample=False,
50
+ set_alpha_to_one=False,
51
+ steps_offset=1,
52
+ )
53
+ vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)
54
+ pipe = StableDiffusionPipeline.from_pretrained(
55
  base_model_path,
56
  torch_dtype=torch.float16,
57
+ scheduler=noise_scheduler,
58
+ vae=vae,
59
+ feature_extractor=None,
60
+ safety_checker=None
61
  )
62
+ iv_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, DEVICE)
63
 
64
  yield {'ti_pipe': ti_pipe, 'ii_pipe': ii_pipe, 'br_model': br_model, 'iv_model': iv_model}
65
 
66
  del ti_pipe
67
  del ii_pipe
68
  del br_model
69
+ del vae
70
+ del pipe
71
  del iv_model
72
 
73
 
models/image_encoder/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./image_encoder",
3
+ "architectures": [
4
+ "CLIPVisionModelWithProjection"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "dropout": 0.0,
8
+ "hidden_act": "gelu",
9
+ "hidden_size": 1280,
10
+ "image_size": 224,
11
+ "initializer_factor": 1.0,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 5120,
14
+ "layer_norm_eps": 1e-05,
15
+ "model_type": "clip_vision_model",
16
+ "num_attention_heads": 16,
17
+ "num_channels": 3,
18
+ "num_hidden_layers": 32,
19
+ "patch_size": 14,
20
+ "projection_dim": 1024,
21
+ "torch_dtype": "float16",
22
+ "transformers_version": "4.28.0.dev0"
23
+ }
{sdxl_models β†’ models}/image_encoder/model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:657723e09f46a7c3957df651601029f66b1748afb12b419816330f16ed45d64d
3
- size 3689912664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ca9667da1ca9e0b0f75e46bb030f7e011f44f86cbfb8d5a36590fcd7507b030
3
+ size 2528373448
{sdxl_models β†’ models}/image_encoder/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2999562fbc02f9dc0d9c0acb7cf0970ec3a9b2a578d7d05afe82191d606d2d80
3
- size 3690112753
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d3ec1e66737f77a4f3bc2df3c52eacefc69ce7825e2784183b1d4e9877d9193
3
+ size 2528481905
sdxl_models/ip-adapter-plus-face_sdxl_vit-h.safetensors β†’ models/ip-adapter-full-face_sd15.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:677ad8860204f7d0bfba12d29e6c31ded9beefdf3e4bbd102518357d31a292c1
3
- size 847517512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47ec4644114f3bfe25b2fc830af6b0dd8dcad9a0371a238b9cc919465c60d1dc
3
+ size 43592551
sdxl_models/ip-adapter-plus_sdxl_vit-h.safetensors β†’ models/ip-adapter-full-face_sd15.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f5062b8400c94b7159665b21ba5c62acdcd7682262743d7f2aefedef00e6581
3
- size 847517512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4a17fb643bf876235a45a0e87a49da2855be6584b28ca04c62a97ab5ff1c6f3
3
+ size 43592352
sdxl_models/ip-adapter_sdxl.bin β†’ models/ip-adapter-plus-face_sd15.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7525f2731e9e86d1368e0b68467615d55dda459691965bdd7d37fa3d7fd84c12
3
- size 702585097
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa09c22b49ef63474dcde12f26a35b8b8e9b755b716a553aa29e8dbe8d21e0c9
3
+ size 98183381
sdxl_models/ip-adapter_sdxl.safetensors β†’ models/ip-adapter-plus-face_sd15.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba1002529e783604c5f326d49f0122025392d1d20ac8d573b3eeb3e6dea4ebb6
3
- size 702585376
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c9edc21af6f737dc1d6e0e734190e976cfacf802d6b024b77aa3be922f7569b
3
+ size 98183288
models/ip-adapter-plus_sd15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cb77fc0613369b66be1531cc452b823a4af7d87ee56956000a69fc39e3817ba
3
+ size 158033179
models/ip-adapter-plus_sd15.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1c250be40455cc61a43da1201ec3f1edaea71214865fb47f57927e06cbe4996
3
+ size 98183288
models/ip-adapter_sd15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68e1df30d760f280e578c302f1e73b37ea08654eff16a31153588047affe0058
3
+ size 44642825
models/ip-adapter_sd15.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:289b45f16d043d0bf542e45831f971dcdaabe18b656f11e86d9dfba7e9ee3369
3
+ size 44642768
models/ip-adapter_sd15_light.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f71bfbdd937f2edad0c894ec72d12db02b3be0316f62988e5fc669ca4da6b7e1
3
+ size 44642819
models/ip-adapter_sd15_light.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0747d08db670535bfa286452a77d93cebad5c677b46d038543f9f2de8690bb26
3
+ size 44642768
models/ip-adapter_sd15_light_v11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:350b63a57847c163e2e984b01090f85ffe60eaae20f32b2b2c9e1ccc7ddd972b
3
+ size 44642977
models/ip-adapter_sd15_vit-G.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1398e9ae37cb65553a8525871830a283914dafd9ec3039716344a826399ec474
3
+ size 46215689
models/ip-adapter_sd15_vit-G.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a26f736af07bb341a83dfea23713531d0575760e8ed947c68cb31a4c62d9c90b
3
+ size 46215640
sdxl_models/image_encoder/config.json DELETED
@@ -1,81 +0,0 @@
1
- {
2
- "architectures": [
3
- "CLIPVisionModelWithProjection"
4
- ],
5
- "_name_or_path": "",
6
- "add_cross_attention": false,
7
- "architectures": null,
8
- "attention_dropout": 0.0,
9
- "bad_words_ids": null,
10
- "begin_suppress_tokens": null,
11
- "bos_token_id": null,
12
- "chunk_size_feed_forward": 0,
13
- "cross_attention_hidden_size": null,
14
- "decoder_start_token_id": null,
15
- "diversity_penalty": 0.0,
16
- "do_sample": false,
17
- "dropout": 0.0,
18
- "early_stopping": false,
19
- "encoder_no_repeat_ngram_size": 0,
20
- "eos_token_id": null,
21
- "exponential_decay_length_penalty": null,
22
- "finetuning_task": null,
23
- "forced_bos_token_id": null,
24
- "forced_eos_token_id": null,
25
- "hidden_act": "gelu",
26
- "hidden_size": 1664,
27
- "id2label": {
28
- "0": "LABEL_0",
29
- "1": "LABEL_1"
30
- },
31
- "image_size": 224,
32
- "initializer_factor": 1.0,
33
- "initializer_range": 0.02,
34
- "intermediate_size": 8192,
35
- "is_decoder": false,
36
- "is_encoder_decoder": false,
37
- "label2id": {
38
- "LABEL_0": 0,
39
- "LABEL_1": 1
40
- },
41
- "layer_norm_eps": 1e-05,
42
- "length_penalty": 1.0,
43
- "max_length": 20,
44
- "min_length": 0,
45
- "model_type": "clip_vision_model",
46
- "no_repeat_ngram_size": 0,
47
- "num_attention_heads": 16,
48
- "num_beam_groups": 1,
49
- "num_beams": 1,
50
- "num_channels": 3,
51
- "num_hidden_layers": 48,
52
- "num_return_sequences": 1,
53
- "output_attentions": false,
54
- "output_hidden_states": false,
55
- "output_scores": false,
56
- "pad_token_id": null,
57
- "patch_size": 14,
58
- "prefix": null,
59
- "problem_type": null,
60
- "pruned_heads": {},
61
- "remove_invalid_values": false,
62
- "repetition_penalty": 1.0,
63
- "return_dict": true,
64
- "return_dict_in_generate": false,
65
- "sep_token_id": null,
66
- "suppress_tokens": null,
67
- "task_specific_params": null,
68
- "temperature": 1.0,
69
- "tf_legacy_loss": false,
70
- "tie_encoder_decoder": false,
71
- "tie_word_embeddings": true,
72
- "tokenizer_class": null,
73
- "top_k": 50,
74
- "top_p": 1.0,
75
- "torch_dtype": null,
76
- "torchscript": false,
77
- "transformers_version": "4.24.0",
78
- "typical_p": 1.0,
79
- "use_bfloat16": false,
80
- "projection_dim": 1280
81
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
sdxl_models/ip-adapter-plus-face_sdxl_vit-h.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:50e886d82940b3c5873d80c2b06d8a4b0d0fccec70bc44fd53f16ac3cfd7fc36
3
- size 1013454761
 
 
 
 
sdxl_models/ip-adapter-plus_sdxl_vit-h.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec70edb7cc8e769c9388d94eeaea3e4526352c9fae793a608782d1d8951fde90
3
- size 1013454427
 
 
 
 
sdxl_models/ip-adapter_sdxl_vit-h.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b382e2501d0ab3fe2e09312e561a59cd3f21262aff25373700e0cd62c635929
3
- size 698390793
 
 
 
 
sdxl_models/ip-adapter_sdxl_vit-h.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebf05d918348aec7abb02a5e9ecef77e0aaea6914a5c4ea13f50d45eb1681831
3
- size 698391064