Spaces:

khawir
/

cg-gen-ai

Paused

App Files Files Community

khawir commited on Apr 22

Commit

23549a1

•

1 Parent(s): b89b9f5

sdxl -> sd1.5

Browse files

Files changed (24) hide show

app/api/generate.py +5 -3
app/core/schemas.py +12 -11
app/main.py +24 -9
models/image_encoder/config.json +23 -0
{sdxl_models → models}/image_encoder/model.safetensors +2 -2
{sdxl_models → models}/image_encoder/pytorch_model.bin +2 -2
sdxl_models/ip-adapter-plus-face_sdxl_vit-h.safetensors → models/ip-adapter-full-face_sd15.bin +2 -2
sdxl_models/ip-adapter-plus_sdxl_vit-h.safetensors → models/ip-adapter-full-face_sd15.safetensors +2 -2
sdxl_models/ip-adapter_sdxl.bin → models/ip-adapter-plus-face_sd15.bin +2 -2
sdxl_models/ip-adapter_sdxl.safetensors → models/ip-adapter-plus-face_sd15.safetensors +2 -2
models/ip-adapter-plus_sd15.bin +3 -0
models/ip-adapter-plus_sd15.safetensors +3 -0
models/ip-adapter_sd15.bin +3 -0
models/ip-adapter_sd15.safetensors +3 -0
models/ip-adapter_sd15_light.bin +3 -0
models/ip-adapter_sd15_light.safetensors +3 -0
models/ip-adapter_sd15_light_v11.bin +3 -0
models/ip-adapter_sd15_vit-G.bin +3 -0
models/ip-adapter_sd15_vit-G.safetensors +3 -0
sdxl_models/image_encoder/config.json +0 -81
sdxl_models/ip-adapter-plus-face_sdxl_vit-h.bin +0 -3
sdxl_models/ip-adapter-plus_sdxl_vit-h.bin +0 -3
sdxl_models/ip-adapter_sdxl_vit-h.bin +0 -3
sdxl_models/ip-adapter_sdxl_vit-h.safetensors +0 -3

app/api/generate.py CHANGED Viewed

@@ -142,13 +142,15 @@ def image_variations(model: Request, request: ImageVariations, db: Annotated[Ses
     prompt = create_prompt(request.prompt, medium=request.medium, style=request.style, artist=request.artist, website=request.website, resolution=request.resolution, additional_details=request.additional_details, color=request.color, lightning=request.lightning)
     image = decode_image(request.image)
-    image.resize((512, 512))
     if prompt:
         crud.create_prompt(db=db, user_id=current_user.user_id, prompt=prompt)
-    images = model.state.iv_model.generate(pil_image=image, num_samples=request.num_samples, num_inference_steps=request.num_inference_steps,
-                                           seed=request.seed, prompt=prompt, scale=request.scale, negative_prompt=request.negative_prompt)
     images = [encode_image(image) for image in images]

     prompt = create_prompt(request.prompt, medium=request.medium, style=request.style, artist=request.artist, website=request.website, resolution=request.resolution, additional_details=request.additional_details, color=request.color, lightning=request.lightning)
     image = decode_image(request.image)
+    image.resize((256, 256))
     if prompt:
         crud.create_prompt(db=db, user_id=current_user.user_id, prompt=prompt)
+    else:
+        request.scale = 1.0
+    images = model.state.iv_model.generate(pil_image=image, num_samples=request.num_samples, num_inference_steps=request.num_inference_steps, seed=request.seed,
+                                           prompt=prompt, negative_prompt=request.negative_prompt, scale=request.scale, guidance_scale=request.guidance_scale)
     images = [encode_image(image) for image in images]

app/core/schemas.py CHANGED Viewed

@@ -43,14 +43,14 @@ class User(UserBase):
 class Generate(BaseModel):
     seed: int | None = None
     negative_prompt : str = "ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face"
-    medium: str | None = None
-    style: str | None = None
-    artist: str | None = None
-    website: str | None = None
-    resolution: str | None = None
-    additional_details: str | None = None
-    color: str | None = None
-    lightning: str | None = None
 class TextImage(Generate):
@@ -62,7 +62,7 @@ class TextImage(Generate):
 class ImageImage(Generate):
     prompt: str
     image: str
-    num_inference_steps: int = 10
     guidance_scale: float = 7.5
     image_guidance_scale: float = 1.5
@@ -73,7 +73,8 @@ class BackgroundRemoval(BaseModel):
 class ImageVariations(Generate):
     image: str
-    num_samples: int = 2
     num_inference_steps: int = 30
-    prompt: str | None = None
     scale: float = 0.5

 class Generate(BaseModel):
     seed: int | None = None
     negative_prompt : str = "ugly, tiling, poorly drawn hands, poorly drawn feet, poorly drawn face, out of frame, extra limbs, disfigured, deformed, body out of frame, bad anatomy, watermark, signature, cut off, low contrast, underexposed, overexposed, bad art, beginner, amateur, distorted face"
+    medium: str | None = ""
+    style: str | None = ""
+    artist: str | None = ""
+    website: str | None = ""
+    resolution: str | None = ""
+    additional_details: str | None = ""
+    color: str | None = ""
+    lightning: str | None = ""
 class TextImage(Generate):
 class ImageImage(Generate):
     prompt: str
     image: str
+    num_inference_steps: int = 30
     guidance_scale: float = 7.5
     image_guidance_scale: float = 1.5
 class ImageVariations(Generate):
     image: str
+    num_samples: int = 4
     num_inference_steps: int = 30
+    prompt: str | None = ""
     scale: float = 0.5
+    guidance_scale: float = 7.5

app/main.py CHANGED Viewed

@@ -6,9 +6,9 @@ from fastapi.security import OAuth2PasswordRequestForm
 from fastapi import APIRouter, FastAPI, HTTPException, Depends
 import torch
-from ip_adapter import IPAdapterXL
 from transformers import AutoModelForImageSegmentation
-from diffusers import AutoPipelineForText2Image, DPMSolverMultistepScheduler, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler, StableDiffusionXLPipeline
 from app.api import user
 from app.api import prompt
@@ -20,9 +20,10 @@ from app.security import authenticate_user, create_access_token, timedelta
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    base_model_path = "stabilityai/stable-diffusion-xl-base-1.0"
-    image_encoder_path = "sdxl_models/image_encoder"
-    ip_ckpt = "sdxl_models/ip-adapter_sdxl.bin"
     ti_pipe = AutoPipelineForText2Image.from_pretrained(
         'lykon/dreamshaper-xl-v2-turbo', torch_dtype=torch.float16, variant="fp16")
@@ -40,19 +41,33 @@ async def lifespan(app: FastAPI):
         "briaai/RMBG-1.4", trust_remote_code=True)
     br_model.to(DEVICE)
-    sdxl_pipe = StableDiffusionXLPipeline.from_pretrained(
         base_model_path,
         torch_dtype=torch.float16,
-        add_watermarker=False,
     )
-    iv_model = IPAdapterXL(sdxl_pipe, image_encoder_path, ip_ckpt, DEVICE)
     yield {'ti_pipe': ti_pipe, 'ii_pipe': ii_pipe, 'br_model': br_model, 'iv_model': iv_model}
     del ti_pipe
     del ii_pipe
     del br_model
-    del sdxl_pipe
     del iv_model

 from fastapi import APIRouter, FastAPI, HTTPException, Depends
 import torch
+from ip_adapter import IPAdapter
 from transformers import AutoModelForImageSegmentation
+from diffusers import AutoPipelineForText2Image, DPMSolverMultistepScheduler, StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler, StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
 from app.api import user
 from app.api import prompt
 @asynccontextmanager
 async def lifespan(app: FastAPI):
+    base_model_path = "runwayml/stable-diffusion-v1-5"
+    vae_model_path = "stabilityai/sd-vae-ft-mse"
+    image_encoder_path = "models/image_encoder/"
+    ip_ckpt = "models/ip-adapter_sd15.bin"
     ti_pipe = AutoPipelineForText2Image.from_pretrained(
         'lykon/dreamshaper-xl-v2-turbo', torch_dtype=torch.float16, variant="fp16")
         "briaai/RMBG-1.4", trust_remote_code=True)
     br_model.to(DEVICE)
+    noise_scheduler = DDIMScheduler(
+        num_train_timesteps=1000,
+        beta_start=0.00085,
+        beta_end=0.012,
+        beta_schedule="scaled_linear",
+        clip_sample=False,
+        set_alpha_to_one=False,
+        steps_offset=1,
+    )
+    vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)
+    pipe = StableDiffusionPipeline.from_pretrained(
         base_model_path,
         torch_dtype=torch.float16,
+        scheduler=noise_scheduler,
+        vae=vae,
+        feature_extractor=None,
+        safety_checker=None
     )
+    iv_model = IPAdapter(pipe, image_encoder_path, ip_ckpt, DEVICE)
     yield {'ti_pipe': ti_pipe, 'ii_pipe': ii_pipe, 'br_model': br_model, 'iv_model': iv_model}
     del ti_pipe
     del ii_pipe
     del br_model
+    del vae
+    del pipe
     del iv_model

models/image_encoder/config.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "_name_or_path": "./image_encoder",
+  "architectures": [
+    "CLIPVisionModelWithProjection"
+  ],
+  "attention_dropout": 0.0,
+  "dropout": 0.0,
+  "hidden_act": "gelu",
+  "hidden_size": 1280,
+  "image_size": 224,
+  "initializer_factor": 1.0,
+  "initializer_range": 0.02,
+  "intermediate_size": 5120,
+  "layer_norm_eps": 1e-05,
+  "model_type": "clip_vision_model",
+  "num_attention_heads": 16,
+  "num_channels": 3,
+  "num_hidden_layers": 32,
+  "patch_size": 14,
+  "projection_dim": 1024,
+  "torch_dtype": "float16",
+  "transformers_version": "4.28.0.dev0"
+}

{sdxl_models → models}/image_encoder/model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:657723e09f46a7c3957df651601029f66b1748afb12b419816330f16ed45d64d
-size 3689912664

 version https://git-lfs.github.com/spec/v1
+oid sha256:6ca9667da1ca9e0b0f75e46bb030f7e011f44f86cbfb8d5a36590fcd7507b030
+size 2528373448

{sdxl_models → models}/image_encoder/pytorch_model.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2999562fbc02f9dc0d9c0acb7cf0970ec3a9b2a578d7d05afe82191d606d2d80
-size 3690112753

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d3ec1e66737f77a4f3bc2df3c52eacefc69ce7825e2784183b1d4e9877d9193
+size 2528481905

sdxl_models/ip-adapter-plus-face_sdxl_vit-h.safetensors → models/ip-adapter-full-face_sd15.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:677ad8860204f7d0bfba12d29e6c31ded9beefdf3e4bbd102518357d31a292c1
-size 847517512

 version https://git-lfs.github.com/spec/v1
+oid sha256:47ec4644114f3bfe25b2fc830af6b0dd8dcad9a0371a238b9cc919465c60d1dc
+size 43592551

sdxl_models/ip-adapter-plus_sdxl_vit-h.safetensors → models/ip-adapter-full-face_sd15.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f5062b8400c94b7159665b21ba5c62acdcd7682262743d7f2aefedef00e6581
-size 847517512

 version https://git-lfs.github.com/spec/v1
+oid sha256:f4a17fb643bf876235a45a0e87a49da2855be6584b28ca04c62a97ab5ff1c6f3
+size 43592352

sdxl_models/ip-adapter_sdxl.bin → models/ip-adapter-plus-face_sd15.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7525f2731e9e86d1368e0b68467615d55dda459691965bdd7d37fa3d7fd84c12
-size 702585097

 version https://git-lfs.github.com/spec/v1
+oid sha256:aa09c22b49ef63474dcde12f26a35b8b8e9b755b716a553aa29e8dbe8d21e0c9
+size 98183381

sdxl_models/ip-adapter_sdxl.safetensors → models/ip-adapter-plus-face_sd15.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba1002529e783604c5f326d49f0122025392d1d20ac8d573b3eeb3e6dea4ebb6
-size 702585376

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c9edc21af6f737dc1d6e0e734190e976cfacf802d6b024b77aa3be922f7569b
+size 98183288

models/ip-adapter-plus_sd15.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1cb77fc0613369b66be1531cc452b823a4af7d87ee56956000a69fc39e3817ba
+size 158033179

models/ip-adapter-plus_sd15.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a1c250be40455cc61a43da1201ec3f1edaea71214865fb47f57927e06cbe4996
+size 98183288

models/ip-adapter_sd15.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:68e1df30d760f280e578c302f1e73b37ea08654eff16a31153588047affe0058
+size 44642825

models/ip-adapter_sd15.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:289b45f16d043d0bf542e45831f971dcdaabe18b656f11e86d9dfba7e9ee3369
+size 44642768

models/ip-adapter_sd15_light.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f71bfbdd937f2edad0c894ec72d12db02b3be0316f62988e5fc669ca4da6b7e1
+size 44642819

models/ip-adapter_sd15_light.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0747d08db670535bfa286452a77d93cebad5c677b46d038543f9f2de8690bb26
+size 44642768

models/ip-adapter_sd15_light_v11.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:350b63a57847c163e2e984b01090f85ffe60eaae20f32b2b2c9e1ccc7ddd972b
+size 44642977

models/ip-adapter_sd15_vit-G.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1398e9ae37cb65553a8525871830a283914dafd9ec3039716344a826399ec474
+size 46215689

models/ip-adapter_sd15_vit-G.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a26f736af07bb341a83dfea23713531d0575760e8ed947c68cb31a4c62d9c90b
+size 46215640

sdxl_models/image_encoder/config.json DELETED Viewed

@@ -1,81 +0,0 @@
-{
-  "architectures": [
-    "CLIPVisionModelWithProjection"
-  ],
-  "_name_or_path": "",
-  "add_cross_attention": false,
-  "architectures": null,
-  "attention_dropout": 0.0,
-  "bad_words_ids": null,
-  "begin_suppress_tokens": null,
-  "bos_token_id": null,
-  "chunk_size_feed_forward": 0,
-  "cross_attention_hidden_size": null,
-  "decoder_start_token_id": null,
-  "diversity_penalty": 0.0,
-  "do_sample": false,
-  "dropout": 0.0,
-  "early_stopping": false,
-  "encoder_no_repeat_ngram_size": 0,
-  "eos_token_id": null,
-  "exponential_decay_length_penalty": null,
-  "finetuning_task": null,
-  "forced_bos_token_id": null,
-  "forced_eos_token_id": null,
-  "hidden_act": "gelu",
-  "hidden_size": 1664,
-  "id2label": {
-    "0": "LABEL_0",
-    "1": "LABEL_1"
-      },
-  "image_size": 224,
-  "initializer_factor": 1.0,
-  "initializer_range": 0.02,
-  "intermediate_size": 8192,
-  "is_decoder": false,
-  "is_encoder_decoder": false,
-  "label2id": {
-    "LABEL_0": 0,
-    "LABEL_1": 1
-      },
-  "layer_norm_eps": 1e-05,
-  "length_penalty": 1.0,
-  "max_length": 20,
-  "min_length": 0,
-  "model_type": "clip_vision_model",
-  "no_repeat_ngram_size": 0,
-  "num_attention_heads": 16,
-  "num_beam_groups": 1,
-  "num_beams": 1,
-  "num_channels": 3,
-  "num_hidden_layers": 48,
-  "num_return_sequences": 1,
-  "output_attentions": false,
-  "output_hidden_states": false,
-  "output_scores": false,
-  "pad_token_id": null,
-  "patch_size": 14,
-  "prefix": null,
-  "problem_type": null,
-  "pruned_heads": {},
-  "remove_invalid_values": false,
-  "repetition_penalty": 1.0,
-  "return_dict": true,
-  "return_dict_in_generate": false,
-  "sep_token_id": null,
-  "suppress_tokens": null,
-  "task_specific_params": null,
-  "temperature": 1.0,
-  "tf_legacy_loss": false,
-  "tie_encoder_decoder": false,
-  "tie_word_embeddings": true,
-  "tokenizer_class": null,
-  "top_k": 50,
-  "top_p": 1.0,
-  "torch_dtype": null,
-  "torchscript": false,
-  "transformers_version": "4.24.0",
-  "typical_p": 1.0,
-  "use_bfloat16": false,
-  "projection_dim": 1280
-}

sdxl_models/ip-adapter-plus-face_sdxl_vit-h.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:50e886d82940b3c5873d80c2b06d8a4b0d0fccec70bc44fd53f16ac3cfd7fc36
-size 1013454761

sdxl_models/ip-adapter-plus_sdxl_vit-h.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ec70edb7cc8e769c9388d94eeaea3e4526352c9fae793a608782d1d8951fde90
-size 1013454427

sdxl_models/ip-adapter_sdxl_vit-h.bin DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:6b382e2501d0ab3fe2e09312e561a59cd3f21262aff25373700e0cd62c635929
-size 698390793

sdxl_models/ip-adapter_sdxl_vit-h.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ebf05d918348aec7abb02a5e9ecef77e0aaea6914a5c4ea13f50d45eb1681831
-size 698391064