add IP-Adapter-FaceID-Portrait
Browse files
README.md
CHANGED
@@ -56,9 +56,15 @@ IP-Adapter-FaceID-SDXL: An experimental SDXL version of IP-Adapter-FaceID
|
|
56 |
![results](./sdxl_faceid.jpg)
|
57 |
</div>
|
58 |
|
59 |
-
**Update 2024/01/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
-
IP-Adapter-FaceID-PlusV2-SDXL: An experimental SDXL version of IP-Adapter-FaceID-PlusV2
|
62 |
|
63 |
## Usage
|
64 |
|
@@ -316,6 +322,75 @@ images = ip_model.generate(
|
|
316 |
|
317 |
```
|
318 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
319 |
|
320 |
## Limitations and Bias
|
321 |
- The models do not achieve perfect photorealism and ID consistency.
|
|
|
56 |
![results](./sdxl_faceid.jpg)
|
57 |
</div>
|
58 |
|
59 |
+
**Update 2024/01/19**:
|
60 |
+
|
61 |
+
IP-Adapter-FaceID-Portrait: same with IP-Adapter-FaceID but for portrait generation (no lora! no controlnet!). Specifically, it accepts multiple facial images to enhance similarity (the default is 5).
|
62 |
+
|
63 |
+
<div align="center">
|
64 |
+
|
65 |
+
![results](./faceid_portrait_sd15.jpg)
|
66 |
+
</div>
|
67 |
|
|
|
68 |
|
69 |
## Usage
|
70 |
|
|
|
322 |
|
323 |
```
|
324 |
|
325 |
+
### IP-Adapter-FaceID-Portrait
|
326 |
+
|
327 |
+
```python
|
328 |
+
|
329 |
+
import cv2
|
330 |
+
from insightface.app import FaceAnalysis
|
331 |
+
import torch
|
332 |
+
|
333 |
+
app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
|
334 |
+
app.prepare(ctx_id=0, det_size=(640, 640))
|
335 |
+
|
336 |
+
|
337 |
+
images = ["1.jpg", "2.jpg", "3.jpg", "4.jpg", "5.jpg"]
|
338 |
+
|
339 |
+
faceid_embeds = []
|
340 |
+
for image in images:
|
341 |
+
image = cv2.imread("person.jpg")
|
342 |
+
faces = app.get(image)
|
343 |
+
faceid_embeds.append(torch.from_numpy(faces[0].normed_embedding).unsqueeze(0).unsqueeze(0))
|
344 |
+
faceid_embeds = torch.cat(faceid_embeds, dim=1)
|
345 |
+
```
|
346 |
+
|
347 |
+
```python
|
348 |
+
import torch
|
349 |
+
from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
|
350 |
+
from PIL import Image
|
351 |
+
|
352 |
+
from ip_adapter.ip_adapter_faceid_separate import IPAdapterFaceID
|
353 |
+
|
354 |
+
base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
|
355 |
+
vae_model_path = "stabilityai/sd-vae-ft-mse"
|
356 |
+
ip_ckpt = "ip-adapter-faceid-portrait_sd15.bin"
|
357 |
+
device = "cuda"
|
358 |
+
|
359 |
+
noise_scheduler = DDIMScheduler(
|
360 |
+
num_train_timesteps=1000,
|
361 |
+
beta_start=0.00085,
|
362 |
+
beta_end=0.012,
|
363 |
+
beta_schedule="scaled_linear",
|
364 |
+
clip_sample=False,
|
365 |
+
set_alpha_to_one=False,
|
366 |
+
steps_offset=1,
|
367 |
+
)
|
368 |
+
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)
|
369 |
+
pipe = StableDiffusionPipeline.from_pretrained(
|
370 |
+
base_model_path,
|
371 |
+
torch_dtype=torch.float16,
|
372 |
+
scheduler=noise_scheduler,
|
373 |
+
vae=vae,
|
374 |
+
feature_extractor=None,
|
375 |
+
safety_checker=None
|
376 |
+
)
|
377 |
+
|
378 |
+
|
379 |
+
# load ip-adapter
|
380 |
+
ip_model = IPAdapterFaceID(pipe, ip_ckpt, device, num_tokens=16, n_cond=5)
|
381 |
+
|
382 |
+
# generate image
|
383 |
+
prompt = "photo of a woman in red dress in a garden"
|
384 |
+
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
|
385 |
+
|
386 |
+
images = ip_model.generate(
|
387 |
+
prompt=prompt, negative_prompt=negative_prompt, faceid_embeds=faceid_embeds, num_samples=4, width=512, height=512, num_inference_steps=30, seed=2023
|
388 |
+
)
|
389 |
+
|
390 |
+
|
391 |
+
```
|
392 |
+
|
393 |
+
|
394 |
|
395 |
## Limitations and Bias
|
396 |
- The models do not achieve perfect photorealism and ID consistency.
|