File size: 3,717 Bytes
3a1e48f
8340be4
3a1e48f
0e80ee6
68b51dd
8ddce9c
0e80ee6
399fa48
41238f8
8340be4
3a1e48f
 
0e80ee6
 
 
3a1e48f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0e80ee6
3a1e48f
debbd96
0e80ee6
ad7df92
42e179a
544fa37
0e80ee6
41238f8
0e80ee6
41238f8
 
 
 
 
1287e5e
0e80ee6
 
 
41238f8
ad7df92
0e80ee6
1287e5e
0e80ee6
 
 
 
 
1287e5e
0e80ee6
 
 
 
872fe49
a550ff1
21f9f22
 
 
0609de7
21f9f22
0609de7
 
 
 
 
 
 
 
 
0e80ee6
 
0609de7
 
 
 
 
 
3a1e48f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import torch
import spaces
from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
from ip_adapter.ip_adapter_faceid import IPAdapterFaceID, IPAdapterFaceIDPlus
from huggingface_hub import hf_hub_download
from insightface.app import FaceAnalysis
from insightface.utils import face_align
import gradio as gr
import cv2

base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
vae_model_path = "stabilityai/sd-vae-ft-mse"
image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
ip_ckpt = hf_hub_download(repo_id="h94/IP-Adapter-FaceID", filename="ip-adapter-faceid_sd15.bin", repo_type="model")
ip_plus_ckpt = hf_hub_download(repo_id="h94/IP-Adapter-FaceID", filename="ip-adapter-faceid-plusv2_sd15.bin", repo_type="model")

device = "cuda"

noise_scheduler = DDIMScheduler(
    num_train_timesteps=1000,
    beta_start=0.00085,
    beta_end=0.012,
    beta_schedule="scaled_linear",
    clip_sample=False,
    set_alpha_to_one=False,
    steps_offset=1,
)
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)
pipe = StableDiffusionPipeline.from_pretrained(
    base_model_path,
    torch_dtype=torch.float16,
    scheduler=noise_scheduler,
    vae=vae,
)

ip_model = IPAdapterFaceID(pipe, ip_ckpt, device)
ip_model_plus = IPAdapterFaceIDPlus(pipe, image_encoder_path, ip_plus_ckpt, device)

@spaces.GPU(enable_queue=True)
def generate_image(images, prompt, negative_prompt, preserve_face_structure, progress=gr.Progress(track_tqdm=True)):
    pipe.to(device)
    app = FaceAnalysis(name="buffalo_l", providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
    app.prepare(ctx_id=0, det_size=(640, 640))
    
    faceid_all_embeds = []
    first_iteration = True
    for image in images:
        face = cv2.imread(image)
        faces = app.get(face)
        faceid_embed = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
        faceid_all_embeds.append(faceid_embed)
        if(first_iteration and preserve_face_structure):
            face_image = face_align.norm_crop(face, landmark=faces[0].kps, image_size=224) # you can also segment the face
            first_iteration = False
            
    average_embedding = torch.mean(torch.stack(faceid_all_embeds, dim=0), dim=0)
    
    if(not preserve_face_structure):
        print("Generating normal")
        image = ip_model.generate(
            prompt=prompt, negative_prompt=negative_prompt, faceid_embeds=average_embedding,
            width=512, height=512, num_inference_steps=30
        )
    else:
        print("Generating plus")
        image = ip_model_plus.generate(
            prompt=prompt, negative_prompt=negative_prompt, faceid_embeds=average_embedding,
            face_image=face_image, shortcut=True, s_scale=1.5, width=512, height=512, num_inference_steps=30
        )
    print(image)
    return image
css = '''
h1{margin-bottom: 0 !important}
'''
demo = gr.Interface(
        css=css,
        fn=generate_image,
        inputs=[
            gr.Files(
                label="Drag 1 or more photos of your face",
                file_types=["image"]
            ),
            gr.Textbox(label="Prompt",
                       info="Try something like 'a photo of a man/woman/person'",
                       placeholder="A photo of a [man/woman/person]..."),
            gr.Textbox(label="Negative Prompt", placeholder="low quality"),
            gr.Checkbox(label="Preserve Face Structure", value=False),
        ],
        outputs=[gr.Gallery(label="Generated Image")],
        title="IP-Adapter-FaceID demo",
        description="Demo for the [h94/IP-Adapter-FaceID model](https://huggingface.co/h94/IP-Adapter-FaceID)",
        allow_flagging=False,
        )
demo.launch()