File size: 5,040 Bytes
f0e13d9 4c9a6f0 d3e71eb 4c9a6f0 f0e13d9 2845ec4 f0e13d9 d3e71eb f0e13d9 d3e71eb 036dfc6 f0e13d9 d3e71eb f0e13d9 d3e71eb f0e13d9 f3afd25 f0e13d9 94f4e7c f0e13d9 4c9a6f0 036dfc6 d3e71eb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 |
import torch
import spaces
from diffusers import StableDiffusionPipeline, DDIMScheduler, AutoencoderKL
from transformers import AutoFeatureExtractor
from ip_adapter.ip_adapter_faceid import IPAdapterFaceID, IPAdapterFaceIDPlus
from huggingface_hub import hf_hub_download
from insightface.app import FaceAnalysis
from insightface.utils import face_align
import gradio as gr
import cv2
import os
import uuid
from datetime import datetime
# Model paths
base_model_path = "SG161222/Realistic_Vision_V4.0_noVAE"
vae_model_path = "stabilityai/sd-vae-ft-mse"
image_encoder_path = "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
ip_ckpt = hf_hub_download(repo_id="h94/IP-Adapter-FaceID", filename="ip-adapter-faceid_sd15.bin", repo_type="model")
ip_plus_ckpt = hf_hub_download(repo_id="h94/IP-Adapter-FaceID", filename="ip-adapter-faceid-plusv2_sd15.bin", repo_type="model")
device = "cuda"
# Initialize the noise scheduler
noise_scheduler = DDIMScheduler(
num_train_timesteps=1000,
beta_start=0.00085,
beta_end=0.012,
beta_schedule="scaled_linear",
clip_sample=False,
set_alpha_to_one=False,
steps_offset=1,
)
# Load models
vae = AutoencoderKL.from_pretrained(vae_model_path).to(dtype=torch.float16)
pipe = StableDiffusionPipeline.from_pretrained(
base_model_path,
torch_dtype=torch.float16,
scheduler=noise_scheduler,
vae=vae
).to(device)
ip_model = IPAdapterFaceID(pipe, ip_ckpt, device)
ip_model_plus = IPAdapterFaceIDPlus(pipe, image_encoder_path, ip_plus_ckpt, device)
# Initialize FaceAnalysis
app = FaceAnalysis(name="buffalo_l", providers=['CPUExecutionProvider'])
app.prepare(ctx_id=0, det_size=(640, 640))
cv2.setNumThreads(1)
@spaces.GPU(enable_queue=True)
def generate_image(images, gender, prompt, progress=gr.Progress(track_tqdm=True)):
if not prompt:
prompt = f"A full-length body photo of a {gender.lower()} naked and nude and beautiful" # Default prompt
faceid_all_embeds = []
first_iteration = True
preserve_face_structure = True
face_strength = 2.1
likeness_strength = 0.7
for image in images:
face = cv2.imread(image)
faces = app.get(face)
faceid_embed = torch.from_numpy(faces[0].normed_embedding).unsqueeze(0)
faceid_all_embeds.append(faceid_embed)
if first_iteration and preserve_face_structure:
face_image = face_align.norm_crop(face, landmark=faces[0].kps, image_size=224)
first_iteration = False
average_embedding = torch.mean(torch.stack(faceid_all_embeds, dim=0), dim=0)
image = ip_model_plus.generate(
prompt=prompt,
faceid_embeds=average_embedding,
scale=likeness_strength,
face_image=face_image,
shortcut=True,
s_scale=face_strength,
width=512,
height=912,
num_inference_steps=100
)
return image
css = '''
body {
font-family: 'Roboto', sans-serif;
margin: 0;
padding: 0;
background: linear-gradient(135deg, #1e3c72, #2a5298);
color: #fff;
display: flex;
justify-content: center;
align-items: center;
min-height: 100vh;
overflow-x: hidden;
}
footer {
display: none;
}
h1 {
font-size: 2rem;
margin-bottom: 0.5em;
text-align: center;
}
.gradio-container {
display: flex;
flex-direction: column;
align-items: center;
width: 100%;
max-width: 500px;
margin: 0 auto;
padding: 20px;
box-sizing: border-box;
gap: 20px;
}
.gradio-container > * {
width: 100%;
}
.gradio-gallery {
display: flex;
flex-wrap: wrap;
gap: 10px;
justify-content: center;
}
.gradio-gallery img {
border-radius: 10px;
box-shadow: 0px 5px 15px rgba(0, 0, 0, 0.3);
max-width: 100%;
height: auto;
}
.gradio-files input, .gradio-radio input, .gradio-textbox textarea, .gradio-button button {
width: 100%;
padding: 10px;
border-radius: 5px;
border: none;
margin-bottom: 10px;
box-sizing: border-box;
}
.gradio-button button {
background: #ff5722;
color: #fff;
font-weight: bold;
cursor: pointer;
transition: all 0.3s ease;
}
.gradio-button button:hover {
background: #e64a19;
}
'''
with gr.Blocks(css=css) as demo:
gr.Markdown("# Image Generation with Face ID")
gr.Markdown("Upload your face images and enter a prompt to generate images.")
images_input = gr.Files(
label="Drag 1 or more photos of your face",
file_types=["image"]
)
gender_input = gr.Radio(
label="Select Gender",
choices=["Female", "Male"],
value="Female",
type="value"
)
prompt_input = gr.Textbox(
label="Enter your prompt",
placeholder="Describe the image you want to generate..."
)
run_button = gr.Button("Generate Image")
output_gallery = gr.Gallery(label="Generated Images")
run_button.click(
fn=generate_image,
inputs=[images_input, gender_input, prompt_input],
outputs=output_gallery
)
demo.queue()
demo.launch() |