apes / interface_projector.py
Yannic Kilcher
bugfix
a74df5c
#!/usr/bin/env python3
import gradio as gr
import numpy as np
import torch
import pickle
import PIL.Image
import types
from projector import project, imageio, _MODELS
from huggingface_hub import hf_hub_url, cached_download
# with open("../models/gamma500/network-snapshot-010000.pkl", "rb") as f:
# with open("../models/gamma400/network-snapshot-010600.pkl", "rb") as f:
# with open("../models/gamma400/network-snapshot-019600.pkl", "rb") as f:
with open(cached_download(hf_hub_url('ykilcher/apes', 'gamma500/network-snapshot-010000.pkl')), 'rb') as f:
G = pickle.load(f)["G_ema"] # torch.nn.Module
device = torch.device("cpu")
if torch.cuda.is_available():
device = torch.device("cuda")
G = G.to(device)
else:
_old_forward = G.forward
def _new_forward(self, *args, **kwargs):
kwargs["force_fp32"] = True
return _old_forward(*args, **kwargs)
G.forward = types.MethodType(_new_forward, G)
_old_synthesis_forward = G.synthesis.forward
def _new_synthesis_forward(self, *args, **kwargs):
kwargs["force_fp32"] = True
return _old_synthesis_forward(*args, **kwargs)
G.synthesis.forward = types.MethodType(_new_synthesis_forward, G.synthesis)
def generate(
target_image_upload,
# target_image_webcam,
num_steps,
seed,
learning_rate,
model_name,
normalize_for_clip,
loss_type,
regularize_noise_weight,
initial_noise_factor,
):
seed = round(seed)
np.random.seed(seed)
torch.manual_seed(seed)
target_image = target_image_upload
# if target_image is None:
# target_image = target_image_webcam
num_steps = round(num_steps)
print(type(target_image))
print(target_image.dtype)
print(target_image.max())
print(target_image.min())
print(target_image.shape)
target_pil = PIL.Image.fromarray(target_image).convert("RGB")
w, h = target_pil.size
s = min(w, h)
target_pil = target_pil.crop(
((w - s) // 2, (h - s) // 2, (w + s) // 2, (h + s) // 2)
)
target_pil = target_pil.resize(
(G.img_resolution, G.img_resolution), PIL.Image.LANCZOS
)
target_uint8 = np.array(target_pil, dtype=np.uint8)
target_image = torch.from_numpy(target_uint8.transpose([2, 0, 1])).to(device)
projected_w_steps = project(
G,
target=target_image,
num_steps=num_steps,
device=device,
verbose=True,
initial_learning_rate=learning_rate,
model_name=model_name,
normalize_for_clip=normalize_for_clip,
loss_type=loss_type,
regularize_noise_weight=regularize_noise_weight,
initial_noise_factor=initial_noise_factor,
)
with torch.no_grad():
video = imageio.get_writer(f'proj.mp4', mode='I', fps=10, codec='libx264', bitrate='16M')
for w in projected_w_steps:
synth_image = G.synthesis(w.to(device).unsqueeze(0), noise_mode="const")
synth_image = (synth_image + 1) * (255 / 2)
synth_image = (
synth_image.permute(0, 2, 3, 1)
.clamp(0, 255)
.to(torch.uint8)[0]
.cpu()
.numpy()
)
video.append_data(np.concatenate([target_uint8, synth_image], axis=1))
video.close()
return synth_image, "proj.mp4"
iface = gr.Interface(
fn=generate,
inputs=[
gr.inputs.Image(source="upload", optional=True),
# gr.inputs.Image(source="webcam", optional=True),
gr.inputs.Number(default=250, label="steps"),
gr.inputs.Number(default=69420, label="seed"),
gr.inputs.Number(default=0.05, label="learning_rate"),
gr.inputs.Dropdown(default='RN50', label="model_name", choices=['vgg16', *_MODELS.keys()]),
gr.inputs.Checkbox(default=True, label="normalize_for_clip"),
gr.inputs.Dropdown(
default="l2", label="loss_type", choices=["l2", "l1", "cosine"]
),
gr.inputs.Number(default=1e5, label="regularize_noise_weight"),
gr.inputs.Number(default=0.05, label="initial_noise_factor"),
],
outputs=["image", "video"],
)
iface.launch(inbrowser=True)