File size: 3,935 Bytes
2252f3d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import datetime
import pytz
import traceback
from torchvision.utils import make_grid
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import torch
import json
import os
from tqdm import tqdm
import cv2
import imageio
def get_time_for_log():
return datetime.datetime.now(pytz.timezone('Asia/Shanghai')).strftime(
"%Y%m%d %H:%M:%S")
def get_trace_for_log():
return str(traceback.format_exc())
def make_grid_(imgs, save_file, nrow=10, pad_value=1):
if isinstance(imgs, list):
if isinstance(imgs[0], Image.Image):
imgs = [torch.from_numpy(np.array(img)/255.) for img in imgs]
elif isinstance(imgs[0], np.ndarray):
imgs = [torch.from_numpy(img/255.) for img in imgs]
imgs = torch.stack(imgs, 0).permute(0, 3, 1, 2)
if isinstance(imgs, np.ndarray):
imgs = torch.from_numpy(imgs)
img_grid = make_grid(imgs, nrow=nrow, padding=2, pad_value=pad_value)
img_grid = img_grid.permute(1, 2, 0).numpy()
img_grid = (img_grid * 255).astype(np.uint8)
img_grid = Image.fromarray(img_grid)
img_grid.save(save_file)
def draw_caption(img, text, pos, size=100, color=(128, 128, 128)):
draw = ImageDraw.Draw(img)
# font = ImageFont.truetype(size= size)
font = ImageFont.load_default()
font = font.font_variant(size=size)
draw.text(pos, text, color, font=font)
return img
def txt2json(txt_file, json_file):
with open(txt_file, 'r') as f:
items = f.readlines()
items = [x.strip() for x in items]
with open(json_file, 'w') as f:
json.dump(items.tolist(), f)
def process_thuman_texture():
path = '/aifs4su/mmcode/lipeng/Thuman2.0'
cases = os.listdir(path)
for case in tqdm(cases):
mtl = os.path.join(path, case, 'material0.mtl')
with open(mtl, 'r') as f:
lines = f.read()
lines = lines.replace('png', 'jpeg')
with open(mtl, 'w') as f:
f.write(lines)
#### for debug
os.environ["OPENCV_IO_ENABLE_OPENEXR"] = "1"
def get_intrinsic_from_fov(fov, H, W, bs=-1):
focal_length = 0.5 * H / np.tan(0.5 * fov)
intrinsic = np.identity(3, dtype=np.float32)
intrinsic[0, 0] = focal_length
intrinsic[1, 1] = focal_length
intrinsic[0, 2] = W / 2.0
intrinsic[1, 2] = H / 2.0
if bs > 0:
intrinsic = intrinsic[None].repeat(bs, axis=0)
return torch.from_numpy(intrinsic)
def read_data(data_dir, i):
"""
Return:
rgb: (H, W, 3) torch.float32
depth: (H, W, 1) torch.float32
mask: (H, W, 1) torch.float32
c2w: (4, 4) torch.float32
intrinsic: (3, 3) torch.float32
"""
background_color = torch.tensor([0.0, 0.0, 0.0])
rgb_name = os.path.join(data_dir, f'render_%04d.webp' % i)
depth_name = os.path.join(data_dir, f'depth_%04d.exr' % i)
img = torch.from_numpy(
np.asarray(
Image.fromarray(imageio.v2.imread(rgb_name))
.convert("RGBA")
)
/ 255.0
).float()
mask = img[:, :, -1:]
rgb = img[:, :, :3] * mask + background_color[
None, None, :
] * (1 - mask)
depth = torch.from_numpy(
cv2.imread(depth_name, cv2.IMREAD_UNCHANGED)[..., 0, None]
)
mask[depth > 100.0] = 0.0
depth[~(mask > 0.5)] = 0.0 # set invalid depth to 0
meta_path = os.path.join(data_dir, 'meta.json')
with open(meta_path, 'r') as f:
meta = json.load(f)
c2w = torch.as_tensor(
meta['locations'][i]["transform_matrix"],
dtype=torch.float32,
)
H, W = rgb.shape[:2]
fovy = meta["camera_angle_x"]
intrinsic = get_intrinsic_from_fov(fovy, H=H, W=W)
return rgb, depth, mask, c2w, intrinsic
|