File size: 1,956 Bytes

9a900e5
 
 
 
 
fd22feb
 
9a900e5
 
 
fd22feb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9a900e5
fd22feb
9a900e5

import numpy as np
import torch
from torchvision import transforms
import av
import logging
from ftplib import FTP


logging.basicConfig(filename='/mnt/data/uploads/logfile-video.log', level=logging.INFO)

def get_video_file(file_path, ftp_password):
    # FTP 서버 정보
    ftp_server = "121.136.96.223"
    ftp_port = 21
    ftp_user = "donghuna_ftp"
    # folder_path = "homes/donghuna/database/Diving48_rgb/rgb/"
    
    # FTP 연결 설정
    ftp = FTP()
    ftp.connect(ftp_server, ftp_port)
    ftp.login(user=ftp_user, passwd=ftp_password)
    ftp.set_pasv(True)

    video_path = "test.mp4"
    
    with open(video_path, 'wb') as local_file:
        ftp.retrbinary(f'RETR {file_path}', local_file.write)


def read_video(file_path, num_frames=24, target_size=(224, 224)):
    # logging.info(f"Reading video from: {file_path}")
    container = av.open(file_path)
    frames = []
    for frame in container.decode(video=0):
        frames.append(frame.to_ndarray(format="rgb24").astype(np.uint8))
    
    sampled_frames = sample_frames(frames, num_frames)
    processed_frames = preprocess_frames(sampled_frames, target_size)
    return processed_frames

def sample_frames(frames, num_frames):
    total_frames = len(frames)
    if total_frames <= num_frames:
        if total_frames < num_frames:
            padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)]
            frames.extend(padding)
    else:
        indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)
        frames = [frames[i] for i in indices]
    return np.array(frames)

def preprocess_frames(frames, target_size):
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(target_size),
        transforms.ToTensor()
    ])
    processed_frames = [transform(frame) for frame in frames]
    return torch.stack(processed_frames).permute(1, 0, 2, 3).numpy()  # (T, C, H, W) -> (C, T, H, W)