File size: 2,347 Bytes
9a900e5
 
 
 
 
7092ebc
a095c9c
fd22feb
9a900e5
 
168ae63
 
fd22feb
168ae63
6255548
 
 
3a6db82
6255548
69fee66
 
6255548
9a900e5
 
 
69fee66
9a900e5
69fee66
9a900e5
 
69fee66
9a900e5
 
69fee66
9a900e5
69fee66
 
 
 
 
 
 
 
 
 
 
 
 
 
9a900e5
 
 
 
 
 
69fee66
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import torch
from torchvision import transforms
import av
import logging
import base64
import io

logging.basicConfig(filename='/mnt/data/uploads/logfile-video.log', level=logging.INFO)

def read_video(video_base64, num_frames=24, target_size=(224, 224)):
    video_data = base64.b64decode(video_base64)

    container = av.open(io.BytesIO(video_data))
    frames = []
    for frame in container.decode(video=0):
        frames.append(frame.to_ndarray(format="rgb24").astype(np.uint8))
    
    sampled_frames = sample_frames(frames, num_frames)
    processed_frames = pad_and_resize(sampled_frames, target_size)
    processed_frames = processed_frames.permute(1, 0, 2, 3)  # (T, C, H, W) -> (C, T, H, W)
    return processed_frames

def sample_frames(frames, num_frames):
    total_frames = len(frames)
    sampled_frames = list(frames)
    if total_frames <= num_frames:
        # sampled_frames = frames
        if total_frames < num_frames:
            padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)]
            sampled_frames.extend(padding)
    else:
        indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)
        sampled_frames = [frames[i] for i in indices]

    return np.array(sampled_frames)

    
    # total_frames = len(frames)
    # if total_frames <= num_frames:
    #     if total_frames < num_frames:
    #         padding = [np.zeros_like(frames[0]) for _ in range(num_frames - total_frames)]
    #         frames.extend(padding)
    # else:
    #     indices = np.linspace(0, total_frames - 1, num=num_frames, dtype=int)
    #     frames = [frames[i] for i in indices]
    # return np.array(frames)

def pad_and_resize(frames, target_size):
    transform = transforms.Compose([
        transforms.ToPILImage(),
        transforms.Resize(target_size),
        transforms.ToTensor()
    ])
    processed_frames = [transform(frame) for frame in frames]

    return torch.stack(processed_frames)


# def pad_and_resize(frames, target_size):
#     transform = transforms.Compose([
#         transforms.ToPILImage(),
#         transforms.Resize(target_size),
#         transforms.ToTensor()
#     ])
#     processed_frames = [transform(frame) for frame in frames]
#     return torch.stack(processed_frames).permute(1, 0, 2, 3)  # (T, C, H, W) -> (C, T, H, W)