import random

import numpy as np
import torch
from torch.nn.utils.rnn import pad_sequence


def downsample(data_numpy, step, random_sample=True):
    # input: C,T,V,M
    begin = np.random.randint(step) if random_sample else 0
    return data_numpy[:, begin::step, :, :]


def temporal_slice(data_numpy, step):
    # input: C,T,V,M
    C, T, V, M = data_numpy.shape
    return (
        data_numpy.reshape(C, T / step, step, V, M)
        .transpose((0, 1, 3, 2, 4))
        .reshape(C, T / step, V, step * M)
    )


def mean_subtractor(data_numpy, mean):
    # input: C,T,V,M
    # naive version
    if mean == 0:
        return
    C, T, V, M = data_numpy.shape
    valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0
    begin = valid_frame.argmax()
    end = len(valid_frame) - valid_frame[::-1].argmax()
    data_numpy[:, :end, :, :] = data_numpy[:, :end, :, :] - mean
    return data_numpy


def auto_pading(data_numpy, size, random_pad=False):
    C, T, V, M = data_numpy.shape
    if T < size:
        begin = random.randint(0, size - T) if random_pad else 0
        data_numpy_paded = np.zeros((C, size, V, M))
        data_numpy_paded[:, begin : begin + T, :, :] = data_numpy
        return data_numpy_paded
    else:
        return data_numpy


def random_choose(data_numpy, size, auto_pad=True):
    # input: C,T,V,M 随机选择其中一段，不是很合理。因为有0
    C, T, V, M = data_numpy.shape
    if T == size:
        return data_numpy
    elif T < size:
        if auto_pad:
            return auto_pading(data_numpy, size, random_pad=True)
        else:
            return data_numpy
    else:
        begin = random.randint(0, T - size)
        return data_numpy[:, begin : begin + size, :, :]


def random_move(
    data_numpy,
    angle_candidate=[-10.0, -5.0, 0.0, 5.0, 10.0],
    scale_candidate=[0.9, 1.0, 1.1],
    transform_candidate=[-0.2, -0.1, 0.0, 0.1, 0.2],
    move_time_candidate=[1],
):
    # input: C,T,V,M
    C, T, V, M = data_numpy.shape
    move_time = random.choice(move_time_candidate)
    node = np.arange(0, T, T * 1.0 / move_time).round().astype(int)
    node = np.append(node, T)
    num_node = len(node)

    A = np.random.choice(angle_candidate, num_node)
    S = np.random.choice(scale_candidate, num_node)
    T_x = np.random.choice(transform_candidate, num_node)
    T_y = np.random.choice(transform_candidate, num_node)

    a = np.zeros(T)
    s = np.zeros(T)
    t_x = np.zeros(T)
    t_y = np.zeros(T)

    # linspace
    for i in range(num_node - 1):
        a[node[i] : node[i + 1]] = (
            np.linspace(A[i], A[i + 1], node[i + 1] - node[i]) * np.pi / 180
        )
        s[node[i] : node[i + 1]] = np.linspace(S[i], S[i + 1], node[i + 1] - node[i])
        t_x[node[i] : node[i + 1]] = np.linspace(
            T_x[i], T_x[i + 1], node[i + 1] - node[i]
        )
        t_y[node[i] : node[i + 1]] = np.linspace(
            T_y[i], T_y[i + 1], node[i + 1] - node[i]
        )

    theta = np.array(
        [[np.cos(a) * s, -np.sin(a) * s], [np.sin(a) * s, np.cos(a) * s]]
    )  # xuanzhuan juzhen

    # perform transformation
    for i_frame in range(T):
        xy = data_numpy[0:2, i_frame, :, :]
        new_xy = np.dot(theta[:, :, i_frame], xy.reshape(2, -1))
        new_xy[0] += t_x[i_frame]
        new_xy[1] += t_y[i_frame]  # pingyi bianhuan
        data_numpy[0:2, i_frame, :, :] = new_xy.reshape(2, V, M)

    return data_numpy


def random_shift(data_numpy):
    # input: C,T,V,M 偏移其中一段
    C, T, V, M = data_numpy.shape
    data_shift = np.zeros(data_numpy.shape)
    valid_frame = (data_numpy != 0).sum(axis=3).sum(axis=2).sum(axis=0) > 0
    begin = valid_frame.argmax()
    end = len(valid_frame) - valid_frame[::-1].argmax()

    size = end - begin
    bias = random.randint(0, T - size)
    data_shift[:, bias : bias + size, :, :] = data_numpy[:, begin:end, :, :]

    return data_shift


def openpose_match(data_numpy):
    C, T, V, M = data_numpy.shape
    assert C == 3
    score = data_numpy[2, :, :, :].sum(axis=1)
    # the rank of body confidence in each frame (shape: T-1, M)
    rank = (-score[0 : T - 1]).argsort(axis=1).reshape(T - 1, M)

    # data of frame 1
    xy1 = data_numpy[0:2, 0 : T - 1, :, :].reshape(2, T - 1, V, M, 1)
    # data of frame 2
    xy2 = data_numpy[0:2, 1:T, :, :].reshape(2, T - 1, V, 1, M)
    # square of distance between frame 1&2 (shape: T-1, M, M)
    distance = ((xy2 - xy1) ** 2).sum(axis=2).sum(axis=0)

    # match pose
    forward_map = np.zeros((T, M), dtype=int) - 1
    forward_map[0] = range(M)
    for m in range(M):
        choose = rank == m
        forward = distance[choose].argmin(axis=1)
        for t in range(T - 1):
            distance[t, :, forward[t]] = np.inf
        forward_map[1:][choose] = forward
    assert np.all(forward_map >= 0)

    # string data
    for t in range(T - 1):
        forward_map[t + 1] = forward_map[t + 1][forward_map[t]]

    # generate data
    new_data_numpy = np.zeros(data_numpy.shape)
    for t in range(T):
        new_data_numpy[:, t, :, :] = data_numpy[:, t, :, forward_map[t]].transpose(
            1, 2, 0
        )
    data_numpy = new_data_numpy

    # score sort
    trace_score = data_numpy[2, :, :, :].sum(axis=1).sum(axis=0)
    rank = (-trace_score).argsort()
    data_numpy = data_numpy[:, :, :, rank]

    return data_numpy


def pad(tensor, padding_value=0):
    return pad_sequence(tensor, batch_first=True, padding_value=padding_value)


def collate_with_padding(batch):
    data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch]
    target = [torch.tensor(item[1]) for item in batch]
    gt = [torch.tensor(item[2]) for item in batch]
    mask = [torch.tensor(item[3]) for item in batch]

    data = pad(data).transpose(1, 2)
    target = torch.tensor(target)
    gt = pad(gt)
    mask = pad(mask)
    return [data, target, gt, mask]


def collate_with_padding_multi(batch):
    data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch]
    target = [torch.tensor(item[1]) for item in batch]
    gt = [torch.tensor(item[2]) for item in batch]
    mask = [torch.tensor(item[3]) for item in batch]

    data = pad(data).transpose(1, 2)
    target = torch.stack(target)
    gt = pad(gt)
    mask = pad(mask)
    return [data, target, gt, mask]


def collate_with_padding_multi_velo(batch):
    data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch]
    velo = [torch.tensor(item[1].transpose(1, 0, 2, 3)) for item in batch]
    target = [torch.tensor(item[2]) for item in batch]
    gt = [torch.tensor(item[3]) for item in batch]
    mask = [torch.tensor(item[4]) for item in batch]

    data = pad(data).transpose(1, 2)
    velo = pad(velo).transpose(1, 2)
    target = torch.stack(target)
    gt = pad(gt)
    mask = pad(mask)
    return [data, velo, target, gt, mask]


def collate_with_padding_multi_joint(batch):
    data = [torch.tensor(item[0].transpose(1, 0, 2, 3)) for item in batch] # shape?
    target = [torch.tensor(item[1]) for item in batch]  # video level label
    gt = [torch.tensor(item[2]) for item in batch] # frame level label
    mask = [torch.tensor(item[3]) for item in batch] 
    index = [torch.tensor(item[4]) for item in batch]
    soft_label = [torch.tensor(item[5]) for item in batch]

    data = pad(data).transpose(1, 2) # pad joints seq with 0, rather than the last frame
    target = torch.stack(target) 
    gt = pad(gt,padding_value=4)                     # pad frame level label with 0, so 0 hhere have to stands for 'background', o for 4 action it will be 0,1,2,3,4,
    mask = pad(mask)
    index = torch.tensor(index)
    soft_label = pad(soft_label, padding_value=-100)
    return [data, target, gt, mask, index, soft_label]