File size: 5,526 Bytes
41e3185
 
 
 
 
 
 
 
 
 
7effa0a
 
 
41e3185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7effa0a
 
 
 
41e3185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7effa0a
 
 
 
 
 
 
5ed2b48
 
 
 
 
 
 
 
 
 
7effa0a
 
 
 
 
 
 
41e3185
5ed2b48
 
 
 
 
 
 
 
 
 
41e3185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60aef3c
41e3185
 
60aef3c
41e3185
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
import os
from typing import List, Optional

import numpy as np
import pandas as pd
import torch

from libs.class_id_map import get_n_classes

import pickle
import re 



__all__ = ["get_pos_weight", "get_class_weight"]

modes = ["training", "trainval"]

def get_class_nums(
    dataset: str,
    split: int = 1,
    dataset_dir: str = "./dataset/",
    csv_dir: str = "./csv",
    mode: str = "trainval",
) -> List[int]:

    assert (
        mode in modes
    ), "You have to choose 'training' or 'trainval' as the dataset mode."

    if mode == "training":
        df = pd.read_csv(os.path.join(csv_dir, dataset, "train{}.csv").format(split))
    elif mode == "trainval":
        df1 = pd.read_csv(os.path.join(csv_dir, dataset, "train{}.csv".format(split)))
        df2 = pd.read_csv(os.path.join(csv_dir, dataset, "val{}.csv".format(split)))
        df = pd.concat([df1, df2])

    n_classes = get_n_classes(dataset, dataset_dir)

    nums = [0 for i in range(n_classes)]
    for i in range(len(df)):
        label_path = df.iloc[i]["label"]
        label = np.load(label_path).astype(np.int64)
        num, cnt = np.unique(label, return_counts=True)
        for n, c in zip(num, cnt):
            nums[n] += c

    return nums

def get_class_weight(
    dataset: str,
    split: int = 1,
    dataset_dir: str = "./dataset",
    csv_dir: str = "./csv",
    mode: str = "trainval",
) -> torch.Tensor:
    """
    Class weight for CrossEntropy
    Class weight is calculated in the way described in:
        D. Eigen and R. Fergus, “Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture,” in ICCV,
        openaccess: https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Eigen_Predicting_Depth_Surface_ICCV_2015_paper.pdf
    """
    #get_class_nums
    nums = get_class_nums(dataset, split, dataset_dir, csv_dir, mode)

    class_num = torch.tensor(nums)
    total = class_num.sum().item()
    frequency = class_num.float() / total
    median = torch.median(frequency)
    class_weight = median / frequency

    return class_weight


babel = r"BABEL.*"  


def get_pos_weight(
    dataset: str,
    split: int = 1,
    csv_dir: str = "./csv",
    mode: str = "trainval",
    norm: Optional[float] = None,
) -> torch.Tensor:
    """
    pos_weight for binary cross entropy with logits loss
    pos_weight is defined as reciprocal of ratio of positive samples in the dataset
    """

    assert (
        mode in modes
    ), "You have to choose 'training' or 'trainval' as the dataset mode"

    if not re.match(babel, dataset):
        if mode == "training":
            df = pd.read_csv(os.path.join(csv_dir, dataset, "train{}.csv").format(split))
        elif mode == "trainval":
            df1 = pd.read_csv(os.path.join(csv_dir, dataset, "train{}.csv".format(split)))
            df2 = pd.read_csv(os.path.join(csv_dir, dataset, "val{}.csv".format(split)))
            df = pd.concat([df1, df2])
        
        n_classes = 2  # boundary or not
        nums = [0 for i in range(n_classes)]
        for i in range(len(df)):
            label_path = df.iloc[i]["boundary"]
            label = np.load(label_path, allow_pickle=True).astype(np.int64)
            num, cnt = np.unique(label, return_counts=True)
            for n, c in zip(num, cnt):
                nums[n] += c

    else:
        if mode == "training":
            with open('./dataset/'+str(dataset) +'/train_split'+str(dataset)[-1] +'.pkl',"rb") as f:
                df = pickle.load(f,encoding="latin1")
        else:
            with open('./dataset/'+str(dataset) +'/val_split'+str(dataset)[-1] +'.pkl',"rb") as f:
                df = pickle.load(f,encoding="latin1")

        n_classes = 2  # boundary or not
        nums = [0 for i in range(n_classes)]
        for i in range(len(df["L"])):
            label = df["L"][i]
            boundary = np.zeros_like(label)
            boundary[1:] = label[1:] != label[:-1]
            boundary[0]=1
            num, cnt = np.unique(boundary, return_counts=True)
            for n, c in zip(num, cnt):
                nums[n] += c

    pos_ratio = nums[1] / sum(nums)
    pos_weight = 1 / pos_ratio

    if norm is not None:
        pos_weight /= norm

    return torch.tensor(pos_weight)

def get_pos_weight_BABEL3(
    dataset: str,
    split: int = 1,
    csv_dir: str = "./csv",
    mode: str = "trainval",
    norm: Optional[float] = None,
) -> torch.Tensor:
    """
    pos_weight for binary cross entropy with logits loss
    pos_weight is defined as reciprocal of ratio of positive samples in the dataset
    """

    assert (
        mode in modes
    ), "You have to choose 'training' or 'trainval' as the dataset mode"

    if mode == "training":
        with open('./dataset/BABEL3/train_split3.pkl',"rb") as f:
            df = pickle.load(f,encoding="latin1")
    elif mode == "val":
        with open('./BABEL3/val_split3.pkl',"rb") as f:
            df = pickle.load(f,encoding="latin1")

    n_classes = 2  # boundary or not
    nums = [0 for i in range(n_classes)]
    for i in range(len(df["L"])):
        label = df["L"][i]
        boundary = np.zeros_like(label)
        boundary[1:] = label[1:] != label[:-1]
        boundary[0]=1
        num, cnt = np.unique(boundary, return_counts=True)
        for n, c in zip(num, cnt):
            nums[n] += c

    pos_ratio = nums[1] / sum(nums)
    pos_weight = 1 / pos_ratio

    if norm is not None:
        pos_weight /= norm

    return torch.tensor(pos_weight)