workshop / LaSA /libs /class_weight.py
qiushuocheng's picture
updat
5ed2b48
import os
from typing import List, Optional
import numpy as np
import pandas as pd
import torch
from libs.class_id_map import get_n_classes
import pickle
import re
__all__ = ["get_pos_weight", "get_class_weight"]
modes = ["training", "trainval"]
def get_class_nums(
dataset: str,
split: int = 1,
dataset_dir: str = "./dataset/",
csv_dir: str = "./csv",
mode: str = "trainval",
) -> List[int]:
assert (
mode in modes
), "You have to choose 'training' or 'trainval' as the dataset mode."
if mode == "training":
df = pd.read_csv(os.path.join(csv_dir, dataset, "train{}.csv").format(split))
elif mode == "trainval":
df1 = pd.read_csv(os.path.join(csv_dir, dataset, "train{}.csv".format(split)))
df2 = pd.read_csv(os.path.join(csv_dir, dataset, "val{}.csv".format(split)))
df = pd.concat([df1, df2])
n_classes = get_n_classes(dataset, dataset_dir)
nums = [0 for i in range(n_classes)]
for i in range(len(df)):
label_path = df.iloc[i]["label"]
label = np.load(label_path).astype(np.int64)
num, cnt = np.unique(label, return_counts=True)
for n, c in zip(num, cnt):
nums[n] += c
return nums
def get_class_weight(
dataset: str,
split: int = 1,
dataset_dir: str = "./dataset",
csv_dir: str = "./csv",
mode: str = "trainval",
) -> torch.Tensor:
"""
Class weight for CrossEntropy
Class weight is calculated in the way described in:
D. Eigen and R. Fergus, “Predicting depth, surface normals and semantic labels with a common multi-scale convolutional architecture,” in ICCV,
openaccess: https://www.cv-foundation.org/openaccess/content_iccv_2015/papers/Eigen_Predicting_Depth_Surface_ICCV_2015_paper.pdf
"""
#get_class_nums
nums = get_class_nums(dataset, split, dataset_dir, csv_dir, mode)
class_num = torch.tensor(nums)
total = class_num.sum().item()
frequency = class_num.float() / total
median = torch.median(frequency)
class_weight = median / frequency
return class_weight
babel = r"BABEL.*"
def get_pos_weight(
dataset: str,
split: int = 1,
csv_dir: str = "./csv",
mode: str = "trainval",
norm: Optional[float] = None,
) -> torch.Tensor:
"""
pos_weight for binary cross entropy with logits loss
pos_weight is defined as reciprocal of ratio of positive samples in the dataset
"""
assert (
mode in modes
), "You have to choose 'training' or 'trainval' as the dataset mode"
if not re.match(babel, dataset):
if mode == "training":
df = pd.read_csv(os.path.join(csv_dir, dataset, "train{}.csv").format(split))
elif mode == "trainval":
df1 = pd.read_csv(os.path.join(csv_dir, dataset, "train{}.csv".format(split)))
df2 = pd.read_csv(os.path.join(csv_dir, dataset, "val{}.csv".format(split)))
df = pd.concat([df1, df2])
n_classes = 2 # boundary or not
nums = [0 for i in range(n_classes)]
for i in range(len(df)):
label_path = df.iloc[i]["boundary"]
label = np.load(label_path, allow_pickle=True).astype(np.int64)
num, cnt = np.unique(label, return_counts=True)
for n, c in zip(num, cnt):
nums[n] += c
else:
if mode == "training":
with open('./dataset/'+str(dataset) +'/train_split'+str(dataset)[-1] +'.pkl',"rb") as f:
df = pickle.load(f,encoding="latin1")
else:
with open('./dataset/'+str(dataset) +'/val_split'+str(dataset)[-1] +'.pkl',"rb") as f:
df = pickle.load(f,encoding="latin1")
n_classes = 2 # boundary or not
nums = [0 for i in range(n_classes)]
for i in range(len(df["L"])):
label = df["L"][i]
boundary = np.zeros_like(label)
boundary[1:] = label[1:] != label[:-1]
boundary[0]=1
num, cnt = np.unique(boundary, return_counts=True)
for n, c in zip(num, cnt):
nums[n] += c
pos_ratio = nums[1] / sum(nums)
pos_weight = 1 / pos_ratio
if norm is not None:
pos_weight /= norm
return torch.tensor(pos_weight)
def get_pos_weight_BABEL3(
dataset: str,
split: int = 1,
csv_dir: str = "./csv",
mode: str = "trainval",
norm: Optional[float] = None,
) -> torch.Tensor:
"""
pos_weight for binary cross entropy with logits loss
pos_weight is defined as reciprocal of ratio of positive samples in the dataset
"""
assert (
mode in modes
), "You have to choose 'training' or 'trainval' as the dataset mode"
if mode == "training":
with open('./dataset/BABEL3/train_split3.pkl',"rb") as f:
df = pickle.load(f,encoding="latin1")
elif mode == "val":
with open('./BABEL3/val_split3.pkl',"rb") as f:
df = pickle.load(f,encoding="latin1")
n_classes = 2 # boundary or not
nums = [0 for i in range(n_classes)]
for i in range(len(df["L"])):
label = df["L"][i]
boundary = np.zeros_like(label)
boundary[1:] = label[1:] != label[:-1]
boundary[0]=1
num, cnt = np.unique(boundary, return_counts=True)
for n, c in zip(num, cnt):
nums[n] += c
pos_ratio = nums[1] / sum(nums)
pos_weight = 1 / pos_ratio
if norm is not None:
pos_weight /= norm
return torch.tensor(pos_weight)