| |
| """ |
| stats.py |
| |
| Tools to calcualte statistics |
| |
| """ |
| from __future__ import absolute_import |
|
|
| import os |
| import sys |
| import numpy as np |
| import core_scripts.other_tools.display as nii_display |
| import core_scripts.data_io.conf as nii_dconf |
|
|
| __author__ = "Xin Wang" |
| __email__ = "wangxin@nii.ac.jp" |
| __copyright__ = "Copyright 2020, Xin Wang" |
|
|
|
|
| def f_var2std(var): |
| """ |
| std = f_var2std(var) |
| Args: |
| var: np.arrary, variance |
| |
| Return: |
| std: np.array, standard-devitation |
| |
| std = sqrt(variance), std[std<floor] = 1.0 |
| """ |
| negative_idx = var < 0 |
| std = np.sqrt(var) |
| std[negative_idx] = 1.0 |
| floored_idx = std < nii_dconf.std_floor |
| std[floored_idx] = 1.0 |
| return std |
| |
|
|
| def f_online_mean_std(data, mean_old, var_old, cnt_old): |
| """ |
| mean, var, count=f_online_mean_var(data, mean, var, num_count): |
| |
| online algorithm to accumulate mean and var |
| |
| Args: |
| data: input data as numpy.array, in shape [length, dimension] |
| |
| mean: mean to be updated, np.array [dimension] |
| |
| var: var to be updated, np.array [dimension] |
| |
| num_count: how many data rows have been calculated before |
| this calling. |
| |
| Return: |
| mean: mean, np.array [dimension] |
| var: var, np.array [dimension] |
| count: accumulated data number, = num_count + data.shape[0] |
| |
| Ref. parallel algorithm |
| https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance |
| """ |
|
|
| try: |
| |
| cnt_this = data.shape[0] |
|
|
| |
| if cnt_this == 0: |
| return mean_old, var_old, cnt_old |
| |
| if data.ndim == 1: |
| |
| mean_this = data.mean() |
| var_this = data.var() |
| dim = 1 |
| else: |
| |
| mean_this = data.mean(axis=0) |
| var_this = data.var(axis=0) |
| dim = data.shape[1] |
| |
| |
| diff_mean = mean_this - mean_old |
|
|
| |
| new_mean = np.zeros([dim], dtype=nii_dconf.h_dtype) |
| new_var = np.zeros([dim], dtype=nii_dconf.h_dtype) |
|
|
| |
| updated_count = cnt_old + cnt_this |
| |
| |
| new_mean = mean_old + diff_mean * (float(cnt_this) / |
| (cnt_old + cnt_this)) |
| |
| if cnt_old == 0: |
| |
| if data.ndim == 1: |
| |
| new_var[0] = var_this |
| else: |
| new_var = var_this |
| else: |
| |
| new_var = (var_old * (float(cnt_old) / updated_count) |
| + var_this * (float(cnt_this)/ updated_count) |
| + (diff_mean * diff_mean |
| / (float(cnt_this)/cnt_old |
| + float(cnt_old)/cnt_this |
| + 2.0))) |
| |
| return new_mean, new_var, updated_count |
| |
| except ValueError: |
| if data.ndim > 1: |
| if data.shape[1] != mean_old.shape[0] or \ |
| data.shape[1] != var_old.shape[0]: |
| nii_display.f_print("Dimension incompatible", "error") |
| nii_display.f_die("Error in online mean var calculation") |
| else: |
| if mean_old.shape[0] != 1 or \ |
| var_old.shape[0] != 1: |
| nii_display.f_print("Dimension incompatible", "error") |
| nii_display.f_die("Error in online mean var calculation") |
| |
|
|
| if __name__ == "__main__": |
| pass |
|
|