lmzjms's picture
Upload 1162 files
0b32ad6 verified
raw
history blame
3.28 kB
# -*- coding: utf-8 -*- #
"""*********************************************************************************************"""
# FileName [ observe_lnsr.py ]
# Synopsis [ a script for calculating the "layerwise noise to signal ratio" (LNSR) proposed in https://arxiv.org/abs/2006.03214]
# Author [ Andy T. Liu (Andi611) ]
# Copyright [ Copyleft(c), Speech Lab, NTU, Taiwan ]
"""*********************************************************************************************"""
###############
# IMPORTATION #
###############
import os
import torch
import numpy as np
from pathlib import Path
from transformer.nn_transformer import TRANSFORMER
def compute_lnsr(real, adve, norm_L2=True):
real = real.reshape(real.shape[0], -1)
adve = adve.reshape(adve.shape[0], -1)
l2 = np.linalg.norm(real - adve, ord=2)
if norm_L2:
l2 /= np.linalg.norm(real, ord=2)
return l2
def run_over_layer(layer, real_todo, adve_todo):
if layer != 'feature':
options = {
'ckpt_file' : 'result/result_transformer/mockingjay/LinearLarge-libri/model-500000.ckpt',
'load_pretrain' : 'True',
'no_grad' : 'True',
'dropout' : 'default',
'spec_aug' : 'False',
'spec_aug_prev' : 'True',
'weighted_sum' : 'False',
'select_layer' : layer,
'permute_input' : 'True',
}
mockingjay = TRANSFORMER(options=options, inp_dim=160)
mockingjay.eval()
episode = []
for i in range(len(real_todo)):
r = torch.FloatTensor(np.load(real_todo[i])).unsqueeze(1)
a = torch.FloatTensor(np.load(adve_todo[i])).unsqueeze(1)
min_len = min(r.shape[0], a.shape[0])
if layer == 'feature':
l2 = compute_lnsr(r[:min_len].data.cpu().numpy(),
a[:min_len].data.cpu().numpy(),
norm_L2=True)
else:
r_hidd = mockingjay(r[:min_len])
a_hidd = mockingjay(a[:min_len])
l2 = compute_lnsr(r_hidd.data.cpu().numpy(),
a_hidd.data.cpu().numpy(),
norm_L2=True)
episode.append(l2)
return np.mean(episode)
def main():
num_layers = 12
data_path = 'data/adversarial/' # this can be downloaded from the S3PRL google drive
data_type = ['fgsm_8.0', 'fgsm_16.0', 'pgd_8.0', 'pgd_16.0']
data_type = data_type[0] # select data type here
real_data = os.path.join(data_path, 'original')
adve_data = os.path.join(data_path, data_type)
real_todo = sorted(list(Path(real_data).rglob("*.npy")))
adve_todo = sorted(list(Path(adve_data).rglob("*.npy")))
assert len(real_todo) == len(adve_todo)
print('Number of data: ', len(real_todo))
dist = run_over_layer('feature', real_todo, adve_todo)
print('[Original v.s. ' + data_type + '] Acoustic distance: ', dist)
dist = []
for i in range(num_layers):
m = run_over_layer(i, real_todo, adve_todo)
dist.append(m)
print('Layer: ', i+1, 'Mse: ', m)
print('[Original v.s. ' + data_type + '] Hidden rep distance over all layers (1->12): ', dist)
if __name__ == '__main__':
main()