TAAS / utils.py
zy414775's picture
upload
c08e521
raw
history blame
4.98 kB
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
# @File : utils.py
# @Author : 刘建林(霜旻)
# @Email : liujianlin.ljl@alibaba-inc.com
# @Time : 2022/10/27 下午8:52
"""
import operator
import pickle
import numpy as np
import pandas as pd
s2_label_dict = {
'0': 0,
'1': 1,
'2': 2,
'3': 3,
'4': 4,
'5': 5,
'6': 6,
'7': 7,
'8': 8,
'9': 9,
'a': 10,
'b': 11,
'c': 12,
'd': 13,
'e': 14,
'f': 15
}
s2_label_decode_dict = {v: k for k, v in s2_label_dict.items()}
s2_weights = [0.025, 0.025, 0.025,
0.025, 0.025, 0.025,
0.025, 0.025, 0.025,
0.0325, 0.0325, 0.0325,
0.035, 0.035, 0.035,
0.0375, 0.0375, 0.0375,
0.04, 0.04, 0.04,
0.0425, 0.0425, 0.0425,
0.045, 0.045, 0.0475,
0.025, 0.025, 0.025,
0.0, 0.0, 0.0]
def generate_s2_index(s2_label):
result = [0 for _ in range(33)]
for i, char_ in enumerate(s2_label):
result[i] = s2_label_dict[char_]
return result
def decode_s2(x):
result = []
for i in x:
result.append(s2_label_decode_dict[i])
return ''.join(result)
def sample_csv2pkl(csv_path, pkl_path):
# df = pd.read_csv('/Users/liujianlin/odps_clt_release_64/bin/addr6node_small1.csv', sep='^', encoding="utf_8_sig")
df = pd.read_csv(csv_path, sep='^', encoding="utf_8_sig")
# print(df)
data = []
for index, row in df.iterrows():
node_s = []
label = []
node1 = [row['node_t1'], row['poi_address_mask1'], row['node1'], generate_s2_index(row['node1'])]
node2 = [row['node_t2'], row['poi_address_mask2'], row['node2'], generate_s2_index(row['node2'])]
node3 = [row['node_t3'], row['poi_address_mask3'], row['node3'], generate_s2_index(row['node3'])]
node4 = [row['node_t4'], row['poi_address_mask4'], row['node4'], generate_s2_index(row['node4'])]
node5 = [row['node_t5'], row['poi_address_mask5'], row['node5'], generate_s2_index(row['node5'])]
node6 = [row['node_t6'], row['poi_address_mask6'], row['node6'], generate_s2_index(row['node6'])]
label.extend(node1[3])
label.extend(node2[3])
label.extend(node3[3])
label.extend(node4[3])
label.extend(node5[3])
label.extend(node6[3])
node1.append(label)
node2.append(label)
node3.append(label)
node4.append(label)
node5.append(label)
node6.append(label)
node_s.append(node1)
node_s.append(node2)
node_s.append(node3)
node_s.append(node4)
node_s.append(node5)
node_s.append(node6)
data.append(node_s)
# print(data)
with open(pkl_path,'wb') as f:
pickle.dump(data,f)
def calculate_multi_s2_acc(predicted_s2, y):
acc_cnt = np.array([0, 0, 0, 0, 0, 0, 0])
y = y.view(-1, 33).tolist()
predicted = predicted_s2.view(-1, 33).tolist()
# print(y.shape, predicted.shape)
for index, s2 in enumerate(y):
for c, i in enumerate(range(12, 33, 3)):
y_l10 = y[index][12:i+3]
p_l10 = predicted[index][12:i+3]
# print(y_l10, p_l10, operator.eq(y_l10, p_l10))
if operator.eq(y_l10, p_l10):
acc_cnt[c] += 1
# print('==='*20)
# print(acc_cnt)
return acc_cnt
def calculate_multi_s2_acc_batch(predicted_s2, y, sequence_len = 6):
acc_cnt = np.array([0, 0, 0, 0, 0, 0, 0])
y = y.view(-1, sequence_len, 33).tolist()
predicted = predicted_s2.view(-1, sequence_len, 33).tolist()
# print(y.shape, predicted.shape)
batch_size = len(y)
for batch_i in range(batch_size):
for index, s2 in enumerate(y[batch_i]):
for c, i in enumerate(range(12, 33, 3)):
y_l10 = y[batch_i][index][12:i+3]
p_l10 = predicted[batch_i][index][12:i+3]
# print(y_l10, p_l10, operator.eq(y_l10, p_l10))
if operator.eq(y_l10, p_l10):
acc_cnt[c] += 1
# print('==='*20)
# print(acc_cnt)
return acc_cnt
def calculate_alias_acc(predicted, y):
tp, fp, fn, tn = 0, 0, 0, 0
acc = 0
for index, label in enumerate(y):
if int(label) == int(predicted[index]):
acc += 1
if int(label) == 1:
fn += 1
if int(predicted[index]) == 1:
tp += 1
if fn == 0:
precision = 0
else:
precision = tp / fn * 100
return tp, fn, acc
def calculate_aoi_acc(predicted, y):
tp, fp, fn, tn = 0, 0, 0, 0
acc = 0
for index, label in enumerate(y):
if int(label) == int(predicted[index]):
acc += 1
if int(label) == 0:
fn += 1
if int(predicted[index]) == 0:
tp += 1
if fn == 0:
precision = 0
else:
precision = tp / fn * 100
return tp, fn, acc