passgan / all.py
Voxpal's picture
Upload 9 files
af48e90
raw
history blame contribute delete
No virus
6.89 kB
import os
from sklearn.preprocessing import OneHotEncoder # 独热编码
import numpy as np
import pickle as pk
# 载入数据
def load_batch(file): # 读取一个批次的数据
with open(file, 'rb') as f:
data_dict = pk.load(f, encoding='bytes')
images = data_dict[b'data']
labels = data_dict[b'labels']
images = images.reshape(10000, 3072)
labels = np.array(labels)
return (images / 255), labels
def load_data(data_dir):
images_train = []
labels_train = []
for i in range(5):
file = os.path.join(data_dir, 'data_batch_%d' % (i + 1))
print('加载文件:', file)
# 按批次读取训练集数据并拼接到图像和标签列表后,直到读入所有批次数据
images_batch, labels_batch = load_batch(file)
images_train.append(images_batch)
labels_train.append(labels_batch)
# 将多个批次的数组统一为一个数组
x_train = np.concatenate(images_train)
t_train = np.concatenate(labels_train)
del images_batch, labels_batch
# 加载测试集图像和标签
x_test, t_test = load_batch(os.path.join(data_dir, 'test_batch'))
return x_train, t_train, x_test, t_test
def sigmoid(x):
return 1 / (1 + np.exp(-x))
def sigmoid_grad(x):
return (1.0 - sigmoid(x)) * sigmoid(x)
def softmax(x):
if x.ndim == 2:
x = x.T
x = x - np.max(x, axis=0)
y = np.exp(x) / np.sum(np.exp(x), axis=0)
return y.T
x = x - np.max(x) # 溢出对策
return np.exp(x) / np.sum(np.exp(x))
class neuralNetwork:
def __init__(self, numNeuronLayers, numNeurons_perLayer, learningRate):
self.numNeurons_perLayer = numNeurons_perLayer
self.numNeuronLayers = numNeuronLayers
self.learningRate = learningRate
self.weight = []
self.bias = []
for i in range(numNeuronLayers):
self.weight.append(
learningRate * np.random.randn(self.numNeurons_perLayer[i], self.numNeurons_perLayer[i + 1]))
self.bias.append(np.zeros(self.numNeurons_perLayer[i + 1]))
def predict(self, x):
z = x
# 走一遍前向传播得到输出
for i in range(self.numNeuronLayers - 1):
a = np.dot(z, self.weight[i]) + self.bias[i]
z = sigmoid(a)
an = np.dot(z, self.weight[self.numNeuronLayers - 1]) + self.bias[self.numNeuronLayers - 1]
y = softmax(an)
return y
def gradient(self, x, t):
z = []
a = []
z.append(x)
# 走一遍前向传播得到输出
for i in range(self.numNeuronLayers):
a.append(np.dot(z[i], self.weight[i]) + self.bias[i])
z.append(sigmoid(a[i]))
y = softmax(a[self.numNeuronLayers - 1])
num = x.shape[0]
dy = (y - t) / num
dz = []
da = []
dz.append(dy)
for i in range(self.numNeuronLayers - 1):
da.append(np.dot(dz[i], self.weight[self.numNeuronLayers - i - 1].T))
dz.append(sigmoid_grad(a[self.numNeuronLayers - i - 2]) * da[i])
for i in range(self.numNeuronLayers):
self.weight[i] -= self.learningRate * np.dot(z[i].T, dz[self.numNeuronLayers - i - 1])
self.bias[i] -= self.learningRate * np.sum(dz[self.numNeuronLayers - i - 1], axis=0)
def loss(self, x, t):
y = self.predict(x)
t = t.argmax(axis=1)
num = y.shape[0]
s = y[np.arange(num), t]
return -np.sum(np.log(s)) / num
def accuracy(self, x, t):
y = self.predict(x)
p = np.argmax(y, axis=1)
q = np.argmax(t, axis=1)
acc = np.sum(p == q) / len(y)
return acc
def kNN(x_train, x_test, t_train, k):
px = list()
for i in range(len(x_test)):
px.append([])
for j in range(10):
px[i].append(0)
for i in range(len(x_test)):
dis = getODistance(x_test[i], x_train)
index = np.argsort(dis)
count = list()
r = np.sort(dis)[k - 1]
for j in range(len(t_train[0])):
count.append(0)
for j in range(k):
for w in range(10):
if t_train[index[j]][w] == 1:
count[w] = count[w] + 1
for j in range(10):
px[i][j] = count[j]
return px
def getODistance(sample, train):
a = np.tile(sample, [1000, 1])
a = a - train
a = np.square(a)
a = a.sum(axis=1)
dis = np.sqrt(a)
dis = dis.T
dis = dis.tolist()
return dis[0]
def runNetwork():
numNeuronLayers = 3
numNeurons_perLayer = [3072, 50, 20, 10]
learningRate = 0.05
epoch = 50000
batch_size = 100
train_size = x_train.shape[0] # 50000
net = neuralNetwork(numNeuronLayers, numNeurons_perLayer, learningRate)
for i in range(epoch):
batch_mask = np.random.choice(train_size, batch_size) # 从0到50000 随机选100个数
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
net.gradient(x_batch, t_batch)
y = net.predict(x_test[0:1000, 0:3072])
p = np.argmax(y, axis=1)
q = np.argmax(t_test[0:1000, 0:3072], axis=1)
acc = np.sum(p == q) / len(y)
print("神经网络正确率为:", acc)
return p
def runKnn(x_train, x_test):
x_train = np.mat(x_train)
x_test = np.mat(x_test)
px = kNN(x_train[0:1000, 0:3072], x_test[0:1000, 0:3072], t_train[0:1000, 0:10], 7)
p = np.argmax(px, axis=1)
q = np.argmax(t_test[0:1000, 0:3072], axis=1)
acc = np.sum(p == q) / 1000
print("knn正确率为:", acc)
return p
from sklearn import svm
def runSvm():
clf = svm.SVC(probability=True)
t = np.argmax(t_train[0:1000, 0:3072], axis=1)
clf.fit(x_train[0:1000, 0:3072], t)
p = clf.predict(x_test[0:1000, 0:3072])
q = np.argmax(t_test[0:1000, 0:3072], axis=1)
acc = np.sum(p == q) / 1000
print("svm正确率为:", acc)
return p
data_dir = 'cifar-10-batches-py'
x_train, t_train, x_test, t_test = load_data(data_dir)
encoder = OneHotEncoder(sparse=False)
one_format = [[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]
encoder.fit(one_format)
t_train = t_train.reshape(-1, 1) # 数组化为一维包含一个元素的二维数组,-1代表二维的数量自适应
t_train = encoder.transform(t_train)
t_test = t_test.reshape(-1, 1)
t_test = encoder.transform(t_test)
p1 = runNetwork()
p2 = runSvm()
p3 = runKnn(x_train, x_test)
p1 = p1.reshape(-1, 1) # 数组化为一维包含一个元素的二维数组,-1代表二维的数量自适应
p1 = encoder.transform(p1)
p2 = p2.reshape(-1, 1)
p2 = encoder.transform(p2)
p3 = p3.reshape(-1, 1)
p3 = encoder.transform(p3)
vote = p1+p2+p3
p = np.argmax(vote, axis=1)
q = np.argmax(t_test[0:1000, 0:3072], axis=1)
acc = np.sum(p == q) / 1000
print("最终正确率为", acc)