Voxpal commited on
Commit
af48e90
1 Parent(s): 71fb7fa

Upload 9 files

Browse files
.gitattributes CHANGED
@@ -32,3 +32,9 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ cifar-10-batches-py/data_batch_1 filter=lfs diff=lfs merge=lfs -text
36
+ cifar-10-batches-py/data_batch_2 filter=lfs diff=lfs merge=lfs -text
37
+ cifar-10-batches-py/data_batch_3 filter=lfs diff=lfs merge=lfs -text
38
+ cifar-10-batches-py/data_batch_4 filter=lfs diff=lfs merge=lfs -text
39
+ cifar-10-batches-py/data_batch_5 filter=lfs diff=lfs merge=lfs -text
40
+ cifar-10-batches-py/test_batch filter=lfs diff=lfs merge=lfs -text
all.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from sklearn.preprocessing import OneHotEncoder # 独热编码
3
+ import numpy as np
4
+ import pickle as pk
5
+
6
+
7
+ # 载入数据
8
+ def load_batch(file): # 读取一个批次的数据
9
+ with open(file, 'rb') as f:
10
+ data_dict = pk.load(f, encoding='bytes')
11
+ images = data_dict[b'data']
12
+ labels = data_dict[b'labels']
13
+ images = images.reshape(10000, 3072)
14
+ labels = np.array(labels)
15
+ return (images / 255), labels
16
+
17
+
18
+ def load_data(data_dir):
19
+ images_train = []
20
+ labels_train = []
21
+ for i in range(5):
22
+ file = os.path.join(data_dir, 'data_batch_%d' % (i + 1))
23
+ print('加载文件:', file)
24
+ # 按批次读取训练集数据并拼接到图像和标签列表后,直到读入所有批次数据
25
+ images_batch, labels_batch = load_batch(file)
26
+ images_train.append(images_batch)
27
+ labels_train.append(labels_batch)
28
+ # 将多个批次的数组统一为一个数组
29
+ x_train = np.concatenate(images_train)
30
+ t_train = np.concatenate(labels_train)
31
+ del images_batch, labels_batch
32
+
33
+ # 加载测试集图像和标签
34
+ x_test, t_test = load_batch(os.path.join(data_dir, 'test_batch'))
35
+ return x_train, t_train, x_test, t_test
36
+
37
+
38
+ def sigmoid(x):
39
+ return 1 / (1 + np.exp(-x))
40
+
41
+
42
+ def sigmoid_grad(x):
43
+ return (1.0 - sigmoid(x)) * sigmoid(x)
44
+
45
+
46
+ def softmax(x):
47
+ if x.ndim == 2:
48
+ x = x.T
49
+ x = x - np.max(x, axis=0)
50
+ y = np.exp(x) / np.sum(np.exp(x), axis=0)
51
+ return y.T
52
+
53
+ x = x - np.max(x) # 溢出对策
54
+ return np.exp(x) / np.sum(np.exp(x))
55
+
56
+
57
+ class neuralNetwork:
58
+
59
+ def __init__(self, numNeuronLayers, numNeurons_perLayer, learningRate):
60
+ self.numNeurons_perLayer = numNeurons_perLayer
61
+ self.numNeuronLayers = numNeuronLayers
62
+ self.learningRate = learningRate
63
+ self.weight = []
64
+ self.bias = []
65
+ for i in range(numNeuronLayers):
66
+ self.weight.append(
67
+ learningRate * np.random.randn(self.numNeurons_perLayer[i], self.numNeurons_perLayer[i + 1]))
68
+ self.bias.append(np.zeros(self.numNeurons_perLayer[i + 1]))
69
+
70
+ def predict(self, x):
71
+ z = x
72
+ # 走一遍前向传播得到输出
73
+ for i in range(self.numNeuronLayers - 1):
74
+ a = np.dot(z, self.weight[i]) + self.bias[i]
75
+ z = sigmoid(a)
76
+ an = np.dot(z, self.weight[self.numNeuronLayers - 1]) + self.bias[self.numNeuronLayers - 1]
77
+ y = softmax(an)
78
+ return y
79
+
80
+ def gradient(self, x, t):
81
+ z = []
82
+ a = []
83
+ z.append(x)
84
+ # 走一遍前向传播得到输出
85
+ for i in range(self.numNeuronLayers):
86
+ a.append(np.dot(z[i], self.weight[i]) + self.bias[i])
87
+ z.append(sigmoid(a[i]))
88
+ y = softmax(a[self.numNeuronLayers - 1])
89
+ num = x.shape[0]
90
+ dy = (y - t) / num
91
+ dz = []
92
+ da = []
93
+ dz.append(dy)
94
+ for i in range(self.numNeuronLayers - 1):
95
+ da.append(np.dot(dz[i], self.weight[self.numNeuronLayers - i - 1].T))
96
+ dz.append(sigmoid_grad(a[self.numNeuronLayers - i - 2]) * da[i])
97
+
98
+ for i in range(self.numNeuronLayers):
99
+ self.weight[i] -= self.learningRate * np.dot(z[i].T, dz[self.numNeuronLayers - i - 1])
100
+ self.bias[i] -= self.learningRate * np.sum(dz[self.numNeuronLayers - i - 1], axis=0)
101
+
102
+ def loss(self, x, t):
103
+ y = self.predict(x)
104
+ t = t.argmax(axis=1)
105
+ num = y.shape[0]
106
+ s = y[np.arange(num), t]
107
+ return -np.sum(np.log(s)) / num
108
+
109
+ def accuracy(self, x, t):
110
+ y = self.predict(x)
111
+ p = np.argmax(y, axis=1)
112
+ q = np.argmax(t, axis=1)
113
+ acc = np.sum(p == q) / len(y)
114
+ return acc
115
+
116
+
117
+ def kNN(x_train, x_test, t_train, k):
118
+ px = list()
119
+ for i in range(len(x_test)):
120
+ px.append([])
121
+ for j in range(10):
122
+ px[i].append(0)
123
+ for i in range(len(x_test)):
124
+ dis = getODistance(x_test[i], x_train)
125
+ index = np.argsort(dis)
126
+ count = list()
127
+ r = np.sort(dis)[k - 1]
128
+ for j in range(len(t_train[0])):
129
+ count.append(0)
130
+ for j in range(k):
131
+ for w in range(10):
132
+ if t_train[index[j]][w] == 1:
133
+ count[w] = count[w] + 1
134
+ for j in range(10):
135
+ px[i][j] = count[j]
136
+ return px
137
+
138
+
139
+ def getODistance(sample, train):
140
+ a = np.tile(sample, [1000, 1])
141
+ a = a - train
142
+ a = np.square(a)
143
+ a = a.sum(axis=1)
144
+ dis = np.sqrt(a)
145
+ dis = dis.T
146
+ dis = dis.tolist()
147
+ return dis[0]
148
+
149
+
150
+ def runNetwork():
151
+ numNeuronLayers = 3
152
+ numNeurons_perLayer = [3072, 50, 20, 10]
153
+ learningRate = 0.05
154
+ epoch = 50000
155
+ batch_size = 100
156
+ train_size = x_train.shape[0] # 50000
157
+
158
+ net = neuralNetwork(numNeuronLayers, numNeurons_perLayer, learningRate)
159
+ for i in range(epoch):
160
+ batch_mask = np.random.choice(train_size, batch_size) # 从0到50000 随机选100个数
161
+ x_batch = x_train[batch_mask]
162
+ t_batch = t_train[batch_mask]
163
+ net.gradient(x_batch, t_batch)
164
+ y = net.predict(x_test[0:1000, 0:3072])
165
+ p = np.argmax(y, axis=1)
166
+ q = np.argmax(t_test[0:1000, 0:3072], axis=1)
167
+ acc = np.sum(p == q) / len(y)
168
+ print("神经网络正确率为:", acc)
169
+ return p
170
+
171
+
172
+ def runKnn(x_train, x_test):
173
+ x_train = np.mat(x_train)
174
+ x_test = np.mat(x_test)
175
+ px = kNN(x_train[0:1000, 0:3072], x_test[0:1000, 0:3072], t_train[0:1000, 0:10], 7)
176
+ p = np.argmax(px, axis=1)
177
+ q = np.argmax(t_test[0:1000, 0:3072], axis=1)
178
+ acc = np.sum(p == q) / 1000
179
+ print("knn正确率为:", acc)
180
+ return p
181
+
182
+
183
+ from sklearn import svm
184
+
185
+
186
+ def runSvm():
187
+ clf = svm.SVC(probability=True)
188
+ t = np.argmax(t_train[0:1000, 0:3072], axis=1)
189
+ clf.fit(x_train[0:1000, 0:3072], t)
190
+ p = clf.predict(x_test[0:1000, 0:3072])
191
+ q = np.argmax(t_test[0:1000, 0:3072], axis=1)
192
+ acc = np.sum(p == q) / 1000
193
+ print("svm正确率为:", acc)
194
+ return p
195
+
196
+
197
+ data_dir = 'cifar-10-batches-py'
198
+ x_train, t_train, x_test, t_test = load_data(data_dir)
199
+ encoder = OneHotEncoder(sparse=False)
200
+ one_format = [[0], [1], [2], [3], [4], [5], [6], [7], [8], [9]]
201
+ encoder.fit(one_format)
202
+ t_train = t_train.reshape(-1, 1) # 数组化为一维包含一个元素的二维数组,-1代表二维的数量自适应
203
+ t_train = encoder.transform(t_train)
204
+ t_test = t_test.reshape(-1, 1)
205
+ t_test = encoder.transform(t_test)
206
+
207
+ p1 = runNetwork()
208
+ p2 = runSvm()
209
+ p3 = runKnn(x_train, x_test)
210
+
211
+ p1 = p1.reshape(-1, 1) # 数组化为一维包含一个元素的二维数组,-1代表二维的数量自适应
212
+ p1 = encoder.transform(p1)
213
+ p2 = p2.reshape(-1, 1)
214
+ p2 = encoder.transform(p2)
215
+ p3 = p3.reshape(-1, 1)
216
+ p3 = encoder.transform(p3)
217
+
218
+ vote = p1+p2+p3
219
+ p = np.argmax(vote, axis=1)
220
+ q = np.argmax(t_test[0:1000, 0:3072], axis=1)
221
+ acc = np.sum(p == q) / 1000
222
+ print("最终正确率为", acc)
223
+
cifar-10-batches-py/batches.meta ADDED
Binary file (158 Bytes). View file
 
cifar-10-batches-py/data_batch_1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54636561a3ce25bd3e19253c6b0d8538147b0ae398331ac4a2d86c6d987368cd
3
+ size 31035704
cifar-10-batches-py/data_batch_2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:766b2cef9fbc745cf056b3152224f7cf77163b330ea9a15f9392beb8b89bc5a8
3
+ size 31035320
cifar-10-batches-py/data_batch_3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f00d98ebfb30b3ec0ad19f9756dc2630b89003e10525f5e148445e82aa6a1f9
3
+ size 31035999
cifar-10-batches-py/data_batch_4 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f7bb240661948b8f4d53e36ec720d8306f5668bd0071dcb4e6c947f78e9682b
3
+ size 31035696
cifar-10-batches-py/data_batch_5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d91802434d8376bbaeeadf58a737e3a1b12ac839077e931237e0dcd43adcb154
3
+ size 31035623
cifar-10-batches-py/readme.html ADDED
@@ -0,0 +1 @@
 
 
1
+ <meta HTTP-EQUIV="REFRESH" content="0; url=http://www.cs.toronto.edu/~kriz/cifar.html">
cifar-10-batches-py/test_batch ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f53d8d457504f7cff4ea9e021afcf0e0ad8e24a91f3fc42091b8adef61157831
3
+ size 31035526