Spaces:
Runtime error
Runtime error
#!/Users/pranab/Tools/anaconda/bin/python | |
# avenir-python: Machine Learning | |
# Author: Pranab Ghosh | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); you | |
# may not use this file except in compliance with the License. You may | |
# obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | |
# implied. See the License for the specific language governing | |
# permissions and limitations under the License. | |
# Package imports | |
import os | |
import sys | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import sklearn | |
import sklearn.datasets | |
import sklearn.linear_model | |
import matplotlib | |
if len(sys.argv) != 7: | |
print "usage: <num_hidden_units> <data_set_size> <noise_in_data> <iteration_count> <learning_rate> <training_mode> " | |
sys.exit() | |
# number of hidden units | |
nn_hdim = int(sys.argv[1]) | |
# dat set size | |
dsize = int(sys.argv[2]) | |
# noise in training data | |
noise_level = float(sys.argv[3]) | |
# iteration count | |
it_count = int(sys.argv[4]) | |
# learning rate | |
epsilon = float(sys.argv[5]) | |
#training mode | |
training_mode = sys.argv[6] | |
# validation | |
use_validation_data = True | |
# Generate a dataset | |
#noise_level = 0.20 | |
#noise_level = 0.01 | |
vlo = 100 | |
vup = vlo + dsize / 5 | |
vsize = vup - vlo | |
print "trainig data size %d" %(vsize) | |
np.random.seed(0) | |
XC, yc = sklearn.datasets.make_moons(dsize, noise=noise_level) | |
print "complete data set generated" | |
def print_array(X,y): | |
print X | |
print y | |
# Generate a validation dataset | |
#np.random.seed(0) | |
#XV, yv = sklearn.datasets.make_moons(40, noise=0.20) | |
#print "validation data set generated" | |
XV = XC[vlo:vup:1] | |
yv = yc[vlo:vup:1] | |
print "validation data generated" | |
#print_array(XV, yv) | |
X = np.delete(XC, np.s_[vlo:vup:1], 0) | |
y = np.delete(yc, np.s_[vlo:vup:1], 0) | |
print "training data generated" | |
#print_array(X, y) | |
print X | |
print y | |
# Parameters | |
num_examples = len(X) # training set size | |
nn_input_dim = 2 # input layer dimensionality | |
nn_output_dim = 2 # output layer dimensionality | |
#training data indices | |
tr_data_indices = np.arange(num_examples) | |
#print tr_data_indices | |
# Gradient descent parameters (I picked these by hand) | |
#epsilon = 0.01 # learning rate for gradient descent | |
reg_lambda = 0.01 # regularization strength | |
# Helper function to evaluate the total loss on the dataset | |
def calculate_loss(X,y,model): | |
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2'] | |
size = len(X) | |
# Forward propagation to calculate our predictions | |
z1 = X.dot(W1) + b1 | |
a1 = np.tanh(z1) | |
z2 = a1.dot(W2) + b2 | |
exp_scores = np.exp(z2) | |
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) | |
# Calculating the loss | |
corect_logprobs = -np.log(probs[range(size), y]) | |
data_loss = np.sum(corect_logprobs) | |
# Add regulatization term to loss (optional) | |
data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2))) | |
return 1./size * data_loss | |
# Helper function to predict an output (0 or 1) | |
def predict(model, x): | |
W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2'] | |
# Forward propagation | |
z1 = x.dot(W1) + b1 | |
a1 = np.tanh(z1) | |
z2 = a1.dot(W2) + b2 | |
exp_scores = np.exp(z2) | |
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) | |
return np.argmax(probs, axis=1) | |
# This function learns parameters for the neural network in batch mode and returns the model. | |
# - nn_hdim: Number of nodes in the hidden layer | |
# - num_passes: Number of passes through the training data for gradient descent | |
# - print_loss: If True, print the loss every 1000 iterations | |
def build_model_batch(nn_hdim, num_passes=10000, validation_interval=50): | |
# Initialize the parameters to random values. We need to learn these. | |
np.random.seed(0) | |
W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim) | |
b1 = np.zeros((1, nn_hdim)) | |
W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim) | |
b2 = np.zeros((1, nn_output_dim)) | |
# This is what we return at the end | |
model = {} | |
# Gradient descent. For each batch... | |
loss = -1.0 | |
for i in xrange(0, num_passes): | |
#print "pass %d" %(i) | |
# Forward propagation | |
z1 = X.dot(W1) + b1 | |
a1 = np.tanh(z1) | |
z2 = a1.dot(W2) + b2 | |
exp_scores = np.exp(z2) | |
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) | |
# Back propagation | |
delta3 = probs | |
delta3[range(num_examples), y] -= 1 | |
dW2 = (a1.T).dot(delta3) | |
db2 = np.sum(delta3, axis=0, keepdims=True) | |
delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2)) | |
dW1 = np.dot(X.T, delta2) | |
db1 = np.sum(delta2, axis=0) | |
# Add regularization terms (b1 and b2 don't have regularization terms) | |
dW2 += reg_lambda * W2 | |
dW1 += reg_lambda * W1 | |
# Gradient descent parameter update | |
W1 += -epsilon * dW1 | |
b1 += -epsilon * db1 | |
W2 += -epsilon * dW2 | |
b2 += -epsilon * db2 | |
# Assign new parameters to the model | |
model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2} | |
# This is expensive because it uses the whole dataset, so we don't want to do it too often. | |
if i % validation_interval == 0: | |
if use_validation_data: | |
cur_loss = calculate_loss(XV,yv,model) | |
else: | |
cur_loss = calculate_loss(X,y,model) | |
print "Loss after iteration %i: %.8f" %(i, cur_loss) | |
loss = cur_loss | |
return model | |
# This function learns parameters for the neural network in incremental and returns the model. | |
# - nn_hdim: Number of nodes in the hidden layer | |
# - num_passes: Number of passes through the training data for gradient descent | |
# - print_loss: If True, print the loss every 1000 iterations | |
def build_model_incr(nn_hdim, num_passes=10000, validation_interval=50): | |
# Initialize the parameters to random values. We need to learn these. | |
np.random.seed(0) | |
W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim) | |
b1 = np.zeros((1, nn_hdim)) | |
W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim) | |
b2 = np.zeros((1, nn_output_dim)) | |
# This is what we return at the end | |
model = {} | |
# gradient descent. For each batch... | |
loss = -1.0 | |
for i in xrange(0, num_passes): | |
#print "pass %d" %(i) | |
#shuffle training data indices | |
np.random.shuffle(tr_data_indices) | |
# all training data | |
for j in tr_data_indices: | |
Xi = X[j].reshape(1,2) | |
yi = y[j].reshape(1) | |
# Forward propagation | |
z1 = Xi.dot(W1) + b1 | |
a1 = np.tanh(z1) | |
z2 = a1.dot(W2) + b2 | |
exp_scores = np.exp(z2) | |
probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) | |
# Back propagation | |
delta3 = probs | |
delta3[0,yi] -= 1 | |
dW2 = (a1.T).dot(delta3) | |
db2 = np.sum(delta3, axis=0, keepdims=True) | |
delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2)) | |
dW1 = np.dot(Xi.T, delta2) | |
db1 = np.sum(delta2, axis=0) | |
# Add regularization terms (b1 and b2 don't have regularization terms) | |
dW2 += reg_lambda * W2 | |
dW1 += reg_lambda * W1 | |
# Gradient descent parameter update | |
W1 += -epsilon * dW1 | |
b1 += -epsilon * db1 | |
W2 += -epsilon * dW2 | |
b2 += -epsilon * db2 | |
# Assign new parameters to the model | |
model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2} | |
# This is expensive because it uses the whole dataset, so we don't want to do it too often. | |
if i % validation_interval == 0: | |
if use_validation_data: | |
cur_loss = calculate_loss(XV,yv,model) | |
else: | |
cur_loss = calculate_loss(X,y,model) | |
print "Loss after iteration %i: %.8f" %(i, cur_loss) | |
loss = cur_loss | |
return model | |
# Build a model with a 3-dimensional hidden layer | |
if (training_mode == "batch"): | |
model = build_model_batch(nn_hdim, num_passes=it_count, validation_interval=1) | |
elif (training_mode == "incr"): | |
model = build_model_incr(nn_hdim, num_passes=it_count, validation_interval=1) | |
else: | |
print "invalid learning mode" | |
sys.exit() | |
print "hidden layer" | |
for row in model['W1']: | |
print(row) | |
print "hidden layer bias" | |
for row in model['b1']: | |
print(row) | |
print "output layer" | |
for row in model['W2']: | |
print(row) | |
print "output layer bias" | |
for row in model['b2']: | |
print(row) | |