#!/Users/pranab/Tools/anaconda/bin/python # avenir-python: Machine Learning # Author: Pranab Ghosh # # Licensed under the Apache License, Version 2.0 (the "License"); you # may not use this file except in compliance with the License. You may # obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. # Package imports import os import sys import matplotlib.pyplot as plt import numpy as np import sklearn import sklearn.datasets import sklearn.linear_model import matplotlib if len(sys.argv) != 7: print "usage: " sys.exit() # number of hidden units nn_hdim = int(sys.argv[1]) # dat set size dsize = int(sys.argv[2]) # noise in training data noise_level = float(sys.argv[3]) # iteration count it_count = int(sys.argv[4]) # learning rate epsilon = float(sys.argv[5]) #training mode training_mode = sys.argv[6] # validation use_validation_data = True # Generate a dataset #noise_level = 0.20 #noise_level = 0.01 vlo = 100 vup = vlo + dsize / 5 vsize = vup - vlo print "trainig data size %d" %(vsize) np.random.seed(0) XC, yc = sklearn.datasets.make_moons(dsize, noise=noise_level) print "complete data set generated" def print_array(X,y): print X print y # Generate a validation dataset #np.random.seed(0) #XV, yv = sklearn.datasets.make_moons(40, noise=0.20) #print "validation data set generated" XV = XC[vlo:vup:1] yv = yc[vlo:vup:1] print "validation data generated" #print_array(XV, yv) X = np.delete(XC, np.s_[vlo:vup:1], 0) y = np.delete(yc, np.s_[vlo:vup:1], 0) print "training data generated" #print_array(X, y) print X print y # Parameters num_examples = len(X) # training set size nn_input_dim = 2 # input layer dimensionality nn_output_dim = 2 # output layer dimensionality #training data indices tr_data_indices = np.arange(num_examples) #print tr_data_indices # Gradient descent parameters (I picked these by hand) #epsilon = 0.01 # learning rate for gradient descent reg_lambda = 0.01 # regularization strength # Helper function to evaluate the total loss on the dataset def calculate_loss(X,y,model): W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2'] size = len(X) # Forward propagation to calculate our predictions z1 = X.dot(W1) + b1 a1 = np.tanh(z1) z2 = a1.dot(W2) + b2 exp_scores = np.exp(z2) probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) # Calculating the loss corect_logprobs = -np.log(probs[range(size), y]) data_loss = np.sum(corect_logprobs) # Add regulatization term to loss (optional) data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2))) return 1./size * data_loss # Helper function to predict an output (0 or 1) def predict(model, x): W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2'] # Forward propagation z1 = x.dot(W1) + b1 a1 = np.tanh(z1) z2 = a1.dot(W2) + b2 exp_scores = np.exp(z2) probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) return np.argmax(probs, axis=1) # This function learns parameters for the neural network in batch mode and returns the model. # - nn_hdim: Number of nodes in the hidden layer # - num_passes: Number of passes through the training data for gradient descent # - print_loss: If True, print the loss every 1000 iterations def build_model_batch(nn_hdim, num_passes=10000, validation_interval=50): # Initialize the parameters to random values. We need to learn these. np.random.seed(0) W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim) b1 = np.zeros((1, nn_hdim)) W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim) b2 = np.zeros((1, nn_output_dim)) # This is what we return at the end model = {} # Gradient descent. For each batch... loss = -1.0 for i in xrange(0, num_passes): #print "pass %d" %(i) # Forward propagation z1 = X.dot(W1) + b1 a1 = np.tanh(z1) z2 = a1.dot(W2) + b2 exp_scores = np.exp(z2) probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) # Back propagation delta3 = probs delta3[range(num_examples), y] -= 1 dW2 = (a1.T).dot(delta3) db2 = np.sum(delta3, axis=0, keepdims=True) delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2)) dW1 = np.dot(X.T, delta2) db1 = np.sum(delta2, axis=0) # Add regularization terms (b1 and b2 don't have regularization terms) dW2 += reg_lambda * W2 dW1 += reg_lambda * W1 # Gradient descent parameter update W1 += -epsilon * dW1 b1 += -epsilon * db1 W2 += -epsilon * dW2 b2 += -epsilon * db2 # Assign new parameters to the model model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2} # This is expensive because it uses the whole dataset, so we don't want to do it too often. if i % validation_interval == 0: if use_validation_data: cur_loss = calculate_loss(XV,yv,model) else: cur_loss = calculate_loss(X,y,model) print "Loss after iteration %i: %.8f" %(i, cur_loss) loss = cur_loss return model # This function learns parameters for the neural network in incremental and returns the model. # - nn_hdim: Number of nodes in the hidden layer # - num_passes: Number of passes through the training data for gradient descent # - print_loss: If True, print the loss every 1000 iterations def build_model_incr(nn_hdim, num_passes=10000, validation_interval=50): # Initialize the parameters to random values. We need to learn these. np.random.seed(0) W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim) b1 = np.zeros((1, nn_hdim)) W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim) b2 = np.zeros((1, nn_output_dim)) # This is what we return at the end model = {} # gradient descent. For each batch... loss = -1.0 for i in xrange(0, num_passes): #print "pass %d" %(i) #shuffle training data indices np.random.shuffle(tr_data_indices) # all training data for j in tr_data_indices: Xi = X[j].reshape(1,2) yi = y[j].reshape(1) # Forward propagation z1 = Xi.dot(W1) + b1 a1 = np.tanh(z1) z2 = a1.dot(W2) + b2 exp_scores = np.exp(z2) probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True) # Back propagation delta3 = probs delta3[0,yi] -= 1 dW2 = (a1.T).dot(delta3) db2 = np.sum(delta3, axis=0, keepdims=True) delta2 = delta3.dot(W2.T) * (1 - np.power(a1, 2)) dW1 = np.dot(Xi.T, delta2) db1 = np.sum(delta2, axis=0) # Add regularization terms (b1 and b2 don't have regularization terms) dW2 += reg_lambda * W2 dW1 += reg_lambda * W1 # Gradient descent parameter update W1 += -epsilon * dW1 b1 += -epsilon * db1 W2 += -epsilon * dW2 b2 += -epsilon * db2 # Assign new parameters to the model model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2} # This is expensive because it uses the whole dataset, so we don't want to do it too often. if i % validation_interval == 0: if use_validation_data: cur_loss = calculate_loss(XV,yv,model) else: cur_loss = calculate_loss(X,y,model) print "Loss after iteration %i: %.8f" %(i, cur_loss) loss = cur_loss return model # Build a model with a 3-dimensional hidden layer if (training_mode == "batch"): model = build_model_batch(nn_hdim, num_passes=it_count, validation_interval=1) elif (training_mode == "incr"): model = build_model_incr(nn_hdim, num_passes=it_count, validation_interval=1) else: print "invalid learning mode" sys.exit() print "hidden layer" for row in model['W1']: print(row) print "hidden layer bias" for row in model['b1']: print(row) print "output layer" for row in model['W2']: print(row) print "output layer bias" for row in model['b2']: print(row)