|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
package weka.classifiers.functions;
|
|
|
|
|
|
import java.util.Arrays;
|
|
|
import java.util.Collections;
|
|
|
import java.util.Enumeration;
|
|
|
import java.util.HashSet;
|
|
|
import java.util.Random;
|
|
|
import java.util.Set;
|
|
|
import java.util.Vector;
|
|
|
import java.util.concurrent.Callable;
|
|
|
import java.util.concurrent.ExecutorService;
|
|
|
import java.util.concurrent.Executors;
|
|
|
import java.util.concurrent.Future;
|
|
|
|
|
|
import weka.classifiers.Classifier;
|
|
|
import weka.classifiers.RandomizableClassifier;
|
|
|
import weka.core.Capabilities;
|
|
|
import weka.core.Capabilities.Capability;
|
|
|
import weka.core.ConjugateGradientOptimization;
|
|
|
import weka.core.Instance;
|
|
|
import weka.core.Instances;
|
|
|
import weka.core.Optimization;
|
|
|
import weka.core.Option;
|
|
|
import weka.core.RevisionUtils;
|
|
|
import weka.core.Utils;
|
|
|
import weka.core.WeightedInstancesHandler;
|
|
|
import weka.filters.Filter;
|
|
|
import weka.filters.unsupervised.attribute.NominalToBinary;
|
|
|
import weka.filters.unsupervised.attribute.RemoveUseless;
|
|
|
import weka.filters.unsupervised.attribute.ReplaceMissingValues;
|
|
|
import weka.filters.unsupervised.attribute.Standardize;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public class MLPClassifier extends RandomizableClassifier implements WeightedInstancesHandler {
|
|
|
|
|
|
|
|
|
private static final long serialVersionUID = -3297474276438394644L;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
public Capabilities getCapabilities() {
|
|
|
Capabilities result = super.getCapabilities();
|
|
|
result.disableAll();
|
|
|
|
|
|
|
|
|
result.enable(Capability.NOMINAL_ATTRIBUTES);
|
|
|
result.enable(Capability.NUMERIC_ATTRIBUTES);
|
|
|
result.enable(Capability.DATE_ATTRIBUTES);
|
|
|
result.enable(Capability.MISSING_VALUES);
|
|
|
|
|
|
|
|
|
result.enable(Capability.NOMINAL_CLASS);
|
|
|
result.enable(Capability.MISSING_CLASS_VALUES);
|
|
|
|
|
|
return result;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected class OptEng extends Optimization {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
protected double objectiveFunction(double[] x) {
|
|
|
|
|
|
m_MLPParameters = x;
|
|
|
return calculateSE();
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
protected double[] evaluateGradient(double[] x) {
|
|
|
|
|
|
m_MLPParameters = x;
|
|
|
return calculateGradient();
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
public String getRevision() {
|
|
|
return RevisionUtils.extract("$Revision: 10949 $");
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected class OptEngCGD extends ConjugateGradientOptimization {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
protected double objectiveFunction(double[] x) {
|
|
|
|
|
|
m_MLPParameters = x;
|
|
|
return calculateSE();
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
protected double[] evaluateGradient(double[] x) {
|
|
|
|
|
|
m_MLPParameters = x;
|
|
|
return calculateGradient();
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
public String getRevision() {
|
|
|
return RevisionUtils.extract("$Revision: 10949 $");
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
protected int m_numUnits = 2;
|
|
|
|
|
|
|
|
|
protected int m_classIndex = -1;
|
|
|
|
|
|
|
|
|
protected Instances m_data = null;
|
|
|
|
|
|
|
|
|
protected int m_numClasses = -1;
|
|
|
|
|
|
|
|
|
protected int m_numAttributes = -1;
|
|
|
|
|
|
|
|
|
protected double[] m_MLPParameters = null;
|
|
|
|
|
|
|
|
|
protected int OFFSET_WEIGHTS = -1;
|
|
|
|
|
|
|
|
|
protected int OFFSET_ATTRIBUTE_WEIGHTS = -1;
|
|
|
|
|
|
|
|
|
protected double m_ridge = 0.01;
|
|
|
|
|
|
|
|
|
protected boolean m_useCGD = false;
|
|
|
|
|
|
|
|
|
protected double m_tolerance = 1.0e-6;
|
|
|
|
|
|
|
|
|
protected int m_numThreads = 1;
|
|
|
|
|
|
|
|
|
protected int m_poolSize = 1;
|
|
|
|
|
|
|
|
|
protected Filter m_Filter = null;
|
|
|
|
|
|
|
|
|
protected RemoveUseless m_AttFilter;
|
|
|
|
|
|
|
|
|
protected NominalToBinary m_NominalToBinary;
|
|
|
|
|
|
|
|
|
protected ReplaceMissingValues m_ReplaceMissingValues;
|
|
|
|
|
|
|
|
|
protected Classifier m_ZeroR;
|
|
|
|
|
|
|
|
|
protected transient ExecutorService m_Pool = null;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected Instances initializeClassifier(Instances data) throws Exception {
|
|
|
|
|
|
|
|
|
getCapabilities().testWithFail(data);
|
|
|
|
|
|
data = new Instances(data);
|
|
|
data.deleteWithMissingClass();
|
|
|
|
|
|
|
|
|
Random random = new Random(m_Seed);
|
|
|
if (data.numInstances() > 1) {
|
|
|
random = data.getRandomNumberGenerator(m_Seed);
|
|
|
}
|
|
|
data.randomize(random);
|
|
|
|
|
|
|
|
|
m_ReplaceMissingValues = new ReplaceMissingValues();
|
|
|
m_ReplaceMissingValues.setInputFormat(data);
|
|
|
data = Filter.useFilter(data, m_ReplaceMissingValues);
|
|
|
|
|
|
|
|
|
m_AttFilter = new RemoveUseless();
|
|
|
m_AttFilter.setInputFormat(data);
|
|
|
data = Filter.useFilter(data, m_AttFilter);
|
|
|
|
|
|
|
|
|
if (data.numAttributes() == 1) {
|
|
|
System.err
|
|
|
.println("Cannot build model (only class attribute present in data after removing useless attributes!), "
|
|
|
+ "using ZeroR model instead!");
|
|
|
m_ZeroR = new weka.classifiers.rules.ZeroR();
|
|
|
m_ZeroR.buildClassifier(data);
|
|
|
return null;
|
|
|
} else {
|
|
|
m_ZeroR = null;
|
|
|
}
|
|
|
|
|
|
|
|
|
m_NominalToBinary = new NominalToBinary();
|
|
|
m_NominalToBinary.setInputFormat(data);
|
|
|
data = Filter.useFilter(data, m_NominalToBinary);
|
|
|
|
|
|
|
|
|
m_Filter = new Standardize();
|
|
|
m_Filter.setInputFormat(data);
|
|
|
data = Filter.useFilter(data, m_Filter);
|
|
|
|
|
|
m_classIndex = data.classIndex();
|
|
|
m_numClasses = data.numClasses();
|
|
|
m_numAttributes = data.numAttributes();
|
|
|
|
|
|
|
|
|
OFFSET_WEIGHTS = 0;
|
|
|
OFFSET_ATTRIBUTE_WEIGHTS = (m_numUnits + 1) * m_numClasses;
|
|
|
m_MLPParameters = new double[OFFSET_ATTRIBUTE_WEIGHTS + m_numUnits
|
|
|
* m_numAttributes];
|
|
|
|
|
|
|
|
|
for (int j = 0; j < m_numClasses; j++) {
|
|
|
int offsetOW = OFFSET_WEIGHTS + (j * (m_numUnits + 1));
|
|
|
for (int i = 0; i < m_numUnits; i++) {
|
|
|
m_MLPParameters[offsetOW + i] = 0.1 * random.nextGaussian();
|
|
|
}
|
|
|
m_MLPParameters[offsetOW + m_numUnits] = 0.1 * random.nextGaussian();
|
|
|
}
|
|
|
for (int i = 0; i < m_numUnits; i++) {
|
|
|
int offsetW = OFFSET_ATTRIBUTE_WEIGHTS + (i * m_numAttributes);
|
|
|
for (int j = 0; j < m_numAttributes; j++) {
|
|
|
m_MLPParameters[offsetW + j] = 0.1 * random.nextGaussian();
|
|
|
}
|
|
|
}
|
|
|
|
|
|
return data;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
public void buildClassifier(Instances data) throws Exception {
|
|
|
|
|
|
|
|
|
m_data = initializeClassifier(data);
|
|
|
if (m_data == null) {
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
|
|
|
m_Pool = Executors.newFixedThreadPool(m_poolSize);
|
|
|
|
|
|
|
|
|
Optimization opt = null;
|
|
|
if (!m_useCGD) {
|
|
|
opt = new OptEng();
|
|
|
} else {
|
|
|
opt = new OptEngCGD();
|
|
|
}
|
|
|
opt.setDebug(m_Debug);
|
|
|
|
|
|
|
|
|
double[][] b = new double[2][m_MLPParameters.length];
|
|
|
for (int i = 0; i < 2; i++) {
|
|
|
for (int j = 0; j < m_MLPParameters.length; j++) {
|
|
|
b[i][j] = Double.NaN;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
m_MLPParameters = opt.findArgmin(m_MLPParameters, b);
|
|
|
while (m_MLPParameters == null) {
|
|
|
m_MLPParameters = opt.getVarbValues();
|
|
|
if (m_Debug) {
|
|
|
System.out.println("First set of iterations finished, not enough!");
|
|
|
}
|
|
|
m_MLPParameters = opt.findArgmin(m_MLPParameters, b);
|
|
|
}
|
|
|
if (m_Debug) {
|
|
|
System.out.println("SE (normalized space) after optimization: "
|
|
|
+ opt.getMinFunction());
|
|
|
}
|
|
|
|
|
|
m_data = new Instances(m_data, 0);
|
|
|
|
|
|
|
|
|
m_Pool.shutdown();
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected double calculateSE() {
|
|
|
|
|
|
|
|
|
int chunksize = m_data.numInstances() / m_numThreads;
|
|
|
Set<Future<Double>> results = new HashSet<Future<Double>>();
|
|
|
|
|
|
|
|
|
for (int j = 0; j < m_numThreads; j++) {
|
|
|
|
|
|
|
|
|
final int lo = j * chunksize;
|
|
|
final int hi = (j < m_numThreads - 1) ? (lo + chunksize) : m_data
|
|
|
.numInstances();
|
|
|
|
|
|
|
|
|
Future<Double> futureSE = m_Pool.submit(new Callable<Double>() {
|
|
|
@Override
|
|
|
public Double call() {
|
|
|
final double[] outputs = new double[m_numUnits];
|
|
|
double SE = 0;
|
|
|
for (int k = lo; k < hi; k++) {
|
|
|
final Instance inst = m_data.instance(k);
|
|
|
|
|
|
|
|
|
calculateOutputs(inst, outputs, null);
|
|
|
|
|
|
|
|
|
for (int i = 0; i < m_numClasses; i++) {
|
|
|
|
|
|
|
|
|
|
|
|
final double target = ((int) inst.value(m_classIndex) == i) ? 0.99
|
|
|
: 0.01;
|
|
|
|
|
|
|
|
|
final double err = getOutput(i, outputs, null) - target;
|
|
|
SE += inst.weight() * err * err;
|
|
|
}
|
|
|
}
|
|
|
return SE;
|
|
|
}
|
|
|
});
|
|
|
results.add(futureSE);
|
|
|
}
|
|
|
|
|
|
|
|
|
double SE = 0;
|
|
|
try {
|
|
|
for (Future<Double> futureSE : results) {
|
|
|
SE += futureSE.get();
|
|
|
}
|
|
|
} catch (Exception e) {
|
|
|
System.out.println("Squared error could not be calculated.");
|
|
|
}
|
|
|
|
|
|
|
|
|
double squaredSumOfWeights = 0;
|
|
|
for (int i = 0; i < m_numClasses; i++) {
|
|
|
int offsetOW = OFFSET_WEIGHTS + (i * (m_numUnits + 1));
|
|
|
for (int k = 0; k < m_numUnits; k++) {
|
|
|
squaredSumOfWeights += m_MLPParameters[offsetOW + k]
|
|
|
* m_MLPParameters[offsetOW + k];
|
|
|
}
|
|
|
}
|
|
|
for (int k = 0; k < m_numUnits; k++) {
|
|
|
int offsetW = OFFSET_ATTRIBUTE_WEIGHTS + k * m_numAttributes;
|
|
|
for (int j = 0; j < m_classIndex; j++) {
|
|
|
squaredSumOfWeights += m_MLPParameters[offsetW + j]
|
|
|
* m_MLPParameters[offsetW + j];
|
|
|
}
|
|
|
for (int j = m_classIndex + 1; j < m_numAttributes; j++) {
|
|
|
squaredSumOfWeights += m_MLPParameters[offsetW + j]
|
|
|
* m_MLPParameters[offsetW + j];
|
|
|
}
|
|
|
}
|
|
|
|
|
|
return ((m_ridge * squaredSumOfWeights) + (0.5 * SE))
|
|
|
/ m_data.sumOfWeights();
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected double[] calculateGradient() {
|
|
|
|
|
|
|
|
|
int chunksize = m_data.numInstances() / m_numThreads;
|
|
|
Set<Future<double[]>> results = new HashSet<Future<double[]>>();
|
|
|
|
|
|
|
|
|
for (int j = 0; j < m_numThreads; j++) {
|
|
|
|
|
|
|
|
|
final int lo = j * chunksize;
|
|
|
final int hi = (j < m_numThreads - 1) ? (lo + chunksize) : m_data
|
|
|
.numInstances();
|
|
|
|
|
|
|
|
|
Future<double[]> futureGrad = m_Pool.submit(new Callable<double[]>() {
|
|
|
@Override
|
|
|
public double[] call() {
|
|
|
|
|
|
final double[] outputs = new double[m_numUnits];
|
|
|
final double[] deltaHidden = new double[m_numUnits];
|
|
|
final double[] sigmoidDerivativeOutput = new double[1];
|
|
|
final double[] sigmoidDerivativesHidden = new double[m_numUnits];
|
|
|
final double[] localGrad = new double[m_MLPParameters.length];
|
|
|
for (int k = lo; k < hi; k++) {
|
|
|
final Instance inst = m_data.instance(k);
|
|
|
calculateOutputs(inst, outputs, sigmoidDerivativesHidden);
|
|
|
updateGradient(localGrad, inst, outputs, sigmoidDerivativeOutput,
|
|
|
deltaHidden);
|
|
|
updateGradientForHiddenUnits(localGrad, inst,
|
|
|
sigmoidDerivativesHidden, deltaHidden);
|
|
|
}
|
|
|
return localGrad;
|
|
|
}
|
|
|
});
|
|
|
results.add(futureGrad);
|
|
|
}
|
|
|
|
|
|
|
|
|
double[] grad = new double[m_MLPParameters.length];
|
|
|
try {
|
|
|
for (Future<double[]> futureGrad : results) {
|
|
|
double[] lg = futureGrad.get();
|
|
|
for (int i = 0; i < lg.length; i++) {
|
|
|
grad[i] += lg[i];
|
|
|
}
|
|
|
}
|
|
|
} catch (Exception e) {
|
|
|
System.out.println("Gradient could not be calculated.");
|
|
|
}
|
|
|
|
|
|
|
|
|
for (int i = 0; i < m_numClasses; i++) {
|
|
|
int offsetOW = OFFSET_WEIGHTS + (i * (m_numUnits + 1));
|
|
|
for (int k = 0; k < m_numUnits; k++) {
|
|
|
grad[offsetOW + k] += m_ridge * 2 * m_MLPParameters[offsetOW + k];
|
|
|
}
|
|
|
}
|
|
|
for (int k = 0; k < m_numUnits; k++) {
|
|
|
int offsetW = OFFSET_ATTRIBUTE_WEIGHTS + k * m_numAttributes;
|
|
|
for (int j = 0; j < m_classIndex; j++) {
|
|
|
grad[offsetW + j] += m_ridge * 2 * m_MLPParameters[offsetW + j];
|
|
|
}
|
|
|
for (int j = m_classIndex + 1; j < m_numAttributes; j++) {
|
|
|
grad[offsetW + j] += m_ridge * 2 * m_MLPParameters[offsetW + j];
|
|
|
}
|
|
|
}
|
|
|
|
|
|
double factor = 1.0 / m_data.sumOfWeights();
|
|
|
for (int i = 0; i < grad.length; i++) {
|
|
|
grad[i] *= factor;
|
|
|
}
|
|
|
|
|
|
return grad;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected void updateGradient(double[] grad, Instance inst, double[] outputs,
|
|
|
double[] sigmoidDerivativeOutput, double[] deltaHidden) {
|
|
|
|
|
|
|
|
|
Arrays.fill(deltaHidden, 0.0);
|
|
|
|
|
|
|
|
|
for (int j = 0; j < m_numClasses; j++) {
|
|
|
|
|
|
|
|
|
double pred = getOutput(j, outputs, sigmoidDerivativeOutput);
|
|
|
|
|
|
|
|
|
|
|
|
double target = ((int) inst.value(m_classIndex) == j) ? 0.99 : 0.01;
|
|
|
|
|
|
|
|
|
double deltaOut = inst.weight() * (pred - target) * sigmoidDerivativeOutput[0];
|
|
|
|
|
|
|
|
|
if (deltaOut <= m_tolerance && deltaOut >= -m_tolerance) {
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
|
|
|
int offsetOW = OFFSET_WEIGHTS + (j * (m_numUnits + 1));
|
|
|
|
|
|
|
|
|
for (int i = 0; i < m_numUnits; i++) {
|
|
|
deltaHidden[i] += deltaOut * m_MLPParameters[offsetOW + i];
|
|
|
}
|
|
|
|
|
|
|
|
|
for (int i = 0; i < m_numUnits; i++) {
|
|
|
grad[offsetOW + i] += deltaOut * outputs[i];
|
|
|
}
|
|
|
|
|
|
|
|
|
grad[offsetOW + m_numUnits] += deltaOut;
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected void updateGradientForHiddenUnits(double[] grad, Instance inst,
|
|
|
double[] sigmoidDerivativesHidden, double[] deltaHidden) {
|
|
|
|
|
|
|
|
|
for (int i = 0; i < m_numUnits; i++) {
|
|
|
deltaHidden[i] *= sigmoidDerivativesHidden[i];
|
|
|
}
|
|
|
|
|
|
|
|
|
for (int i = 0; i < m_numUnits; i++) {
|
|
|
|
|
|
|
|
|
if (deltaHidden[i] <= m_tolerance && deltaHidden[i] >= -m_tolerance) {
|
|
|
continue;
|
|
|
}
|
|
|
|
|
|
|
|
|
int offsetW = OFFSET_ATTRIBUTE_WEIGHTS + i * m_numAttributes;
|
|
|
for (int l = 0; l < m_classIndex; l++) {
|
|
|
grad[offsetW + l] += deltaHidden[i] * inst.value(l);
|
|
|
}
|
|
|
grad[offsetW + m_classIndex] += deltaHidden[i];
|
|
|
for (int l = m_classIndex + 1; l < m_numAttributes; l++) {
|
|
|
grad[offsetW + l] += deltaHidden[i] * inst.value(l);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected void calculateOutputs(Instance inst, double[] o, double[] d) {
|
|
|
|
|
|
for (int i = 0; i < m_numUnits; i++) {
|
|
|
int offsetW = OFFSET_ATTRIBUTE_WEIGHTS + i * m_numAttributes;
|
|
|
double sum = 0;
|
|
|
for (int j = 0; j < m_classIndex; j++) {
|
|
|
sum += inst.value(j) * m_MLPParameters[offsetW + j];
|
|
|
}
|
|
|
sum += m_MLPParameters[offsetW + m_classIndex];
|
|
|
for (int j = m_classIndex + 1; j < m_numAttributes; j++) {
|
|
|
sum += inst.value(j) * m_MLPParameters[offsetW + j];
|
|
|
}
|
|
|
o[i] = sigmoid(-sum, d, i);
|
|
|
}
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected double getOutput(int unit, double[] outputs, double[] d) {
|
|
|
|
|
|
int offsetOW = OFFSET_WEIGHTS + (unit * (m_numUnits + 1));
|
|
|
double result = 0;
|
|
|
for (int i = 0; i < m_numUnits; i++) {
|
|
|
result += m_MLPParameters[offsetOW + i] * outputs[i];
|
|
|
}
|
|
|
result += m_MLPParameters[offsetOW + m_numUnits];
|
|
|
return sigmoid(-result, d, 0);
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
protected double sigmoid(double x, double[] d, int index) {
|
|
|
|
|
|
|
|
|
double y = 1.0 + x / 4096.0;
|
|
|
x = y * y;
|
|
|
x *= x;
|
|
|
x *= x;
|
|
|
x *= x;
|
|
|
x *= x;
|
|
|
x *= x;
|
|
|
x *= x;
|
|
|
x *= x;
|
|
|
x *= x;
|
|
|
x *= x;
|
|
|
x *= x;
|
|
|
x *= x;
|
|
|
double output = 1.0 / (1.0 + x);
|
|
|
|
|
|
|
|
|
if (d != null) {
|
|
|
d[index] = output * (1.0 - output) / y;
|
|
|
}
|
|
|
|
|
|
return output;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
public double[] distributionForInstance(Instance inst) throws Exception {
|
|
|
|
|
|
m_ReplaceMissingValues.input(inst);
|
|
|
inst = m_ReplaceMissingValues.output();
|
|
|
m_AttFilter.input(inst);
|
|
|
inst = m_AttFilter.output();
|
|
|
|
|
|
|
|
|
if (m_ZeroR != null) {
|
|
|
return m_ZeroR.distributionForInstance(inst);
|
|
|
}
|
|
|
|
|
|
m_NominalToBinary.input(inst);
|
|
|
inst = m_NominalToBinary.output();
|
|
|
m_Filter.input(inst);
|
|
|
inst = m_Filter.output();
|
|
|
|
|
|
double[] dist = new double[m_numClasses];
|
|
|
double[] outputs = new double[m_numUnits];
|
|
|
calculateOutputs(inst, outputs, null);
|
|
|
for (int i = 0; i < m_numClasses; i++) {
|
|
|
dist[i] = getOutput(i, outputs, null);
|
|
|
if (dist[i] < 0) {
|
|
|
dist[i] = 0;
|
|
|
} else if (dist[i] > 1) {
|
|
|
dist[i] = 1;
|
|
|
}
|
|
|
}
|
|
|
Utils.normalize(dist);
|
|
|
|
|
|
return dist;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public String globalInfo() {
|
|
|
|
|
|
return "Trains a multilayer perceptron with one hidden layer using WEKA's Optimization class"
|
|
|
+ " by minimizing the squared error plus a quadratic penalty with the BFGS method."
|
|
|
+ " Note that all attributes are standardized. There are several parameters. The"
|
|
|
+ " ridge parameter is used to determine the penalty on the size of the weights. The"
|
|
|
+ " number of hidden units can also be specified. Note that large"
|
|
|
+ " numbers produce long training times. Finally, it is possible to use conjugate gradient"
|
|
|
+ " descent rather than BFGS updates, which may be faster for cases with many parameters."
|
|
|
+ " To improve speed, an approximate version of the logistic function is used as the"
|
|
|
+ " activation function. Also, if delta values in the backpropagation step are "
|
|
|
+ " within the user-specified tolerance, the gradient is not updated for that"
|
|
|
+ " particular instance, which saves some additional time. Paralled calculation"
|
|
|
+ " of squared error and gradient is possible when multiple CPU cores are present."
|
|
|
+ " Data is split into batches and processed in separate threads in this case."
|
|
|
+ " Note that this only improves runtime for larger datasets."
|
|
|
+ " Nominal attributes are processed using the unsupervised"
|
|
|
+ " NominalToBinary filter and missing values are replaced globally"
|
|
|
+ " using ReplaceMissingValues.";
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public String toleranceTipText() {
|
|
|
|
|
|
return "The tolerance parameter for the delta values.";
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public double getTolerance() {
|
|
|
|
|
|
return m_tolerance;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public void setTolerance(double newTolerance) {
|
|
|
|
|
|
m_tolerance = newTolerance;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public String numFunctionsTipText() {
|
|
|
|
|
|
return "The number of hidden units to use.";
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public int getNumFunctions() {
|
|
|
|
|
|
return m_numUnits;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public void setNumFunctions(int newNumFunctions) {
|
|
|
|
|
|
m_numUnits = newNumFunctions;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public String ridgeTipText() {
|
|
|
|
|
|
return "The ridge penalty factor for the quadratic penalty on the weights.";
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public double getRidge() {
|
|
|
|
|
|
return m_ridge;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public void setRidge(double newRidge) {
|
|
|
|
|
|
m_ridge = newRidge;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public String useCGDTipText() {
|
|
|
|
|
|
return "Whether to use conjugate gradient descent (potentially useful for many parameters).";
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public boolean getUseCGD() {
|
|
|
|
|
|
return m_useCGD;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public void setUseCGD(boolean newUseCGD) {
|
|
|
|
|
|
m_useCGD = newUseCGD;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public String numThreadsTipText() {
|
|
|
|
|
|
return "The number of threads to use, which should be >= size of thread pool.";
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public int getNumThreads() {
|
|
|
|
|
|
return m_numThreads;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public void setNumThreads(int nT) {
|
|
|
|
|
|
m_numThreads = nT;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public String poolSizeTipText() {
|
|
|
|
|
|
return "The size of the thread pool, for example, the number of cores in the CPU.";
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public int getPoolSize() {
|
|
|
|
|
|
return m_poolSize;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public void setPoolSize(int nT) {
|
|
|
|
|
|
m_poolSize = nT;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
public Enumeration<Option> listOptions() {
|
|
|
|
|
|
Vector<Option> newVector = new Vector<Option>(6);
|
|
|
|
|
|
newVector.addElement(new Option(
|
|
|
"\tNumber of hidden units (default is 2).\n", "N", 1, "-N <int>"));
|
|
|
|
|
|
newVector.addElement(new Option(
|
|
|
"\tRidge factor for quadratic penalty on weights (default is 0.01).\n",
|
|
|
"R", 1, "-R <double>"));
|
|
|
newVector.addElement(new Option(
|
|
|
"\tTolerance parameter for delta values (default is 1.0e-6).\n", "O", 1,
|
|
|
"-O <double>"));
|
|
|
newVector.addElement(new Option(
|
|
|
"\tUse conjugate gradient descent (recommended for many attributes).\n",
|
|
|
"G", 0, "-G"));
|
|
|
newVector.addElement(new Option(
|
|
|
"\t" + poolSizeTipText() + " (default 1)\n", "P", 1, "-P <int>"));
|
|
|
newVector.addElement(new Option("\t" + numThreadsTipText()
|
|
|
+ " (default 1)\n", "E", 1, "-E <int>"));
|
|
|
|
|
|
newVector.addAll(Collections.list(super.listOptions()));
|
|
|
|
|
|
return newVector.elements();
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
public void setOptions(String[] options) throws Exception {
|
|
|
|
|
|
String numFunctions = Utils.getOption('N', options);
|
|
|
if (numFunctions.length() != 0) {
|
|
|
setNumFunctions(Integer.parseInt(numFunctions));
|
|
|
} else {
|
|
|
setNumFunctions(2);
|
|
|
}
|
|
|
String Ridge = Utils.getOption('R', options);
|
|
|
if (Ridge.length() != 0) {
|
|
|
setRidge(Double.parseDouble(Ridge));
|
|
|
} else {
|
|
|
setRidge(0.01);
|
|
|
}
|
|
|
String Tolerance = Utils.getOption('O', options);
|
|
|
if (Tolerance.length() != 0) {
|
|
|
setTolerance(Double.parseDouble(Tolerance));
|
|
|
} else {
|
|
|
setTolerance(1.0e-6);
|
|
|
}
|
|
|
m_useCGD = Utils.getFlag('G', options);
|
|
|
String PoolSize = Utils.getOption('P', options);
|
|
|
if (PoolSize.length() != 0) {
|
|
|
setPoolSize(Integer.parseInt(PoolSize));
|
|
|
} else {
|
|
|
setPoolSize(1);
|
|
|
}
|
|
|
String NumThreads = Utils.getOption('E', options);
|
|
|
if (NumThreads.length() != 0) {
|
|
|
setNumThreads(Integer.parseInt(NumThreads));
|
|
|
} else {
|
|
|
setNumThreads(1);
|
|
|
}
|
|
|
|
|
|
super.setOptions(options);
|
|
|
|
|
|
Utils.checkForRemainingOptions(options);
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
public String[] getOptions() {
|
|
|
|
|
|
Vector<String> options = new Vector<String>();
|
|
|
|
|
|
options.add("-N");
|
|
|
options.add("" + getNumFunctions());
|
|
|
|
|
|
options.add("-R");
|
|
|
options.add("" + getRidge());
|
|
|
|
|
|
options.add("-O");
|
|
|
options.add("" + getTolerance());
|
|
|
|
|
|
if (m_useCGD) {
|
|
|
options.add("-G");
|
|
|
}
|
|
|
|
|
|
options.add("-P");
|
|
|
options.add("" + getPoolSize());
|
|
|
|
|
|
options.add("-E");
|
|
|
options.add("" + getNumThreads());
|
|
|
|
|
|
Collections.addAll(options, super.getOptions());
|
|
|
|
|
|
return options.toArray(new String[0]);
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@Override
|
|
|
public String toString() {
|
|
|
|
|
|
if (m_ZeroR != null) {
|
|
|
return m_ZeroR.toString();
|
|
|
}
|
|
|
|
|
|
if (m_MLPParameters == null) {
|
|
|
return "Classifier not built yet.";
|
|
|
}
|
|
|
|
|
|
String s = "MLPClassifier with ridge value " + getRidge() + " and "
|
|
|
+ getNumFunctions() + " hidden units (useCGD=" + getUseCGD() + ")\n\n";
|
|
|
|
|
|
for (int i = 0; i < m_numUnits; i++) {
|
|
|
for (int j = 0; j < m_numClasses; j++) {
|
|
|
s += "Output unit " + j + " weight for hidden unit " + i + ": "
|
|
|
+ m_MLPParameters[OFFSET_WEIGHTS + j * (m_numUnits + 1) + i] + "\n";
|
|
|
}
|
|
|
s += "\nHidden unit " + i + " weights:\n\n";
|
|
|
for (int j = 0; j < m_numAttributes; j++) {
|
|
|
if (j != m_classIndex) {
|
|
|
s += m_MLPParameters[OFFSET_ATTRIBUTE_WEIGHTS + (i * m_numAttributes)
|
|
|
+ j]
|
|
|
+ " " + m_data.attribute(j).name() + "\n";
|
|
|
}
|
|
|
}
|
|
|
s += "\nHidden unit "
|
|
|
+ i
|
|
|
+ " bias: "
|
|
|
+ m_MLPParameters[OFFSET_ATTRIBUTE_WEIGHTS
|
|
|
+ (i * m_numAttributes + m_classIndex)] + "\n\n";
|
|
|
}
|
|
|
for (int j = 0; j < m_numClasses; j++) {
|
|
|
s += "Output unit " + j + " bias: "
|
|
|
+ m_MLPParameters[OFFSET_WEIGHTS + j * (m_numUnits + 1) + m_numUnits]
|
|
|
+ "\n";
|
|
|
}
|
|
|
|
|
|
return s;
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
public static void main(String[] argv) {
|
|
|
|
|
|
runClassifier(new MLPClassifier(), argv);
|
|
|
}
|
|
|
}
|
|
|
|