| package org.maltparser.ml.libsvm;
|
|
|
| import java.io.BufferedReader;
|
| import java.io.BufferedWriter;
|
| import java.io.File;
|
| import java.io.FileNotFoundException;
|
| import java.io.IOException;
|
| import java.io.InputStream;
|
| import java.io.InputStreamReader;
|
| import java.io.OutputStreamWriter;
|
| import java.io.UnsupportedEncodingException;
|
| import java.text.DecimalFormat;
|
| import java.text.DecimalFormatSymbols;
|
| import java.util.ArrayList;
|
| import java.util.HashMap;
|
| import java.util.Map;
|
| import java.util.Set;
|
| import java.util.jar.JarEntry;
|
| import java.util.regex.Pattern;
|
| import java.util.regex.PatternSyntaxException;
|
|
|
| import libsvm.svm;
|
| import libsvm.svm_model;
|
| import libsvm.svm_node;
|
| import libsvm.svm_parameter;
|
| import libsvm.svm_problem;
|
|
|
| import org.maltparser.core.config.Configuration;
|
| import org.maltparser.core.config.ConfigurationException;
|
| import org.maltparser.core.exception.MaltChainedException;
|
| import org.maltparser.core.feature.FeatureVector;
|
| import org.maltparser.core.feature.function.FeatureFunction;
|
| import org.maltparser.core.feature.value.FeatureValue;
|
| import org.maltparser.core.feature.value.MultipleFeatureValue;
|
| import org.maltparser.core.feature.value.SingleFeatureValue;
|
| import org.maltparser.core.syntaxgraph.DependencyStructure;
|
| import org.maltparser.ml.LearningMethod;
|
| import org.maltparser.ml.liblinear.LiblinearException;
|
| import org.maltparser.ml.libsvm.LibsvmException;
|
| import org.maltparser.parser.DependencyParserConfig;
|
| import org.maltparser.parser.guide.instance.InstanceModel;
|
| import org.maltparser.parser.history.action.SingleDecision;
|
| import org.maltparser.parser.history.kbest.KBestList;
|
| import org.maltparser.parser.history.kbest.ScoredKBestList;
|
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| public class Libsvm implements LearningMethod {
|
| public final static String LIBSVM_VERSION = "2.91";
|
| public enum Verbostity {
|
| SILENT, ERROR, ALL
|
| }
|
| protected InstanceModel owner;
|
| protected int learnerMode;
|
| protected String name;
|
| protected int numberOfInstances;
|
| protected boolean saveInstanceFiles;
|
| protected boolean excludeNullValues;
|
| protected String pathExternalSVMTrain = null;
|
|
|
|
|
| |
| |
|
|
| private BufferedWriter instanceOutput = null;
|
| |
| |
|
|
| private svm_model model = null;
|
|
|
| |
| |
|
|
| private svm_parameter svmParam;
|
| |
| |
|
|
| private String paramString;
|
| |
| |
|
|
| private ArrayList<svm_node> xlist = null;
|
|
|
| private Verbostity verbosity;
|
| |
| |
| |
| |
| |
|
|
| public Libsvm(InstanceModel owner, Integer learnerMode) throws MaltChainedException {
|
| setOwner(owner);
|
| setLearningMethodName("libsvm");
|
| setLearnerMode(learnerMode.intValue());
|
| setNumberOfInstances(0);
|
| verbosity = Verbostity.SILENT;
|
| initSvmParam(getConfiguration().getOptionValue("libsvm", "libsvm_options").toString());
|
| initSpecialParameters();
|
| if (learnerMode == BATCH) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins"));
|
| }
|
|
|
|
|
|
|
|
|
|
|
| }
|
|
|
|
|
| public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException {
|
| if (featureVector == null) {
|
| throw new LibsvmException("The feature vector cannot be found");
|
| } else if (decision == null) {
|
| throw new LibsvmException("The decision cannot be found");
|
| }
|
| try {
|
| instanceOutput.write(decision.getDecisionCode()+"\t");
|
| for (int i = 0; i < featureVector.size(); i++) {
|
| FeatureValue featureValue = featureVector.get(i).getFeatureValue();
|
| if (excludeNullValues == true && featureValue.isNullValue()) {
|
| instanceOutput.write("-1");
|
| } else {
|
| if (featureValue instanceof SingleFeatureValue) {
|
| instanceOutput.write(((SingleFeatureValue)featureValue).getIndexCode()+"");
|
| } else if (featureValue instanceof MultipleFeatureValue) {
|
| Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes();
|
| int j=0;
|
| for (Integer value : values) {
|
| instanceOutput.write(value.toString());
|
| if (j != values.size()-1) {
|
| instanceOutput.write("|");
|
| }
|
| j++;
|
| }
|
| }
|
| }
|
| if (i != featureVector.size()) {
|
| instanceOutput.write('\t');
|
| }
|
| }
|
|
|
| instanceOutput.write('\n');
|
| instanceOutput.flush();
|
| increaseNumberOfInstances();
|
| } catch (IOException e) {
|
| throw new LibsvmException("The LIBSVM learner cannot write to the instance file. ", e);
|
| }
|
| }
|
|
|
| public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { }
|
|
|
| |
| |
|
|
| public void noMoreInstances() throws MaltChainedException {
|
| closeInstanceWriter();
|
| }
|
|
|
|
|
| |
| |
|
|
| public void train() throws MaltChainedException {
|
| if (owner == null) {
|
| throw new LibsvmException("The parent guide model cannot be found. ");
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| }
|
|
|
|
|
|
|
| private void trainExternal() throws MaltChainedException {
|
| try {
|
|
|
| Configuration config = owner.getGuide().getConfiguration();
|
| if (config.isLoggerInfoEnabled()) {
|
| config.logInfoMessage("Creating LIBSVM model (svm-train) "+getFile(".mod").getName());
|
| }
|
|
|
|
|
| final ArrayList<String> commands = new ArrayList<String>();
|
| commands.add(pathExternalSVMTrain);
|
| final String[] params = getSVMParamStringArray(svmParam);
|
| for (int i=0; i < params.length; i++) {
|
| commands.add(params[i]);
|
| }
|
| commands.add(getFile(".ins.tmp").getAbsolutePath());
|
| commands.add(getFile(".mod").getAbsolutePath());
|
| String[] arrayCommands = commands.toArray(new String[commands.size()]);
|
|
|
| if (verbosity == Verbostity.ALL) {
|
| config.logInfoMessage('\n');
|
| }
|
| final Process child = Runtime.getRuntime().exec(arrayCommands);
|
| final InputStream in = child.getInputStream();
|
| final InputStream err = child.getErrorStream();
|
| int c;
|
| while ((c = in.read()) != -1){
|
| if (verbosity == Verbostity.ALL) {
|
| config.logInfoMessage((char)c);
|
| }
|
| }
|
| while ((c = err.read()) != -1){
|
| if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) {
|
| config.logInfoMessage((char)c);
|
| }
|
| }
|
| if (child.waitFor() != 0) {
|
| config.logErrorMessage(" FAILED ("+child.exitValue()+")");
|
| }
|
| in.close();
|
| err.close();
|
| if (!saveInstanceFiles) {
|
| getFile(".ins").delete();
|
| getFile(".ins.tmp").delete();
|
| }
|
| if (config.isLoggerInfoEnabled()) {
|
| config.logInfoMessage('\n');
|
| }
|
| } catch (InterruptedException e) {
|
| throw new LibsvmException("SVM-trainer is interrupted. ", e);
|
| } catch (IllegalArgumentException e) {
|
| throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e);
|
| } catch (SecurityException e) {
|
| throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
|
| } catch (IOException e) {
|
| throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
|
| } catch (OutOfMemoryError e) {
|
| throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
|
| }
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
| |
|
|
| public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException {
|
| if (method == null) {
|
| throw new LibsvmException("The learning method cannot be found. ");
|
| } else if (divideFeature == null) {
|
| throw new LibsvmException("The divide feature cannot be found. ");
|
| }
|
| try {
|
| final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
|
| final BufferedWriter out = method.getInstanceWriter();
|
| final StringBuilder sb = new StringBuilder(6);
|
| int l = in.read();
|
| char c;
|
| int j = 0;
|
| while(true) {
|
| if (l == -1) {
|
| sb.setLength(0);
|
| break;
|
| }
|
|
|
| c = (char)l;
|
| l = in.read();
|
| if (c == '\t') {
|
| if (divideFeatureIndexVector.contains(j-1)) {
|
| out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getIndexCode()));
|
| out.write('\t');
|
| }
|
| out.write(sb.toString());
|
| j++;
|
| out.write('\t');
|
| sb.setLength(0);
|
| } else if (c == '\n') {
|
| if (sb.length() > 0) {
|
| out.write(sb.toString());
|
| }
|
| if (divideFeatureIndexVector.contains(j-1)) {
|
| if (sb.length() > 0) {
|
| out.write('\t');
|
| }
|
| out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getIndexCode()));
|
| }
|
| out.write('\n');
|
| sb.setLength(0);
|
| method.increaseNumberOfInstances();
|
| this.decreaseNumberOfInstances();
|
| j = 0;
|
| } else {
|
| sb.append(c);
|
| }
|
| }
|
| in.close();
|
| getFile(".ins").delete();
|
| } catch (SecurityException e) {
|
| throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
|
| } catch (NullPointerException e) {
|
| throw new LibsvmException("The instance file cannot be found. ", e);
|
| } catch (FileNotFoundException e) {
|
| throw new LibsvmException("The instance file cannot be found. ", e);
|
| } catch (IOException e) {
|
| throw new LibsvmException("The LIBSVM learner read from the instance file. ", e);
|
| }
|
| }
|
|
|
| |
| |
|
|
| public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException {
|
| if (model == null) {
|
| try {
|
| model = svm.svm_load_model(new BufferedReader(getInstanceInputStreamReaderFromConfigFile(".mod")));
|
| } catch (IOException e) {
|
| throw new LibsvmException("The model cannot be loaded. ", e);
|
| }
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| if (xlist == null) {
|
| xlist = new ArrayList<svm_node>(featureVector.size());
|
| }
|
| if (model == null) {
|
| throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the learning model cannot be found. ");
|
| } else if (featureVector == null) {
|
| throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the feature vector cannot be found. ");
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| return true;
|
| }
|
|
|
|
|
| public void terminate() throws MaltChainedException {
|
| closeInstanceWriter();
|
| model = null;
|
| svmParam = null;
|
| xlist = null;
|
| owner = null;
|
| }
|
|
|
| public BufferedWriter getInstanceWriter() {
|
| return instanceOutput;
|
| }
|
|
|
| protected void closeInstanceWriter() throws MaltChainedException {
|
| try {
|
| if (instanceOutput != null) {
|
| instanceOutput.flush();
|
| instanceOutput.close();
|
| instanceOutput = null;
|
| }
|
| } catch (IOException e) {
|
| throw new LibsvmException("The LIBSVM learner cannot close the instance file. ", e);
|
| }
|
| }
|
|
|
| |
| |
| |
| |
| |
|
|
| protected void initSvmParam(String paramString) throws MaltChainedException {
|
| this.paramString = paramString;
|
| svmParam = new svm_parameter();
|
| initParameters(svmParam);
|
| parseParameters(paramString, svmParam);
|
| }
|
|
|
| |
| |
| |
| |
|
|
| public String getParamString() {
|
| return paramString;
|
| }
|
|
|
| public InstanceModel getOwner() {
|
| return owner;
|
| }
|
|
|
| protected void setOwner(InstanceModel owner) {
|
| this.owner = owner;
|
| }
|
|
|
| public int getLearnerMode() {
|
| return learnerMode;
|
| }
|
|
|
| public void setLearnerMode(int learnerMode) throws MaltChainedException {
|
| this.learnerMode = learnerMode;
|
| }
|
|
|
| public String getLearningMethodName() {
|
| return name;
|
| }
|
|
|
| |
| |
| |
| |
| |
|
|
| public DependencyParserConfig getConfiguration() throws MaltChainedException {
|
| return owner.getGuide().getConfiguration();
|
| }
|
|
|
| public int getNumberOfInstances() throws MaltChainedException {
|
| if(numberOfInstances!=0)
|
| return numberOfInstances;
|
| else{
|
|
|
|
|
| BufferedReader reader = new BufferedReader( getInstanceInputStreamReader(".ins"));
|
| try {
|
| while(reader.readLine()!=null){
|
| numberOfInstances++;
|
| owner.increaseFrequency();
|
| }
|
|
|
| reader.close();
|
| } catch (IOException e) {
|
| throw new MaltChainedException("No instances found in file",e);
|
| }
|
|
|
|
|
|
|
| return numberOfInstances;
|
|
|
| }
|
| }
|
|
|
| public void increaseNumberOfInstances() {
|
| numberOfInstances++;
|
| owner.increaseFrequency();
|
| }
|
|
|
| public void decreaseNumberOfInstances() {
|
| numberOfInstances--;
|
| owner.decreaseFrequency();
|
| }
|
|
|
| protected void setNumberOfInstances(int numberOfInstances) {
|
| this.numberOfInstances = 0;
|
| }
|
|
|
| protected void setLearningMethodName(String name) {
|
| this.name = name;
|
| }
|
|
|
| protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException {
|
| return getConfiguration().getAppendOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix);
|
| }
|
|
|
| protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException {
|
| return getConfiguration().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix);
|
| }
|
|
|
| protected InputStreamReader getInstanceInputStreamReaderFromConfigFile(String suffix) throws MaltChainedException {
|
| try {
|
| return new InputStreamReader(getInputStreamFromConfigFileEntry(suffix), "UTF-8");
|
| } catch (UnsupportedEncodingException e) {
|
| throw new ConfigurationException("The char set UTF-8 is not supported. ", e);
|
| }
|
|
|
| }
|
|
|
| protected InputStream getInputStreamFromConfigFileEntry(String suffix) throws MaltChainedException {
|
| return getConfiguration().getInputStreamFromConfigFileEntry(owner.getModelName()+getLearningMethodName()+suffix);
|
| }
|
|
|
| protected File getFile(String suffix) throws MaltChainedException {
|
| return getConfiguration().getFile(owner.getModelName()+getLearningMethodName()+suffix);
|
| }
|
|
|
|
|
|
|
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| public final svm_problem readProblemMaltSVMFormat(InputStreamReader isr, int[] cardinalities, svm_parameter param) throws MaltChainedException {
|
| final svm_problem prob = new svm_problem();
|
| try {
|
| final BufferedReader fp = new BufferedReader(isr);
|
| int max_index = 0;
|
| if (xlist == null) {
|
| xlist = new ArrayList<svm_node>();
|
| }
|
| prob.l = getNumberOfInstances();
|
| prob.x = new svm_node[prob.l][];
|
| prob.y = new double[prob.l];
|
| int i = 0;
|
| final Pattern tabPattern = Pattern.compile("\t");
|
| final Pattern pipePattern = Pattern.compile("\\|");
|
| while(true) {
|
| String line = fp.readLine();
|
| if(line == null) break;
|
| String[] columns = tabPattern.split(line);
|
|
|
| if (columns.length == 0) {
|
| continue;
|
| }
|
|
|
| int offset = 0;
|
| int j = 0;
|
| try {
|
| prob.y[i] = (double)Integer.parseInt(columns[j]);
|
| int p = 0;
|
| for(j = 1; j < columns.length; j++) {
|
| final String[] items = pipePattern.split(columns[j]);
|
| for (int k = 0; k < items.length; k++) {
|
| try {
|
| if (Integer.parseInt(items[k]) != -1) {
|
| xlist.add(p, new svm_node());
|
| xlist.get(p).value = 1;
|
| xlist.get(p).index = Integer.parseInt(items[k])+offset;
|
| p++;
|
| }
|
| } catch (NumberFormatException e) {
|
| throw new LibsvmException("The instance file contain a non-integer value '"+items[k]+"'", e);
|
| }
|
| }
|
| offset += cardinalities[j-1];
|
| }
|
| prob.x[i] = xlist.subList(0, p).toArray(new svm_node[0]);
|
| if(columns.length > 1) {
|
| max_index = Math.max(max_index, xlist.get(p-1).index);
|
| }
|
| i++;
|
| xlist.clear();
|
| } catch (ArrayIndexOutOfBoundsException e) {
|
| throw new LibsvmException("Cannot read from the instance file. ", e);
|
| }
|
| }
|
| fp.close();
|
| if (param.gamma == 0) {
|
| param.gamma = 1.0/max_index;
|
| }
|
| xlist = null;
|
| } catch (IOException e) {
|
| throw new LibsvmException("Cannot read from the instance file. ", e);
|
| }
|
| return prob;
|
| }
|
|
|
| protected void initSpecialParameters() throws MaltChainedException {
|
| if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) {
|
| excludeNullValues = true;
|
| } else {
|
| excludeNullValues = false;
|
| }
|
| saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("libsvm", "save_instance_files")).booleanValue();
|
|
|
| if (!getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().equals("")) {
|
| try {
|
| if (!new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).exists()) {
|
| throw new LibsvmException("The path to the external LIBSVM trainer 'svm-train' is wrong.");
|
| }
|
| if (new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).isDirectory()) {
|
| throw new LibsvmException("The option --libsvm-libsvm_external points to a directory, the path should point at the 'svm-train' file or the 'svm-train.exe' file");
|
| }
|
| if (!(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train") || getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train.exe"))) {
|
| throw new LibsvmException("The option --libsvm-libsvm_external does not specify the path to 'svm-train' file or the 'svm-train.exe' file. ");
|
| }
|
| pathExternalSVMTrain = getConfiguration().getOptionValue("libsvm", "libsvm_external").toString();
|
| } catch (SecurityException e) {
|
| throw new LibsvmException("Access denied to the file specified by the option --libsvm-libsvm_external. ", e);
|
| }
|
| }
|
| if (getConfiguration().getOptionValue("libsvm", "verbosity") != null) {
|
| verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("libsvm", "verbosity").toString().toUpperCase());
|
| }
|
| }
|
|
|
| |
| |
| |
| |
|
|
| protected void initParameters(svm_parameter param) throws MaltChainedException {
|
| if (param == null) {
|
| throw new LibsvmException("Svm-parameters cannot be found. ");
|
| }
|
| param.svm_type = svm_parameter.C_SVC;
|
| param.kernel_type = svm_parameter.POLY;
|
| param.degree = 2;
|
| param.gamma = 0.2;
|
| param.coef0 = 0;
|
| param.nu = 0.5;
|
| param.cache_size = 100;
|
| param.C = 1;
|
| param.eps = 1.0;
|
| param.p = 0.1;
|
| param.shrinking = 1;
|
| param.probability = 0;
|
| param.nr_weight = 0;
|
| param.weight_label = new int[0];
|
| param.weight = new double[0];
|
| }
|
|
|
| |
| |
| |
| |
| |
|
|
| public String toStringParameters(svm_parameter param) {
|
| if (param == null) {
|
| throw new IllegalArgumentException("Svm-parameters cannot be found. ");
|
| }
|
| final StringBuffer sb = new StringBuffer();
|
|
|
| final String[] svmtypes = {"C_SVC", "NU_SVC","ONE_CLASS","EPSILON_SVR","NU_SVR"};
|
| final String[] kerneltypes = {"LINEAR", "POLY","RBF","SIGMOID","PRECOMPUTED"};
|
| final DecimalFormat dform = new DecimalFormat("#0.0#");
|
| final DecimalFormatSymbols sym = new DecimalFormatSymbols();
|
| sym.setDecimalSeparator('.');
|
| dform.setDecimalFormatSymbols(sym);
|
| sb.append("LIBSVM SETTINGS\n");
|
| sb.append(" SVM type : " + svmtypes[param.svm_type] + " (" + param.svm_type + ")\n");
|
| sb.append(" Kernel : " + kerneltypes[param.kernel_type] + " (" + param.kernel_type + ")\n");
|
| if (param.kernel_type == svm_parameter.POLY) {
|
| sb.append(" Degree : " + param.degree + "\n");
|
| }
|
| if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.RBF || param.kernel_type == svm_parameter.SIGMOID) {
|
| sb.append(" Gamma : " + dform.format(param.gamma) + "\n");
|
| if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.SIGMOID) {
|
| sb.append(" Coef0 : " + dform.format(param.coef0) + "\n");
|
| }
|
| }
|
| if (param.svm_type == svm_parameter.NU_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.ONE_CLASS) {
|
| sb.append(" Nu : " + dform.format(param.nu) + "\n");
|
| }
|
| sb.append(" Cache Size : " + dform.format(param.cache_size) + " MB\n");
|
| if (param.svm_type == svm_parameter.C_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.EPSILON_SVR) {
|
| sb.append(" C : " + dform.format(param.C) + "\n");
|
| }
|
| sb.append(" Eps : " + dform.format(param.eps) + "\n");
|
| if (param.svm_type == svm_parameter.EPSILON_SVR) {
|
| sb.append(" P : " + dform.format(param.p) + "\n");
|
| }
|
| sb.append(" Shrinking : " + param.shrinking + "\n");
|
| sb.append(" Probability : " + param.probability + "\n");
|
| if (param.svm_type == svm_parameter.C_SVC) {
|
| sb.append(" #Weight : " + param.nr_weight + "\n");
|
| if (param.nr_weight > 0) {
|
| sb.append(" Weight labels : ");
|
| for (int i = 0; i < param.nr_weight; i++) {
|
| sb.append(param.weight_label[i]);
|
| if (i != param.nr_weight-1) {
|
| sb.append(", ");
|
| }
|
| }
|
| sb.append("\n");
|
| for (int i = 0; i < param.nr_weight; i++) {
|
| sb.append(dform.format(param.weight));
|
| if (i != param.nr_weight-1) {
|
| sb.append(", ");
|
| }
|
| }
|
| sb.append("\n");
|
| }
|
| }
|
| return sb.toString();
|
| }
|
|
|
| public String[] getSVMParamStringArray(svm_parameter param) {
|
| final ArrayList<String> params = new ArrayList<String>();
|
|
|
| if (param.svm_type != 0) {
|
| params.add("-s"); params.add(new Integer(param.svm_type).toString());
|
| }
|
| if (param.kernel_type != 2) {
|
| params.add("-t"); params.add(new Integer(param.kernel_type).toString());
|
| }
|
| if (param.degree != 3) {
|
| params.add("-d"); params.add(new Integer(param.degree).toString());
|
| }
|
| params.add("-g"); params.add(new Double(param.gamma).toString());
|
| if (param.coef0 != 0) {
|
| params.add("-r"); params.add(new Double(param.coef0).toString());
|
| }
|
| if (param.nu != 0.5) {
|
| params.add("-n"); params.add(new Double(param.nu).toString());
|
| }
|
| if (param.cache_size != 100) {
|
| params.add("-m"); params.add(new Double(param.cache_size).toString());
|
| }
|
| if (param.C != 1) {
|
| params.add("-c"); params.add(new Double(param.C).toString());
|
| }
|
| if (param.eps != 0.001) {
|
| params.add("-e"); params.add(new Double(param.eps).toString());
|
| }
|
| if (param.p != 0.1) {
|
| params.add("-p"); params.add(new Double(param.p).toString());
|
| }
|
| if (param.shrinking != 1) {
|
| params.add("-h"); params.add(new Integer(param.shrinking).toString());
|
| }
|
| if (param.probability != 0) {
|
| params.add("-b"); params.add(new Integer(param.probability).toString());
|
| }
|
|
|
| return params.toArray(new String[params.size()]);
|
| }
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| public void parseParameters(String paramstring, svm_parameter param) throws MaltChainedException {
|
| if (param == null) {
|
| throw new LibsvmException("Svm-parameters cannot be found. ");
|
| }
|
| if (paramstring == null) {
|
| return;
|
| }
|
| final String[] argv;
|
| try {
|
| argv = paramstring.split("[_\\p{Blank}]");
|
| } catch (PatternSyntaxException e) {
|
| throw new LibsvmException("Could not split the svm-parameter string '"+paramstring+"'. ", e);
|
| }
|
| for (int i=0; i < argv.length-1; i++) {
|
| if(argv[i].charAt(0) != '-') {
|
| throw new LibsvmException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0));
|
| }
|
| if(++i>=argv.length) {
|
| throw new LibsvmException("The last argument does not have any value. ");
|
| }
|
| try {
|
| switch(argv[i-1].charAt(1)) {
|
| case 's':
|
| param.svm_type = Integer.parseInt(argv[i]);
|
| break;
|
| case 't':
|
| param.kernel_type = Integer.parseInt(argv[i]);
|
| break;
|
| case 'd':
|
| param.degree = Integer.parseInt(argv[i]);
|
| break;
|
| case 'g':
|
| param.gamma = Double.valueOf(argv[i]).doubleValue();
|
| break;
|
| case 'r':
|
| param.coef0 = Double.valueOf(argv[i]).doubleValue();
|
| break;
|
| case 'n':
|
| param.nu = Double.valueOf(argv[i]).doubleValue();
|
| break;
|
| case 'm':
|
| param.cache_size = Double.valueOf(argv[i]).doubleValue();
|
| break;
|
| case 'c':
|
| param.C = Double.valueOf(argv[i]).doubleValue();
|
| break;
|
| case 'e':
|
| param.eps = Double.valueOf(argv[i]).doubleValue();
|
| break;
|
| case 'p':
|
| param.p = Double.valueOf(argv[i]).doubleValue();
|
| break;
|
| case 'h':
|
| param.shrinking = Integer.parseInt(argv[i]);
|
| break;
|
| case 'b':
|
| param.probability = Integer.parseInt(argv[i]);
|
| break;
|
| case 'w':
|
| ++param.nr_weight;
|
| {
|
| int[] old = param.weight_label;
|
| param.weight_label = new int[param.nr_weight];
|
| System.arraycopy(old,0,param.weight_label,0,param.nr_weight-1);
|
| }
|
|
|
| {
|
| double[] old = param.weight;
|
| param.weight = new double[param.nr_weight];
|
| System.arraycopy(old,0,param.weight,0,param.nr_weight-1);
|
| }
|
|
|
| param.weight_label[param.nr_weight-1] = Integer.parseInt(argv[i].substring(2));
|
| param.weight[param.nr_weight-1] = Double.valueOf(argv[i]).doubleValue();
|
| break;
|
| case 'Y':
|
| case 'V':
|
| case 'S':
|
| case 'F':
|
| case 'T':
|
| case 'M':
|
| case 'N':
|
| break;
|
| default:
|
| throw new LibsvmException("Unknown svm parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. ");
|
| }
|
| } catch (ArrayIndexOutOfBoundsException e) {
|
| throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
|
| } catch (NumberFormatException e) {
|
| throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
|
| } catch (NullPointerException e) {
|
| throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
|
| }
|
| }
|
| }
|
|
|
| public void svm_predict_with_kbestlist(svm_model model, svm_node[] x, KBestList kBestList) throws MaltChainedException {
|
| int i;
|
| final int nr_class = svm.svm_get_nr_class(model);
|
| final double[] dec_values = new double[nr_class*(nr_class-1)/2];
|
| svm.svm_predict_values(model, x, dec_values);
|
|
|
| final int[] vote = new int[nr_class];
|
| final double[] score = new double[nr_class];
|
| final int[] voteindex = new int[nr_class];
|
| for(i=0;i<nr_class;i++) {
|
| vote[i] = 0;
|
| score[i] = 0.0;
|
| voteindex[i] = i;
|
| }
|
| int pos=0;
|
| for(i=0;i<nr_class;i++) {
|
| for(int j=i+1;j<nr_class;j++) {
|
| if(dec_values[pos] > 0) {
|
| vote[i]++;
|
| } else {
|
| vote[j]++;
|
| }
|
| score[i] += dec_values[pos];
|
| score[j] += dec_values[pos];
|
| pos++;
|
| }
|
| }
|
| for(i=0;i<nr_class;i++) {
|
| score[i] = score[i]/nr_class;
|
| }
|
| int lagest, tmpint;
|
| double tmpdouble;
|
| for (i=0;i<nr_class-1;i++) {
|
| lagest = i;
|
| for (int j=i;j<nr_class;j++) {
|
| if (vote[j] > vote[lagest]) {
|
| lagest = j;
|
| }
|
| }
|
| tmpint = vote[lagest];
|
| vote[lagest] = vote[i];
|
| vote[i] = tmpint;
|
| tmpdouble = score[lagest];
|
| score[lagest] = score[i];
|
| score[i] = tmpdouble;
|
| tmpint = voteindex[lagest];
|
| voteindex[lagest] = voteindex[i];
|
| voteindex[i] = tmpint;
|
| }
|
| final int[] labels = new int[nr_class];
|
| svm.svm_get_labels(model, labels);
|
| int k = nr_class-1;
|
| if (kBestList.getK() != -1) {
|
| k = kBestList.getK() - 1;
|
| }
|
|
|
| for (i=0; i<nr_class && k >= 0; i++, k--) {
|
| if (vote[i] > 0 || i == 0) {
|
| if (kBestList instanceof ScoredKBestList) {
|
| ((ScoredKBestList)kBestList).add(labels[voteindex[i]], (float)vote[i]/(float)(nr_class*(nr_class-1)/2));
|
| } else {
|
| kBestList.add(labels[voteindex[i]]);
|
| }
|
| }
|
| }
|
| }
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
|
| try {
|
| final BufferedReader in = new BufferedReader(isr);
|
| final BufferedWriter out = new BufferedWriter(osw);
|
|
|
| int c;
|
| int j = 0;
|
| int offset = 0;
|
| int code = 0;
|
| while(true) {
|
| c = in.read();
|
| if (c == -1) {
|
| break;
|
| }
|
|
|
| if (c == '\t' || c == '|') {
|
| if (j == 0) {
|
| out.write(Integer.toString(code));
|
| j++;
|
| } else {
|
| if (code != -1) {
|
| out.write(' ');
|
| out.write(Integer.toString(code+offset));
|
| out.write(":1");
|
| }
|
| if (c == '\t') {
|
| offset += cardinalities[j-1];
|
| j++;
|
| }
|
| }
|
| code = 0;
|
| } else if (c == '\n') {
|
| j = 0;
|
| offset = 0;
|
| out.write('\n');
|
| code = 0;
|
| } else if (c == '-') {
|
| code = -1;
|
| } else if (code != -1) {
|
| if (c > 47 && c < 58) {
|
| code = code * 10 + (c-48);
|
| } else {
|
| throw new LibsvmException("The instance file contain a non-integer value, when converting the Malt SVM format into LIBSVM format.");
|
| }
|
| }
|
| }
|
| in.close();
|
| out.close();
|
| } catch (IOException e) {
|
| throw new LibsvmException("Cannot read from the instance file, when converting the Malt SVM format into LIBSVM format. ", e);
|
| }
|
| }
|
|
|
| protected void finalize() throws Throwable {
|
| try {
|
| closeInstanceWriter();
|
| } finally {
|
| super.finalize();
|
| }
|
| }
|
|
|
| |
| |
|
|
| public String toString() {
|
| final StringBuffer sb = new StringBuffer();
|
| sb.append("\nLIBSVM INTERFACE\n");
|
| sb.append(" LIBSVM version: "+LIBSVM_VERSION+"\n");
|
| sb.append(" SVM-param string: "+paramString+"\n");
|
|
|
| sb.append(toStringParameters(svmParam));
|
| return sb.toString();
|
| }
|
| }
|
|
|