| package org.maltparser.ml.liblinear;
|
|
|
| import java.io.BufferedReader;
|
| import java.io.BufferedWriter;
|
| import java.io.File;
|
| import java.io.FileNotFoundException;
|
| import java.io.FileOutputStream;
|
| import java.io.IOException;
|
| import java.io.InputStream;
|
| import java.io.InputStreamReader;
|
| import java.io.ObjectInputStream;
|
| import java.io.ObjectOutputStream;
|
| import java.io.OutputStream;
|
| import java.io.OutputStreamWriter;
|
| import java.io.PrintStream;
|
| import java.io.UnsupportedEncodingException;
|
| import java.util.ArrayList;
|
| import java.util.HashMap;
|
| import java.util.LinkedHashMap;
|
| import java.util.Set;
|
| import java.util.TreeSet;
|
| import java.util.regex.Pattern;
|
| import java.util.regex.PatternSyntaxException;
|
|
|
| import de.bwaldvogel.liblinear.FeatureNode;
|
| import de.bwaldvogel.liblinear.Linear;
|
| import de.bwaldvogel.liblinear.Model;
|
| import de.bwaldvogel.liblinear.Parameter;
|
| import de.bwaldvogel.liblinear.Problem;
|
| import de.bwaldvogel.liblinear.SolverType;
|
|
|
|
|
|
|
| import org.maltparser.core.config.Configuration;
|
| import org.maltparser.core.config.ConfigurationException;
|
| import org.maltparser.core.exception.MaltChainedException;
|
| import org.maltparser.core.feature.FeatureVector;
|
| import org.maltparser.core.feature.function.FeatureFunction;
|
| import org.maltparser.core.feature.value.FeatureValue;
|
| import org.maltparser.core.feature.value.MultipleFeatureValue;
|
| import org.maltparser.core.feature.value.SingleFeatureValue;
|
| import org.maltparser.core.helper.NoPrintStream;
|
| import org.maltparser.core.syntaxgraph.DependencyStructure;
|
| import org.maltparser.ml.LearningMethod;
|
| import org.maltparser.parser.DependencyParserConfig;
|
| import org.maltparser.parser.guide.instance.InstanceModel;
|
| import org.maltparser.parser.history.action.SingleDecision;
|
| import org.maltparser.parser.history.kbest.KBestList;
|
| import org.maltparser.parser.history.kbest.ScoredKBestList;
|
|
|
|
|
|
|
| public class Liblinear implements LearningMethod {
|
| public final static String LIBLINEAR_VERSION = "1.51";
|
| public enum Verbostity {
|
| SILENT, ERROR, ALL
|
| }
|
| private LinkedHashMap<String, String> liblinearOptions;
|
|
|
| protected InstanceModel owner;
|
| protected int learnerMode;
|
| protected String name;
|
| protected int numberOfInstances;
|
| protected boolean saveInstanceFiles;
|
| protected boolean excludeNullValues;
|
| protected String pathExternalLiblinearTrain = null;
|
|
|
| |
| |
|
|
| private BufferedWriter instanceOutput = null;
|
| |
| |
|
|
| private Model model = null;
|
|
|
| |
| |
|
|
| private String paramString;
|
|
|
| private ArrayList<FeatureNode> xlist = null;
|
|
|
| private Verbostity verbosity;
|
|
|
| private HashMap<Long,Integer> featureMap;
|
| private int featureCounter = 1;
|
| private boolean featurePruning = false;
|
| private TreeSet<XNode> featureSet;
|
|
|
| |
| |
| |
| |
| |
|
|
| public Liblinear(InstanceModel owner, Integer learnerMode) throws MaltChainedException {
|
| setOwner(owner);
|
| setLearningMethodName("liblinear");
|
| setLearnerMode(learnerMode.intValue());
|
| setNumberOfInstances(0);
|
| verbosity = Verbostity.SILENT;
|
|
|
| liblinearOptions = new LinkedHashMap<String, String>();
|
| initLiblinearOptions();
|
| parseParameters(getConfiguration().getOptionValue("liblinear", "liblinear_options").toString());
|
| initSpecialParameters();
|
|
|
| if (learnerMode == BATCH) {
|
| if (featurePruning) {
|
| featureMap = new HashMap<Long, Integer>();
|
| }
|
| instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins"));
|
| }
|
| if (featurePruning) {
|
| featureSet = new TreeSet<XNode>();
|
| }
|
| }
|
|
|
| private int addFeatureMapValue(int featurePosition, int code) {
|
| long key = ((((long)featurePosition) << 48) | (long)code);
|
| if (featureMap.containsKey(key)) {
|
| return featureMap.get(key);
|
| }
|
| int value = featureCounter++;
|
| featureMap.put(key, value);
|
| return value;
|
| }
|
|
|
| private int getFeatureMapValue(int featurePosition, int code) {
|
| long key = ((((long)featurePosition) << 48) | (long)code);
|
| if (featureMap.containsKey(key)) {
|
| return featureMap.get(key);
|
| }
|
| return -1;
|
| }
|
|
|
| private void saveFeatureMap(OutputStream os, HashMap<Long,Integer> map) throws MaltChainedException {
|
| try {
|
| ObjectOutputStream obj_out_stream = new ObjectOutputStream (os);
|
| obj_out_stream.writeObject(map);
|
| obj_out_stream.close();
|
| } catch (IOException e) {
|
| throw new LiblinearException("Save feature map error", e);
|
| }
|
| }
|
|
|
| private HashMap<Long,Integer> loadFeatureMap(InputStream is) throws MaltChainedException {
|
| HashMap<Long,Integer> map = new HashMap<Long,Integer>();
|
| try {
|
| ObjectInputStream obj_in_stream = new ObjectInputStream(is);
|
| map = (HashMap<Long,Integer>)obj_in_stream.readObject();
|
| obj_in_stream.close();
|
| } catch (ClassNotFoundException e) {
|
| throw new LiblinearException("Load feature map error", e);
|
| } catch (IOException e) {
|
| throw new LiblinearException("Load feature map error", e);
|
| }
|
| return map;
|
| }
|
|
|
| public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException {
|
| if (featureVector == null) {
|
| throw new LiblinearException("The feature vector cannot be found");
|
| } else if (decision == null) {
|
| throw new LiblinearException("The decision cannot be found");
|
| }
|
|
|
| StringBuilder sb = new StringBuilder();
|
| try {
|
| sb.append(decision.getDecisionCode()+"\t");
|
| int n = featureVector.size();
|
| for (int i = 0; i < n; i++) {
|
| FeatureValue featureValue = featureVector.get(i).getFeatureValue();
|
| if (excludeNullValues == true && featureValue.isNullValue()) {
|
| sb.append("-1");
|
| } else {
|
| if (featureValue instanceof SingleFeatureValue) {
|
| sb.append(((SingleFeatureValue)featureValue).getIndexCode()+"");
|
| } else if (featureValue instanceof MultipleFeatureValue) {
|
| Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes();
|
| int j=0;
|
| for (Integer value : values) {
|
| sb.append(value.toString());
|
| if (j != values.size()-1) {
|
| sb.append("|");
|
| }
|
| j++;
|
| }
|
| }
|
| }
|
| sb.append('\t');
|
| }
|
| sb.append('\n');
|
| instanceOutput.write(sb.toString());
|
| instanceOutput.flush();
|
| increaseNumberOfInstances();
|
| } catch (IOException e) {
|
| throw new LiblinearException("The Liblinear learner cannot write to the instance file. ", e);
|
| }
|
| }
|
|
|
| public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { }
|
|
|
| |
| |
|
|
| public void noMoreInstances() throws MaltChainedException {
|
| closeInstanceWriter();
|
| }
|
|
|
|
|
| |
| |
|
|
| public void train() throws MaltChainedException {
|
| if (owner == null) {
|
| throw new LiblinearException("The parent guide model cannot be found. ");
|
| }
|
|
|
| if (pathExternalLiblinearTrain == null) {
|
| try {
|
| Problem problem = null;
|
| if (featurePruning) {
|
| problem = readLibLinearProblemWithFeaturePruning(getInstanceInputStreamReader(".ins"));
|
| } else {
|
|
|
| }
|
| Configuration config = owner.getGuide().getConfiguration();
|
| if (config.isLoggerInfoEnabled()) {
|
| config.logInfoMessage("Creating Liblinear model "+getFile(".mod").getName()+"\n");
|
| }
|
| final PrintStream out = System.out;
|
| final PrintStream err = System.err;
|
| System.setOut(NoPrintStream.NO_PRINTSTREAM);
|
| System.setErr(NoPrintStream.NO_PRINTSTREAM);
|
| Linear.saveModel(new File(getFile(".mod").getAbsolutePath()), Linear.train(problem, getLiblinearParameters()));
|
| System.setOut(err);
|
| System.setOut(out);
|
| if (!saveInstanceFiles) {
|
| getFile(".ins").delete();
|
| }
|
| } catch (OutOfMemoryError e) {
|
| throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
|
| } catch (IllegalArgumentException e) {
|
| throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
|
| } catch (SecurityException e) {
|
| throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
|
| } catch (IOException e) {
|
| throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
|
| }
|
| } else {
|
| trainExternal();
|
| }
|
|
|
| if (featurePruning) {
|
| try {
|
| saveFeatureMap(new FileOutputStream(getFile(".map").getAbsolutePath()), featureMap);
|
| } catch (FileNotFoundException e) {
|
| throw new LiblinearException("The Liblinear learner cannot save the feature map file '"+getFile(".map").getAbsolutePath()+"'. ", e);
|
| }
|
| } else {
|
|
|
| }
|
| }
|
|
|
| private void trainExternal() throws MaltChainedException {
|
| try {
|
|
|
| Configuration config = owner.getGuide().getConfiguration();
|
| if (config.isLoggerInfoEnabled()) {
|
| config.logInfoMessage("Creating Liblinear model (external) "+getFile(".mod").getName());
|
| }
|
|
|
| final String[] params = getLibLinearParamStringArray();
|
| String[] arrayCommands = new String[params.length+3];
|
| int i = 0;
|
| arrayCommands[i++] = pathExternalLiblinearTrain;
|
| for (; i <= params.length; i++) {
|
| arrayCommands[i] = params[i-1];
|
| }
|
| arrayCommands[i++] = getFile(".ins.tmp").getAbsolutePath();
|
| arrayCommands[i++] = getFile(".mod").getAbsolutePath();
|
|
|
| if (verbosity == Verbostity.ALL) {
|
| config.logInfoMessage('\n');
|
| }
|
| final Process child = Runtime.getRuntime().exec(arrayCommands);
|
| final InputStream in = child.getInputStream();
|
| final InputStream err = child.getErrorStream();
|
| int c;
|
| while ((c = in.read()) != -1){
|
| if (verbosity == Verbostity.ALL) {
|
| config.logInfoMessage((char)c);
|
| }
|
| }
|
| while ((c = err.read()) != -1){
|
| if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) {
|
| config.logInfoMessage((char)c);
|
| }
|
| }
|
| if (child.waitFor() != 0) {
|
| config.logErrorMessage(" FAILED ("+child.exitValue()+")");
|
| }
|
| in.close();
|
| err.close();
|
| if (!saveInstanceFiles) {
|
| getFile(".ins").delete();
|
| getFile(".ins.tmp").delete();
|
| }
|
| if (config.isLoggerInfoEnabled()) {
|
| config.logInfoMessage('\n');
|
| }
|
| } catch (InterruptedException e) {
|
| throw new LiblinearException("Liblinear is interrupted. ", e);
|
| } catch (IllegalArgumentException e) {
|
| throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
|
| } catch (SecurityException e) {
|
| throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
|
| } catch (IOException e) {
|
| throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
|
| } catch (OutOfMemoryError e) {
|
| throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
|
| }
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| |
| |
|
|
| public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException {
|
| if (method == null) {
|
| throw new LiblinearException("The learning method cannot be found. ");
|
| } else if (divideFeature == null) {
|
| throw new LiblinearException("The divide feature cannot be found. ");
|
| }
|
|
|
| try {
|
| final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
|
| final BufferedWriter out = method.getInstanceWriter();
|
| final StringBuilder sb = new StringBuilder(6);
|
| int l = in.read();
|
| char c;
|
| int j = 0;
|
|
|
| while(true) {
|
| if (l == -1) {
|
| sb.setLength(0);
|
| break;
|
| }
|
| c = (char)l;
|
| l = in.read();
|
| if (c == '\t') {
|
| if (divideFeatureIndexVector.contains(j-1)) {
|
| out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getIndexCode()));
|
| out.write('\t');
|
| }
|
| out.write(sb.toString());
|
| j++;
|
| out.write('\t');
|
| sb.setLength(0);
|
| } else if (c == '\n') {
|
| out.write(sb.toString());
|
| if (divideFeatureIndexVector.contains(j-1)) {
|
| out.write('\t');
|
| out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getIndexCode()));
|
| }
|
| out.write('\n');
|
| sb.setLength(0);
|
| method.increaseNumberOfInstances();
|
| this.decreaseNumberOfInstances();
|
| j = 0;
|
| } else {
|
| sb.append(c);
|
| }
|
| }
|
| in.close();
|
| getFile(".ins").delete();
|
| out.flush();
|
| } catch (SecurityException e) {
|
| throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
|
| } catch (NullPointerException e) {
|
| throw new LiblinearException("The instance file cannot be found. ", e);
|
| } catch (FileNotFoundException e) {
|
| throw new LiblinearException("The instance file cannot be found. ", e);
|
| } catch (IOException e) {
|
| throw new LiblinearException("The Liblinear learner read from the instance file. ", e);
|
| }
|
|
|
| }
|
|
|
| |
| |
|
|
| public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException {
|
| if (model == null) {
|
| try {
|
| model = Linear.loadModel(new BufferedReader(getInstanceInputStreamReaderFromConfigFile(".mod")));
|
| } catch (IOException e) {
|
| throw new LiblinearException("The model cannot be loaded. ", e);
|
| }
|
| }
|
| if (model == null) {
|
| throw new LiblinearException("The Liblinear learner cannot predict the next class, because the learning model cannot be found. ");
|
| } else if (featureVector == null) {
|
| throw new LiblinearException("The Liblinear learner cannot predict the next class, because the feature vector cannot be found. ");
|
| }
|
| if (featurePruning) {
|
| return predictWithFeaturePruning(featureVector, decision);
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| if (xlist == null) {
|
| xlist = new ArrayList<FeatureNode>(featureVector.size());
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| FeatureNode[] xarray = new FeatureNode[xlist.size()];
|
| for (int k = 0; k < xlist.size(); k++) {
|
| xarray[k] = xlist.get(k);
|
| }
|
|
|
| if (decision.getKBestList().getK() == 1) {
|
| decision.getKBestList().add(Linear.predict(model, xarray));
|
| } else {
|
| liblinear_predict_with_kbestlist(model, xarray, decision.getKBestList());
|
| }
|
|
|
| xlist.clear();
|
|
|
| return true;
|
| }
|
|
|
| public boolean predictWithFeaturePruning(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException {
|
| if (featureMap == null) {
|
| featureMap = loadFeatureMap(getInputStreamFromConfigFileEntry(".map"));
|
| }
|
|
|
| for (int i = 0; i < featureVector.size(); i++) {
|
| final FeatureValue featureValue = featureVector.getFeatureValue(i-1);
|
| if (!(excludeNullValues == true && featureValue.isNullValue())) {
|
| if (featureValue instanceof SingleFeatureValue) {
|
| int v = getFeatureMapValue(i, ((SingleFeatureValue)featureValue).getIndexCode());
|
| if (v != -1) {
|
| featureSet.add(new XNode(v,1));
|
| }
|
| } else if (featureValue instanceof MultipleFeatureValue) {
|
| for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) {
|
| int v = getFeatureMapValue(i, value);
|
| if (v != -1) {
|
| featureSet.add(new XNode(v,1));
|
| }
|
| }
|
| }
|
| }
|
| }
|
| FeatureNode[] xarray = new FeatureNode[featureSet.size()];
|
| int k = 0;
|
| for (XNode x : featureSet) {
|
| xarray[k++] = new FeatureNode(x.getIndex(), x.getValue());
|
| }
|
|
|
|
|
| if (decision.getKBestList().getK() == 1) {
|
| decision.getKBestList().add(Linear.predict(model, xarray));
|
| } else {
|
| liblinear_predict_with_kbestlist(model, xarray, decision.getKBestList());
|
| }
|
| featureSet.clear();
|
|
|
| return true;
|
| }
|
|
|
| public void terminate() throws MaltChainedException {
|
| closeInstanceWriter();
|
| model = null;
|
| xlist = null;
|
| owner = null;
|
| }
|
|
|
| public BufferedWriter getInstanceWriter() {
|
| return instanceOutput;
|
| }
|
|
|
| protected void closeInstanceWriter() throws MaltChainedException {
|
| try {
|
| if (instanceOutput != null) {
|
| instanceOutput.flush();
|
| instanceOutput.close();
|
| instanceOutput = null;
|
| }
|
| } catch (IOException e) {
|
| throw new LiblinearException("The Liblinear learner cannot close the instance file. ", e);
|
| }
|
| }
|
|
|
|
|
| |
| |
| |
| |
|
|
| public String getParamString() {
|
| return paramString;
|
| }
|
|
|
| public InstanceModel getOwner() {
|
| return owner;
|
| }
|
|
|
| protected void setOwner(InstanceModel owner) {
|
| this.owner = owner;
|
| }
|
|
|
| public int getLearnerMode() {
|
| return learnerMode;
|
| }
|
|
|
| public void setLearnerMode(int learnerMode) throws MaltChainedException {
|
| this.learnerMode = learnerMode;
|
| }
|
|
|
| public String getLearningMethodName() {
|
| return name;
|
| }
|
|
|
| |
| |
| |
| |
| |
|
|
| public DependencyParserConfig getConfiguration() throws MaltChainedException {
|
| return owner.getGuide().getConfiguration();
|
| }
|
|
|
| public int getNumberOfInstances() throws MaltChainedException {
|
| if(numberOfInstances!=0)
|
| return numberOfInstances;
|
| else{
|
|
|
|
|
| BufferedReader reader = new BufferedReader( getInstanceInputStreamReader(".ins"));
|
| try {
|
| while(reader.readLine()!=null){
|
| numberOfInstances++;
|
| owner.increaseFrequency();
|
| }
|
| reader.close();
|
| } catch (IOException e) {
|
| throw new MaltChainedException("No instances found in file",e);
|
| }
|
| return numberOfInstances;
|
| }
|
| }
|
|
|
| public void increaseNumberOfInstances() {
|
| numberOfInstances++;
|
| owner.increaseFrequency();
|
| }
|
|
|
| public void decreaseNumberOfInstances() {
|
| numberOfInstances--;
|
| owner.decreaseFrequency();
|
| }
|
|
|
| protected void setNumberOfInstances(int numberOfInstances) {
|
| this.numberOfInstances = 0;
|
| }
|
|
|
| protected void setLearningMethodName(String name) {
|
| this.name = name;
|
| }
|
|
|
| protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException {
|
| return getConfiguration().getAppendOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix);
|
| }
|
|
|
| protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException {
|
| return getConfiguration().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix);
|
| }
|
|
|
| protected InputStreamReader getInstanceInputStreamReaderFromConfigFile(String suffix) throws MaltChainedException {
|
| try {
|
| return new InputStreamReader(getInputStreamFromConfigFileEntry(suffix), "UTF-8");
|
| } catch (UnsupportedEncodingException e) {
|
| throw new ConfigurationException("The char set UTF-8 is not supported. ", e);
|
| }
|
| }
|
|
|
| protected InputStream getInputStreamFromConfigFileEntry(String suffix) throws MaltChainedException {
|
| return getConfiguration().getInputStreamFromConfigFileEntry(owner.getModelName()+getLearningMethodName()+suffix);
|
| }
|
|
|
| protected File getFile(String suffix) throws MaltChainedException {
|
| return getConfiguration().getFile(owner.getModelName()+getLearningMethodName()+suffix);
|
| }
|
|
|
|
|
|
|
|
|
|
|
|
|
| public Problem readLibLinearProblemWithFeaturePruning(InputStreamReader isr) throws MaltChainedException {
|
| Problem problem = new Problem();
|
|
|
| try {
|
| final BufferedReader fp = new BufferedReader(isr);
|
|
|
| problem.bias = -1;
|
| problem.l = getNumberOfInstances();
|
| problem.x = new FeatureNode[problem.l][];
|
| problem.y = new int[problem.l];
|
| int i = 0;
|
| final Pattern tabPattern = Pattern.compile("\t");
|
| final Pattern pipePattern = Pattern.compile("\\|");
|
| while(true) {
|
| String line = fp.readLine();
|
|
|
| if(line == null) break;
|
| String[] columns = tabPattern.split(line);
|
|
|
| if (columns.length == 0) {
|
| continue;
|
| }
|
| int j = 0;
|
| try {
|
| problem.y[i] = Integer.parseInt(columns[j]);
|
| for(j = 1; j < columns.length; j++) {
|
| final String[] items = pipePattern.split(columns[j]);
|
| for (int k = 0; k < items.length; k++) {
|
| try {
|
| int colon = items[k].indexOf(':');
|
| if (colon == -1) {
|
| if (Integer.parseInt(items[k]) != -1) {
|
| int v = addFeatureMapValue(j, Integer.parseInt(items[k]));
|
| if (v != -1) {
|
| featureSet.add(new XNode(v,1));
|
| }
|
| }
|
| } else {
|
| int index = addFeatureMapValue(j, Integer.parseInt(items[k].substring(0,colon)));
|
| double value;
|
| if (items[k].substring(colon+1).indexOf('.') != -1) {
|
| value = Double.parseDouble(items[k].substring(colon+1));
|
| } else {
|
| value = Integer.parseInt(items[k].substring(colon+1));
|
| }
|
| featureSet.add(new XNode(index,value));
|
| }
|
| } catch (NumberFormatException e) {
|
| throw new LiblinearException("The instance file contain a non-integer value '"+items[k]+"'", e);
|
| }
|
| }
|
| }
|
| problem.x[i] = new FeatureNode[featureSet.size()];
|
| int p = 0;
|
| for (XNode x : featureSet) {
|
| problem.x[i][p++] = new FeatureNode(x.getIndex(), x.getValue());
|
| }
|
| featureSet.clear();
|
| i++;
|
| } catch (ArrayIndexOutOfBoundsException e) {
|
| throw new LiblinearException("Cannot read from the instance file. ", e);
|
| }
|
| }
|
| fp.close();
|
| featureSet = null;
|
| problem.n = featureMap.size();
|
| System.out.println("Number of features: "+problem.n);
|
| } catch (IOException e) {
|
| throw new LiblinearException("Cannot read from the instance file. ", e);
|
| }
|
| return problem;
|
| }
|
| |
| |
| |
| |
| |
| |
|
|
| public Problem readLibLinearProblem(InputStreamReader isr, int[] cardinalities) throws MaltChainedException {
|
| Problem problem = new Problem();
|
|
|
| try {
|
| final BufferedReader fp = new BufferedReader(isr);
|
| int max_index = 0;
|
| if (xlist == null) {
|
| xlist = new ArrayList<FeatureNode>();
|
| }
|
| problem.bias = -1;
|
| problem.l = getNumberOfInstances();
|
| problem.x = new FeatureNode[problem.l][];
|
| problem.y = new int[problem.l];
|
| int i = 0;
|
| final Pattern tabPattern = Pattern.compile("\t");
|
| final Pattern pipePattern = Pattern.compile("\\|");
|
| while(true) {
|
| String line = fp.readLine();
|
|
|
| if(line == null) break;
|
| String[] columns = tabPattern.split(line);
|
|
|
| if (columns.length == 0) {
|
| continue;
|
| }
|
|
|
| int offset = 1;
|
| int j = 0;
|
| try {
|
| problem.y[i] =
|
| Integer.parseInt(columns[j]);
|
| int p = 0;
|
| for(j = 1; j < columns.length; j++) {
|
| final String[] items = pipePattern.split(columns[j]);
|
| for (int k = 0; k < items.length; k++) {
|
| try {
|
| if (Integer.parseInt(items[k]) != -1) {
|
| xlist.add(p, new FeatureNode(Integer.parseInt(items[k])+offset, 1));
|
| p++;
|
| }
|
| } catch (NumberFormatException e) {
|
| throw new LiblinearException("The instance file contain a non-integer value '"+items[k]+"'", e);
|
| }
|
| }
|
| offset += cardinalities[j-1];
|
| }
|
| problem.x[i] = xlist.subList(0, p).toArray(new FeatureNode[0]);
|
| if(columns.length > 1) {
|
| max_index = Math.max(max_index, problem.x[i][p-1].index);
|
| }
|
| i++;
|
| xlist.clear();
|
| } catch (ArrayIndexOutOfBoundsException e) {
|
| throw new LiblinearException("Cannot read from the instance file. ", e);
|
| }
|
| }
|
| fp.close();
|
| problem.n = max_index;
|
| System.out.println("Number of features: "+problem.n);
|
|
|
|
|
|
|
| xlist = null;
|
| } catch (IOException e) {
|
| throw new LiblinearException("Cannot read from the instance file. ", e);
|
| }
|
| return problem;
|
| }
|
|
|
| protected void initSpecialParameters() throws MaltChainedException {
|
| if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) {
|
| excludeNullValues = true;
|
| } else {
|
| excludeNullValues = false;
|
| }
|
| saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("liblinear", "save_instance_files")).booleanValue();
|
|
|
| featurePruning = true;
|
| if (!getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().equals("")) {
|
| try {
|
| if (!new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).exists()) {
|
| throw new LiblinearException("The path to the external Liblinear trainer 'svm-train' is wrong.");
|
| }
|
| if (new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).isDirectory()) {
|
| throw new LiblinearException("The option --liblinear-liblinear_external points to a directory, the path should point at the 'train' file or the 'train.exe' file");
|
| }
|
| if (!(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train") || getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train.exe"))) {
|
| throw new LiblinearException("The option --liblinear-liblinear_external does not specify the path to 'train' file or the 'train.exe' file. ");
|
| }
|
| pathExternalLiblinearTrain = getConfiguration().getOptionValue("liblinear", "liblinear_external").toString();
|
| } catch (SecurityException e) {
|
| throw new LiblinearException("Access denied to the file specified by the option --liblinear-liblinear_external. ", e);
|
| }
|
| }
|
| if (getConfiguration().getOptionValue("liblinear", "verbosity") != null) {
|
| verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("liblinear", "verbosity").toString().toUpperCase());
|
| }
|
| }
|
|
|
| public String getLibLinearOptions() {
|
| StringBuilder sb = new StringBuilder();
|
| for (String key : liblinearOptions.keySet()) {
|
| sb.append('-');
|
| sb.append(key);
|
| sb.append(' ');
|
| sb.append(liblinearOptions.get(key));
|
| sb.append(' ');
|
| }
|
| return sb.toString();
|
| }
|
|
|
| public void parseParameters(String paramstring) throws MaltChainedException {
|
| if (paramstring == null) {
|
| return;
|
| }
|
| final String[] argv;
|
| String allowedFlags = "sceB";
|
| try {
|
| argv = paramstring.split("[_\\p{Blank}]");
|
| } catch (PatternSyntaxException e) {
|
| throw new LiblinearException("Could not split the liblinear-parameter string '"+paramstring+"'. ", e);
|
| }
|
| for (int i=0; i < argv.length-1; i++) {
|
| if(argv[i].charAt(0) != '-') {
|
| throw new LiblinearException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0));
|
| }
|
| if(++i>=argv.length) {
|
| throw new LiblinearException("The last argument does not have any value. ");
|
| }
|
| try {
|
| int index = allowedFlags.indexOf(argv[i-1].charAt(1));
|
| if (index != -1) {
|
| liblinearOptions.put(Character.toString(argv[i-1].charAt(1)), argv[i]);
|
| } else {
|
| throw new LiblinearException("Unknown liblinear parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. ");
|
| }
|
| } catch (ArrayIndexOutOfBoundsException e) {
|
| throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
|
| } catch (NumberFormatException e) {
|
| throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
|
| } catch (NullPointerException e) {
|
| throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
|
| }
|
| }
|
| }
|
|
|
| public double getBias() throws MaltChainedException {
|
| try {
|
| return Double.valueOf(liblinearOptions.get("B")).doubleValue();
|
| } catch (NumberFormatException e) {
|
| throw new LiblinearException("The liblinear bias value is not numerical value. ", e);
|
| }
|
| }
|
|
|
| public Parameter getLiblinearParameters() throws MaltChainedException {
|
| Parameter param = new Parameter(SolverType.MCSVM_CS, 0.1, 0.1);
|
| String type = liblinearOptions.get("s");
|
|
|
| if (type.equals("0")) {
|
| param.setSolverType(SolverType.L2R_LR);
|
| } else if (type.equals("1")) {
|
| param.setSolverType(SolverType.L2R_L2LOSS_SVC_DUAL);
|
| } else if (type.equals("2")) {
|
| param.setSolverType(SolverType.L2R_L2LOSS_SVC);
|
| } else if (type.equals("3")) {
|
| param.setSolverType(SolverType.L2R_L1LOSS_SVC_DUAL);
|
| } else if (type.equals("4")) {
|
| param.setSolverType(SolverType.MCSVM_CS);
|
| } else if (type.equals("5")) {
|
| param.setSolverType(SolverType.L1R_L2LOSS_SVC);
|
| } else if (type.equals("6")) {
|
| param.setSolverType(SolverType.L1R_LR);
|
| } else {
|
| throw new LiblinearException("The liblinear type (-s) is not an integer value between 0 and 4. ");
|
| }
|
| try {
|
| param.setC(Double.valueOf(liblinearOptions.get("c")).doubleValue());
|
| } catch (NumberFormatException e) {
|
| throw new LiblinearException("The liblinear cost (-c) value is not numerical value. ", e);
|
| }
|
| try {
|
| param.setEps(Double.valueOf(liblinearOptions.get("e")).doubleValue());
|
| } catch (NumberFormatException e) {
|
| throw new LiblinearException("The liblinear epsilon (-e) value is not numerical value. ", e);
|
| }
|
| return param;
|
| }
|
|
|
| public void initLiblinearOptions() {
|
| liblinearOptions.put("s", "4");
|
| liblinearOptions.put("c", "0.1");
|
| liblinearOptions.put("e", "0.1");
|
| liblinearOptions.put("B", "-1");
|
| }
|
|
|
| public String[] getLibLinearParamStringArray() {
|
| final ArrayList<String> params = new ArrayList<String>();
|
|
|
| for (String key : liblinearOptions.keySet()) {
|
| params.add("-"+key); params.add(liblinearOptions.get(key));
|
| }
|
| return params.toArray(new String[params.size()]);
|
| }
|
|
|
|
|
| public void liblinear_predict_with_kbestlist(Model model, FeatureNode[] x, KBestList kBestList) throws MaltChainedException {
|
| int i;
|
| final int nr_class = model.getNrClass();
|
| final double[] dec_values = new double[nr_class];
|
|
|
| Linear.predictValues(model, x, dec_values);
|
| final int[] labels = model.getLabels();
|
| int[] predictionList = new int[nr_class];
|
| for(i=0;i<nr_class;i++) {
|
| predictionList[i] = labels[i];
|
| }
|
|
|
| double tmpDec;
|
| int tmpObj;
|
| int lagest;
|
| for (i=0;i<nr_class-1;i++) {
|
| lagest = i;
|
| for (int j=i;j<nr_class;j++) {
|
| if (dec_values[j] > dec_values[lagest]) {
|
| lagest = j;
|
| }
|
| }
|
| tmpDec = dec_values[lagest];
|
| dec_values[lagest] = dec_values[i];
|
| dec_values[i] = tmpDec;
|
| tmpObj = predictionList[lagest];
|
| predictionList[lagest] = predictionList[i];
|
| predictionList[i] = tmpObj;
|
| }
|
|
|
| int k = nr_class-1;
|
| if (kBestList.getK() != -1) {
|
| k = kBestList.getK() - 1;
|
| }
|
|
|
| for (i=0; i<nr_class && k >= 0; i++, k--) {
|
| if (kBestList instanceof ScoredKBestList) {
|
| ((ScoredKBestList)kBestList).add(predictionList[i], (float)dec_values[i]);
|
| } else {
|
| kBestList.add(predictionList[i]);
|
| }
|
|
|
| }
|
| }
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
|
| try {
|
| final BufferedReader in = new BufferedReader(isr);
|
| final BufferedWriter out = new BufferedWriter(osw);
|
|
|
| int c;
|
| int j = 0;
|
| int offset = 1;
|
| int code = 0;
|
| while(true) {
|
| c = in.read();
|
| if (c == -1) {
|
| break;
|
| }
|
|
|
| if (c == '\t' || c == '|') {
|
| if (j == 0) {
|
| out.write(Integer.toString(code));
|
| j++;
|
| } else {
|
| if (code != -1) {
|
| out.write(' ');
|
| out.write(Integer.toString(code+offset));
|
| out.write(":1");
|
| }
|
| if (c == '\t') {
|
| offset += cardinalities[j-1];
|
| j++;
|
| }
|
| }
|
| code = 0;
|
| } else if (c == '\n') {
|
| j = 0;
|
| offset = 1;
|
| out.write('\n');
|
| code = 0;
|
| } else if (c == '-') {
|
| code = -1;
|
| } else if (code != -1) {
|
| if (c > 47 && c < 58) {
|
| code = code * 10 + (c-48);
|
| } else {
|
| throw new LiblinearException("The instance file contain a non-integer value, when converting the Malt SVM format into Liblinear format.");
|
| }
|
| }
|
| }
|
| in.close();
|
| out.close();
|
| } catch (IOException e) {
|
| throw new LiblinearException("Cannot read from the instance file, when converting the Malt SVM format into Liblinear format. ", e);
|
| }
|
| }
|
|
|
| protected void finalize() throws Throwable {
|
| try {
|
| closeInstanceWriter();
|
| } finally {
|
| super.finalize();
|
| }
|
| }
|
|
|
| |
| |
|
|
| public String toString() {
|
| final StringBuffer sb = new StringBuffer();
|
| sb.append("\nLiblinear INTERFACE\n");
|
| sb.append(" Liblinear version: "+LIBLINEAR_VERSION+"\n");
|
| sb.append(" Liblinear string: "+paramString+"\n");
|
|
|
| sb.append(getLibLinearOptions());
|
| return sb.toString();
|
| }
|
| }
|
|
|