Spaces:

UjjwalVIT
/

Text_analysis_and_metadata_app

Running

App Files Files Community

Text_analysis_and_metadata_app / stanfordmodel /NERDemo.java

UjjwalVIT

Upload 17 files

5a1f678 about 1 year ago

raw history blame contribute delete

No virus

6.79 kB

	import edu.stanford.nlp.ie.AbstractSequenceClassifier;
	import edu.stanford.nlp.ie.crf.*;
	import edu.stanford.nlp.io.IOUtils;
	import edu.stanford.nlp.ling.CoreLabel;
	import edu.stanford.nlp.ling.CoreAnnotations;
	import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
	import edu.stanford.nlp.util.Triple;

	import java.util.List;


	/** This is a demo of calling CRFClassifier programmatically.
	* <p>
	* Usage: {@code java -mx400m -cp "*" NERDemo [serializedClassifier [fileName]] }
	* <p>
	* If arguments aren't specified, they default to
	* classifiers/english.all.3class.distsim.crf.ser.gz and some hardcoded sample text.
	* If run with arguments, it shows some of the ways to get k-best labelings and
	* probabilities out with CRFClassifier. If run without arguments, it shows some of
	* the alternative output formats that you can get.
	* <p>
	* To use CRFClassifier from the command line:
	* </p><blockquote>
	* {@code java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier [classifier] -textFile [file] }
	* </blockquote><p>
	* Or if the file is already tokenized and one word per line, perhaps in
	* a tab-separated value format with extra columns for part-of-speech tag,
	* etc., use the version below (note the 's' instead of the 'x'):
	* </p><blockquote>
	* {@code java -mx400m edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier [classifier] -testFile [file] }
	* </blockquote>
	*
	* @author Jenny Finkel
	* @author Christopher Manning
	*/

	public class NERDemo {

	public static void main(String[] args) throws Exception {

	String serializedClassifier = "classifiers/english.all.3class.distsim.crf.ser.gz";

	if (args.length > 0) {
	serializedClassifier = args[0];
	}

	AbstractSequenceClassifier<CoreLabel> classifier = CRFClassifier.getClassifier(serializedClassifier);

	/* For either a file to annotate or for the hardcoded text example, this
	demo file shows several ways to process the input, for teaching purposes.
	*/

	if (args.length > 1) {

	/* For the file, it shows (1) how to run NER on a String, (2) how
	to get the entities in the String with character offsets, and
	(3) how to run NER on a whole file (without loading it into a String).
	*/

	String fileContents = IOUtils.slurpFile(args[1]);
	List<List<CoreLabel>> out = classifier.classify(fileContents);
	for (List<CoreLabel> sentence : out) {
	for (CoreLabel word : sentence) {
	System.out.print(word.word() + '/' + word.get(CoreAnnotations.AnswerAnnotation.class) + ' ');
	}
	System.out.println();
	}

	System.out.println("---");
	out = classifier.classifyFile(args[1]);
	for (List<CoreLabel> sentence : out) {
	for (CoreLabel word : sentence) {
	System.out.print(word.word() + '/' + word.get(CoreAnnotations.AnswerAnnotation.class) + ' ');
	}
	System.out.println();
	}

	System.out.println("---");
	List<Triple<String, Integer, Integer>> list = classifier.classifyToCharacterOffsets(fileContents);
	for (Triple<String, Integer, Integer> item : list) {
	System.out.println(item.first() + ": " + fileContents.substring(item.second(), item.third()));
	}
	System.out.println("---");
	System.out.println("Ten best entity labelings");
	DocumentReaderAndWriter<CoreLabel> readerAndWriter = classifier.makePlainTextReaderAndWriter();
	classifier.classifyAndWriteAnswersKBest(args[1], 10, readerAndWriter);

	System.out.println("---");
	System.out.println("Per-token marginalized probabilities");
	classifier.printProbs(args[1], readerAndWriter);

	// -- This code prints out the first order (token pair) clique probabilities.
	// -- But that output is a bit overwhelming, so we leave it commented out by default.
	// System.out.println("---");
	// System.out.println("First Order Clique Probabilities");
	// ((CRFClassifier) classifier).printFirstOrderProbs(args[1], readerAndWriter);

	} else {

	/* For the hard-coded String, it shows how to run it on a single
	sentence, and how to do this and produce several formats, including
	slash tags and an inline XML output format. It also shows the full
	contents of the {@code CoreLabel}s that are constructed by the
	classifier. And it shows getting out the probabilities of different
	assignments and an n-best list of classifications with probabilities.
	*/

	String[] example = {"Good afternoon Rajat Raina, how are you today?",
	"I go to school at Stanford University, which is located in California." };
	for (String str : example) {
	System.out.println(classifier.classifyToString(str));
	}
	System.out.println("---");

	for (String str : example) {
	// This one puts in spaces and newlines between tokens, so just print not println.
	System.out.print(classifier.classifyToString(str, "slashTags", false));
	}
	System.out.println("---");

	for (String str : example) {
	// This one is best for dealing with the output as a TSV (tab-separated column) file.
	// The first column gives entities, the second their classes, and the third the remaining text in a document
	System.out.print(classifier.classifyToString(str, "tabbedEntities", false));
	}
	System.out.println("---");

	for (String str : example) {
	System.out.println(classifier.classifyWithInlineXML(str));
	}
	System.out.println("---");

	for (String str : example) {
	System.out.println(classifier.classifyToString(str, "xml", true));
	}
	System.out.println("---");

	for (String str : example) {
	System.out.print(classifier.classifyToString(str, "tsv", false));
	}
	System.out.println("---");

	// This gets out entities with character offsets
	int j = 0;
	for (String str : example) {
	j++;
	List<Triple<String,Integer,Integer>> triples = classifier.classifyToCharacterOffsets(str);
	for (Triple<String,Integer,Integer> trip : triples) {
	System.out.printf("%s over character offsets [%d, %d) in sentence %d.%n",
	trip.first(), trip.second(), trip.third, j);
	}
	}
	System.out.println("---");

	// This prints out all the details of what is stored for each token
	int i=0;
	for (String str : example) {
	for (List<CoreLabel> lcl : classifier.classify(str)) {
	for (CoreLabel cl : lcl) {
	System.out.print(i++ + ": ");
	System.out.println(cl.toShorterString());
	}
	}
	}

	System.out.println("---");

	}
	}

	}