import nltk nltk.download('punkt') from nltk.corpus import treebank from nltk import PCFG, ViterbiParser nltk.download('all') # Load the treebank dataset nltk.download('treebank') corpus = treebank.parsed_sents() # Train a PCFG parser productions = [] for tree in corpus: productions += tree.productions() S = nltk.Nonterminal('S') grammar = nltk.induce_pcfg(S, productions) # Initialize the parser with the trained grammar parser = ViterbiParser(grammar) # Tokenize and parse a sentence sentences = ["this is a beautiful"] # Prepare gold standard parse trees for the sentences gold_standard_trees = list(treebank.parsed_sents()[:len(sentences)]) # Initialize counters true_positives = 0 false_positives = 0 false_negatives = 0 # Evaluate each sentence for sentence, gold_tree in zip(sentences, gold_standard_trees): tokens = nltk.word_tokenize(sentence) parsed_trees = list(parser.parse(tokens)) if parsed_trees: # If the parser produced a parse tree, consider the first one parsed_tree = parsed_trees[0] # Compare each production in the parsed tree with the gold standard tree for production in gold_tree.productions(): if production in parsed_tree.productions(): true_positives += 1 else: false_negatives += 1 for production in parsed_tree.productions(): if production not in gold_tree.productions(): false_positives += 1 # Compute precision, recall, and F1-score precision = true_positives / (true_positives + false_positives) recall = true_positives / (true_positives + false_negatives) f1_score = 2 * (precision * recall) / (precision + recall) # Compute accuracy accuracy = true_positives / (true_positives + false_positives + false_negatives) print("Precision:", precision) print("Recall:", recall) print("F1 Score:", f1_score) print("Accuracy:", accuracy)