Spaces:
Runtime error
Runtime error
import nltk | |
nltk.download('punkt') | |
from nltk.corpus import treebank | |
from nltk import PCFG, ViterbiParser | |
nltk.download('all') | |
# Load the treebank dataset | |
nltk.download('treebank') | |
corpus = treebank.parsed_sents() | |
# Train a PCFG parser | |
productions = [] | |
for tree in corpus: | |
productions += tree.productions() | |
S = nltk.Nonterminal('S') | |
grammar = nltk.induce_pcfg(S, productions) | |
# Initialize the parser with the trained grammar | |
parser = ViterbiParser(grammar) | |
# Tokenize and parse a sentence | |
sentences = ["this is a beautiful"] | |
# Prepare gold standard parse trees for the sentences | |
gold_standard_trees = list(treebank.parsed_sents()[:len(sentences)]) | |
# Initialize counters | |
true_positives = 0 | |
false_positives = 0 | |
false_negatives = 0 | |
# Evaluate each sentence | |
for sentence, gold_tree in zip(sentences, gold_standard_trees): | |
tokens = nltk.word_tokenize(sentence) | |
parsed_trees = list(parser.parse(tokens)) | |
if parsed_trees: | |
# If the parser produced a parse tree, consider the first one | |
parsed_tree = parsed_trees[0] | |
# Compare each production in the parsed tree with the gold standard tree | |
for production in gold_tree.productions(): | |
if production in parsed_tree.productions(): | |
true_positives += 1 | |
else: | |
false_negatives += 1 | |
for production in parsed_tree.productions(): | |
if production not in gold_tree.productions(): | |
false_positives += 1 | |
# Compute precision, recall, and F1-score | |
precision = true_positives / (true_positives + false_positives) | |
recall = true_positives / (true_positives + false_negatives) | |
f1_score = 2 * (precision * recall) / (precision + recall) | |
# Compute accuracy | |
accuracy = true_positives / (true_positives + false_positives + false_negatives) | |
print("Precision:", precision) | |
print("Recall:", recall) | |
print("F1 Score:", f1_score) | |
print("Accuracy:", accuracy) |