Spaces:
Runtime error
Runtime error
create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import nltk
|
2 |
+
nltk.download('punkt')
|
3 |
+
from nltk.corpus import treebank
|
4 |
+
from nltk import PCFG, ViterbiParser
|
5 |
+
nltk.download('all')
|
6 |
+
# Load the treebank dataset
|
7 |
+
nltk.download('treebank')
|
8 |
+
corpus = treebank.parsed_sents()
|
9 |
+
|
10 |
+
# Train a PCFG parser
|
11 |
+
productions = []
|
12 |
+
for tree in corpus:
|
13 |
+
productions += tree.productions()
|
14 |
+
S = nltk.Nonterminal('S')
|
15 |
+
grammar = nltk.induce_pcfg(S, productions)
|
16 |
+
|
17 |
+
# Initialize the parser with the trained grammar
|
18 |
+
parser = ViterbiParser(grammar)
|
19 |
+
|
20 |
+
# Tokenize and parse a sentence
|
21 |
+
sentences = ["this is a beautiful"]
|
22 |
+
|
23 |
+
# Prepare gold standard parse trees for the sentences
|
24 |
+
gold_standard_trees = list(treebank.parsed_sents()[:len(sentences)])
|
25 |
+
|
26 |
+
# Initialize counters
|
27 |
+
true_positives = 0
|
28 |
+
false_positives = 0
|
29 |
+
false_negatives = 0
|
30 |
+
|
31 |
+
# Evaluate each sentence
|
32 |
+
for sentence, gold_tree in zip(sentences, gold_standard_trees):
|
33 |
+
tokens = nltk.word_tokenize(sentence)
|
34 |
+
parsed_trees = list(parser.parse(tokens))
|
35 |
+
if parsed_trees:
|
36 |
+
# If the parser produced a parse tree, consider the first one
|
37 |
+
parsed_tree = parsed_trees[0]
|
38 |
+
# Compare each production in the parsed tree with the gold standard tree
|
39 |
+
for production in gold_tree.productions():
|
40 |
+
if production in parsed_tree.productions():
|
41 |
+
true_positives += 1
|
42 |
+
else:
|
43 |
+
false_negatives += 1
|
44 |
+
for production in parsed_tree.productions():
|
45 |
+
if production not in gold_tree.productions():
|
46 |
+
false_positives += 1
|
47 |
+
|
48 |
+
# Compute precision, recall, and F1-score
|
49 |
+
precision = true_positives / (true_positives + false_positives)
|
50 |
+
recall = true_positives / (true_positives + false_negatives)
|
51 |
+
f1_score = 2 * (precision * recall) / (precision + recall)
|
52 |
+
|
53 |
+
# Compute accuracy
|
54 |
+
accuracy = true_positives / (true_positives + false_positives + false_negatives)
|
55 |
+
|
56 |
+
print("Precision:", precision)
|
57 |
+
print("Recall:", recall)
|
58 |
+
print("F1 Score:", f1_score)
|
59 |
+
print("Accuracy:", accuracy)
|