Pravein26 commited on
Commit
2ee4a3c
1 Parent(s): 4324731

create app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -0
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nltk
2
+ nltk.download('punkt')
3
+ from nltk.corpus import treebank
4
+ from nltk import PCFG, ViterbiParser
5
+ nltk.download('all')
6
+ # Load the treebank dataset
7
+ nltk.download('treebank')
8
+ corpus = treebank.parsed_sents()
9
+
10
+ # Train a PCFG parser
11
+ productions = []
12
+ for tree in corpus:
13
+ productions += tree.productions()
14
+ S = nltk.Nonterminal('S')
15
+ grammar = nltk.induce_pcfg(S, productions)
16
+
17
+ # Initialize the parser with the trained grammar
18
+ parser = ViterbiParser(grammar)
19
+
20
+ # Tokenize and parse a sentence
21
+ sentences = ["this is a beautiful"]
22
+
23
+ # Prepare gold standard parse trees for the sentences
24
+ gold_standard_trees = list(treebank.parsed_sents()[:len(sentences)])
25
+
26
+ # Initialize counters
27
+ true_positives = 0
28
+ false_positives = 0
29
+ false_negatives = 0
30
+
31
+ # Evaluate each sentence
32
+ for sentence, gold_tree in zip(sentences, gold_standard_trees):
33
+ tokens = nltk.word_tokenize(sentence)
34
+ parsed_trees = list(parser.parse(tokens))
35
+ if parsed_trees:
36
+ # If the parser produced a parse tree, consider the first one
37
+ parsed_tree = parsed_trees[0]
38
+ # Compare each production in the parsed tree with the gold standard tree
39
+ for production in gold_tree.productions():
40
+ if production in parsed_tree.productions():
41
+ true_positives += 1
42
+ else:
43
+ false_negatives += 1
44
+ for production in parsed_tree.productions():
45
+ if production not in gold_tree.productions():
46
+ false_positives += 1
47
+
48
+ # Compute precision, recall, and F1-score
49
+ precision = true_positives / (true_positives + false_positives)
50
+ recall = true_positives / (true_positives + false_negatives)
51
+ f1_score = 2 * (precision * recall) / (precision + recall)
52
+
53
+ # Compute accuracy
54
+ accuracy = true_positives / (true_positives + false_positives + false_negatives)
55
+
56
+ print("Precision:", precision)
57
+ print("Recall:", recall)
58
+ print("F1 Score:", f1_score)
59
+ print("Accuracy:", accuracy)