File size: 1,067 Bytes
6b29ff7
2ee4a3c
 
 
6b29ff7
2ee4a3c
 
6b29ff7
2ee4a3c
 
 
 
 
 
 
 
 
 
 
 
6b29ff7
2ee4a3c
 
6b29ff7
2ee4a3c
 
6b29ff7
 
 
2ee4a3c
6b29ff7
 
2ee4a3c
6b29ff7
 
2ee4a3c
6b29ff7
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import streamlit as st
import nltk
from nltk.corpus import treebank
from nltk import PCFG, ViterbiParser
nltk.download('punkt')
nltk.download('all')
# Load the treebank dataset
# nltk.download('treebank')
corpus = treebank.parsed_sents()

# Train a PCFG parser
productions = []
for tree in corpus:
    productions += tree.productions()
S = nltk.Nonterminal('S')
grammar = nltk.induce_pcfg(S, productions)

# Initialize the parser with the trained grammar
parser = ViterbiParser(grammar)

def evaluate_parser(sentence):
    tokens = nltk.word_tokenize(sentence)
    parsed_trees = list(parser.parse(tokens))
    
    if parsed_trees:
        parsed_tree = parsed_trees[0]
        return parsed_tree
    else:
        return "Failed to parse the sentence."

# Streamlit UI
st.title("PCFG Parser Evaluation")

# Input text box for entering sentences
sentence = st.text_input("Enter a sentence:", "this is a beautiful")

# Button to trigger parsing
if st.button("Parse"):
    parsed_tree = evaluate_parser(sentence)
    st.write("Parsed Tree:")
    st.write(parsed_tree)