Spaces:
Runtime error
Runtime error
from io import StringIO | |
import streamlit as st | |
from nltk import Tree | |
import stanza | |
from annotated_text import annotated_text | |
st.set_page_config(layout='wide') | |
st.title("Clause segmentation") | |
st.info("This bert stanza parser is very slow, please checkout the berkley parser") | |
uploaded_file = st.file_uploader("Upload your text file", type="txt") | |
nlp = stanza.Pipeline(lang="en", processors='tokenize,pos,constituency', | |
package={'constituency': 'wsj_bert'}) | |
def tree_to_text(tree): | |
t = Tree.fromstring(str(tree)) | |
subtexts = [] | |
for subtree in t.subtrees(): | |
if subtree.label() == "S": | |
subtexts.append(' '.join(subtree.leaves())) | |
for index in range(len(subtexts) - 1): | |
subtexts[index] = subtexts[index][0:subtexts[index].index(subtexts[index + 1])] | |
for text in subtexts: | |
yield text | |
def constituency_tree(): | |
stringio = StringIO(uploaded_file.getvalue().decode("utf-8")) | |
data = stringio.read() | |
doc = nlp(data) | |
for sent in doc.sentences: | |
yield sent.constituency | |
def sentence_reader(): | |
stringio = StringIO(uploaded_file.getvalue().decode("utf-8")) | |
datas = StringIO.readlines(stringio) | |
for data in datas: | |
yield data | |
def main(): | |
#colors = ["#9CFAFF", "#B1FBDF", "#C5FDBE", "#DAFE9E", "#EEFF7D"] | |
colors = ["#eae4e9", "#fff1e6", "#fde2e4", "#fad2e1", "#e2ece9", "#bee1e6", "#f0efeb", "#dfe7fd", "#cddafd"] | |
key = 1 | |
for tree, text in zip(constituency_tree(), sentence_reader()): | |
with st.expander(f"Sentence{key}", expanded=True): | |
annotated_list = [] | |
sub_phrases = tree_to_text(tree) | |
for sub_phrase, color in zip(sub_phrases, colors): | |
annotated_list.append((sub_phrase,"", color)) | |
key += 1 | |
st.write(f'{text}') | |
annotated_text(*annotated_list) | |
if __name__ == "__main__": | |
if uploaded_file is not None: | |
main() |