Spaces:
Runtime error
Runtime error
| from io import StringIO | |
| import streamlit as st | |
| from nltk import Tree | |
| import stanza | |
| from annotated_text import annotated_text | |
| st.set_page_config(layout='wide') | |
| st.title("Clause segmentation") | |
| st.info("This bert stanza parser is very slow, please checkout the berkley parser") | |
| uploaded_file = st.file_uploader("Upload your text file", type="txt") | |
| nlp = stanza.Pipeline(lang="en", processors='tokenize,pos,constituency', | |
| package={'constituency': 'wsj_bert'}) | |
| def tree_to_text(tree): | |
| t = Tree.fromstring(str(tree)) | |
| subtexts = [] | |
| for subtree in t.subtrees(): | |
| if subtree.label() == "S": | |
| subtexts.append(' '.join(subtree.leaves())) | |
| for index in range(len(subtexts) - 1): | |
| subtexts[index] = subtexts[index][0:subtexts[index].index(subtexts[index + 1])] | |
| for text in subtexts: | |
| yield text | |
| def constituency_tree(): | |
| stringio = StringIO(uploaded_file.getvalue().decode("utf-8")) | |
| data = stringio.read() | |
| doc = nlp(data) | |
| for sent in doc.sentences: | |
| yield sent.constituency | |
| def sentence_reader(): | |
| stringio = StringIO(uploaded_file.getvalue().decode("utf-8")) | |
| datas = StringIO.readlines(stringio) | |
| for data in datas: | |
| yield data | |
| def main(): | |
| #colors = ["#9CFAFF", "#B1FBDF", "#C5FDBE", "#DAFE9E", "#EEFF7D"] | |
| colors = ["#eae4e9", "#fff1e6", "#fde2e4", "#fad2e1", "#e2ece9", "#bee1e6", "#f0efeb", "#dfe7fd", "#cddafd"] | |
| key = 1 | |
| for tree, text in zip(constituency_tree(), sentence_reader()): | |
| with st.expander(f"Sentence{key}", expanded=True): | |
| annotated_list = [] | |
| sub_phrases = tree_to_text(tree) | |
| for sub_phrase, color in zip(sub_phrases, colors): | |
| annotated_list.append((sub_phrase,"", color)) | |
| key += 1 | |
| st.write(f'{text}') | |
| annotated_text(*annotated_list) | |
| if __name__ == "__main__": | |
| if uploaded_file is not None: | |
| main() |