Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import scattertext as stx | |
| import spacy | |
| import pandas as pd | |
| import en_core_web_md | |
| # load language model: | |
| nlp = en_core_web_md.load() | |
| nlp = spacy.load("en_core_web_md") | |
| # config | |
| st.title("Scattertext Analysis") | |
| # TODO:update other web settings | |
| # upload file | |
| uploaded_file = st.file_uploader("Upload your text document", type=["csv", "txt"]) | |
| # read data | |
| if uploaded_file is not None: | |
| # choose function | |
| function_choice = st.selectbox('Choose file source', ['Choose...', 'Customized', 'Download from Online Databases']) | |
| # function1: generate plot from customized file | |
| if st.button('Customized'): | |
| # proceed data | |
| if uploaded_file.name.endswith(".csv"): | |
| df = pd.read_csv(uploaded_file) | |
| elif uploaded_file.name.endswith(".txt"): | |
| df = pd.read_table(uploaded_file, sep='\t') # TODO : doc: assume contents are seperated by Tabs. | |
| chosen_column = st.selectbox("Choose text column for analysis", df.columns) | |
| # convert to scattertext corpus | |
| corpus = stx.CorpusFromPandas( | |
| df, | |
| category_col=df.head(), | |
| text_col=df[chosen_column], | |
| nlp=nlp, | |
| ). build() | |
| # create visualization | |
| # customize parameters | |
| customize_category_name = st.text_input('Enter the category name') | |
| customize_non_category_name = st.text_input('Enter the non-category name') | |
| html = stx.produce_scattertext_explorer(corpus, | |
| category=chosen_column, | |
| category_name=customize_category_name, | |
| not_category_name=customize_non_category_name, | |
| width_in_pixels=1000, | |
| minimum_term_frequency=0, | |
| metadata=df) | |
| st.components.v1.html(html) | |
| else: | |
| st.error("Unsupported file format.") | |
| # function2: generate plot from databases | |
| elif st.button('Download from Online Databases'): # TODO doc: Explain: analyze abstract. | |
| # scopus & lens | |
| if uploaded_file.name.endswith(".csv"): | |
| df = pd.read_csv(uploaded_file) | |
| chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.columns) | |
| if chosen_column == 'Abstract': | |
| st.write("This column cannot be selected, please select again") | |
| else: | |
| # make plot | |
| corpus = stx.CorpusFromPandas( | |
| df, | |
| category_col=df[chosen_column], | |
| text_col='Abstract', | |
| nlp=nlp, | |
| ).build() | |
| # generate HTML visualization | |
| input_category_name = input('Enter the category name') | |
| customize_category_name = st.text_input('Customize parameter', input_category_name) | |
| input_non_category_name = input('Enter the non-category name') | |
| customize_non_category_name = st.text_input('Customize parameter', input_non_category_name) | |
| html = stx.produce_scattertext_explorer(corpus, | |
| category=chosen_column, | |
| category_name=customize_category_name, | |
| not_category_name=customize_non_category_name, | |
| width_in_pixels=1000, | |
| minimum_term_frequency=0, | |
| metadata=df) | |
| st.components.v1.html(html) | |
| # web of science | |
| elif uploaded_file.name.endswith(".txt"): | |
| df = pd.read_table(uploaded_file, sep='\t') | |
| chosen_column = st.selectbox("Choose text column for analysis BESIDES ABSTRACT", df.head()) | |
| if chosen_column == 'AB': | |
| st.write("This column cannot be selected, please select again") | |
| else: | |
| # make plot | |
| corpus = stx.CorpusFromPandas( | |
| df, | |
| category_col=df[chosen_column], | |
| text_col='Abstract', | |
| nlp=nlp, | |
| ).build() | |
| # generate HTML visualization | |
| input_category_name = input('Enter the category name') | |
| customize_category_name = st.text_input('Customize parameter', input_category_name) | |
| input_non_category_name = input('Enter the non-category name') | |
| customize_non_category_name = st.text_input('Customize parameter', input_non_category_name) | |
| html = stx.produce_scattertext_explorer(corpus, | |
| category=chosen_column, | |
| category_name=customize_category_name, | |
| not_category_name=customize_non_category_name, | |
| width_in_pixels=1000, | |
| minimum_term_frequency=0, | |
| metadata=df) | |
| st.components.v1.html(html) | |
| else: | |
| st.error("Unsupported file format.") | |
| else: | |
| st.write("Please upload a CSV or TXT file to begin.") | |