diyclassics's picture
Update app
3aa7322
import streamlit as st
import spacy
import pandas as pd
import datetime
st.set_page_config(layout="wide")
default_text = """Ita fac, mi Lucili; vindica te tibi, et tempus, quod adhuc aut auferebatur aut subripiebatur aut excidebat, collige et serva."""
def format_morph(morph):
morph = morph.to_dict()
if morph:
return ", ".join([f"{k}={v}" for k, v in morph.items()])
else:
return ""
def analyze_text(text):
doc = nlp(text)
rows = []
for token in doc[:100]:
rows.append(
(
token.text,
token.norm_,
token.lower_,
token.lemma_,
token.pos_,
token.tag_,
token.dep_,
format_morph(token.morph),
token.ent_type_,
)
)
df = pd.DataFrame(
rows,
columns=[
"text",
"norm",
"lower",
"lemma",
"pos",
"tag",
"dep",
"morph",
"ent_type",
],
)
return df
st.title("LatinCy Text Analyzer")
# Using object notation
model_selectbox = st.sidebar.selectbox(
"Choose model:",
("la_core_web_lg", "la_core_web_md", "la_core_web_sm")
)
nlp = spacy.load(model_selectbox)
df = None
text = st.text_area(
"Enter some text to analyze (max 100 tokens)", value=default_text, height=200
)
if st.button("Analyze"):
df = analyze_text(text)
st.text(f"Analyzed {len(df)} tokens with {model_selectbox} model.")
st.dataframe(df, width=1000)
@st.cache_data
def convert_df(df):
return df.to_csv(index=False, sep="\t").encode("utf-8")
csv = convert_df(df)
def create_timestamp():
return datetime.datetime.now().strftime("%Y%m%d%H%M%S")
# nb: clicking this button resets app! Open streamlit issue, as of 4.15.2023; cf. https://github.com/streamlit/streamlit/issues/4382
st.markdown("*NB: Clicking the download button will reset the app after download!*")
st.download_button(
"Press to Download",
csv,
f"latincy-analysis-{create_timestamp()}.tsv",
"text/csv",
key="download-csv",
)