diyclassics commited on
Commit
3aa7322
1 Parent(s): 100b317

Update app

Browse files
Files changed (2) hide show
  1. app.py +90 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import spacy
3
+ import pandas as pd
4
+ import datetime
5
+
6
+ st.set_page_config(layout="wide")
7
+
8
+ default_text = """Ita fac, mi Lucili; vindica te tibi, et tempus, quod adhuc aut auferebatur aut subripiebatur aut excidebat, collige et serva."""
9
+
10
+
11
+ def format_morph(morph):
12
+ morph = morph.to_dict()
13
+ if morph:
14
+ return ", ".join([f"{k}={v}" for k, v in morph.items()])
15
+ else:
16
+ return ""
17
+
18
+
19
+ def analyze_text(text):
20
+ doc = nlp(text)
21
+ rows = []
22
+ for token in doc[:100]:
23
+ rows.append(
24
+ (
25
+ token.text,
26
+ token.norm_,
27
+ token.lower_,
28
+ token.lemma_,
29
+ token.pos_,
30
+ token.tag_,
31
+ token.dep_,
32
+ format_morph(token.morph),
33
+ token.ent_type_,
34
+ )
35
+ )
36
+ df = pd.DataFrame(
37
+ rows,
38
+ columns=[
39
+ "text",
40
+ "norm",
41
+ "lower",
42
+ "lemma",
43
+ "pos",
44
+ "tag",
45
+ "dep",
46
+ "morph",
47
+ "ent_type",
48
+ ],
49
+ )
50
+ return df
51
+
52
+
53
+ st.title("LatinCy Text Analyzer")
54
+
55
+ # Using object notation
56
+ model_selectbox = st.sidebar.selectbox(
57
+ "Choose model:",
58
+ ("la_core_web_lg", "la_core_web_md", "la_core_web_sm")
59
+ )
60
+
61
+ nlp = spacy.load(model_selectbox)
62
+
63
+ df = None
64
+
65
+ text = st.text_area(
66
+ "Enter some text to analyze (max 100 tokens)", value=default_text, height=200
67
+ )
68
+ if st.button("Analyze"):
69
+ df = analyze_text(text)
70
+ st.text(f"Analyzed {len(df)} tokens with {model_selectbox} model.")
71
+ st.dataframe(df, width=1000)
72
+
73
+ @st.cache_data
74
+ def convert_df(df):
75
+ return df.to_csv(index=False, sep="\t").encode("utf-8")
76
+
77
+ csv = convert_df(df)
78
+
79
+ def create_timestamp():
80
+ return datetime.datetime.now().strftime("%Y%m%d%H%M%S")
81
+
82
+ # nb: clicking this button resets app! Open streamlit issue, as of 4.15.2023; cf. https://github.com/streamlit/streamlit/issues/4382
83
+ st.markdown("*NB: Clicking the download button will reset the app after download!*")
84
+ st.download_button(
85
+ "Press to Download",
86
+ csv,
87
+ f"latincy-analysis-{create_timestamp()}.tsv",
88
+ "text/csv",
89
+ key="download-csv",
90
+ )
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ la-core-web-lg @ https://huggingface.co/latincy/la_core_web_lg/resolve/main/la_core_web_lg-any-py3-none-any.whl#sha256=03e29fbaae0bf583610f6c042b874441aa213aee238f2a63f413bb608fe6f100
2
+ la-core-web-md @ https://huggingface.co/latincy/la_core_web_md/resolve/main/la_core_web_md-any-py3-none-any.whl#sha256=6c48e1494a8e892878a5381846fc8b3d7dc1c160b3ae2090098b856aa679bfd4
3
+ la-core-web-sm @ https://huggingface.co/latincy/la_core_web_sm/resolve/main/la_core_web_sm-any-py3-none-any.whl#sha256=0aecb1b9c9974b48b180092ab4e25b3bdba7c4b7b6cd47942e667cb054f07e04
4
+ pandas==1.5.3
5
+ spacy==3.5.2