File size: 3,663 Bytes
0a30cc1
fb46644
 
 
 
 
0a30cc1
c6c5ede
fb46644
 
ac5d938
fb46644
 
 
 
 
 
0a30cc1
fb46644
156ce35
fb46644
0ba8bf7
fb46644
0a30cc1
e71195e
fb46644
0a30cc1
500e543
fb46644
 
ff2b9e2
0897c3b
864e374
ff2b9e2
8b0c061
5d29147
500e543
5d29147
fb46644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a30cc1
fb46644
 
 
 
 
 
 
 
 
 
 
 
 
513550a
fb46644
 
 
 
 
 
 
 
 
 
 
216fdc4
500e543
 
fb46644
 
5783140
fba9cb6
647214f
fba9cb6
8ea9220
fba9cb6
fb46644
ac5d938
00af88e
 
c44e3c9
caa20ae
6a0a4f4
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from typing import Optional 
import spacy
from spacy import displacy
from spacy.language import Language
import streamlit as st
from spacy_streamlit import visualize_parser
from spacy_streamlit import visualize_tokens
from spacy_streamlit import visualize_ner
import base64
from PIL import Image
#import pandas




st.set_page_config(layout="wide")

st.image("logo.png", use_column_width=False, width=150)

st.title("Ancient Greek Syntax Analyzer")

st.markdown("Welcome to our analyzer. Here you can parse the parts of speech (POS) and the syntactic relationships of any ancient Greek sentence. This analysis is done by our language models trained with transformers and the NLP library spaCy.  Below, you can choose which model do you want to use (each model may produce a different analysis).  Documentation about the linguistic terms used by our models to annotate your sentences can be found here.  If you have any questions, please contact us at diogenet@ucsd.edu")

st.header("Select a model:")
spacy_model = st.selectbox("Model", ["grc_proiel_lg","grc_proiel_trf","grc_proiel_sm","grc_perseus_lg","grc_perseus_trf","grc_perseus_sm"])

st.header("Enter text:")
text = st.text_area("Greek text","ἐπὶ τοῦτον δὴ τὸν Ἄμασιν Καμβύσης ὁ Κύρου ἐστρατεύετο, ἄγων καί ἄλλους τῶν ἦρχε καὶ Ἑλλήνων Ἴωνάς τε καὶ Αἰολέας.")


#config = {"punct_chars": [".", ";", "·"]}

nlp = spacy.load(spacy_model)
#nlp.add_pipe("sentencizer", config=config, before="parser")


# Get the pipeline order

doc = nlp(text)

def get_html(html: str):
    """Convert HTML so it can be rendered."""
    WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
    # Newlines seem to mess with the rendering
    html = html.replace("\n", " ")
    return WRAPPER.format(html)

def get_svg(svg: str, style: str = "", wrap: bool = True):
    """Convert an SVG to a base64-encoded image."""
    b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
    html = f'<img src="data:image/svg+xml;base64,{b64}" style="{style}"/>'
    return get_html(html) if wrap else html

def visualize_parser(
    doc: spacy.tokens.Doc,
    *,
    title: Optional[str] = "Dependency parse & part of speech:",
    key: Optional[str] = None,
) -> None:
    """Visualizer for dependency parses."""
    if title:
        st.header(title)
    cols = st.columns(4)
    split_sents = cols[0].checkbox(
        "Split sentences", value=True, key=f"{key}_parser_split_sents"
    )
    options = {
        "collapse_punct": cols[1].checkbox(
            "Collapse punct", value=True, key=f"{key}_parser_collapse_punct"    
        ),
        "compact": cols[3].checkbox("Compact mode", value=True, key=f"{key}_parser_compact"),
    }
    docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
    for sent in docs:
        html = displacy.render(sent, options=options, style="dep")
        # Double newlines seem to mess with the rendering
        html = html.replace("\n\n", "\n")
        if split_sents and len(docs) > 1:
            st.markdown(f"> {sent.text}")
        st.write(get_svg(html), unsafe_allow_html=True)


#displacy.render(doc, style="ent")


visualize_parser(doc)

visualize_ner(
    doc,
    labels=["PERSON","LOC","NORP","GOD","LANGUAGE"],
    show_table=False,
    title="Persons, locations, groups, gods, and languages",
)

#pd.set_option('display.max_colwidth', None)


visualize_tokens(doc, attrs=["text", "lemma_", "pos_", "dep_","ent_type_"], title="Table view:", key="tokens")