File size: 3,607 Bytes
fb46644
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from typing import List, Sequence, Tuple, Optional, Dict, Union, Callable   
import spacy
from spacy import displacy
from spacy.language import Language
import streamlit as st
from spacy_streamlit import visualize_parser
import base64
from PIL import Image
import deplacy
import graphviz





st.set_page_config(layout="wide")

st.title("Ancient Greek Analyzer")

st.markdown("Here you'll find four spaCy models for processing ancient Greek. They have been trained with the Universal Dependencies datasets *Perseus* and *Proiel*. We provide two types of models for each dataset. The '_lg' models were built with tok2vec pretrained embeddings and fasttext vectors, while the '_tfr' models have a transfomers layer. You can choose among models to compare their performance. More information about the models can be found in the [Huggingface Models Hub] (https://huggingface.co/Jacobo).")

st.sidebar.image("logo.png", use_column_width=False, width=150, caption="\n provided by Diogenet")

st.sidebar.title("Choose model:")
spacy_model = st.sidebar.selectbox("", ["grc_ud_perseus_lg", "grc_ud_proiel_lg"])

st.header("Text to analyze:")
text = st.text_area("", "Πλάτων ὁ Περικτιόνης τὸ γένος ἀνέφερεν εἰς Σόλωνα.")


nlp = spacy.load(spacy_model)
doc = nlp(text)

def get_html(html: str):
    """Convert HTML so it can be rendered."""
    WRAPPER = """<div style="overflow-x: auto; border: 1px solid #e6e9ef; border-radius: 0.25rem; padding: 1rem; margin-bottom: 2.5rem">{}</div>"""
    # Newlines seem to mess with the rendering
    html = html.replace("\n", " ")
    return WRAPPER.format(html)

def get_svg(svg: str, style: str = "", wrap: bool = True):
    """Convert an SVG to a base64-encoded image."""
    b64 = base64.b64encode(svg.encode("utf-8")).decode("utf-8")
    html = f'<img src="data:image/svg+xml;base64,{b64}" style="{style}"/>'
    return get_html(html) if wrap else html

def visualize_parser(
    doc: spacy.tokens.Doc,
    *,
    title: Optional[str] = "Dependency parse & part of speech",
    key: Optional[str] = None,
) -> None:
    """Visualizer for dependency parses."""
    if title:
        st.header(title)
    cols = st.columns(4)
    split_sents = cols[0].checkbox(
        "Split sentences", value=True, key=f"{key}_parser_split_sents"
    )
    options = {
        "collapse_punct": cols[1].checkbox(
            "Collapse punct", value=True, key=f"{key}_parser_collapse_punct"    
        ),
        "compact": cols[3].checkbox("Compact mode", value=True, key=f"{key}_parser_compact"),
    }
    docs = [span.as_doc() for span in doc.sents] if split_sents else [doc]
    for sent in docs:
        html = displacy.render(sent, options=options, style="dep")
        # Double newlines seem to mess with the rendering
        html = html.replace("\n\n", "\n")
        if split_sents and len(docs) > 1:
            st.markdown(f"> {sent.text}")
        st.write(get_svg(html), unsafe_allow_html=True)


visualize_parser(doc)

#graph_r = deplacy.render(doc)

#st.graphviz_chart(graph_r)


graph_dot = deplacy.dot(doc)

#graphviz.Source(deplacy.dot(doc))

st.graphviz_chart(graph_dot)





#st.sidebar.title("Model 2")
#spacy_model2 = st.sidebar.selectbox("Model 2", ["grc_ud_perseus_lg", "grc_ud_proiel_lg"])

#st.header("Text to analyze:")
#text = st.text_area("", "Πλάτων ὁ Περικτιόνης τὸ γένος ἀνέφερεν εἰς Σόλωνα.")


#nlp = spacy.load(spacy_model2)
#doc2 = nlp(text)

#visualize_parser(doc2)

#visualizers = ["pos", "dep"]
#spacy_streamlit.visualize(models, default_text,visualizers)