File size: 5,474 Bytes
05b9456
f3fd096
 
 
 
05de9a6
f3fd096
3d26c4a
 
f3fd096
 
3d26c4a
e10ccfa
3d26c4a
 
f3fd096
e10ccfa
05de9a6
 
 
 
 
 
 
f3fd096
05de9a6
 
 
 
 
 
 
f3fd096
05b9456
05de9a6
 
 
05b9456
05de9a6
05b9456
05de9a6
 
05b9456
 
05de9a6
05b9456
f3fd096
05de9a6
 
f3fd096
05de9a6
05b9456
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
05de9a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3fd096
 
 
 
0e6dbbe
55fbc57
 
 
 
 
ded6735
 
0e6dbbe
 
 
 
 
e10ccfa
0e6dbbe
55fbc57
 
05b9456
f3fd096
0e6dbbe
f3fd096
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import base64
from collections import Counter

import graphviz
import penman
from multi_amr.data.postprocessing_graph import ParsedStatus

from utils import get_resources, LANGUAGES, translate

import streamlit as st

st.set_page_config(
    page_title="Multilingual text-to-AMR demo by Bram Vanroy",
    page_icon="πŸ‘©β€πŸ’»"
)

st.title("πŸ‘©β€πŸ’» Multilingual text-to-AMR")

if "text" not in st.session_state:
    st.session_state["text"] = ""
if "language" not in st.session_state:
    st.session_state["language"] = "English"
if "use_multilingual" not in st.session_state:
    st.session_state["use_multilingual"] = False

text_col, lang_col = st.columns((4, 1))
text = text_col.text_input(label="Input text", key="text")
src_lang = lang_col.selectbox(label="Language", options=list(LANGUAGES.keys()), index=0, key="language")
multilingual = st.checkbox("Use multilingual model", label_visibility="visible", key="use_multilingual",
                           help="Whether to use a single multilingual model that was trained on English, Spanish and"
                                " Dutch together, or (if not checked) language-specific models. Enabling this will"
                                " results in worse performance but can be of interest for research purposes.")

error_ct = st.empty()
if st.session_state["text"]:
    if st.button("Submit"):
        text = text.strip()
        error_ct.info("Generating abstract meaning representation (AMR)...", icon="πŸ’»")
        model, tokenizer = get_resources(multilingual, src_lang)
        gen_kwargs = {
            "max_new_tokens": 512,
            "num_beams": 5,
        }

        outputs = translate(text, src_lang, model, tokenizer, **gen_kwargs)
        error_ct.empty()

        if outputs["status"][0] == ParsedStatus.BACKOFF:
            st.write(f"The system could not generate a valid graph no matter how hard it tried.")
        else:
            graph = outputs["graph"][0]
            visualized = graphviz.Digraph(node_attr={"color": "#3aafa9", "style": "rounded,filled", "shape": "box",
                                                     "fontcolor": "white"})

            # Count which names occur multiple times, e.g. t/talk-01 t2/talk-01
            nodename_c = Counter([item[2] for item in graph.triples if item[1] == ":instance"])
            # Generated initial nodenames for each variable, e.g. {"t": "talk-01",  "t2": "talk-01"}
            nodenames = {item[0]: item[2] for item in graph.triples if item[1] == ":instance"}

            # Modify nodenames, so that the values are unique, e.g. {"t": "talk-01 (1)",  "t2": "talk-01 (2)"}
            # but only the value occurs more than once
            nodename_str_c = Counter()
            for varname in nodenames:
                nodename = nodenames[varname]
                if nodename_c[nodename] > 1:
                    nodename_str_c[nodename] += 1
                    nodenames[varname] = f"{nodename} ({nodename_str_c[nodename]})"

            def get_node_name(item: str):
                return nodenames[item] if item in nodenames else item

            for triple in graph.triples:
                if triple[1] == ":instance":
                    continue
                else:
                    visualized.edge(get_node_name(triple[0]), get_node_name(triple[2]), label=triple[1])
            st.subheader("Graph visualization")
            st.graphviz_chart(visualized, use_container_width=True)

            # Download link
            def create_download_link(img_bytes: bytes):
                encoded = base64.b64encode(img_bytes).decode("utf-8")
                return f'<a href="data:image/png;charset=utf-8;base64,{encoded}" download="amr-graph.png">Download graph</a>'

            img = visualized.pipe(format="png")
            st.markdown(create_download_link(img), unsafe_allow_html=True)

            # Additional info
            st.subheader("PENMAN representation")
            st.code(penman.encode(graph))
else:
    error_ct.warning("Text cannot be empty!", icon="⚠️")

########################
# Information, socials #
########################
st.header("SignON 🀟")

st.markdown("""
<div style="display: flex">
    <img style="margin-right: 1em" alt="SignON logo" src="https://signon-project.eu/wp-content/uploads/2021/05/SignOn_Favicon_500x500px.png" width=64 height=64>
    <p><a href="https://signon-project.eu/" target="_blank" title="SignON homepage">SignON</a> aims to bridge the
     communication gap between deaf, hard-of-hearing and hearing people through an accessible translation service. 
     This service will translate between languages and modalities with particular attention for sign languages.</p>
</div>""", unsafe_allow_html=True)

st.markdown("""[Abstract meaning representation](https://aclanthology.org/W13-2322/) (AMR) 
is a semantic framework to describe meaning relations of sentences as graphs. In the SignON project, AMR is used as
 an interlingua to translate between modalities and languages. To this end, I built MBART models for the task of 
 generating AMR representations from an input sentence, which is show-cased in this demo.
""")


st.header("Contact βœ’οΈ")

st.markdown("Would you like  additional functionality in the demo, do you have questions, or just want to get in touch?"
            " Give me a shout on [Twitter](https://twitter.com/BramVanroy)"
            " or add me on [LinkedIn](https://www.linkedin.com/in/bramvanroy/)!")