text-to-amr / app.py
Bram Vanroy
update typos
e10ccfa
import base64
from collections import Counter
import graphviz
import penman
from multi_amr.data.postprocessing_graph import ParsedStatus
from utils import get_resources, LANGUAGES, translate
import streamlit as st
st.set_page_config(
page_title="Multilingual text-to-AMR demo by Bram Vanroy",
page_icon="πŸ‘©β€πŸ’»"
)
st.title("πŸ‘©β€πŸ’» Multilingual text-to-AMR")
if "text" not in st.session_state:
st.session_state["text"] = ""
if "language" not in st.session_state:
st.session_state["language"] = "English"
if "use_multilingual" not in st.session_state:
st.session_state["use_multilingual"] = False
text_col, lang_col = st.columns((4, 1))
text = text_col.text_input(label="Input text", key="text")
src_lang = lang_col.selectbox(label="Language", options=list(LANGUAGES.keys()), index=0, key="language")
multilingual = st.checkbox("Use multilingual model", label_visibility="visible", key="use_multilingual",
help="Whether to use a single multilingual model that was trained on English, Spanish and"
" Dutch together, or (if not checked) language-specific models. Enabling this will"
" results in worse performance but can be of interest for research purposes.")
error_ct = st.empty()
if st.session_state["text"]:
if st.button("Submit"):
text = text.strip()
error_ct.info("Generating abstract meaning representation (AMR)...", icon="πŸ’»")
model, tokenizer = get_resources(multilingual, src_lang)
gen_kwargs = {
"max_new_tokens": 512,
"num_beams": 5,
}
outputs = translate(text, src_lang, model, tokenizer, **gen_kwargs)
error_ct.empty()
if outputs["status"][0] == ParsedStatus.BACKOFF:
st.write(f"The system could not generate a valid graph no matter how hard it tried.")
else:
graph = outputs["graph"][0]
visualized = graphviz.Digraph(node_attr={"color": "#3aafa9", "style": "rounded,filled", "shape": "box",
"fontcolor": "white"})
# Count which names occur multiple times, e.g. t/talk-01 t2/talk-01
nodename_c = Counter([item[2] for item in graph.triples if item[1] == ":instance"])
# Generated initial nodenames for each variable, e.g. {"t": "talk-01", "t2": "talk-01"}
nodenames = {item[0]: item[2] for item in graph.triples if item[1] == ":instance"}
# Modify nodenames, so that the values are unique, e.g. {"t": "talk-01 (1)", "t2": "talk-01 (2)"}
# but only the value occurs more than once
nodename_str_c = Counter()
for varname in nodenames:
nodename = nodenames[varname]
if nodename_c[nodename] > 1:
nodename_str_c[nodename] += 1
nodenames[varname] = f"{nodename} ({nodename_str_c[nodename]})"
def get_node_name(item: str):
return nodenames[item] if item in nodenames else item
for triple in graph.triples:
if triple[1] == ":instance":
continue
else:
visualized.edge(get_node_name(triple[0]), get_node_name(triple[2]), label=triple[1])
st.subheader("Graph visualization")
st.graphviz_chart(visualized, use_container_width=True)
# Download link
def create_download_link(img_bytes: bytes):
encoded = base64.b64encode(img_bytes).decode("utf-8")
return f'<a href="data:image/png;charset=utf-8;base64,{encoded}" download="amr-graph.png">Download graph</a>'
img = visualized.pipe(format="png")
st.markdown(create_download_link(img), unsafe_allow_html=True)
# Additional info
st.subheader("PENMAN representation")
st.code(penman.encode(graph))
else:
error_ct.warning("Text cannot be empty!", icon="⚠️")
########################
# Information, socials #
########################
st.header("SignON 🀟")
st.markdown("""
<div style="display: flex">
<img style="margin-right: 1em" alt="SignON logo" src="https://signon-project.eu/wp-content/uploads/2021/05/SignOn_Favicon_500x500px.png" width=64 height=64>
<p><a href="https://signon-project.eu/" target="_blank" title="SignON homepage">SignON</a> aims to bridge the
communication gap between deaf, hard-of-hearing and hearing people through an accessible translation service.
This service will translate between languages and modalities with particular attention for sign languages.</p>
</div>""", unsafe_allow_html=True)
st.markdown("""[Abstract meaning representation](https://aclanthology.org/W13-2322/) (AMR)
is a semantic framework to describe meaning relations of sentences as graphs. In the SignON project, AMR is used as
an interlingua to translate between modalities and languages. To this end, I built MBART models for the task of
generating AMR representations from an input sentence, which is show-cased in this demo.
""")
st.header("Contact βœ’οΈ")
st.markdown("Would you like additional functionality in the demo, do you have questions, or just want to get in touch?"
" Give me a shout on [Twitter](https://twitter.com/BramVanroy)"
" or add me on [LinkedIn](https://www.linkedin.com/in/bramvanroy/)!")