Spaces:

BramVanroy
/

text-to-amr

Running

App Files Files Community

Bram Vanroy commited on Feb 12, 2023

Commit

f3fd096

1 Parent(s): 50faa6a

init space

Browse files

Files changed (5) hide show

.gitignore +237 -0
README.md +10 -7
app.py +113 -0
packages.txt +1 -0
requirements.txt +6 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,237 @@

+Pipfile*
+data/*
+*config.json
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+.idea/
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+# AWS User-specific
+.idea/**/aws.xml
+# Generated files
+.idea/**/contentModel.xml
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+# CMake
+cmake-build-*/
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+# File-based project format
+*.iws
+# IntelliJ
+out/
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+# JIRA plugin
+atlassian-ide-plugin.xml
+# Cursive Clojure plugin
+.idea/replstate.xml
+# SonarLint plugin
+.idea/sonarlint/
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+# Editor-based Rest Client
+.idea/httpRequests
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

README.md CHANGED Viewed

@@ -1,13 +1,16 @@
 ---
-title: Text To Amr
-emoji: 🚀
-colorFrom: purple
-colorTo: blue
 sdk: streamlit
 sdk_version: 1.17.0
 app_file: app.py
-pinned: false
 license: gpl-3.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: Text To AMR
+emoji: 🦀
+colorFrom: green
+colorTo: gray
 sdk: streamlit
 sdk_version: 1.17.0
 app_file: app.py
+pinned: true
 license: gpl-3.0
+tags:
+    - natural language processing
+    - semantic parsing
+    - abstract meaning representation
+    - amr
 ---

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from collections import Counter
+import graphviz
+from optimum.bettertransformer import BetterTransformer
+import penman
+from penman.models.noop import NoOpModel
+from mbart_amr.constraints.constraints import AMRLogitsProcessor
+from mbart_amr.data.linearization import linearized2penmanstr
+from mbart_amr.data.tokenization import AMRMBartTokenizer
+from transformers import MBartForConditionalGeneration, LogitsProcessorList
+import streamlit as st
+if "logits_processor" not in st.session_state:
+    st.session_state["logits_processor"] = None
+if "tokenizer" not in st.session_state:
+    st.session_state["tokenizer"] = None
+if "model" not in st.session_state:
+    st.session_state["tokenizer"] = AMRMBartTokenizer.from_pretrained("BramVanroy/mbart-en-to-amr", src_lang="en_XX")
+    st.session_state["model"] = MBartForConditionalGeneration.from_pretrained("BramVanroy/mbart-en-to-amr")
+    st.session_state["model"] = BetterTransformer.transform(st.session_state["model"], keep_original_model=False)
+    st.session_state["model"].resize_token_embeddings(len(st.session_state["tokenizer"]))
+    st.session_state["logits_processor"] = AMRLogitsProcessor(st.session_state["tokenizer"],
+                                                              st.session_state["model"].config.max_length)
+st.title("📝 Parse text into AMR")
+text = st.text_input(label="Text to transform (en)")
+if text and "model" in st.session_state:
+    gen_kwargs = {
+        "max_length": st.session_state["model"].config.max_length,
+        "num_beams": st.session_state["model"].config.num_beams,
+        "logits_processor": LogitsProcessorList([st.session_state["logits_processor"]]) if st.session_state[
+            "logits_processor"] else None
+    }
+    encoded = st.session_state["tokenizer"](text, return_tensors="pt")
+    generated = st.session_state["model"].generate(**encoded, **gen_kwargs)
+    linearized = st.session_state["tokenizer"].decode_and_fix(generated)[0]
+    penman_str = linearized2penmanstr(linearized)
+    try:
+        graph = penman.decode(penman_str, model=NoOpModel())
+    except Exception as exc:
+        st.write(f"The generated graph is not valid so it cannot be visualized correctly. Below is the closest attempt"
+                 f" to a valid graph but note that this is invalid Penman.")
+        st.code(penman_str)
+        with st.expander("Error trace"):
+            st.write(exc)
+    else:
+        visualized = graphviz.Digraph(node_attr={"color": "#3aafa9", "style": "rounded,filled", "shape": "box",
+                                                 "fontcolor": "white"})
+        # Count which names occur multiple times, e.g. t/talk-01 t2/talk-01
+        nodename_c = Counter([item[2] for item in graph.triples if item[1] == ":instance"])
+        # Generated initial nodenames for each variable, e.g. {"t": "talk-01",  "t2": "talk-01"}
+        nodenames = {item[0]: item[2] for item in graph.triples if item[1] == ":instance"}
+        # Modify nodenames, so that the values are unique, e.g. {"t": "talk-01 (1)",  "t2": "talk-01 (2)"}
+        # but only the value occurs more than once
+        nodename_str_c = Counter()
+        for varname in nodenames:
+            nodename = nodenames[varname]
+            if nodename_c[nodename] > 1:
+                nodename_str_c[nodename] += 1
+                nodenames[varname] = f"{nodename} ({nodename_str_c[nodename]})"
+        def get_node_name(item: str):
+            return nodenames[item] if item in nodenames else item
+        try:
+            for triple in graph.triples:
+                if triple[1] == ":instance":
+                    continue
+                else:
+                    visualized.edge(get_node_name(triple[0]), get_node_name(triple[2]), label=triple[1])
+        except Exception as exc:
+            st.write("The generated graph is not valid so it cannot be visualized correctly. Below is the closest attempt"
+                     " to a valid graph but note that this is probably invalid Penman.")
+            st.code(penman_str)
+            st.write("The initial linearized output of the model was:")
+            st.code(linearized)
+            with st.expander("Error trace"):
+                st.write(exc)
+        else:
+            st.subheader("Graph visualization")
+            st.graphviz_chart(visualized, use_container_width=True)
+            # Download
+            img = visualized.pipe(format="png")
+            st.download_button("Download graph", img, mime="image/png")
+            # Additional info
+            st.subheader("Model output and Penman graph")
+            st.write("The linearized output of the model (after some post-processing) is:")
+            st.code(linearized)
+            st.write("When converted into Penman, it looks like this:")
+            st.code(penman.encode(graph))
+########################
+# Information, socials #
+########################
+st.markdown("## Contact ✒️")
+st.markdown("Would you like  additional functionality in the demo? Or just want to get in touch?"
+            " Give me a shout on [Twitter](https://twitter.com/BramVanroy)"
+            " or add me on [LinkedIn](https://www.linkedin.com/in/bramvanroy/)!")

packages.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ graphviz

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+graphviz
+optimum>=1.6.0
+penman
+streamlit==1.17.0
+torch>=1.13
+git+https://github.com/BramVanroy/multilingual-text-to-amr@5859af0d870acd2f76d71e5a7d12fa35a7a2059b#egg=mbart-amr