Bram Vanroy commited on
Commit
f3fd096
Β·
1 Parent(s): 50faa6a

init space

Browse files
Files changed (5) hide show
  1. .gitignore +237 -0
  2. README.md +10 -7
  3. app.py +113 -0
  4. packages.txt +1 -0
  5. requirements.txt +6 -0
.gitignore ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Pipfile*
2
+ data/*
3
+ *config.json
4
+
5
+
6
+ # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
7
+ # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
8
+
9
+ .idea/
10
+ # User-specific stuff
11
+ .idea/**/workspace.xml
12
+ .idea/**/tasks.xml
13
+ .idea/**/usage.statistics.xml
14
+ .idea/**/dictionaries
15
+ .idea/**/shelf
16
+
17
+ # AWS User-specific
18
+ .idea/**/aws.xml
19
+
20
+ # Generated files
21
+ .idea/**/contentModel.xml
22
+
23
+ # Sensitive or high-churn files
24
+ .idea/**/dataSources/
25
+ .idea/**/dataSources.ids
26
+ .idea/**/dataSources.local.xml
27
+ .idea/**/sqlDataSources.xml
28
+ .idea/**/dynamic.xml
29
+ .idea/**/uiDesigner.xml
30
+ .idea/**/dbnavigator.xml
31
+
32
+ # Gradle
33
+ .idea/**/gradle.xml
34
+ .idea/**/libraries
35
+
36
+ # Gradle and Maven with auto-import
37
+ # When using Gradle or Maven with auto-import, you should exclude module files,
38
+ # since they will be recreated, and may cause churn. Uncomment if using
39
+ # auto-import.
40
+ # .idea/artifacts
41
+ # .idea/compiler.xml
42
+ # .idea/jarRepositories.xml
43
+ # .idea/modules.xml
44
+ # .idea/*.iml
45
+ # .idea/modules
46
+ # *.iml
47
+ # *.ipr
48
+
49
+ # CMake
50
+ cmake-build-*/
51
+
52
+ # Mongo Explorer plugin
53
+ .idea/**/mongoSettings.xml
54
+
55
+ # File-based project format
56
+ *.iws
57
+
58
+ # IntelliJ
59
+ out/
60
+
61
+ # mpeltonen/sbt-idea plugin
62
+ .idea_modules/
63
+
64
+ # JIRA plugin
65
+ atlassian-ide-plugin.xml
66
+
67
+ # Cursive Clojure plugin
68
+ .idea/replstate.xml
69
+
70
+ # SonarLint plugin
71
+ .idea/sonarlint/
72
+
73
+ # Crashlytics plugin (for Android Studio and IntelliJ)
74
+ com_crashlytics_export_strings.xml
75
+ crashlytics.properties
76
+ crashlytics-build.properties
77
+ fabric.properties
78
+
79
+ # Editor-based Rest Client
80
+ .idea/httpRequests
81
+
82
+ # Android studio 3.1+ serialized cache file
83
+ .idea/caches/build_file_checksums.ser
84
+
85
+
86
+ # Byte-compiled / optimized / DLL files
87
+ __pycache__/
88
+ *.py[cod]
89
+ *$py.class
90
+
91
+ # C extensions
92
+ *.so
93
+
94
+ # Distribution / packaging
95
+ .Python
96
+ build/
97
+ develop-eggs/
98
+ dist/
99
+ downloads/
100
+ eggs/
101
+ .eggs/
102
+ lib/
103
+ lib64/
104
+ parts/
105
+ sdist/
106
+ var/
107
+ wheels/
108
+ share/python-wheels/
109
+ *.egg-info/
110
+ .installed.cfg
111
+ *.egg
112
+ MANIFEST
113
+
114
+ # PyInstaller
115
+ # Usually these files are written by a python script from a template
116
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
117
+ *.manifest
118
+ *.spec
119
+
120
+ # Installer logs
121
+ pip-log.txt
122
+ pip-delete-this-directory.txt
123
+
124
+ # Unit test / coverage reports
125
+ htmlcov/
126
+ .tox/
127
+ .nox/
128
+ .coverage
129
+ .coverage.*
130
+ .cache
131
+ nosetests.xml
132
+ coverage.xml
133
+ *.cover
134
+ *.py,cover
135
+ .hypothesis/
136
+ .pytest_cache/
137
+ cover/
138
+
139
+ # Translations
140
+ *.mo
141
+ *.pot
142
+
143
+ # Django stuff:
144
+ *.log
145
+ local_settings.py
146
+ db.sqlite3
147
+ db.sqlite3-journal
148
+
149
+ # Flask stuff:
150
+ instance/
151
+ .webassets-cache
152
+
153
+ # Scrapy stuff:
154
+ .scrapy
155
+
156
+ # Sphinx documentation
157
+ docs/_build/
158
+
159
+ # PyBuilder
160
+ .pybuilder/
161
+ target/
162
+
163
+ # Jupyter Notebook
164
+ .ipynb_checkpoints
165
+
166
+ # IPython
167
+ profile_default/
168
+ ipython_config.py
169
+
170
+ # pyenv
171
+ # For a library or package, you might want to ignore these files since the code is
172
+ # intended to run in multiple environments; otherwise, check them in:
173
+ # .python-version
174
+
175
+ # pipenv
176
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
177
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
178
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
179
+ # install all needed dependencies.
180
+ #Pipfile.lock
181
+
182
+ # poetry
183
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
184
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
185
+ # commonly ignored for libraries.
186
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
187
+ #poetry.lock
188
+
189
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
190
+ __pypackages__/
191
+
192
+ # Celery stuff
193
+ celerybeat-schedule
194
+ celerybeat.pid
195
+
196
+ # SageMath parsed files
197
+ *.sage.py
198
+
199
+ # Environments
200
+ .env
201
+ .venv
202
+ env/
203
+ venv/
204
+ ENV/
205
+ env.bak/
206
+ venv.bak/
207
+
208
+ # Spyder project settings
209
+ .spyderproject
210
+ .spyproject
211
+
212
+ # Rope project settings
213
+ .ropeproject
214
+
215
+ # mkdocs documentation
216
+ /site
217
+
218
+ # mypy
219
+ .mypy_cache/
220
+ .dmypy.json
221
+ dmypy.json
222
+
223
+ # Pyre type checker
224
+ .pyre/
225
+
226
+ # pytype static type analyzer
227
+ .pytype/
228
+
229
+ # Cython debug symbols
230
+ cython_debug/
231
+
232
+ # PyCharm
233
+ # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can
234
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
235
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
236
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
237
+ #.idea/
README.md CHANGED
@@ -1,13 +1,16 @@
1
  ---
2
- title: Text To Amr
3
- emoji: πŸš€
4
- colorFrom: purple
5
- colorTo: blue
6
  sdk: streamlit
7
  sdk_version: 1.17.0
8
  app_file: app.py
9
- pinned: false
10
  license: gpl-3.0
 
 
 
 
 
11
  ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Text To AMR
3
+ emoji: πŸ¦€
4
+ colorFrom: green
5
+ colorTo: gray
6
  sdk: streamlit
7
  sdk_version: 1.17.0
8
  app_file: app.py
9
+ pinned: true
10
  license: gpl-3.0
11
+ tags:
12
+ - natural language processing
13
+ - semantic parsing
14
+ - abstract meaning representation
15
+ - amr
16
  ---
 
 
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import Counter
2
+
3
+ import graphviz
4
+ from optimum.bettertransformer import BetterTransformer
5
+ import penman
6
+ from penman.models.noop import NoOpModel
7
+ from mbart_amr.constraints.constraints import AMRLogitsProcessor
8
+ from mbart_amr.data.linearization import linearized2penmanstr
9
+ from mbart_amr.data.tokenization import AMRMBartTokenizer
10
+ from transformers import MBartForConditionalGeneration, LogitsProcessorList
11
+
12
+ import streamlit as st
13
+
14
+ if "logits_processor" not in st.session_state:
15
+ st.session_state["logits_processor"] = None
16
+
17
+ if "tokenizer" not in st.session_state:
18
+ st.session_state["tokenizer"] = None
19
+
20
+ if "model" not in st.session_state:
21
+ st.session_state["tokenizer"] = AMRMBartTokenizer.from_pretrained("BramVanroy/mbart-en-to-amr", src_lang="en_XX")
22
+ st.session_state["model"] = MBartForConditionalGeneration.from_pretrained("BramVanroy/mbart-en-to-amr")
23
+ st.session_state["model"] = BetterTransformer.transform(st.session_state["model"], keep_original_model=False)
24
+ st.session_state["model"].resize_token_embeddings(len(st.session_state["tokenizer"]))
25
+ st.session_state["logits_processor"] = AMRLogitsProcessor(st.session_state["tokenizer"],
26
+ st.session_state["model"].config.max_length)
27
+
28
+ st.title("πŸ“ Parse text into AMR")
29
+
30
+ text = st.text_input(label="Text to transform (en)")
31
+
32
+ if text and "model" in st.session_state:
33
+ gen_kwargs = {
34
+ "max_length": st.session_state["model"].config.max_length,
35
+ "num_beams": st.session_state["model"].config.num_beams,
36
+ "logits_processor": LogitsProcessorList([st.session_state["logits_processor"]]) if st.session_state[
37
+ "logits_processor"] else None
38
+ }
39
+
40
+ encoded = st.session_state["tokenizer"](text, return_tensors="pt")
41
+ generated = st.session_state["model"].generate(**encoded, **gen_kwargs)
42
+ linearized = st.session_state["tokenizer"].decode_and_fix(generated)[0]
43
+ penman_str = linearized2penmanstr(linearized)
44
+
45
+ try:
46
+ graph = penman.decode(penman_str, model=NoOpModel())
47
+ except Exception as exc:
48
+ st.write(f"The generated graph is not valid so it cannot be visualized correctly. Below is the closest attempt"
49
+ f" to a valid graph but note that this is invalid Penman.")
50
+ st.code(penman_str)
51
+
52
+ with st.expander("Error trace"):
53
+ st.write(exc)
54
+ else:
55
+ visualized = graphviz.Digraph(node_attr={"color": "#3aafa9", "style": "rounded,filled", "shape": "box",
56
+ "fontcolor": "white"})
57
+
58
+ # Count which names occur multiple times, e.g. t/talk-01 t2/talk-01
59
+ nodename_c = Counter([item[2] for item in graph.triples if item[1] == ":instance"])
60
+ # Generated initial nodenames for each variable, e.g. {"t": "talk-01", "t2": "talk-01"}
61
+ nodenames = {item[0]: item[2] for item in graph.triples if item[1] == ":instance"}
62
+
63
+ # Modify nodenames, so that the values are unique, e.g. {"t": "talk-01 (1)", "t2": "talk-01 (2)"}
64
+ # but only the value occurs more than once
65
+ nodename_str_c = Counter()
66
+ for varname in nodenames:
67
+ nodename = nodenames[varname]
68
+ if nodename_c[nodename] > 1:
69
+ nodename_str_c[nodename] += 1
70
+ nodenames[varname] = f"{nodename} ({nodename_str_c[nodename]})"
71
+
72
+ def get_node_name(item: str):
73
+ return nodenames[item] if item in nodenames else item
74
+
75
+ try:
76
+ for triple in graph.triples:
77
+ if triple[1] == ":instance":
78
+ continue
79
+ else:
80
+ visualized.edge(get_node_name(triple[0]), get_node_name(triple[2]), label=triple[1])
81
+ except Exception as exc:
82
+ st.write("The generated graph is not valid so it cannot be visualized correctly. Below is the closest attempt"
83
+ " to a valid graph but note that this is probably invalid Penman.")
84
+ st.code(penman_str)
85
+ st.write("The initial linearized output of the model was:")
86
+ st.code(linearized)
87
+
88
+ with st.expander("Error trace"):
89
+ st.write(exc)
90
+ else:
91
+ st.subheader("Graph visualization")
92
+ st.graphviz_chart(visualized, use_container_width=True)
93
+
94
+ # Download
95
+ img = visualized.pipe(format="png")
96
+ st.download_button("Download graph", img, mime="image/png")
97
+
98
+ # Additional info
99
+ st.subheader("Model output and Penman graph")
100
+ st.write("The linearized output of the model (after some post-processing) is:")
101
+ st.code(linearized)
102
+ st.write("When converted into Penman, it looks like this:")
103
+ st.code(penman.encode(graph))
104
+
105
+
106
+ ########################
107
+ # Information, socials #
108
+ ########################
109
+ st.markdown("## Contact βœ’οΈ")
110
+
111
+ st.markdown("Would you like additional functionality in the demo? Or just want to get in touch?"
112
+ " Give me a shout on [Twitter](https://twitter.com/BramVanroy)"
113
+ " or add me on [LinkedIn](https://www.linkedin.com/in/bramvanroy/)!")
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ graphviz
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ graphviz
2
+ optimum>=1.6.0
3
+ penman
4
+ streamlit==1.17.0
5
+ torch>=1.13
6
+ git+https://github.com/BramVanroy/multilingual-text-to-amr@5859af0d870acd2f76d71e5a7d12fa35a7a2059b#egg=mbart-amr