Spaces:

Spico
/

Mirror

Runtime error

App Files Files Community

Spico commited on Nov 10, 2023

Commit

5953ef9

1 Parent(s): 2c6adeb

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
.gitignore +187 -0
.pre-commit-config.yaml +19 -0
LICENSE +201 -0
Makefile +26 -0
README.md +136 -6
conf/Pretrain_excluded.yaml +51 -0
conf/Pretrain_v1.5.yaml +51 -0
conf/Pretrain_v1.5_woInstruction.yaml +51 -0
conf/Pretrain_woOverlapV2.yaml +51 -0
conf/ac/g1_dpspd.yaml +18 -0
conf/ac/g1_dpspd_fp16.yaml +18 -0
conf/cadec.yaml +3 -0
conf/hyperred.yaml +3 -0
conf/merge_all_data.yaml +6 -0
conf/merge_analysis_data.yaml +18 -0
conf/merge_analysis_data_woInstruction.yaml +18 -0
conf/merge_uie_data.yaml +18 -0
conf/mirror-ace05en.yaml +70 -0
conf/mirror-multi-task-pretrain.yaml +51 -0
conf/mrc.yaml +43 -0
conf/ner.yaml +45 -0
conf/nlu/cola.yaml +6 -0
conf/nlu/mnli.yaml +6 -0
conf/nlu/mrpc.yaml +3 -0
conf/nlu/plm.yaml +19 -0
conf/nlu/qnli.yaml +6 -0
conf/nlu/qqp.yaml +6 -0
conf/nlu/rte.yaml +6 -0
conf/nlu/squad_v2.yaml +4 -0
conf/nlu/sst-2.yaml +6 -0
conf/t-rex_pretrain.yaml +9 -0
conf/uie_data/absa_14lap.yaml +3 -0
conf/uie_data/absa_14res.yaml +3 -0
conf/uie_data/absa_15res.yaml +3 -0
conf/uie_data/absa_16res.yaml +3 -0
conf/uie_data/ent_ace04.yaml +3 -0
conf/uie_data/ent_ace05.yaml +3 -0
conf/uie_data/ent_conll03.yaml +3 -0
conf/uie_data/event_ace05.yaml +3 -0
conf/uie_data/event_casie.yaml +3 -0
conf/uie_data/fewshot.yaml +5 -0
conf/uie_data/merged.yaml +3 -0
conf/uie_data/rel_ace05.yaml +3 -0
conf/uie_data/rel_conll04.yaml +3 -0
conf/uie_data/rel_nyt.yaml +3 -0
conf/uie_data/rel_scierc.yaml +3 -0
conf/uie_data/wPretrain.yaml +19 -0
eval.py +0 -0
index.html +288 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+figs/mirror-frontpage.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,187 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+.DS_Store
+._.DS_Store
+debug.py
+outputs/
+resources/NER/msra/cache/
+resources/NER/msra/mrc/
+resources/NER/msra/formatted/
+resources/MRC/cmrc2018/cache/
+resources/MRC/cmrc2018/formatted/
+cache/*.cache
+resources/MRC/DuReader-*/
+resources/**/*.json
+resources/**/*.jsonl
+resources/**/*.zip
+resources/**/*.tsv
+resources/**/*.xml
+resources/**/raw/
+resources.tar.gz
+debug/
+debug.json
+mirror_outputs/
+sampled_stats.xlsx
+mirror_fewshot_outputs/
+conll03-100.jsonl
+tmp*/
+resources/

.pre-commit-config.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+repos:
+- repo: https://github.com/pycqa/isort
+  rev: 5.12.0
+  hooks:
+    - id: isort
+      name: isort (python)
+      args: ["--profile", "black", "--filter-files"]
+- repo: https://github.com/psf/black
+  rev: 22.12.0
+  hooks:
+    - id: black
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v4.4.0
+  hooks:
+  - id: trailing-whitespace
+  - id: end-of-file-fixer
+  - id: check-yaml
+  - id: check-added-large-files
+    args: [--maxkb=900]

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2023
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Makefile ADDED Viewed

	@@ -0,0 +1,26 @@

+all: format clean test pre
+	echo 'finished'
+.PHONY: format
+format:
+	isort --profile black --filter-files .
+	black .
+.PHONY: test
+test:
+	coverage run --source src -m pytest -vv .
+	coverage report -m
+	flake8
+.PHONY: pre
+pre:
+	pre-commit run --all-files
+.PHONY: clean
+clean:
+	rm -rf build/
+	rm -rf dist/
+	rm -rf *.egg-info/
+	rm -f .coverage
+	rm -f coverage.xml
+	find . | grep -E '(__pycache__|\.pyc|\.pyo$$)' | xargs rm -rf

README.md CHANGED Viewed

@@ -1,13 +1,143 @@
 ---
 title: Mirror
-emoji: 👀
-colorFrom: green
-colorTo: red
 sdk: gradio
 sdk_version: 4.1.2
-app_file: app.py
-pinned: false
 license: apache-2.0
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
 title: Mirror
+emoji: 🪞
+colorFrom: blue
+colorTo: yellow
 sdk: gradio
 sdk_version: 4.1.2
+app_file: src/app/gradio_app.py
+pinned: true
 license: apache-2.0
 ---
+<div align="center">
+  <h1>🪞 Mirror: A Universal Framework for Various Information Extraction Tasks</h1>
+  <img src="figs/mirror-frontpage.png" width="300" alt="Magic mirror"><br>
+  <i>Image generated by DALLE 3</i><br>
+  <!-- <img src="figs/mirror-framework.png" alt="Mirror Framework"> -->
+  <a href="https://arxiv.org/abs/2311.05419" target="_blank">[Paper]</a> | <a href="https://huggingface.co/spaces/Spico/Mirror" target="_blank">[Demo]</a><br>
+  📃 Our paper has been accepted to EMNLP23 main conference, <a href="http://arxiv.org/abs/2311.05419" target="_blank">check it out</a>!<br>
+</div>
+<hr>
+😎: This is the official implementation of [🪞Mirror](https://arxiv.org/abs/2311.05419) which supports *almost* all the Information Extraction tasks.
+The name, Mirror, comes from the classical story *Snow White and the Seven Dwarfs*, where a magic mirror knows everything in the world.
+We aim to build such a powerful tool for the IE community.
+## 🔥 Supported Tasks
+1. Named Entity Recognition
+2. Entity Relationship Extraction (Triplet Extraction)
+3. Event Extraction
+4. Aspect-based Sentiment Analysis
+5. Multi-span Extraction (e.g. Discontinuous NER)
+6. N-ary Extraction (e.g. Hyper Relation Extraction)
+7. Extractive Machine Reading Comprehension (MRC) and Question Answering
+8. Classification & Multi-choice MRC
+![System Comparison](figs/sys-comparison.png)
+## 🌴 Dependencies
+Python>=3.10
+```bash
+pip install -r requirements.txt
+```
+## 🚀 QuickStart
+### Pretrained Model Weights & Datasets
+Download the pretrained model weights & datasets from [[OSF]](https://osf.io/kwsm4/?view_only=5b66734d88cf456b93f17b6bac8a44fb) .
+No worries, it's an anonymous link just for double blind peer reviewing.
+### Pretraining
+1. Download and unzip the pretraining corpus into `resources/Mirror/v1.4_sampled_v3/merged/all_excluded`
+2. Start to run
+```bash
+CUDA_VISIBLE_DEVICES=0 rex train -m src.task -dc conf/Pretrain_excluded.yaml
+```
+### Fine-tuning
+⚠️ Due to data license constraints, some datasets are unavailable to provide directly (e.g. ACE04, ACE05).
+1. Download and unzip the pretraining corpus into `resources/Mirror/v1.4_sampled_v3/merged/all_excluded`
+2. Download and unzip the fine-tuning datasets into `resources/Mirror/uie/`
+3. Start to fine-tuning
+```bash
+# UIE tasks
+CUDA_VISIBLE_DEVICES=0 bash scripts/single_task_wPTAllExcluded_wInstruction/run1.sh
+CUDA_VISIBLE_DEVICES=1 bash scripts/single_task_wPTAllExcluded_wInstruction/run2.sh
+CUDA_VISIBLE_DEVICES=2 bash scripts/single_task_wPTAllExcluded_wInstruction/run3.sh
+CUDA_VISIBLE_DEVICES=3 bash scripts/single_task_wPTAllExcluded_wInstruction/run4.sh
+# Multi-span and N-ary extraction
+CUDA_VISIBLE_DEVICES=4 bash scripts/single_task_wPTAllExcluded_wInstruction/run_new_tasks.sh
+# GLUE datasets
+CUDA_VISIBLE_DEVICES=5 bash scripts/single_task_wPTAllExcluded_wInstruction/glue.sh
+```
+### Analysis Experiments
+- Few-shot experiments : `scripts/run_fewshot.sh`. Collecting results: `python mirror_fewshot_outputs/get_avg_results.py`
+- Mirror w/ PT w/o Inst. : `scripts/single_task_wPTAllExcluded_woInstruction`
+- Mirror w/o PT w/ Inst. : `scripts/single_task_wo_pretrain`
+- Mirror w/o PT w/o Inst. : `scripts/single_task_wo_pretrain_wo_instruction`
+### Evaluation
+1. Change `task_dir` and `data_pairs` you want to evaluate. The default setting is to get results of Mirror<sub>direct</sub> on all downstream tasks.
+2. `CUDA_VISIBLE_DEVICES=0 python -m src.eval`
+### Demo
+1. Download and unzip the pretrained task dump into `mirror_outputs/Mirror_Pretrain_AllExcluded_2`
+2. Try our demo:
+```bash
+CUDA_VISIBLE_DEVICES=0 python -m src.app.api_backend
+```
+![Demo](figs/mirror-demo.gif)
+## 📋 Citation
+```bibtex
+@misc{zhu_mirror_2023,
+  shorttitle = {Mirror},
+  title = {Mirror: A Universal Framework for Various Information Extraction Tasks},
+  author = {Zhu, Tong and Ren, Junfei and Yu, Zijian and Wu, Mengsong and Zhang, Guoliang and Qu, Xiaoye and Chen, Wenliang and Wang, Zhefeng and Huai, Baoxing and Zhang, Min},
+  url = {http://arxiv.org/abs/2311.05419},
+  doi = {10.48550/arXiv.2311.05419},
+  urldate = {2023-11-10},
+  publisher = {arXiv},
+  month = nov,
+  year = {2023},
+  note = {arXiv:2311.05419 [cs]},
+  keywords = {Computer Science - Artificial Intelligence, Computer Science - Computation and Language},
+}
+```
+## 🛣️ Roadmap
+- [ ] Convert current model into Huggingface version, supporting loading from `transformers` like other newly released LLMs.
+- [ ] Remove `Background` area, merge `TL`, `TP` into a single `T` token
+- [ ] Add more task data: keyword extraction, coreference resolution, FrameNet, WikiNER, T-Rex relation extraction dataset, etc.
+- [ ] Pre-train on all the data (including benchmarks) to build a nice out-of-the-box toolkit for universal IE.
+## 💌 Yours sincerely
+This project is licensed under Apache-2.0.
+We hope you enjoy it ~
+<hr>
+<div align="center">
+  <p>Mirror Team w/ 💖</p>
+</div>

conf/Pretrain_excluded.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+# task
+task_type: SchemaGuidedInstructBertTask
+task_name: Mirror_Pretrain_AllExcluded_2
+comment: '~~content as label, (start, end + 1) span'
+# data preprocessing
+max_seq_len: 512
+debug_mode: false
+label_span: tag  # tag `[LM]` or content `person`
+mode: span  # w2 (1,2,3) or span (1,3)
+stream_mode: false
+# filepaths
+plm_dir: microsoft/deberta-v3-large
+data_dir: resources/Mirror/v1.4_sampled_v3/merged/all_excluded
+output_dir: mirror_outputs
+task_dir: ${output_dir}/${task_name}
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/test.jsonl
+dump_cache_dir: ${task_dir}/cache
+regenerate_cache: false
+# training
+random_seed: 1227
+base_model_path: null
+eval_on_data: [train]
+select_best_on_data: train
+select_best_by_key: loss
+final_eval_on_test: false
+save_every_ckpt: true
+save_best_ckpt: true
+warmup_proportion: 0.1
+num_epochs: 3
+epoch_patience: -1
+num_steps: -1
+step_patience: -1
+step_eval_interval: 10000
+train_batch_size: 8
+eval_batch_size: 8
+grad_accum_steps: 1
+learning_rate: !!float 2e-5
+other_learning_rate: !!float 1e-4
+max_grad_norm: 1.0
+weight_decay: 0.1
+# model
+dropout: 0.3
+use_rope: true
+biaffine_size: 512

conf/Pretrain_v1.5.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+# task
+task_type: SchemaGuidedInstructBertTask
+task_name: Mirror_Pretrain_DataV1.5_2
+comment: '~~content as label, (start, end + 1) span'
+# data preprocessing
+max_seq_len: 512
+debug_mode: false
+label_span: tag  # tag `[LM]` or content `person`
+mode: span  # w2 (1,2,3) or span (1,3)
+stream_mode: false
+# filepaths
+plm_dir: microsoft/deberta-v3-large
+data_dir: resources/Mirror/v1.5/merged/t-rex-200k
+output_dir: mirror_outputs
+task_dir: ${output_dir}/${task_name}
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/test.jsonl
+dump_cache_dir: ${task_dir}/cache
+regenerate_cache: false
+# training
+random_seed: 1227
+base_model_path: null
+eval_on_data: [train]
+select_best_on_data: train
+select_best_by_key: loss
+final_eval_on_test: false
+save_every_ckpt: true
+save_best_ckpt: true
+warmup_proportion: 0.1
+num_epochs: 3
+epoch_patience: -1
+num_steps: -1
+step_patience: -1
+step_eval_interval: 10000
+train_batch_size: 8
+eval_batch_size: 8
+grad_accum_steps: 1
+learning_rate: !!float 2e-5
+other_learning_rate: !!float 1e-4
+max_grad_norm: 1.0
+weight_decay: 0.1
+# model
+dropout: 0.3
+use_rope: true
+biaffine_size: 512

conf/Pretrain_v1.5_woInstruction.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+# task
+task_type: SchemaGuidedInstructBertTask
+task_name: Mirror_Pretrain_DataV1.5_woInstruction
+comment: '~~content as label, (start, end + 1) span'
+# data preprocessing
+max_seq_len: 512
+debug_mode: false
+label_span: tag  # tag `[LM]` or content `person`
+mode: span  # w2 (1,2,3) or span (1,3)
+stream_mode: false
+# filepaths
+plm_dir: microsoft/deberta-v3-large
+data_dir: resources/Mirror/v1.5/merged/t-rex-200k-woInstruction/remove_instruction
+output_dir: mirror_outputs
+task_dir: ${output_dir}/${task_name}
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/test.jsonl
+dump_cache_dir: ${task_dir}/cache
+regenerate_cache: false
+# training
+random_seed: 1227
+base_model_path: null
+eval_on_data: [train]
+select_best_on_data: train
+select_best_by_key: loss
+final_eval_on_test: false
+save_every_ckpt: true
+save_best_ckpt: true
+warmup_proportion: 0.1
+num_epochs: 3
+epoch_patience: -1
+num_steps: -1
+step_patience: -1
+step_eval_interval: 10000
+train_batch_size: 8
+eval_batch_size: 8
+grad_accum_steps: 1
+learning_rate: !!float 2e-5
+other_learning_rate: !!float 1e-4
+max_grad_norm: 1.0
+weight_decay: 0.1
+# model
+dropout: 0.3
+use_rope: true
+biaffine_size: 512

conf/Pretrain_woOverlapV2.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+# task
+task_type: SchemaGuidedInstructBertTask
+task_name: Mirror_Pretrain_woOverlapV2
+comment: '~~content as label, (start, end + 1) span'
+# data preprocessing
+max_seq_len: 512
+debug_mode: false
+label_span: tag  # tag `[LM]` or content `person`
+mode: span  # w2 (1,2,3) or span (1,3)
+stream_mode: false
+# filepaths
+plm_dir: microsoft/deberta-v3-large
+data_dir: resources/Mirror/v1.4_sampled_v3/merged/all
+output_dir: mirror_outputs
+task_dir: ${output_dir}/${task_name}
+train_filepath: ${data_dir}/train_wo_overlap_v2.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/test.jsonl
+dump_cache_dir: ${task_dir}/cache
+regenerate_cache: false
+# training
+random_seed: 1227
+base_model_path: null
+eval_on_data: [train]
+select_best_on_data: train
+select_best_by_key: loss
+final_eval_on_test: false
+save_every_ckpt: true
+save_best_ckpt: true
+warmup_proportion: 0.1
+num_epochs: 3
+epoch_patience: -1
+num_steps: -1
+step_patience: -1
+step_eval_interval: 10000
+train_batch_size: 8
+eval_batch_size: 8
+grad_accum_steps: 1
+learning_rate: !!float 2e-5
+other_learning_rate: !!float 1e-4
+max_grad_norm: 1.0
+weight_decay: 0.1
+# model
+dropout: 0.3
+use_rope: true
+biaffine_size: 512

conf/ac/g1_dpspd.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+compute_environment: LOCAL_MACHINE
+deepspeed_config:
+  gradient_accumulation_steps: 1
+  zero3_init_flag: false
+  zero_stage: 1
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+mixed_precision: 'no'
+num_machines: 1
+num_processes: 1
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

conf/ac/g1_dpspd_fp16.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+compute_environment: LOCAL_MACHINE
+deepspeed_config:
+  gradient_accumulation_steps: 4
+  zero3_init_flag: false
+  zero_stage: 1
+distributed_type: DEEPSPEED
+downcast_bf16: 'no'
+machine_rank: 0
+main_training_function: main
+mixed_precision: fp16
+num_machines: 1
+num_processes: 1
+rdzv_backend: static
+same_network: true
+tpu_env: []
+tpu_use_cluster: false
+tpu_use_sudo: false
+use_cpu: false

conf/cadec.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_DiscontinuousNER_CADEC
+data_dir: resources/Mirror/new_abilities_v2/cadec/new
+best_metric_field: discontinuous_ent.micro.f1

conf/hyperred.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_HyperRel_HyperRED
+data_dir: resources/Mirror/new_abilities_v2/HyperRED/new
+best_metric_field: hyper_rel.micro.f1

conf/merge_all_data.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+task_name: InstructBert_MergedAllData
+data_dir: resources/Mirror/v1.3/merged_pretrained_data
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: resources/Mirror/v1.3/uie_data/dev.jsonl
+test_filepath: resources/Mirror/v1.3/uie_data/test.jsonl
+num_epochs: 1

conf/merge_analysis_data.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+task_name: Mirror_MultiTask_Analysis
+plm_dir: microsoft/deberta-v3-large
+data_dir: resources/Mirror/uie/merged_analysis
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/test.jsonl
+num_epochs: 20
+epoch_patience: 3
+regenerate_cache: true
+eval_on_data: [dev]
+select_best_on_data: dev
+select_best_by_key: metric
+best_metric_field: general_spans.micro.f1
+final_eval_on_test: true
+base_model_path: null

conf/merge_analysis_data_woInstruction.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+task_name: Mirror_MultiTask_Analysis_woInstruction
+plm_dir: microsoft/deberta-v3-large
+data_dir: resources/Mirror/uie/merged_analysis/remove_instruction
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/test.jsonl
+num_epochs: 20
+epoch_patience: 3
+regenerate_cache: true
+eval_on_data: [dev]
+select_best_on_data: dev
+select_best_by_key: metric
+best_metric_field: general_spans.micro.f1
+final_eval_on_test: true
+base_model_path: null

conf/merge_uie_data.yaml ADDED Viewed

	@@ -0,0 +1,18 @@

+task_name: Mirror_woPT_NewMergedUIEData_woOverlap
+plm_dir: microsoft/deberta-v3-large
+data_dir: resources/Mirror/uie/merged
+train_filepath: ${data_dir}/train_wo_overlap.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/test.jsonl
+num_epochs: 20
+epoch_patience: 3
+regenerate_cache: true
+eval_on_data: [dev]
+select_best_on_data: dev
+select_best_by_key: metric
+best_metric_field: general_spans.micro.f1
+final_eval_on_test: true
+base_model_path: null

conf/mirror-ace05en.yaml ADDED Viewed

	@@ -0,0 +1,70 @@

+# task
+task_type: SchemaGuidedInstructBertTask
+task_name: InstructBert_TagSpan_DebertaV3Base_ACE05ENPlus
+comment: '~~content as label, (start, end + 1) span'
+# data preprocessing
+max_seq_len: 512
+debug_mode: false
+label_span: tag  # tag `[LM]` or content `person`
+mode: span  # w2 (1,2,3) or span (1,3)
+# filepaths
+plm_dir: microsoft/deberta-v3-base
+# plm_dir: bert-base-cased
+# data_dir: resources/Mirror/Tasks/EE/ACE05-EN
+# data_dir: resources/Mirror/Tasks/RE/merged-20230502-2340-v1
+# data_dir: resources/Mirror/Tasks/RE/merged-20230502-2358-v2-woADE
+# data_dir: resources/Mirror/Tasks/EE/ACE05-EN-labelmap
+data_dir: resources/Mirror/v1.3/event/en/ACE05-EN-plus/fixed_instructed
+output_dir: outputs
+task_dir: ${output_dir}/${task_name}
+# train_filepath: ${data_dir}/ACE2005_plus_train.jsonl
+# dev_filepath: ${data_dir}/ACE2005_plus_dev.jsonl
+# test_filepath: ${data_dir}/ACE2005_plus_test.jsonl
+# train_filepath: ${data_dir}/ACE2005_oneie_NER_train.jsonl
+# dev_filepath: ${data_dir}/ACE2005_oneie_NER_dev.jsonl
+# test_filepath: ${data_dir}/ACE2005_oneie_NER_test.jsonl
+# train_filepath: ${data_dir}/ACE2005_oneie_RE_train.jsonl
+# dev_filepath: ${data_dir}/ACE2005_oneie_RE_dev.jsonl
+# test_filepath: ${data_dir}/ACE2005_oneie_RE_test.jsonl
+# train_filepath: ${data_dir}/ACE2005_oneie_EE_train.jsonl
+# dev_filepath: ${data_dir}/ACE2005_oneie_EE_dev.jsonl
+# test_filepath: ${data_dir}/ACE2005_oneie_EE_test.jsonl
+# train_filepath: ${data_dir}/ACE2005_oneie_train.jsonl
+# dev_filepath: ${data_dir}/ACE2005_oneie_dev.jsonl
+# test_filepath: ${data_dir}/ACE2005_oneie_test.jsonl
+# train_filepath: ${data_dir}/train.jsonl
+# dev_filepath: ${data_dir}/dev.jsonl
+# test_filepath: ${data_dir}/test.jsonl
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/test.jsonl
+dump_cache_dir: ${task_dir}/cache
+regenerate_cache: false
+# training
+random_seed: 1227
+eval_on_data: [dev, test]
+select_best_on_data: dev
+select_best_by_key: metric
+best_metric_field: general_spans.micro.f1
+final_eval_on_test: true
+save_every_ckpt: false
+save_best_ckpt: true
+warmup_proportion: 0.1
+num_epochs: 50
+epoch_patience: 5
+train_batch_size: 32
+eval_batch_size: 32
+learning_rate: !!float 3e-5
+other_learning_rate: !!float 3e-5
+max_grad_norm: 1.0
+weight_decay: 0.1
+# model
+dropout: 0.3
+use_rope: true
+biaffine_size: 512

conf/mirror-multi-task-pretrain.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+# task
+task_type: SchemaGuidedInstructBertTask
+task_name: MirrorLarge_SamplingPretrain_woLowResource_woOverlap
+comment: '~~content as label, (start, end + 1) span'
+# data preprocessing
+max_seq_len: 512
+debug_mode: false
+label_span: tag  # tag `[LM]` or content `person`
+mode: span  # w2 (1,2,3) or span (1,3)
+stream_mode: false
+# filepaths
+plm_dir: microsoft/deberta-v3-large
+data_dir: resources/Mirror/v1.4_sampled_v3/merged/woLowResource
+output_dir: mirror_outputs
+task_dir: ${output_dir}/${task_name}
+train_filepath: ${data_dir}/train_wo_overlap.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/test.jsonl
+dump_cache_dir: ${task_dir}/cache
+regenerate_cache: false
+# training
+random_seed: 1227
+base_model_path: null
+eval_on_data: [train]
+select_best_on_data: train
+select_best_by_key: loss
+final_eval_on_test: false
+save_every_ckpt: true
+save_best_ckpt: true
+warmup_proportion: 0.1
+num_epochs: 1
+epoch_patience: -1
+num_steps: -1
+step_patience: -1
+step_eval_interval: 3000
+train_batch_size: 8
+eval_batch_size: 8
+grad_accum_steps: 1
+learning_rate: !!float 2e-5
+other_learning_rate: !!float 1e-4
+max_grad_norm: 1.0
+weight_decay: 0.1
+# model
+dropout: 0.3
+use_rope: true
+biaffine_size: 512

conf/mrc.yaml ADDED Viewed

	@@ -0,0 +1,43 @@

+# task
+task_type: MrcQaTask
+task_name: Mirror_RobertaBaseWwm_Cons_MsraMrc
+comment: 'GlobalPointer with RoPE'
+# data preprocessing
+max_seq_len: 512
+debug_mode: false
+mode: cons
+# filepaths
+plm_dir: hfl/chinese-roberta-wwm-ext
+data_dir: resources/NER/msra/mrc
+output_dir: outputs
+task_dir: ${output_dir}/${task_name}
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: ${data_dir}/test.jsonl
+test_filepath: ${data_dir}/test.jsonl
+dump_cache_dir: ${task_dir}/cache
+regenerate_cache: true
+# training
+random_seed: 1227
+eval_on_data: [dev]
+select_best_on_data: dev
+select_best_by_key: metric
+best_metric_field: micro.f1
+final_eval_on_test: true
+warmup_proportion: 0.1
+step_eval_interval: 20000
+step_patience: -1
+num_epochs: 5
+epoch_patience: 5
+train_batch_size: 32
+eval_batch_size: 64
+learning_rate: !!float 5e-5
+other_learning_rate: !!float 1e-4
+max_grad_norm: 1.0
+# model
+dropout: 0.3
+biaffine_size: 512

conf/ner.yaml ADDED Viewed

	@@ -0,0 +1,45 @@

+# task
+task_type: MrcTaggingTask
+task_name: debug-Mirror_W2_MSRAv2_NER_FreezeBertEmbAnd0-3_bs64
+comment: 'bert mrc w/ w2ner for NER'
+# data preprocessing
+max_seq_len: 300
+negative_sample_prob: 1.0
+debug_mode: false
+mode: w2
+# filepaths
+base_model_path: outputs/RobertaBase_data20230314v2/ckpt/MrcGlobalPointerModel.best.pth
+plm_dir: hfl/chinese-roberta-wwm-ext
+data_dir: resources/NER/MSRA_v2/formatted
+output_dir: outputs
+task_dir: ${output_dir}/${task_name}
+train_filepath: ${data_dir}/train.char.bmes.jsonl
+dev_filepath: ${data_dir}/dev.char.bmes.jsonl
+test_filepath: ${data_dir}/test.char.bmes.jsonl
+ent_type2query_filepath: ${data_dir}/query.json
+dump_cache_dir: ${task_dir}/cache
+regenerate_cache: true
+# training
+random_seed: 1227
+eval_on_data: [dev, test]
+select_best_on_data: dev
+select_best_by_key: metric
+best_metric_field: micro.f1
+final_eval_on_test: true
+warmup_proportion: 0.1
+num_epochs: 5
+epoch_patience: 5
+train_batch_size: 64
+eval_batch_size: 128
+learning_rate: !!float 5e-5
+other_learning_rate: !!float 1e-4
+max_grad_norm: 1.0
+weight_decay: 0.1
+# model
+dropout: 0.3
+biaffine_size: 512

conf/nlu/cola.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+task_name: Mirror_SingleTask_Cls_CoLA
+data_dir: resources/Mirror/v1.3/cls/en/CoLA/formated
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/dev.jsonl
+best_metric_field: cls.mcc

conf/nlu/mnli.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+task_name: Mirror_SingleTask_Cls_MNLI
+data_dir: resources/Mirror/v1.3/cls/en/MNLI/formated
+train_filepath: ${data_dir}/MNLI_train.jsonl
+dev_filepath: ${data_dir}/MNLI_dev.jsonl
+test_filepath: ${data_dir}/MNLI_dev.jsonl
+best_metric_field: cls.acc

conf/nlu/mrpc.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_Cls_MRPC
+data_dir: resources/Mirror/v1.3/cls/en/MRPC/formated
+best_metric_field: cls.acc

conf/nlu/plm.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+plm_dir: microsoft/deberta-v3-large
+base_model_path: mirror_outputs/Mirror_Pretrain_AllExcluded_2/ckpt/SchemaGuidedInstructBertModel.best.pth
+stream_mode: false
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/test.jsonl
+num_epochs: 5
+epoch_patience: -1
+num_steps: -1
+step_patience: -1
+step_eval_interval: -1
+eval_on_data: [dev]
+select_best_on_data: dev
+select_best_by_key: metric
+best_metric_field: general_spans.micro.f1
+final_eval_on_test: true

conf/nlu/qnli.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+task_name: Mirror_SingleTask_Cls_QNLI
+data_dir: resources/Mirror/v1.3/cls/en/QNLI/processed
+train_filepath: ${data_dir}/QNLI_train.jsonl
+dev_filepath: ${data_dir}/QNLI_dev.jsonl
+test_filepath: ${data_dir}/QNLI_dev.jsonl
+best_metric_field: cls.acc

conf/nlu/qqp.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+task_name: Mirror_SingleTask_Cls_QQP
+data_dir: resources/Mirror/v1.3/cls/en/QQP/new
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/dev.jsonl
+best_metric_field: cls.acc

conf/nlu/rte.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+task_name: Mirror_SingleTask_Cls_RTE
+data_dir: resources/Mirror/v1.3/cls/en/RTE/formated
+train_filepath: ${data_dir}/RTE_train.jsonl
+dev_filepath: ${data_dir}/RTE_dev.jsonl
+test_filepath: ${data_dir}/RTE_dev.jsonl
+best_metric_field: cls.acc

conf/nlu/squad_v2.yaml ADDED Viewed

	@@ -0,0 +1,4 @@

+task_name: Mirror_SingleTask_MRC_SQuADv2
+data_dir: resources/Mirror/v1.3/span/en/squad_v2
+test_filepath: ${data_dir}/dev.jsonl
+best_metric_field: span.f1.f1

conf/nlu/sst-2.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+task_name: Mirror_SingleTask_Cls_SST2
+data_dir: resources/Mirror/v1.3/cls/en/SST-2/instructed
+train_filepath: ${data_dir}/SST-2_train.jsonl
+dev_filepath: ${data_dir}/SST-2_dev.jsonl
+test_filepath: ${data_dir}/SST-2_dev.jsonl
+best_metric_field: cls.acc

conf/t-rex_pretrain.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+task_name: InstructBert_TagSpan_DebertaV3Base_TRExPretrain
+data_dir: resources/Mirror/v1.3/rel/en/T-REx/instructed
+train_filepath: ${data_dir}/t-rex.udi.fix.jsonl
+num_epochs: 3
+eval_on_data: [train]
+select_best_on_data: train
+select_best_by_key: loss
+final_eval_on_test: false

conf/uie_data/absa_14lap.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_ABSA_14lap
+data_dir: resources/Mirror/uie/absa/14lap
+best_metric_field: rel.rel.micro.f1

conf/uie_data/absa_14res.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_ABSA_14res
+data_dir: resources/Mirror/uie/absa/14res
+best_metric_field: rel.rel.micro.f1

conf/uie_data/absa_15res.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_ABSA_15res
+data_dir: resources/Mirror/uie/absa/15res
+best_metric_field: rel.rel.micro.f1

conf/uie_data/absa_16res.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_ABSA_16res
+data_dir: resources/Mirror/uie/absa/16res
+best_metric_field: rel.rel.micro.f1

conf/uie_data/ent_ace04.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_Ent_ACE04
+data_dir: resources/Mirror/uie/ent/ace04
+best_metric_field: ent.micro.f1

conf/uie_data/ent_ace05.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_Ent_ACE05
+data_dir: resources/Mirror/uie/ent/ace05
+best_metric_field: ent.micro.f1

conf/uie_data/ent_conll03.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_Ent_CoNLL03
+data_dir: resources/Mirror/uie/ent/conll03
+best_metric_field: ent.micro.f1

conf/uie_data/event_ace05.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_Event_ACE05
+data_dir: resources/Mirror/uie/event/ace05-evt
+best_metric_field: event.arg_cls.f1

conf/uie_data/event_casie.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_Event_CASIE
+data_dir: resources/Mirror/uie/event/casie
+best_metric_field: event.arg_cls.f1

conf/uie_data/fewshot.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+num_epochs: 200
+epoch_patience: 10
+output_dir: mirror_fewshot_outputs
+base_model_path: mirror_outputs/Mirror_Pretrain_AllExcluded_2/ckpt/SchemaGuidedInstructBertModel.best.pth
+save_every_ckpt: false

conf/uie_data/merged.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_MultiTask_UIE
+data_dir: resources/Mirror/uie/merged
+best_metric_field: general_spans.micro.f1

conf/uie_data/rel_ace05.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_Rel_ACE05
+data_dir: resources/Mirror/uie/rel/ace05-rel
+best_metric_field: rel.rel.micro.f1

conf/uie_data/rel_conll04.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_Rel_CoNLL04
+data_dir: resources/Mirror/uie/rel/conll04
+best_metric_field: rel.rel.micro.f1

conf/uie_data/rel_nyt.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_Rel_NYT
+data_dir: resources/Mirror/uie/rel/nyt
+best_metric_field: rel.rel.micro.f1

conf/uie_data/rel_scierc.yaml ADDED Viewed

	@@ -0,0 +1,3 @@

+task_name: Mirror_SingleTask_Rel_SciERC
+data_dir: resources/Mirror/uie/rel/scierc
+best_metric_field: rel.rel.micro.f1

conf/uie_data/wPretrain.yaml ADDED Viewed

	@@ -0,0 +1,19 @@

+plm_dir: microsoft/deberta-v3-large
+base_model_path: mirror_outputs/Mirror_Pretrain_AllExcluded_2/ckpt/SchemaGuidedInstructBertModel.best.pth
+stream_mode: false
+train_filepath: ${data_dir}/train.jsonl
+dev_filepath: ${data_dir}/dev.jsonl
+test_filepath: ${data_dir}/test.jsonl
+num_epochs: 20
+epoch_patience: 3
+num_steps: -1
+step_patience: -1
+step_eval_interval: -1
+eval_on_data: [dev]
+select_best_on_data: dev
+select_best_by_key: metric
+best_metric_field: general_spans.micro.f1
+final_eval_on_test: true

eval.py ADDED Viewed

File without changes

index.html ADDED Viewed

	@@ -0,0 +1,288 @@

+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <meta http-equiv="X-UA-Compatible" content="IE=edge">
+  <meta name="viewport" content="width=device-width, initial-scale=1.0">
+  <title>🪞Mirror</title>
+  <link rel="stylesheet" href="https://unpkg.com/boltcss/bolt.min.css">
+  <script type="importmap">
+    {
+      "imports": {
+        "vue": "https://unpkg.com/vue@3/dist/vue.esm-browser.js"
+      }
+    }
+  </script>
+  <style>
+    body {
+      max-width: 800px;
+      margin: 40px auto;
+      padding: 0 20px;
+    }
+    .form-group {
+      display: flex;
+      flex-direction: row;
+      justify-content: flex-start;
+      align-items: center;
+    }
+    label {
+      margin-right: 1rem;
+    }
+    button {
+      margin: 0.2rem 0.2rem;
+    }
+    button:hover {
+      background-color: #dbdbdb;
+    }
+    footer {
+      text-align: center;
+      margin-top: 2rem;
+    }
+    input {
+      width: 100%;
+    }
+    .button-group {
+      margin-top: 1rem;
+      margin-bottom: 1rem;
+    }
+    .submit-button {
+      background-color: #ffc83d;
+      color: #d67d00;
+      font-weight: bold;
+    }
+    .lc-button {
+      background-color: #c4e5be;
+    }
+    .lm-button {
+      background-color: #dae7fb;
+    }
+    .lr-button {
+      background-color: #fff3ce;
+    }
+    .submit-button:hover {
+      background-color: #ffc83dc0;
+    }
+    .download-button {
+      background-color: #98ca56;
+      color: white;
+      font-weight: bold;
+    }
+    .download-button:hover {
+      background-color: #98ca56d1;
+    }
+    .output-title {
+      margin-top: 2rem;
+      margin-bottom: 0;
+      display: block;
+      background-color: #98ca56;
+      color: white;
+      font-weight: bold;
+      font-size: large;
+      padding: 6px 15px;
+      border-top-left-radius: 6px;
+      border-top-right-radius: 6px;
+    }
+    .output-box {
+      margin-top: 0;
+      padding: 6px 15px;
+      background-color: white;
+      border: 2px solid #98ca56;
+      border-bottom-left-radius: 6px;
+      border-bottom-right-radius: 6px;
+    }
+  </style>
+</head>
+<body>
+  <header>
+    <h1>🪞Mirror</h1>
+    <p>
+      🪞Mirror can help you deal with a wide range of Natural Language Understanding and Information Extraction tasks.
+    </p>
+  </header>
+  <main>
+    <div id="app">
+      <div>
+        <label for="instruction"><strong>Instruction</strong></label>
+        <input id="instruction" type="text" v-model="instruction" placeholder="Mirror mirror tell me ..." size="200">
+      </div>
+      <div>
+        <label for="schema"><strong>Schema Labels</strong></label>
+        <p>Split with <code>#</code> for multiple inputs</p>
+        <p>For entities, relations or classification, input <code>{"ent|rel|cls": ["cls1", "type2"]}</code> .</p>
+        <p>For events and hyper relations, input <code>{"type": ["role1", "role2"]}</code> .</p>
+        <input id="schema" type="text" v-model="schema" size="200">
+        <!-- <div>
+          <button @click.prevent="addCls">Class</button>
+          <button @click.prevent="addEnt">Entity</button>
+          <button @click.prevent="addDisconEnt">Discontinuous Entity</button>
+          <button @click.prevent="addRel">Relation</button>
+          <button @click.prevent="addEvent">Event Type</button>
+          <button @click.prevent="addHyperRel">Hyper Relation</button>
+        </div> -->
+      </div>
+      <div>
+        <label for="text"><strong>Text</strong></label>
+        <input id="text" type="text" v-model="text" size="200">
+      </div>
+      <!-- <div>
+        <label for="background"><strong>Background</strong></label>
+        <input id="background" type="text" v-model="background" size="200">
+      </div> -->
+      <div class="button-group">
+        <button @click.prevent="reset">Reset</button>
+        <button @click.prevent="clearOutput">Clear Output</button>
+        <button class="submit-button" @click.prevent="getResults">Ask Mirror</button>
+      </div>
+      <div v-if="timerHandler">
+        <p>⏱️ {{ searchSecondsString }}</p>
+      </div>
+      <div>
+        <div v-if="isNotEmptyObj(results)">
+          <label for="output"><strong>Output</strong></label>
+          <table>
+            <thead>
+              <th>Item</th>
+              <th>Predicted</th>
+            </thead>
+            <tbody>
+              <tr v-for="(value, key, index) in results" :key="index">
+                <template v-if="value.length">
+                  <td>{{ key }}</td>
+                  <td>{{ value }}</td>
+                </template>
+              </tr>
+            </tbody>
+          </table>
+        </div>
+      </div>
+    </div>
+  </main>
+  <footer>
+    <hr>
+    Made by Mirror Team w/ 💖
+  </footer>
+  <script type="module">
+    import { createApp, ref, computed, toRaw, watch } from 'vue'
+    createApp(
+      {
+        setup() {
+          const instruction = ref("")
+          const text = ref("")
+          const background = ref("")
+          const schema = ref("{}")
+          const results = ref({})
+          const timerHandler = ref(0)
+          const searchSeconds = ref(0.0)
+          const searchSecondsString = computed(() => {
+            return `${searchSeconds.value.toFixed(1)}s`
+          })
+          function isNotEmptyObj(obj) {
+            return Object.keys(obj).length > 0
+          }
+          function clearOutput() {
+            timerHandler.value = 0
+            results.value = {}
+          }
+          function reset() {
+            schema.value = "{}"
+            clearOutput()
+          }
+          function startTimer() {
+            searchSeconds.value = 0.0
+            timerHandler.value = setInterval(() => {
+              searchSeconds.value += 0.1
+            }, 100)
+          }
+          function endTimer() {
+            if (timerHandler.value > 0) {
+              clearInterval(timerHandler.value)
+            }
+          }
+          function getResults() {
+            clearOutput()
+            startTimer()
+            const data = {
+              "id": Date.now().toString(),
+              "instruction": instruction.value,
+              "schema": JSON.parse(schema.value),
+              "text": text.value,
+              "background": background.value,
+              "ans": {},
+            }
+            const postData = JSON.stringify({
+              "data": [data],
+            })
+            fetch(
+              "/process",
+              {
+                method: "POST",
+                headers: {
+                  'Content-Type': 'application/json',
+                },
+                body: postData,
+              }
+            )
+              .then((response) => response.json())
+              .then((json) => {
+                if (json["ok"] === false) {
+                  alert(json["msg"])
+                } else {
+                  results.value = json["results"][0]["results"]
+                }
+              })
+              .catch((err) => { alert(err) })
+              .finally(() => endTimer())
+          }
+          return {
+            instruction,
+            text,
+            background,
+            schema,
+            results,
+            reset,
+            clearOutput,
+            getResults,
+            searchSecondsString,
+            timerHandler,
+            isNotEmptyObj,
+          }
+        }
+      }
+    ).mount("#app")
+  </script>
+</body>
+</html>