Spaces:

AmitGarage
/

Pytorch_clinical_NER

Runtime error

App Files Files Community

AmitGarage commited on Nov 28, 2022

Commit

7cb24dc

•

1 Parent(s): 1ec71e5

Upload 2 files

Browse files

Files changed (2) hide show

project.yml +173 -0
requirements.txt +9 -0

project.yml ADDED Viewed

	@@ -0,0 +1,173 @@

+title: "Detecting entities in Medical Records with PyTorch"
+description: "This project uses the [i2b2 (n2c2) 2011 Challenge Dataset](https://portal.dbmi.hms.harvard.edu/projects/n2c2-nlp/) to bootstrap a PyTorch NER model to detect entities in Medical Records. It also demonstrates how to anonymize medical records for annotators in [Prodigy](https://prodi.gy)."
+# Variables can be referenced across the project.yml using ${vars.var_name}
+vars:
+  beth_train_tarfile: i2b2_Beth_Train_Release.tar.gz
+  partners_train_tarfile: i2b2_Partners_Train_Release.tar.gz
+  test_zipfile: Task_1C.zip
+  spacy_config: "spacy_config.cfg"
+  config: "config.cfg"
+  config_trf: "config_trf.cfg"
+  config_trf_resume: "config_trf_resume.cfg"
+  config_trf_test: "config_trf_test.cfg"
+  name: "ner_pytorch_medical"
+  version: "0.0.0"
+  train: "train"
+  dev: "dev"
+  test: "test"
+  prodigy:
+    dataset: "pytorch_ner_medical_correct_anonymous"
+    source: "assets/mock_notes.jsonl"
+    labels: "person,problem,pronoun,test,treatment"
+  azure:
+    text_analytics_key: "YOUR_API_KEY"
+    text_analytics_base_url: "https://westus2.api.cognitive.microsoft.com/"
+# These are the directories that the project needs. The project CLI will make
+# sure that they always exist.
+directories: ["assets", "training", "configs", "scripts", "corpus", "packages"]
+# Assets that should be downloaded or available in the directory. You can replace
+# this with your own input data.
+assets:
+    - dest: "assets/n2c2_2011/${vars.beth_train_tarfile}"
+      description: "Tarfile containing original challenge data from the Beth training data split"
+    - dest: "assets/n2c2_2011/${vars.partners_train_tarfile}"
+      description: "Tarfile containing original challenge data from the Partners training data split"
+    - dest: "assets/n2c2_2011/${vars.test_zipfile}"
+      description: "Zipfile containing original challenge test data"
+    - dest: "assets/mock_notes.jsonl"
+      description: "JSONL file with raw mock notes to annotate in prodigy"
+# Workflows are sequences of commands (see below) executed in order. You can
+# run them via "spacy project run [workflow]". If a commands's inputs/outputs
+# haven't changed, it won't be re-run.
+workflows:
+  all:
+    - preprocess
+    - train
+    - evaluate
+# Project commands, specified in a style similar to CI config files (e.g. Azure
+# pipelines). The name is the command name that lets you trigger the command
+# via "spacy project run [command] [path]". The help message is optional and
+# shown when executing "spacy project run [optional command] [path] --help".
+commands:
+  - name: "preprocess"
+    help: "Convert the data to spaCy's binary format"
+    script:
+      - "python scripts/preprocess.py assets/n2c2_2011 corpus"
+    deps:
+      - "assets/n2c2_2011/${vars.beth_train_tarfile}"
+      - "assets/n2c2_2011/${vars.partners_train_tarfile}"
+      - "assets/n2c2_2011/${vars.test_zipfile}"
+      - "scripts/preprocess.py"
+    outputs:
+      - "corpus/${vars.train}.spacy"
+      - "corpus/${vars.dev}.spacy"
+      - "corpus/${vars.test}.spacy"
+  - name: "train"
+    help: "Train a custom PyTorch named entity recognition model"
+    script:
+      - "python -m spacy train configs/${vars.config} --output training/ --paths.train corpus/${vars.train}.spacy --paths.dev corpus/${vars.dev}.spacy --code scripts/custom_functions.py --gpu-id 0"
+    deps:
+      - "corpus/${vars.train}.spacy"
+      - "corpus/${vars.dev}.spacy"
+    outputs:
+      - "training/model-best"
+  - name: "train-trf"
+    help: "Train a custom PyTorch named entity recognition model with transformer"
+    script:
+      - "python -m spacy train configs/${vars.config_trf} --output training_trf/ --paths.train corpus/${vars.train}.spacy --paths.dev corpus/${vars.dev}.spacy --code scripts/custom_functions.py --gpu-id 0"
+    deps:
+      - "corpus/${vars.train}.spacy"
+      - "corpus/${vars.dev}.spacy"
+    outputs:
+      - "training_trf/model-best"
+  - name: "train-trf-test"
+    help: "Train a custom PyTorch named entity recognition model with transformer"
+    script:
+      - "python -m spacy train configs/${vars.config_trf_test} --output training_trf_test/ --paths.train corpus/${vars.train}.spacy --paths.dev corpus/${vars.dev}.spacy --code scripts/custom_functions.py --gpu-id 0"
+    deps:
+      - "corpus/${vars.train}.spacy"
+      - "corpus/${vars.dev}.spacy"
+    outputs:
+      - "training_trf/model-best"
+  - name: "train-trf-resume"
+    help: "Train a custom PyTorch named entity recognition model with transformer"
+    script:
+      - "python -m spacy train configs/${vars.config_trf_resume} --output training_trf/ --paths.train corpus/${vars.train}.spacy --paths.dev corpus/${vars.dev}.spacy --code scripts/custom_functions.py --gpu-id 0"
+    deps:
+      - "corpus/${vars.train}.spacy"
+      - "corpus/${vars.dev}.spacy"
+    outputs:
+      - "training_trf/model-best"
+  - name: "evaluate"
+    help: "Evaluate the custom PyTorch model and export metrics"
+    script:
+      - "python -m spacy evaluate training/model-best corpus/${vars.test}.spacy --output training/metrics.json --code scripts/custom_functions.py"
+    deps:
+      - "corpus/${vars.test}.spacy"
+      - "training/model-best"
+    outputs:
+      - "training/metrics.json"
+  - name: "evaluate-trf"
+    help: "Evaluate the custom PyTorch model and export metrics"
+    script:
+      - "python -m spacy evaluate training_trf/model-best corpus/${vars.test}.spacy --output training_trf/metrics.json --code scripts/custom_functions.py --gpu-id 0"
+    deps:
+      - "corpus/${vars.test}.spacy"
+      - "training_trf/model-best"
+    outputs:
+      - "training_trf/metrics.json"
+  - name: "evaluate-trf-test"
+    help: "Evaluate the custom PyTorch model and export metrics"
+    script:
+      - "python -m spacy evaluate training_trf_test/model-best corpus/${vars.test}.spacy --output training_trf_test/metrics.json --code scripts/custom_functions.py --gpu-id 0"
+    deps:
+      - "corpus/${vars.test}.spacy"
+      - "training_trf_test/model-best"
+    outputs:
+      - "training_trf_test/metrics.json"
+  - name: package
+    help: "Package the trained model so it can be installed"
+    script:
+      - "python -m spacy package training/model-best packages --name ${vars.name} --version ${vars.version} --force --code scripts/custom_functions.py"
+    deps:
+      - "training/model-best"
+    outputs_no_cache:
+      - "packages/en_${vars.name}-${vars.version}/dist/en_${vars.name}-${vars.version}.tar.gz"
+  - name: package-trf
+    help: "Package the trained model so it can be installed"
+    script:
+      - "python -m spacy package training_trf/model-best packages --name ${vars.name} --version ${vars.version} --force --code scripts/torch_ner_model.py,scripts/torch_ner_pipe.py"
+    deps:
+      - "training_trf/model-best"
+    outputs_no_cache:
+      - "packages/en_${vars.name}-${vars.version}/dist/en_${vars.name}-${vars.version}.tar.gz"
+  - name: visualize-model
+    help: Visualize the model's output interactively using Streamlit
+    script:
+      - "streamlit run scripts/visualize_model.py training_trf/model-best \"The patient had surgery.\""
+    deps:
+      - "scripts/visualize_model.py"
+      - "training/model-best"
+  - name: annotate
+    help: Run the custom prodigy recipe to anonymize data for the annotator and update the PyTorch NER model
+    script:
+      - "prodigy ner.correct.anonymous ${vars.prodigy.dataset} training/model-best ${vars.prodigy.source} --text-analytics-key ${vars.azure.text_analytics_key} --text-analytics-base-url ${vars.azure.text_analytics_base_url} --label {vars.prodigy.labels} --update -F scripts/prodigy/recipes.py"
+    deps:
+      - "scripts/prodigy/recipes.py"
+      - "training/model-best"

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+spacy-streamlit>=1.0.0a0
+spacy-transformers
+torch==1.8.1
+streamlit
+presidio-analyzer==2.2.1
+presidio-anonymizer==2.2.1
+spacy==3.0.5
+spacy[transformers]
+cupy-cuda11x