Spaces:

AmitGarage
/

Pytorch_clinical_NER

Runtime error

App Files Files Community

Pytorch_clinical_NER / project.yml

AmitGarage

Upload 2 files

7cb24dc almost 2 years ago

raw

history blame

7.71 kB

	title: "Detecting entities in Medical Records with PyTorch"
	description: "This project uses the [i2b2 (n2c2) 2011 Challenge Dataset](https://portal.dbmi.hms.harvard.edu/projects/n2c2-nlp/) to bootstrap a PyTorch NER model to detect entities in Medical Records. It also demonstrates how to anonymize medical records for annotators in [Prodigy](https://prodi.gy)."
	# Variables can be referenced across the project.yml using ${vars.var_name}
	vars:
	beth_train_tarfile: i2b2_Beth_Train_Release.tar.gz
	partners_train_tarfile: i2b2_Partners_Train_Release.tar.gz
	test_zipfile: Task_1C.zip
	spacy_config: "spacy_config.cfg"
	config: "config.cfg"
	config_trf: "config_trf.cfg"
	config_trf_resume: "config_trf_resume.cfg"
	config_trf_test: "config_trf_test.cfg"
	name: "ner_pytorch_medical"
	version: "0.0.0"
	train: "train"
	dev: "dev"
	test: "test"
	prodigy:
	dataset: "pytorch_ner_medical_correct_anonymous"
	source: "assets/mock_notes.jsonl"
	labels: "person,problem,pronoun,test,treatment"
	azure:
	text_analytics_key: "YOUR_API_KEY"
	text_analytics_base_url: "https://westus2.api.cognitive.microsoft.com/"


	# These are the directories that the project needs. The project CLI will make
	# sure that they always exist.
	directories: ["assets", "training", "configs", "scripts", "corpus", "packages"]

	# Assets that should be downloaded or available in the directory. You can replace
	# this with your own input data.
	assets:
	- dest: "assets/n2c2_2011/${vars.beth_train_tarfile}"
	description: "Tarfile containing original challenge data from the Beth training data split"
	- dest: "assets/n2c2_2011/${vars.partners_train_tarfile}"
	description: "Tarfile containing original challenge data from the Partners training data split"
	- dest: "assets/n2c2_2011/${vars.test_zipfile}"
	description: "Zipfile containing original challenge test data"
	- dest: "assets/mock_notes.jsonl"
	description: "JSONL file with raw mock notes to annotate in prodigy"

	# Workflows are sequences of commands (see below) executed in order. You can
	# run them via "spacy project run [workflow]". If a commands's inputs/outputs
	# haven't changed, it won't be re-run.
	workflows:
	all:
	- preprocess
	- train
	- evaluate

	# Project commands, specified in a style similar to CI config files (e.g. Azure
	# pipelines). The name is the command name that lets you trigger the command
	# via "spacy project run [command] [path]". The help message is optional and
	# shown when executing "spacy project run [optional command] [path] --help".
	commands:
	- name: "preprocess"
	help: "Convert the data to spaCy's binary format"
	script:
	- "python scripts/preprocess.py assets/n2c2_2011 corpus"
	deps:
	- "assets/n2c2_2011/${vars.beth_train_tarfile}"
	- "assets/n2c2_2011/${vars.partners_train_tarfile}"
	- "assets/n2c2_2011/${vars.test_zipfile}"
	- "scripts/preprocess.py"
	outputs:
	- "corpus/${vars.train}.spacy"
	- "corpus/${vars.dev}.spacy"
	- "corpus/${vars.test}.spacy"

	- name: "train"
	help: "Train a custom PyTorch named entity recognition model"
	script:
	- "python -m spacy train configs/${vars.config} --output training/ --paths.train corpus/${vars.train}.spacy --paths.dev corpus/${vars.dev}.spacy --code scripts/custom_functions.py --gpu-id 0"
	deps:
	- "corpus/${vars.train}.spacy"
	- "corpus/${vars.dev}.spacy"
	outputs:
	- "training/model-best"

	- name: "train-trf"
	help: "Train a custom PyTorch named entity recognition model with transformer"
	script:
	- "python -m spacy train configs/${vars.config_trf} --output training_trf/ --paths.train corpus/${vars.train}.spacy --paths.dev corpus/${vars.dev}.spacy --code scripts/custom_functions.py --gpu-id 0"
	deps:
	- "corpus/${vars.train}.spacy"
	- "corpus/${vars.dev}.spacy"
	outputs:
	- "training_trf/model-best"

	- name: "train-trf-test"
	help: "Train a custom PyTorch named entity recognition model with transformer"
	script:
	- "python -m spacy train configs/${vars.config_trf_test} --output training_trf_test/ --paths.train corpus/${vars.train}.spacy --paths.dev corpus/${vars.dev}.spacy --code scripts/custom_functions.py --gpu-id 0"
	deps:
	- "corpus/${vars.train}.spacy"
	- "corpus/${vars.dev}.spacy"
	outputs:
	- "training_trf/model-best"

	- name: "train-trf-resume"
	help: "Train a custom PyTorch named entity recognition model with transformer"
	script:
	- "python -m spacy train configs/${vars.config_trf_resume} --output training_trf/ --paths.train corpus/${vars.train}.spacy --paths.dev corpus/${vars.dev}.spacy --code scripts/custom_functions.py --gpu-id 0"
	deps:
	- "corpus/${vars.train}.spacy"
	- "corpus/${vars.dev}.spacy"
	outputs:
	- "training_trf/model-best"

	- name: "evaluate"
	help: "Evaluate the custom PyTorch model and export metrics"
	script:
	- "python -m spacy evaluate training/model-best corpus/${vars.test}.spacy --output training/metrics.json --code scripts/custom_functions.py"
	deps:
	- "corpus/${vars.test}.spacy"
	- "training/model-best"
	outputs:
	- "training/metrics.json"

	- name: "evaluate-trf"
	help: "Evaluate the custom PyTorch model and export metrics"
	script:
	- "python -m spacy evaluate training_trf/model-best corpus/${vars.test}.spacy --output training_trf/metrics.json --code scripts/custom_functions.py --gpu-id 0"
	deps:
	- "corpus/${vars.test}.spacy"
	- "training_trf/model-best"
	outputs:
	- "training_trf/metrics.json"

	- name: "evaluate-trf-test"
	help: "Evaluate the custom PyTorch model and export metrics"
	script:
	- "python -m spacy evaluate training_trf_test/model-best corpus/${vars.test}.spacy --output training_trf_test/metrics.json --code scripts/custom_functions.py --gpu-id 0"
	deps:
	- "corpus/${vars.test}.spacy"
	- "training_trf_test/model-best"
	outputs:
	- "training_trf_test/metrics.json"

	- name: package
	help: "Package the trained model so it can be installed"
	script:
	- "python -m spacy package training/model-best packages --name ${vars.name} --version ${vars.version} --force --code scripts/custom_functions.py"
	deps:
	- "training/model-best"
	outputs_no_cache:
	- "packages/en_${vars.name}-${vars.version}/dist/en_${vars.name}-${vars.version}.tar.gz"

	- name: package-trf
	help: "Package the trained model so it can be installed"
	script:
	- "python -m spacy package training_trf/model-best packages --name ${vars.name} --version ${vars.version} --force --code scripts/torch_ner_model.py,scripts/torch_ner_pipe.py"
	deps:
	- "training_trf/model-best"
	outputs_no_cache:
	- "packages/en_${vars.name}-${vars.version}/dist/en_${vars.name}-${vars.version}.tar.gz"

	- name: visualize-model
	help: Visualize the model's output interactively using Streamlit
	script:
	- "streamlit run scripts/visualize_model.py training_trf/model-best \"The patient had surgery.\""
	deps:
	- "scripts/visualize_model.py"
	- "training/model-best"

	- name: annotate
	help: Run the custom prodigy recipe to anonymize data for the annotator and update the PyTorch NER model
	script:
	- "prodigy ner.correct.anonymous ${vars.prodigy.dataset} training/model-best ${vars.prodigy.source} --text-analytics-key ${vars.azure.text_analytics_key} --text-analytics-base-url ${vars.azure.text_analytics_base_url} --label {vars.prodigy.labels} --update -F scripts/prodigy/recipes.py"
	deps:
	- "scripts/prodigy/recipes.py"
	- "training/model-best"