Spaces:

AmitGarage
/

Pytorch_clinical_NER

Runtime error

App Files Files Community

AmitGarage commited on Nov 28, 2022

Commit

3ab8bd6

•

1 Parent(s): 4439cab

Upload 8 files

Browse files

Files changed (8) hide show

scripts/__init__.py +1 -0
scripts/custom_functions.py +3 -0
scripts/preprocess.py +214 -0
scripts/torch_ner_model.py +203 -0
scripts/torch_ner_model_test.py +203 -0
scripts/torch_ner_pipe.py +294 -0
scripts/torch_ner_pipe_test.py +294 -0
scripts/visualize_model.py +19 -0

scripts/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+

scripts/custom_functions.py ADDED Viewed

	@@ -0,0 +1,3 @@

+from scripts.azure.azure_ner_pipe import make_azure_entity_recognizer
+from scripts.torch_ner_model import build_torch_ner_model
+from scripts.torch_ner_pipe import make_torch_entity_recognizer

scripts/preprocess.py ADDED Viewed

	@@ -0,0 +1,214 @@

+from collections import defaultdict
+import random
+from typing import List
+import tarfile
+import shutil
+import typer
+from pathlib import Path
+import spacy
+from spacy.language import Language
+from spacy.tokens import Doc, DocBin, Span
+from spacy.util import filter_spans
+from wasabi import msg
+from spacy.tokenizer import Tokenizer
+from spacy.util import compile_prefix_regex, compile_infix_regex, compile_suffix_regex
+import functools
+random.seed(42)
+def main(
+    input_dir: Path = typer.Argument(..., exists=True),
+    output_dir: Path = typer.Argument(...),
+    beth_train_tar_name: str = "i2b2_Beth_Train_Release.tar.gz",
+    partners_train_tar_name: str = "i2b2_Partners_Train_Release.tar.gz",
+    test_zip_name: str = "Task_1C.zip",
+    merge_docs: bool = True,
+):
+    """Extract and preprocess raw n2c2 2011 Challenge data into spaCy DocBin format.
+    input_dir (Path): Input directory with raw downloads from Harvard DBMI Portal.
+    output_dir (Path): Output directory to save spaCy .docbin files to.
+    beth_train_tar_name (str): Filename of downloaded tarfile for Beth Training Data.
+    partners_train_tar_name (str): Filename of downloaded tarfile for Partners Training Data.
+    test_zip_name (str): Filename of downloaded tarfile for n2c2 Test Data.
+    merge_docs (bool): If False, create spaCy docs for each line of each medical record
+    """
+    # Unpack compressed data files
+    msg.info("Extracting raw data.")
+    beth_train_tar_path = input_dir / beth_train_tar_name
+    partners_train_tar_path = input_dir / partners_train_tar_name
+    test_zip_path = input_dir / test_zip_name
+    #for path in [beth_train_tar_path, partners_train_tar_path]:
+    #    if path.name.endswith("tar.gz"):
+    #        msg.text(f"Extracting {path}")
+    #        tar = tarfile.open(path, "r:gz")
+    #        tar.extractall(path.parent)
+    #        tar.close()
+    #shutil.unpack_archive(test_zip_path, input_dir / test_zip_name.replace(".zip", ""))
+    # preprocess data
+    msg.info("Converting to spaCy Doc objects.")
+    with open(r'/notebooks/Clinical_NER/difference.txt', 'a') as fp:
+            fp.write(str((input_dir / "Beth_Train").stem)+'\n')
+    beth_train_docs = docs_from_many_clinical_records(
+        input_dir / "Beth_Train", merge_docs=merge_docs
+    )
+    with open(r'/notebooks/Clinical_NER/difference.txt', 'a') as fp:
+            fp.write(str((input_dir / "Partners_Train").stem)+'\n')
+    partners_train_docs = docs_from_many_clinical_records(
+        input_dir / "Partners_Train", merge_docs=merge_docs
+    )
+    train_docs = beth_train_docs + partners_train_docs
+    with open(r'/notebooks/Clinical_NER/difference.txt', 'a') as fp:
+            fp.write(str((input_dir / "Task_1C/i2b2_Test/i2b2_Beth_Test").stem)+'\n')
+    beth_test_docs = docs_from_many_clinical_records(
+        input_dir / "Task_1C/i2b2_Test/i2b2_Beth_Test", merge_docs=merge_docs
+    )
+    with open(r'/notebooks/Clinical_NER/difference.txt', 'a') as fp:
+            fp.write(str((input_dir / "Task_1C/i2b2_Test/i2b2_Partners_Test").stem)+'\n')
+    partners_test_docs = docs_from_many_clinical_records(
+        input_dir / "Task_1C/i2b2_Test/i2b2_Partners_Test", merge_docs=merge_docs
+    )
+    test_docs = beth_test_docs + partners_test_docs
+    random.shuffle(train_docs)
+    split_idx = int(len(train_docs) * 0.8)
+    train_docs, dev_docs = train_docs[:split_idx], train_docs[split_idx:]
+    msg.good(f"Num Train Docs: {len(train_docs)}")
+    msg.good(f"Num Dev Docs: {len(dev_docs)}")
+    msg.good(f"Num Test Docs: {len(test_docs)}")
+    with msg.loading(f"Saving docs to: {output_dir}..."):
+        DocBin(docs=train_docs).to_disk(output_dir / "train.spacy")
+        DocBin(docs=dev_docs).to_disk(output_dir / "dev.spacy")
+        DocBin(docs=test_docs).to_disk(output_dir / "test.spacy")
+        msg.good("Done.")
+def docs_from_clinical_record(
+    lines: List[str], annotations: List[str], nlp: Language, merge_docs: bool = False
+) -> List[Doc]:
+    """Create spaCy docs from a single annotated medical record in the n2c2 2011 format
+    lines (List[str]): Text of the clinical record as a list separated by newlines
+    annotations (List[str]): Raw entity annotations in the n2c2 2011 format
+    nlp (Language): spaCy Language object. Defaults to spacy.blank("en").
+    merge_docs (bool): If True: merge all lines into a single spaCy doc so
+        there is only 1 element in the output array.
+        If False: create a spaCy doc for each line in the original record
+    RETURNS (List[Doc]): List of spaCy Doc objects with entity spans set
+    """
+    difference = []
+    docs = []
+    spans_by_line = defaultdict(list)
+    nlp.Defaults.prefixes = [signs for signs in nlp.Defaults.prefixes if ':' not in signs and '#' not in signs and '+' not in signs and '(' not in signs and ')' not in signs and '*' not in signs and "'" not in signs and "%" not in signs and "_" not in signs and ";" not in signs and ">" not in signs and "," not in signs and "&" not in signs and '"' not in signs and "<" not in signs ]
+    infixes = nlp.Defaults.prefixes + [r"[-]~"]
+    infix_re = spacy.util.compile_infix_regex(infixes)
+    def custom_tokenizer(nlp):
+        return Tokenizer(nlp.vocab, infix_finditer=infix_re.finditer)
+    nlp.tokenizer = custom_tokenizer(nlp)
+    entities = {}
+    for row in annotations:
+        row = row.split("||")
+        text_info = row[0]
+        type_info = row[1]
+        offset_start = text_info.split(" ")[-2]
+        offset_end = text_info.split(" ")[-1]
+        start_line, word_start = offset_start.split(":")
+        end_line, word_end = offset_end.split(":")
+        label = type_info.split('"')[-2]
+        if start_line != end_line:
+            # This happens very infrequently (only about 10 times in total)
+            # so we just skip these annotations
+            continue
+        else:
+            spans_by_line[int(start_line)].append(
+                (int(word_start), int(word_end), label)
+            )
+            if start_line in entities :
+                entities[start_line].append(text_info.split('"')[1])
+            else :
+                entities[start_line] = [text_info.split('"')[1]]
+    extracted_entities = {}
+    for i, line in enumerate(lines):
+        n = i + 1
+        line = line.replace("  "," ")
+        doc = nlp.make_doc(line)
+        if n in spans_by_line:
+            ents = [
+                Span(doc, start, end + 1, label=label)
+                for (start, end, label) in spans_by_line[n]
+            ]
+            ents = [
+                e for e in ents if bool(e.text.strip()) and e.text.strip() == e.text
+            ]
+            doc.ents = filter_spans(ents)
+            extracted_entities[str(n)] =  [ e.text for e in ents if bool(e.text.strip()) and e.text.strip() == e.text ]
+        docs.append(doc)
+    for key , value in entities.items() :
+        if key in extracted_entities :
+            if functools.reduce(lambda x, y : x and y, map(lambda p, q: p.lower() != q.lower(),entities[key],extracted_entities[key]), True):
+                difference = difference+[key]+entities[key]+extracted_entities[key]
+        else :
+            difference = difference+[key+" Key not present"]+entities[key]
+    with open(r'/notebooks/Clinical_NER/difference.txt', 'a') as fp:
+        fp.write('\n'.join(difference))
+    return [Doc.from_docs(docs)] if merge_docs else docs
+def docs_from_many_clinical_records(
+    base_path: Path, nlp: Language = spacy.blank("en"), merge_docs: bool = True
+) -> List[Doc]:
+    """Convert raw n2c2 annotated clinical records into a list of
+        spaCy Doc objects to be ready to be used in training
+    base_path (Path): Root path to the raw data
+    nlp (Language): spaCy Language object. Defaults to spacy.blank("en").
+    merge_docs (bool): If True: merge all lines into a single spaCy doc so
+        there is only 1 element in the output array.
+        If False: create a spaCy doc for each line in the original record
+    RETURNS (List[Doc]): List of spaCy Doc objects with entity spans set
+    """
+    all_docs = []
+    concept_paths = sorted((base_path / "concepts").glob("*.txt.con"))
+    document_paths = sorted((base_path / "docs").glob("*.txt"))
+    for con_path, doc_path in zip(concept_paths, document_paths):
+        with open(r'/notebooks/Clinical_NER/difference.txt', 'a') as fp:
+            fp.write('\n'+str(con_path.stem))
+        annotations = con_path.open().read().splitlines()
+        lines = doc_path.open().read().splitlines()
+        docs = docs_from_clinical_record(lines, annotations, nlp, merge_docs=merge_docs)
+        all_docs += docs
+    return all_docs
+if __name__ == "__main__":
+    typer.run(main)

scripts/torch_ner_model.py ADDED Viewed

	@@ -0,0 +1,203 @@

+from collections import OrderedDict
+from typing import Optional, List
+from thinc.api import (
+    with_array,
+    chain,
+    Model,
+    PyTorchWrapper,
+    PyTorchLSTM,
+)
+from thinc.types import Floats2d
+from spacy.tokens import Doc
+from spacy.util import registry
+import torch
+from torch import nn
+@registry.architectures("TorchEntityRecognizer.v1")
+def build_torch_ner_model(
+    tok2vec: Model[List[Doc], List[Floats2d]],
+    hidden_width: int,
+    dropout: Optional[float] = None,
+    nO: Optional[int] = None,
+) -> Model[List[Doc], List[Floats2d]]:
+    """Build a tagger model, using a provided token-to-vector component. The tagger
+    model simply adds a linear layer with softmax activation to predict scores
+    given the token vectors.
+    tok2vec (Model[List[Doc], List[Floats2d]]): The token-to-vector subnetwork.
+    nO (int or None): The number of tags to output. Inferred from the data if None.
+    RETURNS (Model[List[Doc], List[Floats2d]]): Initialized Model
+    """
+    ##print("Entered build_torch_ner_model - ")
+    #print(tok2vec.dim_names,tok2vec.name)
+    listener = tok2vec.maybe_get_ref("listener")
+    #print(listener.maybe_get_dim("nI"))
+    t2v_width = listener.maybe_get_dim("nO") if listener else None
+    #print(t2v_width, hidden_width, nO, dropout)
+    t2v_width = 768
+    #print(t2v_width, hidden_width, nO, dropout)
+    torch_model = TorchEntityRecognizer(t2v_width, hidden_width, nO, dropout)
+    #print("torch_model - ",torch_model)
+    wrapped_pt_model = PyTorchWrapper(torch_model)
+    #print("wrapped")
+    wrapped_pt_model.attrs["set_dropout_rate"] = torch_model.set_dropout_rate
+    #print("set dropout")
+    model = chain(tok2vec, with_array(wrapped_pt_model))
+    #print(model.param_names)
+    model.set_ref("tok2vec", tok2vec)
+    model.set_ref("torch_model", wrapped_pt_model)
+    model.init = init
+    #print("Completed build_torch_ner_model")
+    return model
+def init(
+    model: Model[List[Doc], Floats2d],
+    X: Optional[List[Doc]] = None,
+    Y: Optional[List[str]] = None,
+) -> Model[List[Doc], List[Floats2d]]:
+    """Dynamically set PyTorch Output Layer shape based on labels data
+    model (Model[List[Doc], Floats2d]): Thinc Model wrapping tok2vec and PyTorch model
+    X (Optional[List[Doc]], optional): Sample of Doc objects.
+    Y (Optional[List[Ints2d]], optional): Available model labels.
+    RETURNS (Model[List[Doc], List[Floats2d]]): Initialized Model
+    """
+    #print("Entered init - ")
+    tok2vec = model.get_ref("tok2vec")
+    #print(tok2vec.ref_names)
+    torch_model = model.get_ref("torch_model")
+    #print(torch_model)
+    #print("Ref names - ",model.ref_names)
+    #print(tok2vec.dim_names,tok2vec.name)
+    #print(torch_model.dim_names,torch_model.name)
+    listener = tok2vec.maybe_get_ref("listener")
+    #print(listener)
+    t2v_width = listener.maybe_get_dim("nO") if listener else None
+    #print(t2v_width," - ",Y)
+    if t2v_width:
+        #print(torch_model.shims[0]._model)
+        #print("Searching - ",torch_model.maybe_get_dim("nI"))
+        torch_model.shims[0]._model.set_input_shape(t2v_width)
+        torch_model.set_dim("nI", t2v_width)
+        #print(torch_model.dim_names)
+    if Y is not None:
+        nO = len(Y)
+        #print(nO)
+        torch_model.shims[0]._model.set_output_shape(nO)
+        torch_model.set_dim("nO", nO)
+        #print(torch_model)
+    tok2vec = model.get_ref("tok2vec")
+    tok2vec.initialize()
+    #print(tok2vec)
+    torch_model = model.get_ref("torch_model")
+    #print("Found - ",torch_model.get_dim("nI"))
+    #print("Exit")
+    return model
+def is_dropout_module(
+    module: nn.Module,
+    dropout_modules: List[nn.Module] = [nn.Dropout, nn.Dropout2d, nn.Dropout3d],
+) -> bool:
+    """Detect if a PyTorch Module is a Dropout layer
+    module (nn.Module): Module to check
+    dropout_modules (List[nn.Module], optional): List of Modules that count as Dropout layers.
+    RETURNS (bool): True if module is a Dropout layer.
+    """
+    #print("Entered is_dropout_module - ")
+    for m in dropout_modules:
+        if isinstance(module, m):
+            return True
+    return False
+class TorchEntityRecognizer(nn.Module):
+    """Torch Entity Recognizer Model Head"""
+    def __init__(self, nI: int, nH: int, nO: int, dropout: float):
+        """Initialize TorchEntityRecognizer.
+        nI (int): Input Dimension
+        nH (int): Hidden Dimension Width
+        nO (int): Output Dimension Width
+        dropout (float): Dropout ratio (0 - 1.0)
+        """
+        super(TorchEntityRecognizer, self).__init__()
+        # Just for initialization of PyTorch layer. Output shape set during Model.init
+        #print("Entered TorchEntityRecognizer.__init__  - ")
+        nI = nI or 1
+        nO = nO or 1
+        self.nH = nH
+        self.model = nn.Sequential(
+            OrderedDict(
+                {
+                    "input_layer": nn.Linear(nI, nH),
+                    "input_activation": nn.ReLU(),
+                    "input_dropout": nn.Dropout2d(dropout),
+                    "output_layer": nn.Linear(nH, nO),
+                    "output_dropout": nn.Dropout2d(dropout),
+                    "softmax": nn.Softmax(dim=1),
+                }
+            )
+        )
+        #print(self.model)
+    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
+        """Forward pass of the model.
+        inputs (torch.Tensor): Batch of outputs from spaCy tok2vec layer
+        RETURNS (torch.Tensor): Batch of results with a score for each tag for each token
+        """
+        #print("Entered TorchEntityRecognizer.forward  - ")
+        return self.model(inputs)
+    def _set_layer_shape(self, name: str, nI: int, nO: int):
+        """Dynamically set the shape of a layer
+        name (str): Layer name
+        nI (int): New input shape
+        nO (int): New output shape
+        """
+        #print("Entered TorchEntityRecognizer._set_layer_shape  - ",nO, nI)
+        with torch.no_grad():
+            layer = getattr(self.model, name)
+            #print(layer)
+            layer.out_features = nO
+            layer.weight = nn.Parameter(torch.Tensor(nO, nI))
+            #print(layer.weight.shape)
+            if layer.bias is not None:
+                layer.bias = nn.Parameter(torch.Tensor(nO))
+            #print(layer)
+            layer.reset_parameters()
+            #print(layer.weight.shape)
+            #print(layer)
+    def set_input_shape(self, nI: int):
+        """Dynamically set the shape of the input layer
+        nI (int): New input layer shape
+        """
+        #print("Entered TorchEntityRecognizer.set_input_shape  - ",nI, self.nH)
+        self._set_layer_shape("input_layer", nI, self.nH)
+    def set_output_shape(self, nO: int):
+        """Dynamically set the shape of the output layer
+        nO (int): New output layer shape
+        """
+        #print("Entered TorchEntityRecognizer.set_output_shape  - ", self.nH, nO)
+        self._set_layer_shape("output_layer", self.nH, nO)
+    def set_dropout_rate(self, dropout: float):
+        """Set the dropout rate of all Dropout layers in the model.
+        dropout (float): Dropout rate to set
+        """
+        #print("Entered TorchEntityRecognizer.set_dropout_rate  - ")
+        dropout_layers = [
+            module for module in self.modules() if is_dropout_module(module)
+        ]
+        for layer in dropout_layers:
+            layer.p = dropout

scripts/torch_ner_model_test.py ADDED Viewed

	@@ -0,0 +1,203 @@

+from collections import OrderedDict
+from typing import Optional, List
+from thinc.api import (
+    with_array,
+    chain,
+    Model,
+    PyTorchWrapper,
+    PyTorchLSTM,
+)
+from thinc.types import Floats2d
+from spacy.tokens import Doc
+from spacy.util import registry
+import torch
+from torch import nn
+@registry.architectures("TorchEntityRecognizer.v1")
+def build_torch_ner_model(
+    tok2vec: Model[List[Doc], List[Floats2d]],
+    hidden_width: int,
+    dropout: Optional[float] = None,
+    nO: Optional[int] = None,
+) -> Model[List[Doc], List[Floats2d]]:
+    """Build a tagger model, using a provided token-to-vector component. The tagger
+    model simply adds a linear layer with softmax activation to predict scores
+    given the token vectors.
+    tok2vec (Model[List[Doc], List[Floats2d]]): The token-to-vector subnetwork.
+    nO (int or None): The number of tags to output. Inferred from the data if None.
+    RETURNS (Model[List[Doc], List[Floats2d]]): Initialized Model
+    """
+    print("Entered build_torch_ner_model - ")
+    print(tok2vec.dim_names,tok2vec.name)
+    listener = tok2vec.maybe_get_ref("listener")
+    print(listener.maybe_get_dim("nI"))
+    t2v_width = listener.maybe_get_dim("nO") if listener else None
+    print(t2v_width, hidden_width, nO, dropout)
+    t2v_width = 768
+    print(t2v_width, hidden_width, nO, dropout)
+    torch_model = TorchEntityRecognizer(t2v_width, hidden_width, nO, dropout)
+    print("torch_model - ",torch_model)
+    wrapped_pt_model = PyTorchWrapper(torch_model)
+    print("wrapped")
+    wrapped_pt_model.attrs["set_dropout_rate"] = torch_model.set_dropout_rate
+    print("set dropout")
+    model = chain(tok2vec, with_array(wrapped_pt_model))
+    print(model.param_names)
+    model.set_ref("tok2vec", tok2vec)
+    model.set_ref("torch_model", wrapped_pt_model)
+    model.init = init
+    print("Completed build_torch_ner_model")
+    return model
+def init(
+    model: Model[List[Doc], Floats2d],
+    X: Optional[List[Doc]] = None,
+    Y: Optional[List[str]] = None,
+) -> Model[List[Doc], List[Floats2d]]:
+    """Dynamically set PyTorch Output Layer shape based on labels data
+    model (Model[List[Doc], Floats2d]): Thinc Model wrapping tok2vec and PyTorch model
+    X (Optional[List[Doc]], optional): Sample of Doc objects.
+    Y (Optional[List[Ints2d]], optional): Available model labels.
+    RETURNS (Model[List[Doc], List[Floats2d]]): Initialized Model
+    """
+    print("Entered init - ")
+    tok2vec = model.get_ref("tok2vec")
+    print(tok2vec.ref_names)
+    torch_model = model.get_ref("torch_model")
+    print(torch_model)
+    print("Ref names - ",model.ref_names)
+    print(tok2vec.dim_names,tok2vec.name)
+    print(torch_model.dim_names,torch_model.name)
+    listener = tok2vec.maybe_get_ref("listener")
+    print(listener)
+    t2v_width = listener.maybe_get_dim("nO") if listener else None
+    print(t2v_width," - ",Y)
+    if t2v_width:
+        print(torch_model.shims[0]._model)
+        print("Searching - ",torch_model.maybe_get_dim("nI"))
+        torch_model.shims[0]._model.set_input_shape(t2v_width)
+        torch_model.set_dim("nI", t2v_width)
+        print(torch_model.dim_names)
+    if Y is not None:
+        nO = len(Y)
+        print(nO)
+        torch_model.shims[0]._model.set_output_shape(nO)
+        torch_model.set_dim("nO", nO)
+        print(torch_model)
+    tok2vec = model.get_ref("tok2vec")
+    tok2vec.initialize()
+    print(tok2vec)
+    torch_model = model.get_ref("torch_model")
+    print("Found - ",torch_model.get_dim("nI"))
+    print("Exit")
+    return model
+def is_dropout_module(
+    module: nn.Module,
+    dropout_modules: List[nn.Module] = [nn.Dropout, nn.Dropout2d, nn.Dropout3d],
+) -> bool:
+    """Detect if a PyTorch Module is a Dropout layer
+    module (nn.Module): Module to check
+    dropout_modules (List[nn.Module], optional): List of Modules that count as Dropout layers.
+    RETURNS (bool): True if module is a Dropout layer.
+    """
+    print("Entered is_dropout_module - ")
+    for m in dropout_modules:
+        if isinstance(module, m):
+            return True
+    return False
+class TorchEntityRecognizer(nn.Module):
+    """Torch Entity Recognizer Model Head"""
+    def __init__(self, nI: int, nH: int, nO: int, dropout: float):
+        """Initialize TorchEntityRecognizer.
+        nI (int): Input Dimension
+        nH (int): Hidden Dimension Width
+        nO (int): Output Dimension Width
+        dropout (float): Dropout ratio (0 - 1.0)
+        """
+        super(TorchEntityRecognizer, self).__init__()
+        # Just for initialization of PyTorch layer. Output shape set during Model.init
+        print("Entered TorchEntityRecognizer.__init__  - ")
+        nI = nI or 1
+        nO = nO or 1
+        self.nH = nH
+        self.model = nn.Sequential(
+            OrderedDict(
+                {
+                    "input_layer": nn.Linear(nI, nH),
+                    "input_activation": nn.ReLU(),
+                    "input_dropout": nn.Dropout2d(dropout),
+                    "output_layer": nn.Linear(nH, nO),
+                    "output_dropout": nn.Dropout2d(dropout),
+                    "softmax": nn.Softmax(dim=1),
+                }
+            )
+        )
+        print(self.model)
+    def forward(self, inputs: torch.Tensor) -> torch.Tensor:
+        """Forward pass of the model.
+        inputs (torch.Tensor): Batch of outputs from spaCy tok2vec layer
+        RETURNS (torch.Tensor): Batch of results with a score for each tag for each token
+        """
+        print("Entered TorchEntityRecognizer.forward  - ")
+        return self.model(inputs)
+    def _set_layer_shape(self, name: str, nI: int, nO: int):
+        """Dynamically set the shape of a layer
+        name (str): Layer name
+        nI (int): New input shape
+        nO (int): New output shape
+        """
+        print("Entered TorchEntityRecognizer._set_layer_shape  - ",nO, nI)
+        with torch.no_grad():
+            layer = getattr(self.model, name)
+            print(layer)
+            layer.out_features = nO
+            layer.weight = nn.Parameter(torch.Tensor(nO, nI))
+            print(layer.weight.shape)
+            if layer.bias is not None:
+                layer.bias = nn.Parameter(torch.Tensor(nO))
+            print(layer)
+            layer.reset_parameters()
+            print(layer.weight.shape)
+            print(layer)
+    def set_input_shape(self, nI: int):
+        """Dynamically set the shape of the input layer
+        nI (int): New input layer shape
+        """
+        print("Entered TorchEntityRecognizer.set_input_shape  - ",nI, self.nH)
+        self._set_layer_shape("input_layer", nI, self.nH)
+    def set_output_shape(self, nO: int):
+        """Dynamically set the shape of the output layer
+        nO (int): New output layer shape
+        """
+        print("Entered TorchEntityRecognizer.set_output_shape  - ", self.nH, nO)
+        self._set_layer_shape("output_layer", self.nH, nO)
+    def set_dropout_rate(self, dropout: float):
+        """Set the dropout rate of all Dropout layers in the model.
+        dropout (float): Dropout rate to set
+        """
+        print("Entered TorchEntityRecognizer.set_dropout_rate  - ")
+        dropout_layers = [
+            module for module in self.modules() if is_dropout_module(module)
+        ]
+        for layer in dropout_layers:
+            layer.p = dropout

scripts/torch_ner_pipe.py ADDED Viewed

	@@ -0,0 +1,294 @@

+from collections import OrderedDict
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+import numpy
+from thinc.api import (
+    Config,
+    Model,
+    set_dropout_rate,
+    SequenceCategoricalCrossentropy,
+    Optimizer,
+)
+from thinc.types import Ints1d, Floats2d
+from itertools import islice
+from spacy.tokens.doc import Doc
+from spacy.vocab import Vocab
+from spacy.training import Example
+from spacy.training.iob_utils import biluo_tags_to_spans, biluo_to_iob, iob_to_biluo
+from spacy.pipeline.trainable_pipe import TrainablePipe
+from spacy.pipeline.pipe import deserialize_config
+from spacy.language import Language
+from spacy.attrs import POS, ID
+from spacy.parts_of_speech import X
+from spacy.errors import Errors
+from spacy.scorer import get_ner_prf
+from spacy.training import validate_examples, validate_get_examples
+from spacy import util
+def set_torch_dropout_rate(model: Model, dropout_rate: float):
+    """Set dropout rate for Thinc and wrapped PyTorch models
+    Args:
+        model (Model): Thinc Model (with PyTorch sub-modules)
+        dropout_rate (float): Dropout rate
+    """
+    #print("Entered set_torch_dropout_rate  - ")
+    set_dropout_rate(model, dropout_rate)
+    func = model.get_ref("torch_model").attrs["set_dropout_rate"]
+    func(dropout_rate)
+default_model_config = """
+[model]
+@architectures = "TorchEntityRecognizer.v1"
+hidden_width = 48
+dropout = 0.1
+nO = null
+[model.tok2vec]
+@architectures = "spacy.HashEmbedCNN.v1"
+pretrained_vectors = null
+width = 96
+depth = 4
+embed_size = 2000
+window_size = 1
+maxout_pieces = 3
+subword_features = true
+"""
+DEFAULT_MODEL = Config().from_str(default_model_config)["model"]
+@Language.factory(
+    "torch_ner",
+    assigns=["doc.ents", "token.ent_iob", "token.ent_type"],
+    default_config={"model": DEFAULT_MODEL},
+    default_score_weights={
+        "ents_f": 1.0,
+        "ents_p": 0.0,
+        "ents_r": 0.0,
+        "ents_per_type": None,
+    },
+)
+def make_torch_entity_recognizer(nlp: Language, name: str, model: Model):
+    """Construct a PyTorch based Named Entity Recognition model
+    model (Model[List[Doc], List[Floats2d]]): A model instance that predicts
+        the tag probabilities. The output vectors should match the number of tags
+        in size, and be normalized as probabilities (all scores between 0 and 1,
+        with the rows summing to 1).
+    """
+    #print("Entered make_torch_entity_recognizer  - ")
+    return TorchEntityRecognizer(nlp.vocab, model, name)
+class TorchEntityRecognizer(TrainablePipe):
+    """Pipeline component Named Entity Recognition using PyTorch"""
+    def __init__(self, vocab: Vocab, model: Model, name: str = "torch_ner"):
+        """Initialize a part-of-speech tagger.
+        vocab (Vocab): The shared vocabulary.
+        model (thinc.api.Model): The Thinc Model powering the pipeline component.
+        name (str): The component instance name, used to add entries to the
+            losses during training.
+        """
+        #print("Entered pipe TorchEntityRecognizer.__init__ - ")
+        self.vocab = vocab
+        self.model = model
+        self.name = name
+        cfg = {"labels": []}
+        self.cfg = dict(sorted(cfg.items()))
+        #print(self.vocab,self.model,self.name,self.cfg)
+        #print(self.model.layers[0].ref_names)
+        #print(self.model.layers[1].ref_names)
+        #print("Completed pipe TorchEntityRecognizer.__init__ - ")
+    @property
+    def labels(self) -> Tuple[str, ...]:
+        """The labels currently added to the component.
+        RETURNS (Tuple[str]): The labels.
+        """
+        ##print("Entered TorchEntityRecognizer.labels - ")
+        labels = ["O"]
+        for label in self.cfg["labels"]:
+            for iob in ["B", "I"]:
+                labels.append(f"{iob}-{label}")
+        return tuple(labels)
+    def predict(self, docs: Iterable[Doc]) -> Iterable[Ints1d]:
+        """Apply the pipeline's model to a batch of docs, without modifying them.
+        docs (Iterable[Doc]): The documents to predict.
+        RETURNS: The models prediction for each document.
+        """
+        #print("Entered pipe TorchEntityRecognizer.predict - ")
+        if not any(len(doc) for doc in docs):
+            # Handle cases where there are no tokens in any docs.
+            n_labels = len(self.labels)
+            guesses = [self.model.ops.alloc((0, n_labels)) for doc in docs]
+            assert len(guesses) == len(docs)
+            return guesses
+        scores = self.model.predict(docs)
+        assert len(scores) == len(docs), (len(scores), len(docs))
+        guesses = []
+        for doc_scores in scores:
+            doc_guesses = doc_scores.argmax(axis=1)
+            if not isinstance(doc_guesses, numpy.ndarray):
+                doc_guesses = doc_guesses.get()
+            guesses.append(doc_guesses)
+        assert len(guesses) == len(docs)
+        return guesses
+    def set_annotations(self, docs: Iterable[Doc], preds: Iterable[Ints1d]):
+        """Modify a batch of documents, using pre-computed scores.
+        docs (Iterable[Doc]): The documents to modify.
+        preds (Iterable[Ints1d]): The IDs to set, produced by TorchEntityRecognizer.predict.
+        """
+        #print("Entered pipe TorchEntityRecognizer.set_annotations - ")
+        if isinstance(docs, Doc):
+            docs = [docs]
+        for doc, tag_ids in zip(docs, preds):
+            labels = iob_to_biluo([self.labels[tag_id] for tag_id in tag_ids])
+            try:
+                spans = biluo_tags_to_spans(doc, labels)
+            except ValueError:
+                # Note:
+                # biluo_tags_to_spans will raise an exception for an invalid tag sequence
+                # this could be fixed using a more complex transition system
+                # (e.g. a Conditional Random Field model head)
+                spans = []
+            doc.ents = spans
+    def update(
+        self,
+        examples: Iterable[Example],
+        *,
+        drop: float = 0.0,
+        sgd: Optimizer = None,
+        losses: Dict[str, float] = None,
+    ) -> Dict[str, float]:
+        """Learn from a batch of documents and gold-standard information,
+        updating the pipe's model. Delegates to predict and get_loss.
+        examples (Iterable[Example]): A batch of Example objects.
+        drop (float): The dropout rate.
+        sgd (thinc.api.Optimizer): The optimizer.
+        losses (Dict[str, float]): Optional record of the loss during training.
+            Updated using the component name as the key.
+        RETURNS (Dict[str, float]): The updated losses dictionary.
+        """
+        #print("Entered pipe TorchEntityRecognizer.update - ")
+        if losses is None:
+            losses = {}
+        losses.setdefault(self.name, 0.0)
+        validate_examples(examples, "TorchEntityRecognizer.update")
+        if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
+            # Handle cases where there are no tokens in any docs.
+            return losses
+        set_torch_dropout_rate(self.model, drop)
+        tag_scores, bp_tag_scores = self.model.begin_update(
+            [eg.predicted for eg in examples]
+        )
+        for sc in tag_scores:
+            if self.model.ops.xp.isnan(sc.sum()):
+                raise ValueError(Errors.E940)
+        loss, d_tag_scores = self.get_loss(examples, tag_scores)
+        bp_tag_scores(d_tag_scores)
+        if sgd not in (None, False):
+            self.finish_update(sgd)
+        losses[self.name] += loss
+        return losses
+    def get_loss(
+        self, examples: Iterable[Example], scores: Iterable[Floats2d]
+    ) -> Tuple[float, float]:
+        """Find the loss and gradient of loss for the batch of documents and
+        their predicted scores.
+        examples (Iterable[Example]): The batch of examples.
+        scores: Scores representing the model's predictions.
+        RETURNS (Tuple[float, float]): The loss and the gradient.
+        """
+        #print("Entered pipe TorchEntityRecognizer.get_loss - ")
+        validate_examples(examples, "TorchEntityRecognizer.get_loss")
+        loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
+        truths = []
+        for eg in examples:
+            eg_truths = [
+                tag if tag != "" else None for tag in biluo_to_iob(eg.get_aligned_ner())
+            ]
+            truths.append(eg_truths)
+        d_scores, loss = loss_func(scores, truths)
+        if self.model.ops.xp.isnan(loss):
+            raise ValueError(Errors.E910.format(name=self.name))
+        return float(loss), d_scores
+    def initialize(
+        self,
+        get_examples: Callable[[], Iterable[Example]],
+        *,
+        nlp: Optional[Language] = None,
+        labels: Optional[List[str]] = None,
+    ):
+        """Initialize the pipe for training, using a representative set
+        of data examples.
+        get_examples (Callable[[], Iterable[Example]]): Function that
+            returns a representative sample of gold-standard Example objects..
+        nlp (Language): The current nlp object the component is part of.
+        labels (Optional[List[str]]): The labels to add to the component, typically generated by the
+            `init labels` command. If no labels are provided, the get_examples
+            callback is used to extract the labels from the data.
+        """
+        #print("Entered pipe TorchEntityRecognizer.initialize - ")
+        validate_get_examples(get_examples, "TorchEntityRecognizer.initialize")
+        if labels is not None:
+            for tag in labels:
+                self.add_label(tag)
+        else:
+            tags = set()
+            for example in get_examples():
+                for token in example.y:
+                    if token.ent_type_:
+                        tags.add(token.ent_type_)
+            for tag in sorted(tags):
+                self.add_label(tag)
+        doc_sample = []
+        for example in islice(get_examples(), 10):
+            doc_sample.append(example.x)
+        self._require_labels()
+        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
+        #print(nlp.config["components"][self.name]["model"]["nO"])
+        ##print(nlp.config["components"][self.name]["model"]["nI"])
+        self.model.initialize(X=doc_sample, Y=self.labels)
+        #print("self.model.initialize exit")
+        #print(self.model.name)
+        #print(self.model.layers[0].ref_names)
+        #print(self.model.layers[1].ref_names)
+        #print(self.name)
+        nlp.config["components"][self.name]["model"]["nO"] = len(self.labels)
+        #nlp.config["components"][self.name]["model"]["nI"] = 768
+        #print(nlp.config["components"][self.name]["model"])
+    def add_label(self, label: str) -> int:
+        """Add a new label to the pipe.
+        label (str): The label to add.
+        RETURNS (int): 0 if label is already present, otherwise 1.
+        """
+        #print("Entered pipe TorchEntityRecognizer.add_label - ")
+        if not isinstance(label, str):
+            raise ValueError(Errors.E187)
+        if label in self.labels:
+            return 0
+        self._allow_extra_label()
+        self.cfg["labels"].append(label)
+        self.vocab.strings.add(label)
+        return 1
+    def score(self, examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
+        """Score a batch of examples.
+        examples (Iterable[Example]): The examples to score.
+        RETURNS (Dict[str, Any]): The NER precision, recall and f-scores.
+        """
+        #print("Entered pipe TorchEntityRecognizer.score - ")
+        validate_examples(examples, "TorchEntityRecognizer.score")
+        return get_ner_prf(examples)

scripts/torch_ner_pipe_test.py ADDED Viewed

	@@ -0,0 +1,294 @@

+from collections import OrderedDict
+from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
+import numpy
+from thinc.api import (
+    Config,
+    Model,
+    set_dropout_rate,
+    SequenceCategoricalCrossentropy,
+    Optimizer,
+)
+from thinc.types import Ints1d, Floats2d
+from itertools import islice
+from spacy.tokens.doc import Doc
+from spacy.vocab import Vocab
+from spacy.training import Example
+from spacy.training.iob_utils import biluo_tags_to_spans, biluo_to_iob, iob_to_biluo
+from spacy.pipeline.trainable_pipe import TrainablePipe
+from spacy.pipeline.pipe import deserialize_config
+from spacy.language import Language
+from spacy.attrs import POS, ID
+from spacy.parts_of_speech import X
+from spacy.errors import Errors
+from spacy.scorer import get_ner_prf
+from spacy.training import validate_examples, validate_get_examples
+from spacy import util
+def set_torch_dropout_rate(model: Model, dropout_rate: float):
+    """Set dropout rate for Thinc and wrapped PyTorch models
+    Args:
+        model (Model): Thinc Model (with PyTorch sub-modules)
+        dropout_rate (float): Dropout rate
+    """
+    print("Entered set_torch_dropout_rate  - ")
+    set_dropout_rate(model, dropout_rate)
+    func = model.get_ref("torch_model").attrs["set_dropout_rate"]
+    func(dropout_rate)
+default_model_config = """
+[model]
+@architectures = "TorchEntityRecognizer.v1"
+hidden_width = 48
+dropout = 0.1
+nO = null
+[model.tok2vec]
+@architectures = "spacy.HashEmbedCNN.v1"
+pretrained_vectors = null
+width = 96
+depth = 4
+embed_size = 2000
+window_size = 1
+maxout_pieces = 3
+subword_features = true
+"""
+DEFAULT_MODEL = Config().from_str(default_model_config)["model"]
+@Language.factory(
+    "torch_ner",
+    assigns=["doc.ents", "token.ent_iob", "token.ent_type"],
+    default_config={"model": DEFAULT_MODEL},
+    default_score_weights={
+        "ents_f": 1.0,
+        "ents_p": 0.0,
+        "ents_r": 0.0,
+        "ents_per_type": None,
+    },
+)
+def make_torch_entity_recognizer(nlp: Language, name: str, model: Model):
+    """Construct a PyTorch based Named Entity Recognition model
+    model (Model[List[Doc], List[Floats2d]]): A model instance that predicts
+        the tag probabilities. The output vectors should match the number of tags
+        in size, and be normalized as probabilities (all scores between 0 and 1,
+        with the rows summing to 1).
+    """
+    print("Entered make_torch_entity_recognizer  - ")
+    return TorchEntityRecognizer(nlp.vocab, model, name)
+class TorchEntityRecognizer(TrainablePipe):
+    """Pipeline component Named Entity Recognition using PyTorch"""
+    def __init__(self, vocab: Vocab, model: Model, name: str = "torch_ner"):
+        """Initialize a part-of-speech tagger.
+        vocab (Vocab): The shared vocabulary.
+        model (thinc.api.Model): The Thinc Model powering the pipeline component.
+        name (str): The component instance name, used to add entries to the
+            losses during training.
+        """
+        print("Entered pipe TorchEntityRecognizer.__init__ - ")
+        self.vocab = vocab
+        self.model = model
+        self.name = name
+        cfg = {"labels": []}
+        self.cfg = dict(sorted(cfg.items()))
+        print(self.vocab,self.model,self.name,self.cfg)
+        print(self.model.layers[0].ref_names)
+        print(self.model.layers[1].ref_names)
+        print("Completed pipe TorchEntityRecognizer.__init__ - ")
+    @property
+    def labels(self) -> Tuple[str, ...]:
+        """The labels currently added to the component.
+        RETURNS (Tuple[str]): The labels.
+        """
+        #print("Entered TorchEntityRecognizer.labels - ")
+        labels = ["O"]
+        for label in self.cfg["labels"]:
+            for iob in ["B", "I"]:
+                labels.append(f"{iob}-{label}")
+        return tuple(labels)
+    def predict(self, docs: Iterable[Doc]) -> Iterable[Ints1d]:
+        """Apply the pipeline's model to a batch of docs, without modifying them.
+        docs (Iterable[Doc]): The documents to predict.
+        RETURNS: The models prediction for each document.
+        """
+        print("Entered pipe TorchEntityRecognizer.predict - ")
+        if not any(len(doc) for doc in docs):
+            # Handle cases where there are no tokens in any docs.
+            n_labels = len(self.labels)
+            guesses = [self.model.ops.alloc((0, n_labels)) for doc in docs]
+            assert len(guesses) == len(docs)
+            return guesses
+        scores = self.model.predict(docs)
+        assert len(scores) == len(docs), (len(scores), len(docs))
+        guesses = []
+        for doc_scores in scores:
+            doc_guesses = doc_scores.argmax(axis=1)
+            if not isinstance(doc_guesses, numpy.ndarray):
+                doc_guesses = doc_guesses.get()
+            guesses.append(doc_guesses)
+        assert len(guesses) == len(docs)
+        return guesses
+    def set_annotations(self, docs: Iterable[Doc], preds: Iterable[Ints1d]):
+        """Modify a batch of documents, using pre-computed scores.
+        docs (Iterable[Doc]): The documents to modify.
+        preds (Iterable[Ints1d]): The IDs to set, produced by TorchEntityRecognizer.predict.
+        """
+        print("Entered pipe TorchEntityRecognizer.set_annotations - ")
+        if isinstance(docs, Doc):
+            docs = [docs]
+        for doc, tag_ids in zip(docs, preds):
+            labels = iob_to_biluo([self.labels[tag_id] for tag_id in tag_ids])
+            try:
+                spans = biluo_tags_to_spans(doc, labels)
+            except ValueError:
+                # Note:
+                # biluo_tags_to_spans will raise an exception for an invalid tag sequence
+                # this could be fixed using a more complex transition system
+                # (e.g. a Conditional Random Field model head)
+                spans = []
+            doc.ents = spans
+    def update(
+        self,
+        examples: Iterable[Example],
+        *,
+        drop: float = 0.0,
+        sgd: Optimizer = None,
+        losses: Dict[str, float] = None,
+    ) -> Dict[str, float]:
+        """Learn from a batch of documents and gold-standard information,
+        updating the pipe's model. Delegates to predict and get_loss.
+        examples (Iterable[Example]): A batch of Example objects.
+        drop (float): The dropout rate.
+        sgd (thinc.api.Optimizer): The optimizer.
+        losses (Dict[str, float]): Optional record of the loss during training.
+            Updated using the component name as the key.
+        RETURNS (Dict[str, float]): The updated losses dictionary.
+        """
+        print("Entered pipe TorchEntityRecognizer.update - ")
+        if losses is None:
+            losses = {}
+        losses.setdefault(self.name, 0.0)
+        validate_examples(examples, "TorchEntityRecognizer.update")
+        if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
+            # Handle cases where there are no tokens in any docs.
+            return losses
+        set_torch_dropout_rate(self.model, drop)
+        tag_scores, bp_tag_scores = self.model.begin_update(
+            [eg.predicted for eg in examples]
+        )
+        for sc in tag_scores:
+            if self.model.ops.xp.isnan(sc.sum()):
+                raise ValueError(Errors.E940)
+        loss, d_tag_scores = self.get_loss(examples, tag_scores)
+        bp_tag_scores(d_tag_scores)
+        if sgd not in (None, False):
+            self.finish_update(sgd)
+        losses[self.name] += loss
+        return losses
+    def get_loss(
+        self, examples: Iterable[Example], scores: Iterable[Floats2d]
+    ) -> Tuple[float, float]:
+        """Find the loss and gradient of loss for the batch of documents and
+        their predicted scores.
+        examples (Iterable[Example]): The batch of examples.
+        scores: Scores representing the model's predictions.
+        RETURNS (Tuple[float, float]): The loss and the gradient.
+        """
+        print("Entered pipe TorchEntityRecognizer.get_loss - ")
+        validate_examples(examples, "TorchEntityRecognizer.get_loss")
+        loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
+        truths = []
+        for eg in examples:
+            eg_truths = [
+                tag if tag != "" else None for tag in biluo_to_iob(eg.get_aligned_ner())
+            ]
+            truths.append(eg_truths)
+        d_scores, loss = loss_func(scores, truths)
+        if self.model.ops.xp.isnan(loss):
+            raise ValueError(Errors.E910.format(name=self.name))
+        return float(loss), d_scores
+    def initialize(
+        self,
+        get_examples: Callable[[], Iterable[Example]],
+        *,
+        nlp: Optional[Language] = None,
+        labels: Optional[List[str]] = None,
+    ):
+        """Initialize the pipe for training, using a representative set
+        of data examples.
+        get_examples (Callable[[], Iterable[Example]]): Function that
+            returns a representative sample of gold-standard Example objects..
+        nlp (Language): The current nlp object the component is part of.
+        labels (Optional[List[str]]): The labels to add to the component, typically generated by the
+            `init labels` command. If no labels are provided, the get_examples
+            callback is used to extract the labels from the data.
+        """
+        print("Entered pipe TorchEntityRecognizer.initialize - ")
+        validate_get_examples(get_examples, "TorchEntityRecognizer.initialize")
+        if labels is not None:
+            for tag in labels:
+                self.add_label(tag)
+        else:
+            tags = set()
+            for example in get_examples():
+                for token in example.y:
+                    if token.ent_type_:
+                        tags.add(token.ent_type_)
+            for tag in sorted(tags):
+                self.add_label(tag)
+        doc_sample = []
+        for example in islice(get_examples(), 10):
+            doc_sample.append(example.x)
+        self._require_labels()
+        assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
+        print(nlp.config["components"][self.name]["model"]["nO"])
+        #print(nlp.config["components"][self.name]["model"]["nI"])
+        self.model.initialize(X=doc_sample, Y=self.labels)
+        print("self.model.initialize exit")
+        print(self.model.name)
+        print(self.model.layers[0].ref_names)
+        print(self.model.layers[1].ref_names)
+        print(self.name)
+        nlp.config["components"][self.name]["model"]["nO"] = len(self.labels)
+        #nlp.config["components"][self.name]["model"]["nI"] = 768
+        print(nlp.config["components"][self.name]["model"])
+    def add_label(self, label: str) -> int:
+        """Add a new label to the pipe.
+        label (str): The label to add.
+        RETURNS (int): 0 if label is already present, otherwise 1.
+        """
+        print("Entered pipe TorchEntityRecognizer.add_label - ")
+        if not isinstance(label, str):
+            raise ValueError(Errors.E187)
+        if label in self.labels:
+            return 0
+        self._allow_extra_label()
+        self.cfg["labels"].append(label)
+        self.vocab.strings.add(label)
+        return 1
+    def score(self, examples: Iterable[Example], **kwargs) -> Dict[str, Any]:
+        """Score a batch of examples.
+        examples (Iterable[Example]): The examples to score.
+        RETURNS (Dict[str, Any]): The NER precision, recall and f-scores.
+        """
+        print("Entered pipe TorchEntityRecognizer.score - ")
+        validate_examples(examples, "TorchEntityRecognizer.score")
+        return get_ner_prf(examples)

scripts/visualize_model.py ADDED Viewed

	@@ -0,0 +1,19 @@

+import spacy_streamlit
+import typer
+from torch_ner_model import build_torch_ner_model
+from torch_ner_pipe import make_torch_entity_recognizer
+def main(models: str, default_text: str):
+    models = [name.strip() for name in models.split(",")]
+    labels = ["person", "problem", "pronoun", "test", "treatment"]
+    spacy_streamlit.visualize(
+        models, default_text, visualizers=["ner"], ner_labels=labels
+    )
+if __name__ == "__main__":
+    try:
+        typer.run(main)
+    except SystemExit:
+        pass