Spaces:

AmitGarage
/

Pytorch_clinical_NER

Runtime error

App Files Files Community

AmitGarage commited on Nov 29, 2022

Commit

a25ba4b

•

1 Parent(s): 945abe6

Update scripts/torch_ner_pipe.py

Browse files

Files changed (1) hide show

scripts/torch_ner_pipe.py +2 -24

scripts/torch_ner_pipe.py CHANGED Viewed

@@ -34,7 +34,6 @@ def set_torch_dropout_rate(model: Model, dropout_rate: float):
         model (Model): Thinc Model (with PyTorch sub-modules)
         dropout_rate (float): Dropout rate
     """
-    #print("Entered set_torch_dropout_rate  - ")
     set_dropout_rate(model, dropout_rate)
     func = model.get_ref("torch_model").attrs["set_dropout_rate"]
     func(dropout_rate)
@@ -78,7 +77,6 @@ def make_torch_entity_recognizer(nlp: Language, name: str, model: Model):
         in size, and be normalized as probabilities (all scores between 0 and 1,
         with the rows summing to 1).
     """
-    #print("Entered make_torch_entity_recognizer  - ")
     return TorchEntityRecognizer(nlp.vocab, model, name)
@@ -92,23 +90,17 @@ class TorchEntityRecognizer(TrainablePipe):
         name (str): The component instance name, used to add entries to the
             losses during training.
         """
-        #print("Entered pipe TorchEntityRecognizer.__init__ - ")
         self.vocab = vocab
         self.model = model
         self.name = name
         cfg = {"labels": []}
         self.cfg = dict(sorted(cfg.items()))
-        #print(self.vocab,self.model,self.name,self.cfg)
-        #print(self.model.layers[0].ref_names)
-        #print(self.model.layers[1].ref_names)
-        #print("Completed pipe TorchEntityRecognizer.__init__ - ")
     @property
     def labels(self) -> Tuple[str, ...]:
         """The labels currently added to the component.
         RETURNS (Tuple[str]): The labels.
         """
-        ##print("Entered TorchEntityRecognizer.labels - ")
         labels = ["O"]
         for label in self.cfg["labels"]:
             for iob in ["B", "I"]:
@@ -120,7 +112,6 @@ class TorchEntityRecognizer(TrainablePipe):
         docs (Iterable[Doc]): The documents to predict.
         RETURNS: The models prediction for each document.
         """
-        #print("Entered pipe TorchEntityRecognizer.predict - ")
         if not any(len(doc) for doc in docs):
             # Handle cases where there are no tokens in any docs.
             n_labels = len(self.labels)
@@ -144,7 +135,6 @@ class TorchEntityRecognizer(TrainablePipe):
         docs (Iterable[Doc]): The documents to modify.
         preds (Iterable[Ints1d]): The IDs to set, produced by TorchEntityRecognizer.predict.
         """
-        #print("Entered pipe TorchEntityRecognizer.set_annotations - ")
         if isinstance(docs, Doc):
             docs = [docs]
         for doc, tag_ids in zip(docs, preds):
@@ -176,7 +166,6 @@ class TorchEntityRecognizer(TrainablePipe):
             Updated using the component name as the key.
         RETURNS (Dict[str, float]): The updated losses dictionary.
         """
-        #print("Entered pipe TorchEntityRecognizer.update - ")
         if losses is None:
             losses = {}
         losses.setdefault(self.name, 0.0)
@@ -208,7 +197,6 @@ class TorchEntityRecognizer(TrainablePipe):
         scores: Scores representing the model's predictions.
         RETURNS (Tuple[float, float]): The loss and the gradient.
         """
-        #print("Entered pipe TorchEntityRecognizer.get_loss - ")
         validate_examples(examples, "TorchEntityRecognizer.get_loss")
         loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
         truths = []
@@ -238,7 +226,6 @@ class TorchEntityRecognizer(TrainablePipe):
             `init labels` command. If no labels are provided, the get_examples
             callback is used to extract the labels from the data.
         """
-        #print("Entered pipe TorchEntityRecognizer.initialize - ")
         validate_get_examples(get_examples, "TorchEntityRecognizer.initialize")
         if labels is not None:
             for tag in labels:
@@ -257,24 +244,16 @@ class TorchEntityRecognizer(TrainablePipe):
         self._require_labels()
         assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
-        #print(nlp.config["components"][self.name]["model"]["nO"])
-        ##print(nlp.config["components"][self.name]["model"]["nI"])
         self.model.initialize(X=doc_sample, Y=self.labels)
-        #print("self.model.initialize exit")
-        #print(self.model.name)
-        #print(self.model.layers[0].ref_names)
-        #print(self.model.layers[1].ref_names)
-        #print(self.name)
         nlp.config["components"][self.name]["model"]["nO"] = len(self.labels)
-        #nlp.config["components"][self.name]["model"]["nI"] = 768
-        #print(nlp.config["components"][self.name]["model"])
     def add_label(self, label: str) -> int:
         """Add a new label to the pipe.
         label (str): The label to add.
         RETURNS (int): 0 if label is already present, otherwise 1.
         """
-        #print("Entered pipe TorchEntityRecognizer.add_label - ")
         if not isinstance(label, str):
             raise ValueError(Errors.E187)
         if label in self.labels:
@@ -289,6 +268,5 @@ class TorchEntityRecognizer(TrainablePipe):
         examples (Iterable[Example]): The examples to score.
         RETURNS (Dict[str, Any]): The NER precision, recall and f-scores.
         """
-        #print("Entered pipe TorchEntityRecognizer.score - ")
         validate_examples(examples, "TorchEntityRecognizer.score")
         return get_ner_prf(examples)

         model (Model): Thinc Model (with PyTorch sub-modules)
         dropout_rate (float): Dropout rate
     """
     set_dropout_rate(model, dropout_rate)
     func = model.get_ref("torch_model").attrs["set_dropout_rate"]
     func(dropout_rate)
         in size, and be normalized as probabilities (all scores between 0 and 1,
         with the rows summing to 1).
     """
     return TorchEntityRecognizer(nlp.vocab, model, name)
         name (str): The component instance name, used to add entries to the
             losses during training.
         """
         self.vocab = vocab
         self.model = model
         self.name = name
         cfg = {"labels": []}
         self.cfg = dict(sorted(cfg.items()))
     @property
     def labels(self) -> Tuple[str, ...]:
         """The labels currently added to the component.
         RETURNS (Tuple[str]): The labels.
         """
         labels = ["O"]
         for label in self.cfg["labels"]:
             for iob in ["B", "I"]:
         docs (Iterable[Doc]): The documents to predict.
         RETURNS: The models prediction for each document.
         """
         if not any(len(doc) for doc in docs):
             # Handle cases where there are no tokens in any docs.
             n_labels = len(self.labels)
         docs (Iterable[Doc]): The documents to modify.
         preds (Iterable[Ints1d]): The IDs to set, produced by TorchEntityRecognizer.predict.
         """
         if isinstance(docs, Doc):
             docs = [docs]
         for doc, tag_ids in zip(docs, preds):
             Updated using the component name as the key.
         RETURNS (Dict[str, float]): The updated losses dictionary.
         """
         if losses is None:
             losses = {}
         losses.setdefault(self.name, 0.0)
         scores: Scores representing the model's predictions.
         RETURNS (Tuple[float, float]): The loss and the gradient.
         """
         validate_examples(examples, "TorchEntityRecognizer.get_loss")
         loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
         truths = []
             `init labels` command. If no labels are provided, the get_examples
             callback is used to extract the labels from the data.
         """
         validate_get_examples(get_examples, "TorchEntityRecognizer.initialize")
         if labels is not None:
             for tag in labels:
         self._require_labels()
         assert len(doc_sample) > 0, Errors.E923.format(name=self.name)
         self.model.initialize(X=doc_sample, Y=self.labels)
         nlp.config["components"][self.name]["model"]["nO"] = len(self.labels)
+        if self.model.layers[0].maybe_get_ref("listener") != None :
+            nlp.config["components"][self.name]["model"]["width"] = self.model.layers[0].maybe_get_ref("listener").maybe_get_dim("nO")
     def add_label(self, label: str) -> int:
         """Add a new label to the pipe.
         label (str): The label to add.
         RETURNS (int): 0 if label is already present, otherwise 1.
         """
         if not isinstance(label, str):
             raise ValueError(Errors.E187)
         if label in self.labels:
         examples (Iterable[Example]): The examples to score.
         RETURNS (Dict[str, Any]): The NER precision, recall and f-scores.
         """
         validate_examples(examples, "TorchEntityRecognizer.score")
         return get_ner_prf(examples)