arubenruben
/

NER-PT-BERT-CRF-Conll2003

Token Classification

Model card Files Files and versions Community

arubenruben commited on May 29, 2023

Commit

f356772

·

1 Parent(s): fa92478

Update deploy_pipeline.py

Files changed (1) hide show

deploy_pipeline.py +5 -2

deploy_pipeline.py CHANGED Viewed

@@ -21,6 +21,7 @@ class TokenizeAndAlignLabelsStep():
         previous_word_idx = None
         labels_mask = []
         for word_idx in word_ids:  # Set the special tokens to -100.
@@ -28,18 +29,20 @@ class TokenizeAndAlignLabelsStep():
                 labels_mask.append(False)
             # Only label the first token of a given word.
             elif word_idx != previous_word_idx:
-                labels_mask.append(True)
             else:
                 labels_mask.append(False)
             previous_word_idx = word_idx
-        tokenized_inputs["tokens"] = examples
         tokenized_inputs["labels_mask"] = labels_mask
         return tokenized_inputs
 class BERT_CRF_Pipeline(Pipeline):
     def _sanitize_parameters(self, **kwargs):

         previous_word_idx = None
         labels_mask = []
         for word_idx in word_ids:  # Set the special tokens to -100.
                 labels_mask.append(False)
             # Only label the first token of a given word.
             elif word_idx != previous_word_idx:
+                labels_mask.append(True)
             else:
                 labels_mask.append(False)
             previous_word_idx = word_idx
+        tokenized_inputs["tokens"] = tokenizer.decode(tokenized_inputs["input_ids"], skip_special_tokens=True)
         tokenized_inputs["labels_mask"] = labels_mask
         return tokenized_inputs
 class BERT_CRF_Pipeline(Pipeline):
     def _sanitize_parameters(self, **kwargs):