arubenruben commited on
Commit
776fd18
1 Parent(s): 2249e1f

Update deploy_pipeline.py

Browse files
Files changed (1) hide show
  1. deploy_pipeline.py +10 -11
deploy_pipeline.py CHANGED
@@ -13,15 +13,14 @@ class TokenizeAndAlignLabelsStep():
13
 
14
  # Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
15
  def tokenize_and_align_labels(self, examples, tokenizer):
16
-
17
- tokenized_inputs = tokenizer(examples, padding='max_length', truncation=True, max_length=128)
18
 
19
  # Map tokens to their respective word.
20
  word_ids = tokenized_inputs.word_ids()
21
 
22
  previous_word_idx = None
23
-
24
-
25
  labels_mask = []
26
 
27
  for word_idx in word_ids: # Set the special tokens to -100.
@@ -35,7 +34,6 @@ class TokenizeAndAlignLabelsStep():
35
 
36
  previous_word_idx = word_idx
37
 
38
- tokenized_inputs["tokens"] = tokenizer.decode(tokenized_inputs["input_ids"], skip_special_tokens=True)
39
  tokenized_inputs["labels_mask"] = labels_mask
40
 
41
  return tokenized_inputs
@@ -47,15 +45,14 @@ class BERT_CRF_Pipeline(Pipeline):
47
  def _sanitize_parameters(self, **kwargs):
48
  return {}, {}, {}
49
 
50
- def preprocess(self, text):
 
51
 
52
  tokenizer = AutoTokenizer.from_pretrained(
53
  "neuralmind/bert-base-portuguese-cased", do_lower_case=False)
 
 
54
 
55
- TokenizeAndAlignLabelsStep().tokenize_and_align_labels(
56
- examples=text, tokenizer=tokenizer)
57
-
58
- return TokenizeAndAlignLabelsStep().tokenize_and_align_labels(examples=text, tokenizer=tokenizer)
59
 
60
  def _forward(self, tokenizer_results):
61
 
@@ -78,13 +75,15 @@ class BERT_CRF_Pipeline(Pipeline):
78
  return outputs
79
 
80
  def postprocess(self, model_outputs):
 
81
  # From Ner_tags to Ner_labels
82
  for i, label in enumerate(model_outputs[0]):
83
  model_outputs[0][i] = self.model.config.id2label[label]
84
-
85
  return model_outputs[0]
86
 
87
 
 
88
  def main():
89
 
90
  PIPELINE_REGISTRY.register_pipeline("PT-BERT-Large-CRF-HAREM-Selective-pipeline",
 
13
 
14
  # Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
15
  def tokenize_and_align_labels(self, examples, tokenizer):
16
+
17
+ tokenized_inputs = tokenizer(examples, padding='max_length', truncation=True, max_length=128, is_split_into_words=True)
18
 
19
  # Map tokens to their respective word.
20
  word_ids = tokenized_inputs.word_ids()
21
 
22
  previous_word_idx = None
23
+
 
24
  labels_mask = []
25
 
26
  for word_idx in word_ids: # Set the special tokens to -100.
 
34
 
35
  previous_word_idx = word_idx
36
 
 
37
  tokenized_inputs["labels_mask"] = labels_mask
38
 
39
  return tokenized_inputs
 
45
  def _sanitize_parameters(self, **kwargs):
46
  return {}, {}, {}
47
 
48
+ def preprocess(self, inputs):
49
+ tokens = inputs['tokens']
50
 
51
  tokenizer = AutoTokenizer.from_pretrained(
52
  "neuralmind/bert-base-portuguese-cased", do_lower_case=False)
53
+
54
+ return TokenizeAndAlignLabelsStep().tokenize_and_align_labels(examples=tokens, tokenizer=tokenizer)
55
 
 
 
 
 
56
 
57
  def _forward(self, tokenizer_results):
58
 
 
75
  return outputs
76
 
77
  def postprocess(self, model_outputs):
78
+
79
  # From Ner_tags to Ner_labels
80
  for i, label in enumerate(model_outputs[0]):
81
  model_outputs[0][i] = self.model.config.id2label[label]
82
+
83
  return model_outputs[0]
84
 
85
 
86
+
87
  def main():
88
 
89
  PIPELINE_REGISTRY.register_pipeline("PT-BERT-Large-CRF-HAREM-Selective-pipeline",