arubenruben commited on
Commit
02dffa0
1 Parent(s): eb14d01

Update deploy_pipeline.py

Browse files
Files changed (1) hide show
  1. deploy_pipeline.py +8 -13
deploy_pipeline.py CHANGED
@@ -14,14 +14,13 @@ class TokenizeAndAlignLabelsStep():
14
  # Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
15
  def tokenize_and_align_labels(self, examples, tokenizer):
16
 
17
- tokenized_inputs = tokenizer(examples, padding='max_length', truncation=True, max_length=128)
18
 
19
  # Map tokens to their respective word.
20
  word_ids = tokenized_inputs.word_ids()
21
 
22
  previous_word_idx = None
23
-
24
- tokens= []
25
  labels_mask = []
26
 
27
  for word_idx in word_ids: # Set the special tokens to -100.
@@ -29,20 +28,17 @@ class TokenizeAndAlignLabelsStep():
29
  labels_mask.append(False)
30
  # Only label the first token of a given word.
31
  elif word_idx != previous_word_idx:
32
- labels_mask.append(True)
33
- tokens.append(tokenized_inputs["input_ids"][word_idx])
34
  else:
35
  labels_mask.append(False)
36
 
37
  previous_word_idx = word_idx
38
 
39
- tokenized_inputs["tokens"] = tokenizer.decode(tokens, skip_special_tokens=True)
40
  tokenized_inputs["labels_mask"] = labels_mask
41
 
42
  return tokenized_inputs
43
 
44
 
45
-
46
  class BERT_CRF_Pipeline(Pipeline):
47
 
48
  def _sanitize_parameters(self, **kwargs):
@@ -76,18 +72,17 @@ class BERT_CRF_Pipeline(Pipeline):
76
  outputs = self.model(input_ids=input_ids, token_type_ids=token_type_ids,
77
  attention_mask=attention_mask, labels=None, labels_mask=labels_mask)
78
 
79
- return {'outputs': outputs, 'tokens': tokenizer_results['tokens']}
80
 
81
- def postprocess(self, outputs):
82
 
83
- model_outputs = outputs['outputs']
84
- tokens = outputs['tokens']
85
-
86
  # From Ner_tags to Ner_labels
87
  for i, label in enumerate(model_outputs[0]):
88
  model_outputs[0][i] = self.model.config.id2label[label]
89
 
90
- return model_outputs[0], tokens
 
 
91
 
92
 
93
  def main():
 
14
  # Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
15
  def tokenize_and_align_labels(self, examples, tokenizer):
16
 
17
+ tokenized_inputs = tokenizer(examples, padding='max_length', truncation=True, max_length=128, is_split_into_words=True)
18
 
19
  # Map tokens to their respective word.
20
  word_ids = tokenized_inputs.word_ids()
21
 
22
  previous_word_idx = None
23
+
 
24
  labels_mask = []
25
 
26
  for word_idx in word_ids: # Set the special tokens to -100.
 
28
  labels_mask.append(False)
29
  # Only label the first token of a given word.
30
  elif word_idx != previous_word_idx:
31
+ labels_mask.append(True)
 
32
  else:
33
  labels_mask.append(False)
34
 
35
  previous_word_idx = word_idx
36
 
 
37
  tokenized_inputs["labels_mask"] = labels_mask
38
 
39
  return tokenized_inputs
40
 
41
 
 
42
  class BERT_CRF_Pipeline(Pipeline):
43
 
44
  def _sanitize_parameters(self, **kwargs):
 
72
  outputs = self.model(input_ids=input_ids, token_type_ids=token_type_ids,
73
  attention_mask=attention_mask, labels=None, labels_mask=labels_mask)
74
 
75
+ return outputs
76
 
77
+ def postprocess(self, model_outputs):
78
 
 
 
 
79
  # From Ner_tags to Ner_labels
80
  for i, label in enumerate(model_outputs[0]):
81
  model_outputs[0][i] = self.model.config.id2label[label]
82
 
83
+ return model_outputs[0]
84
+
85
+
86
 
87
 
88
  def main():