arubenruben
commited on
Commit
•
f356772
1
Parent(s):
fa92478
Update deploy_pipeline.py
Browse files- deploy_pipeline.py +5 -2
deploy_pipeline.py
CHANGED
@@ -21,6 +21,7 @@ class TokenizeAndAlignLabelsStep():
|
|
21 |
|
22 |
previous_word_idx = None
|
23 |
|
|
|
24 |
labels_mask = []
|
25 |
|
26 |
for word_idx in word_ids: # Set the special tokens to -100.
|
@@ -28,18 +29,20 @@ class TokenizeAndAlignLabelsStep():
|
|
28 |
labels_mask.append(False)
|
29 |
# Only label the first token of a given word.
|
30 |
elif word_idx != previous_word_idx:
|
31 |
-
labels_mask.append(True)
|
32 |
else:
|
33 |
labels_mask.append(False)
|
34 |
|
35 |
previous_word_idx = word_idx
|
36 |
|
37 |
-
tokenized_inputs["tokens"] =
|
38 |
tokenized_inputs["labels_mask"] = labels_mask
|
39 |
|
40 |
return tokenized_inputs
|
41 |
|
42 |
|
|
|
|
|
43 |
class BERT_CRF_Pipeline(Pipeline):
|
44 |
|
45 |
def _sanitize_parameters(self, **kwargs):
|
|
|
21 |
|
22 |
previous_word_idx = None
|
23 |
|
24 |
+
|
25 |
labels_mask = []
|
26 |
|
27 |
for word_idx in word_ids: # Set the special tokens to -100.
|
|
|
29 |
labels_mask.append(False)
|
30 |
# Only label the first token of a given word.
|
31 |
elif word_idx != previous_word_idx:
|
32 |
+
labels_mask.append(True)
|
33 |
else:
|
34 |
labels_mask.append(False)
|
35 |
|
36 |
previous_word_idx = word_idx
|
37 |
|
38 |
+
tokenized_inputs["tokens"] = tokenizer.decode(tokenized_inputs["input_ids"], skip_special_tokens=True)
|
39 |
tokenized_inputs["labels_mask"] = labels_mask
|
40 |
|
41 |
return tokenized_inputs
|
42 |
|
43 |
|
44 |
+
|
45 |
+
|
46 |
class BERT_CRF_Pipeline(Pipeline):
|
47 |
|
48 |
def _sanitize_parameters(self, **kwargs):
|