arubenruben commited on
Commit
1d68afa
1 Parent(s): 38fff5a

Update deploy_pipeline.py

Browse files
Files changed (1) hide show
  1. deploy_pipeline.py +10 -12
deploy_pipeline.py CHANGED
@@ -14,7 +14,7 @@ class TokenizeAndAlignLabelsStep():
14
  # Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
15
  def tokenize_and_align_labels(self, examples, tokenizer):
16
 
17
- tokenized_inputs = tokenizer(examples, padding='max_length', max_length=512)
18
 
19
  # Map tokens to their respective word.
20
  word_ids = tokenized_inputs.word_ids()
@@ -34,9 +34,7 @@ class TokenizeAndAlignLabelsStep():
34
 
35
  previous_word_idx = word_idx
36
 
37
- tokenized_inputs["tokens"] = examples
38
- tokenized_inputs["ner_tags"] = []
39
- tokenized_inputs["labels"] = []
40
  tokenized_inputs["labels_mask"] = labels_mask
41
 
42
  return tokenized_inputs
@@ -60,16 +58,16 @@ class BERT_CRF_Pipeline(Pipeline):
60
  def _forward(self, tokenizer_results):
61
 
62
  input_ids = torch.tensor(
63
- tokenizer_results['input_ids'], dtype=torch.long).unsqueeze(0)
64
 
65
  token_type_ids = torch.tensor(
66
- tokenizer_results['token_type_ids'], dtype=torch.long).unsqueeze(0)
67
 
68
  attention_mask = torch.tensor(
69
- tokenizer_results['attention_mask'], dtype=torch.bool).unsqueeze(0)
70
 
71
  labels_mask = torch.tensor(
72
- tokenizer_results['labels_mask'], dtype=torch.bool).unsqueeze(0)
73
 
74
  # input_ids, token_type_ids, attention_mask, labels, labels_mask
75
  outputs = self.model(input_ids=input_ids, token_type_ids=token_type_ids,
@@ -87,12 +85,12 @@ class BERT_CRF_Pipeline(Pipeline):
87
 
88
  def main():
89
 
90
- PIPELINE_REGISTRY.register_pipeline("arubenruben/PT-BERT-Large-CRF-HAREM-Default-pipeline",
91
  pipeline_class=BERT_CRF_Pipeline,
92
  pt_model=AutoModelForTokenClassification,
93
  )
94
- classifier = pipeline("arubenruben/PT-BERT-Large-CRF-HAREM-Default-pipeline", model="arubenruben/PT-BERT-Large-CRF-HAREM-Default",
95
- device='cuda' if torch.cuda.is_available() else 'cpu', trust_remote_code=True)
96
  out_path = os.path.join(sys.path[0], 'out', 'pipeline')
97
  repo = Repository(
98
  out_path, clone_from=f"arubenruben/PT-BERT-Large-CRF-HAREM-Default", use_auth_token=True)
@@ -100,4 +98,4 @@ def main():
100
  # repo.git_pull()
101
 
102
  classifier.save_pretrained(out_path)
103
- repo.push_to_hub()
 
14
  # Adapted From : https://huggingface.co/docs/transformers/tasks/token_classification
15
  def tokenize_and_align_labels(self, examples, tokenizer):
16
 
17
+ tokenized_inputs = tokenizer(examples, padding='max_length', truncation=True, max_length=128)
18
 
19
  # Map tokens to their respective word.
20
  word_ids = tokenized_inputs.word_ids()
 
34
 
35
  previous_word_idx = word_idx
36
 
37
+ tokenized_inputs["tokens"] = examples
 
 
38
  tokenized_inputs["labels_mask"] = labels_mask
39
 
40
  return tokenized_inputs
 
58
  def _forward(self, tokenizer_results):
59
 
60
  input_ids = torch.tensor(
61
+ tokenizer_results['input_ids'], dtype=torch.long, device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")).unsqueeze(0)
62
 
63
  token_type_ids = torch.tensor(
64
+ tokenizer_results['token_type_ids'], dtype=torch.long, device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")).unsqueeze(0)
65
 
66
  attention_mask = torch.tensor(
67
+ tokenizer_results['attention_mask'], dtype=torch.bool, device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")).unsqueeze(0)
68
 
69
  labels_mask = torch.tensor(
70
+ tokenizer_results['labels_mask'], dtype=torch.bool, device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")).unsqueeze(0)
71
 
72
  # input_ids, token_type_ids, attention_mask, labels, labels_mask
73
  outputs = self.model(input_ids=input_ids, token_type_ids=token_type_ids,
 
85
 
86
  def main():
87
 
88
+ PIPELINE_REGISTRY.register_pipeline("PT-BERT-Large-CRF-HAREM-Default-pipeline",
89
  pipeline_class=BERT_CRF_Pipeline,
90
  pt_model=AutoModelForTokenClassification,
91
  )
92
+ classifier = pipeline("PT-BERT-Large-CRF-HAREM-Default-pipeline", model="arubenruben/PT-BERT-Large-CRF-HAREM-Default",
93
+ device=torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"), trust_remote_code=True)
94
  out_path = os.path.join(sys.path[0], 'out', 'pipeline')
95
  repo = Repository(
96
  out_path, clone_from=f"arubenruben/PT-BERT-Large-CRF-HAREM-Default", use_auth_token=True)
 
98
  # repo.git_pull()
99
 
100
  classifier.save_pretrained(out_path)
101
+ repo.push_to_hub()