wzkariampuzha commited on
Commit
5516b92
1 Parent(s): 30a4f12

specifying batch_size did not work - using training argument specification per HF docs and ChatGPT

Browse files
Files changed (1) hide show
  1. epi_pipeline.py +9 -3
epi_pipeline.py CHANGED
@@ -426,7 +426,7 @@ from torch import nn
426
  import numpy as np
427
  from unidecode import unidecode
428
  import re
429
- from transformers import BertConfig, AutoModelForTokenClassification, BertTokenizer, Trainer
430
  from unidecode import unidecode
431
  from collections import OrderedDict
432
  import json
@@ -655,7 +655,13 @@ class NER_Pipeline:
655
  #model = AutoModelForTokenClassification.from_pretrained(name_or_path_to_model_folder)
656
  self.config = BertConfig.from_pretrained(name_or_path_to_model_folder)
657
  self.labels = {re.sub(".-","",label) for label in self.config.label2id.keys() if label != "O"}
658
- self.trainer = Trainer(model=AutoModelForTokenClassification.from_pretrained(name_or_path_to_model_folder))
 
 
 
 
 
 
659
 
660
  def __str__(self):
661
  return "Instantiation: pipe = NER_Pipeline(name_or_path_to_model_folder)"+"\n Calling: output_dict = pipe(text)"
@@ -669,7 +675,7 @@ class NER_Pipeline:
669
  output_dict = {label:[] for label in self.labels}
670
 
671
  dataset = NerDataset(text, self.bert_tokenizer, self.config)
672
- predictions, label_ids, _ = self.trainer.predict(dataset, batch_size=16)
673
  preds_list, _ = self.align_predictions(predictions, label_ids)
674
  #dataset.ner_inputs.labels = preds_list
675
  for ner_input, sent_pred_list in zip(dataset.ner_inputs, preds_list):
 
426
  import numpy as np
427
  from unidecode import unidecode
428
  import re
429
+ from transformers import BertConfig, AutoModelForTokenClassification, BertTokenizer, Trainer, TrainingArguments
430
  from unidecode import unidecode
431
  from collections import OrderedDict
432
  import json
 
655
  #model = AutoModelForTokenClassification.from_pretrained(name_or_path_to_model_folder)
656
  self.config = BertConfig.from_pretrained(name_or_path_to_model_folder)
657
  self.labels = {re.sub(".-","",label) for label in self.config.label2id.keys() if label != "O"}
658
+ # Create the TrainingArguments object and set the batch size
659
+ training_args = TrainingArguments(
660
+ per_device_eval_batch_size=16,
661
+ per_gpu_eval_batch_size=16,
662
+ # other training arguments...
663
+ )
664
+ self.trainer = Trainer(model=AutoModelForTokenClassification.from_pretrained(name_or_path_to_model_folder),args=training_args)
665
 
666
  def __str__(self):
667
  return "Instantiation: pipe = NER_Pipeline(name_or_path_to_model_folder)"+"\n Calling: output_dict = pipe(text)"
 
675
  output_dict = {label:[] for label in self.labels}
676
 
677
  dataset = NerDataset(text, self.bert_tokenizer, self.config)
678
+ predictions, label_ids, _ = self.trainer.predict(dataset)
679
  preds_list, _ = self.align_predictions(predictions, label_ids)
680
  #dataset.ner_inputs.labels = preds_list
681
  for ner_input, sent_pred_list in zip(dataset.ner_inputs, preds_list):