Unable to fine-tune biogpt for ner task

#21
by tekeshwarhirwani - opened

Hi I was using run_ner.py script It showed an error that it doesn't support model which have slow tokenizer and then they suggested that try to run old run_ner.py script which is this (https://github.com/huggingface/transformers/tree/main/examples/legacy/token-classification), Now I am getting an error

/content/transformers/examples/legacy/token-classification/run_ner.py:327 in β”‚
β”‚ β”‚
β”‚ β”‚
β”‚ 324 β”‚
β”‚ 325 β”‚
β”‚ 326 if name == "main": β”‚
β”‚ ❱ 327 β”‚ main() β”‚
β”‚ 328 β”‚
β”‚ β”‚
β”‚ /content/transformers/examples/legacy/token-classification/run_ner.py:262 in β”‚
β”‚ main β”‚
β”‚ β”‚
β”‚ 259 β”‚ β”‚
β”‚ 260 β”‚ # Training β”‚
β”‚ 261 β”‚ if training_args.do_train: β”‚
β”‚ ❱ 262 β”‚ β”‚ trainer.train( β”‚
β”‚ 263 β”‚ β”‚ β”‚ model_path=model_args.model_name_or_path if os.path.isdir( β”‚
β”‚ 264 β”‚ β”‚ ) β”‚
β”‚ 265 β”‚ β”‚ trainer.save_model() β”‚
β”‚ β”‚
β”‚ /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:1536 in β”‚
β”‚ train β”‚
β”‚ β”‚
β”‚ 1533 β”‚ β”‚ inner_training_loop = find_executable_batch_size( β”‚
β”‚ 1534 β”‚ β”‚ β”‚ self._inner_training_loop, self._train_batch_size, args.a β”‚
β”‚ 1535 β”‚ β”‚ ) β”‚
β”‚ ❱ 1536 β”‚ β”‚ return inner_training_loop( β”‚
β”‚ 1537 β”‚ β”‚ β”‚ args=args, β”‚
β”‚ 1538 β”‚ β”‚ β”‚ resume_from_checkpoint=resume_from_checkpoint, β”‚
β”‚ 1539 β”‚ β”‚ β”‚ trial=trial, β”‚
β”‚ β”‚
β”‚ /usr/local/lib/python3.10/dist-packages/transformers/trainer.py:1779 in β”‚
β”‚ _inner_training_loop β”‚
β”‚ β”‚
β”‚ 1776 β”‚ β”‚ β”‚ β”‚ rng_to_sync = True β”‚
β”‚ 1777 β”‚ β”‚ β”‚ β”‚
β”‚ 1778 β”‚ β”‚ β”‚ step = -1 β”‚
β”‚ ❱ 1779 β”‚ β”‚ β”‚ for step, inputs in enumerate(epoch_iterator): β”‚
β”‚ 1780 β”‚ β”‚ β”‚ β”‚ total_batched_samples += 1 β”‚
β”‚ 1781 β”‚ β”‚ β”‚ β”‚ if rng_to_sync: β”‚
β”‚ 1782 β”‚ β”‚ β”‚ β”‚ β”‚ self._load_rng_state(resume_from_checkpoint) β”‚
β”‚ β”‚
β”‚ /usr/local/lib/python3.10/dist-packages/accelerate/data_loader.py:377 in β”‚
β”‚ iter β”‚
β”‚ β”‚
β”‚ 374 β”‚ β”‚ dataloader_iter = super().iter() β”‚
β”‚ 375 β”‚ β”‚ # We iterate one batch ahead to check when we are at the end β”‚
β”‚ 376 β”‚ β”‚ try: β”‚
β”‚ ❱ 377 β”‚ β”‚ β”‚ current_batch = next(dataloader_iter) β”‚
β”‚ 378 β”‚ β”‚ except StopIteration: β”‚
β”‚ 379 β”‚ β”‚ β”‚ yield β”‚
β”‚ 380 β”‚
β”‚ β”‚
β”‚ /usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:633 β”‚
β”‚ in next β”‚
β”‚ β”‚
β”‚ 630 β”‚ β”‚ β”‚ if self._sampler_iter is None: β”‚
β”‚ 631 β”‚ β”‚ β”‚ β”‚ # TODO(https://github.com/pytorch/pytorch/issues/7675 β”‚
β”‚ 632 β”‚ β”‚ β”‚ β”‚ self._reset() # type: ignore[call-arg] β”‚
β”‚ ❱ 633 β”‚ β”‚ β”‚ data = self._next_data() β”‚
β”‚ 634 β”‚ β”‚ β”‚ self._num_yielded += 1 β”‚
β”‚ 635 β”‚ β”‚ β”‚ if self._dataset_kind == _DatasetKind.Iterable and \ β”‚
β”‚ 636 β”‚ β”‚ β”‚ β”‚ β”‚ self._IterableDataset_len_called is not None and β”‚
β”‚ β”‚
β”‚ /usr/local/lib/python3.10/dist-packages/torch/utils/data/dataloader.py:677 β”‚
β”‚ in _next_data β”‚
β”‚ β”‚
β”‚ 674 β”‚ β”‚
β”‚ 675 β”‚ def _next_data(self): β”‚
β”‚ 676 β”‚ β”‚ index = self._next_index() # may raise StopIteration β”‚
β”‚ ❱ 677 β”‚ β”‚ data = self._dataset_fetcher.fetch(index) # may raise StopIt β”‚
β”‚ 678 β”‚ β”‚ if self._pin_memory: β”‚
β”‚ 679 β”‚ β”‚ β”‚ data = _utils.pin_memory.pin_memory(data, self._pin_memor β”‚
β”‚ 680 β”‚ β”‚ return data β”‚
β”‚ β”‚
β”‚ /usr/local/lib/python3.10/dist-packages/torch/utils/data/_utils/fetch.py:54 β”‚
β”‚ in fetch β”‚
β”‚ β”‚
β”‚ 51 β”‚ β”‚ β”‚ β”‚ data = [self.dataset[idx] for idx in possibly_batched_i β”‚
β”‚ 52 β”‚ β”‚ else: β”‚
β”‚ 53 β”‚ β”‚ β”‚ data = self.dataset[possibly_batched_index] β”‚
β”‚ ❱ 54 β”‚ β”‚ return self.collate_fn(data) β”‚
β”‚ 55 β”‚
β”‚ β”‚
β”‚ /usr/local/lib/python3.10/dist-packages/transformers/trainer_utils.py:698 in β”‚
β”‚ call β”‚
β”‚ β”‚
β”‚ 695 β”‚ β”‚
β”‚ 696 β”‚ def call(self, features: List[dict]): β”‚
β”‚ 697 β”‚ β”‚ features = [self._remove_columns(feature) for feature in featu β”‚
β”‚ ❱ 698 β”‚ β”‚ return self.data_collator(features) β”‚
β”‚ 699 β”‚
β”‚ β”‚
β”‚ /usr/local/lib/python3.10/dist-packages/transformers/data/data_collator.py:7 β”‚
β”‚ 0 in default_data_collator β”‚
β”‚ β”‚
β”‚ 67 β”‚ # on the whole batch. β”‚
β”‚ 68 β”‚ β”‚
β”‚ 69 β”‚ if return_tensors == "pt": β”‚
β”‚ ❱ 70 β”‚ β”‚ return torch_default_data_collator(features) β”‚
β”‚ 71 β”‚ elif return_tensors == "tf": β”‚
β”‚ 72 β”‚ β”‚ return tf_default_data_collator(features) β”‚
β”‚ 73 β”‚ elif return_tensors == "np": β”‚
β”‚ β”‚
β”‚ /usr/local/lib/python3.10/dist-packages/transformers/data/data_collator.py:1 β”‚
β”‚ 36 in torch_default_data_collator β”‚
β”‚ β”‚
β”‚ 133 β”‚ β”‚ β”‚ elif isinstance(v, np.ndarray): β”‚
β”‚ 134 β”‚ β”‚ β”‚ β”‚ batch[k] = torch.tensor(np.stack([f[k] for f in featu β”‚
β”‚ 135 β”‚ β”‚ β”‚ else: β”‚
β”‚ ❱ 136 β”‚ β”‚ β”‚ β”‚ batch[k] = torch.tensor([f[k] for f in features]) β”‚
β”‚ 137 β”‚ β”‚
β”‚ 138 β”‚ return batch β”‚
β”‚ 139 β”‚
╰──────────────────────────────────────────────────────────────────────────────╯
RuntimeError: Could not infer dtype of NoneType

Sign up or log in to comment