|
Reading metadata...: 2165it [00:00, 13151.66it/s] | 0/30000 [00:00<?, ?it/s] |
|
Reading metadata...: 1650it [00:00, 10631.92it/s] |
|
|
|
[INFO|trainer_utils.py:744] 2023-11-18 11:52:13,133 >> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message. |
|
[WARNING|logging.py:329] 2023-11-18 11:52:15,242 >> `use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache = False`... |
|
Traceback (most recent call last): |
|
File "/mnt/e/run_speech_recognition_seq2seq_streaming.py", line 679, in <module> |
|
main() |
|
File "/mnt/e/run_speech_recognition_seq2seq_streaming.py", line 628, in main |
|
train_result = trainer.train(resume_from_checkpoint=checkpoint) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/trainer.py", line 1546, in train |
|
return inner_training_loop( |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop |
|
tr_loss_step = self.training_step(model, inputs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/trainer.py", line 2725, in training_step |
|
loss = self.compute_loss(model, inputs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/trainer.py", line 2748, in compute_loss |
|
outputs = model(**inputs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl |
|
return self._call_impl(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl |
|
return forward_call(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/accelerate/utils/operations.py", line 659, in forward |
|
return model_forward(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/accelerate/utils/operations.py", line 647, in __call__ |
|
return convert_to_fp32(self.model_forward(*args, **kwargs)) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 16, in decorate_autocast |
|
return func(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py", line 1683, in forward |
|
outputs = self.model( |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl |
|
return self._call_impl(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl |
|
return forward_call(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py", line 1559, in forward |
|
decoder_outputs = self.decoder( |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl |
|
return self._call_impl(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl |
|
return forward_call(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py", line 1363, in forward |
|
layer_outputs = self._gradient_checkpointing_func( |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/_compile.py", line 24, in inner |
|
return torch._dynamo.disable(fn, recursive)(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 328, in _fn |
|
return fn(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 17, in inner |
|
return fn(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 451, in checkpoint |
|
return CheckpointFunction.apply(function, preserve, *args) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/autograd/function.py", line 539, in apply |
|
return super().apply(*args, **kwargs) # type: ignore[misc] |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 230, in forward |
|
outputs = run_function(*args) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl |
|
return self._call_impl(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl |
|
return forward_call(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py", line 841, in forward |
|
hidden_states, cross_attn_weights, cross_attn_present_key_value = self.encoder_attn( |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl |
|
return self._call_impl(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl |
|
return forward_call(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py", line 385, in forward |
|
value_states = self._shape(self.v_proj(key_value_states), -1, bsz) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl |
|
return self._call_impl(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl |
|
return forward_call(*args, **kwargs) |
|
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 114, in forward |
|
return F.linear(input, self.weight, self.bias) |
|
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacty of 15.99 GiB of which 0 bytes is free. Including non-PyTorch memory, this process has 17179869184.00 GiB memory in use. Of the allocated memory 14.59 GiB is allocated by PyTorch, and 530.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF |