RASMUS's picture
Upload with huggingface_hub
b0ae254
raw
history blame
7.24 kB
Reading metadata...: 2165it [00:00, 13151.66it/s] | 0/30000 [00:00<?, ?it/s]
Reading metadata...: 1650it [00:00, 10631.92it/s]
[INFO|trainer_utils.py:744] 2023-11-18 11:52:13,133 >> The following columns in the training set don't have a corresponding argument in `WhisperForConditionalGeneration.forward` and have been ignored: input_length. If input_length are not expected by `WhisperForConditionalGeneration.forward`, you can safely ignore this message.
[WARNING|logging.py:329] 2023-11-18 11:52:15,242 >> `use_cache = True` is incompatible with gradient checkpointing. Setting `use_cache = False`...
Traceback (most recent call last):
File "/mnt/e/run_speech_recognition_seq2seq_streaming.py", line 679, in <module>
main()
File "/mnt/e/run_speech_recognition_seq2seq_streaming.py", line 628, in main
train_result = trainer.train(resume_from_checkpoint=checkpoint)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/trainer.py", line 1546, in train
return inner_training_loop(
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/trainer.py", line 1860, in _inner_training_loop
tr_loss_step = self.training_step(model, inputs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/trainer.py", line 2725, in training_step
loss = self.compute_loss(model, inputs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/trainer.py", line 2748, in compute_loss
outputs = model(**inputs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/accelerate/utils/operations.py", line 659, in forward
return model_forward(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/accelerate/utils/operations.py", line 647, in __call__
return convert_to_fp32(self.model_forward(*args, **kwargs))
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/amp/autocast_mode.py", line 16, in decorate_autocast
return func(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py", line 1683, in forward
outputs = self.model(
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py", line 1559, in forward
decoder_outputs = self.decoder(
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py", line 1363, in forward
layer_outputs = self._gradient_checkpointing_func(
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/_compile.py", line 24, in inner
return torch._dynamo.disable(fn, recursive)(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 328, in _fn
return fn(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 17, in inner
return fn(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 451, in checkpoint
return CheckpointFunction.apply(function, preserve, *args)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/autograd/function.py", line 539, in apply
return super().apply(*args, **kwargs) # type: ignore[misc]
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/utils/checkpoint.py", line 230, in forward
outputs = run_function(*args)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py", line 841, in forward
hidden_states, cross_attn_weights, cross_attn_present_key_value = self.encoder_attn(
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/transformers/models/whisper/modeling_whisper.py", line 385, in forward
value_states = self._shape(self.v_proj(key_value_states), -1, bsz)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/rasmus/miniconda3/envs/WhisperFinetuneEnv/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 114, in forward
return F.linear(input, self.weight, self.bias)
torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacty of 15.99 GiB of which 0 bytes is free. Including non-PyTorch memory, this process has 17179869184.00 GiB memory in use. Of the allocated memory 14.59 GiB is allocated by PyTorch, and 530.39 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting max_split_size_mb to avoid fragmentation. See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF