Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!

#15
by Drudkh - opened

Hi,

I need help with this error and have tried the following:

device = "cuda:0"
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to('cuda:0' if torch.cuda.is_available() else 'cpu')

Here's the error:

(paraphraser) user@sn01:~/paraphraser$ python ./test.py
/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:381: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.
  warnings.warn(
Traceback (most recent call last):
  File "/home/user/paraphraser/./test.py", line 38, in <module>
    paraphrase("Should I go to church this Sunday?")
  File "/home/user/paraphraser/./test.py", line 27, in paraphrase
    outputs = model.generate(
  File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
    return func(*args, **kwargs)
  File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/transformers/generation/utils.py", line 1548, in generate
    model_kwargs = self._prepare_encoder_decoder_kwargs_for_generation(
  File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/transformers/generation/utils.py", line 661, in _prepare_encoder_decoder_kwargs_for_generation
    model_kwargs["encoder_outputs"]: ModelOutput = encoder(**encoder_kwargs)
  File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1016, in forward
    inputs_embeds = self.embed_tokens(input_ids)
  File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
    return self._call_impl(*args, **kwargs)
  File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
    return forward_call(*args, **kwargs)
  File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/modules/sparse.py", line 162, in forward
    return F.embedding(
  File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/functional.py", line 2233, in embedding
    return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA__index_select)

Here's my python script:

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)

def paraphrase(
    question,
    num_beams=5,
    num_beam_groups=5,
    num_return_sequences=5,
    repetition_penalty=10.0,
    diversity_penalty=3.0,
    no_repeat_ngram_size=2,
    temperature=0.7,
    max_length=128
):
    input_ids = tokenizer(
        f'paraphrase: {question}',
        return_tensors="pt", padding="longest",
        max_length=max_length,
        truncation=True,
    ).input_ids

    outputs = model.generate(
        input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
        num_beams=num_beams, num_beam_groups=num_beam_groups,
        max_length=max_length, diversity_penalty=diversity_penalty
    )

    res = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    return res

paraphrase("Should I go to church this Sunday?")

You can follow below code and it will be working fine.

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)

def paraphrase(
question,
num_beams=5,
num_beam_groups=5,
num_return_sequences=5,
repetition_penalty=10.0,
diversity_penalty=3.0,
no_repeat_ngram_size=2,
temperature=0.7,
max_length=128
):
input_ids = tokenizer(
f'paraphrase: {question}',
return_tensors="pt", padding="longest",
max_length=max_length,
truncation=True,
).input_ids.to(device)

outputs = model.generate(
    input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
    num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
    num_beams=num_beams, num_beam_groups=num_beam_groups,
    max_length=max_length, diversity_penalty=diversity_penalty
)

res = tokenizer.batch_decode(outputs, skip_special_tokens=True)

return res

paraphrase("Should I go to church this Sunday?")

Yes!
Thank you so much!

Drudkh changed discussion status to closed

Sign up or log in to comment