Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!
Hi,
I need help with this error and have tried the following:
device = "cuda:0"
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to('cuda:0' if torch.cuda.is_available() else 'cpu')
Here's the error:
(paraphraser) user@sn01:~/paraphraser$ python ./test.py
/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/transformers/generation/configuration_utils.py:381: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.7` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.
warnings.warn(
Traceback (most recent call last):
File "/home/user/paraphraser/./test.py", line 38, in <module>
paraphrase("Should I go to church this Sunday?")
File "/home/user/paraphraser/./test.py", line 27, in paraphrase
outputs = model.generate(
File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/transformers/generation/utils.py", line 1548, in generate
model_kwargs = self._prepare_encoder_decoder_kwargs_for_generation(
File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/transformers/generation/utils.py", line 661, in _prepare_encoder_decoder_kwargs_for_generation
model_kwargs["encoder_outputs"]: ModelOutput = encoder(**encoder_kwargs)
File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/transformers/models/t5/modeling_t5.py", line 1016, in forward
inputs_embeds = self.embed_tokens(input_ids)
File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/modules/sparse.py", line 162, in forward
return F.embedding(
File "/home/user/miniconda3/envs/paraphraser/lib/python3.10/site-packages/torch/nn/functional.py", line 2233, in embedding
return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu! (when checking argument for argument index in method wrapper_CUDA__index_select)
Here's my python script:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
device = "cuda"
tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)
def paraphrase(
question,
num_beams=5,
num_beam_groups=5,
num_return_sequences=5,
repetition_penalty=10.0,
diversity_penalty=3.0,
no_repeat_ngram_size=2,
temperature=0.7,
max_length=128
):
input_ids = tokenizer(
f'paraphrase: {question}',
return_tensors="pt", padding="longest",
max_length=max_length,
truncation=True,
).input_ids
outputs = model.generate(
input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
num_beams=num_beams, num_beam_groups=num_beam_groups,
max_length=max_length, diversity_penalty=diversity_penalty
)
res = tokenizer.batch_decode(outputs, skip_special_tokens=True)
return res
paraphrase("Should I go to church this Sunday?")
You can follow below code and it will be working fine.
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
device = "cuda" if torch.cuda.is_available() else "cpu"
tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)
def paraphrase(
question,
num_beams=5,
num_beam_groups=5,
num_return_sequences=5,
repetition_penalty=10.0,
diversity_penalty=3.0,
no_repeat_ngram_size=2,
temperature=0.7,
max_length=128
):
input_ids = tokenizer(
f'paraphrase: {question}',
return_tensors="pt", padding="longest",
max_length=max_length,
truncation=True,
).input_ids.to(device)
outputs = model.generate(
input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
num_beams=num_beams, num_beam_groups=num_beam_groups,
max_length=max_length, diversity_penalty=diversity_penalty
)
res = tokenizer.batch_decode(outputs, skip_special_tokens=True)
return res
paraphrase("Should I go to church this Sunday?")
Yes!
Thank you so much!