MPS does not support cumsum op with int64 input

#79
by adityapotdar - opened

I am trying to run this model in M2 Max Apple laptop and install pytorch nightly build but it giving error MPS does not support cumsum op with int64 input

pytorch version : 2.0.1

any suggestion


RuntimeError Traceback (most recent call last)
Cell In[37], line 5
3 input_text = "Write me a poem about Machine Learning."
4 input_ids = tokenizer(input_text, return_tensors="pt").to("mps")
----> 5 outputs = model.generate(**input_ids)
6 print(tokenizer.decode(outputs[0]))

File ~/anaconda3/lib/python3.11/site-packages/torch/utils/_contextlib.py:115, in context_decorator..decorate_context(*args, **kwargs)
112 @functools.wraps(func)
113 def decorate_context(*args, **kwargs):
114 with ctx_factory():
--> 115 return func(*args, **kwargs)

File ~/anaconda3/lib/python3.11/site-packages/transformers/generation/utils.py:1544, in GenerationMixin.generate(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)
1526 return self.assisted_decoding(
1527 input_ids,
1528 candidate_generator=candidate_generator,
(...)
1540 **model_kwargs,
1541 )
1542 if generation_mode == GenerationMode.GREEDY_SEARCH:
1543 # 11. run greedy search
-> 1544 return self.greedy_search(
1545 input_ids,
1546 logits_processor=prepared_logits_processor,
1547 stopping_criteria=prepared_stopping_criteria,
1548 pad_token_id=generation_config.pad_token_id,
1549 eos_token_id=generation_config.eos_token_id,
1550 output_scores=generation_config.output_scores,
1551 output_logits=generation_config.output_logits,
1552 return_dict_in_generate=generation_config.return_dict_in_generate,
1553 synced_gpus=synced_gpus,
1554 streamer=streamer,
1555 **model_kwargs,
1556 )
1558 elif generation_mode == GenerationMode.CONTRASTIVE_SEARCH:
1559 if not model_kwargs["use_cache"]:

File ~/anaconda3/lib/python3.11/site-packages/transformers/generation/utils.py:2401, in GenerationMixin.greedy_search(self, input_ids, logits_processor, stopping_criteria, max_length, pad_token_id, eos_token_id, output_attentions, output_hidden_states, output_scores, output_logits, return_dict_in_generate, synced_gpus, streamer, **model_kwargs)
2398 break
2400 # prepare model inputs
-> 2401 model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
2403 # forward pass to get next token
2404 outputs = self(
2405 **model_inputs,
2406 return_dict=True,
2407 output_attentions=output_attentions,
2408 output_hidden_states=output_hidden_states,
2409 )

File ~/anaconda3/lib/python3.11/site-packages/transformers/models/gemma/modeling_gemma.py:1150, in GemmaForCausalLM.prepare_inputs_for_generation(self, input_ids, past_key_values, attention_mask, inputs_embeds, **kwargs)
1147 position_ids = kwargs.get("position_ids", None)
1148 if attention_mask is not None and position_ids is None:
1149 # create position_ids on the fly for batch generation
-> 1150 position_ids = attention_mask.long().cumsum(-1) - 1
1151 position_ids.masked_fill_(attention_mask == 0, 1)
1152 if past_key_values:

RuntimeError: MPS does not support cumsum op with int64 input

Make sure you have the latest nightly build of PyTorch installed. And you can Try casting the attention_mask to a data type supported by MPS, such as float32, before performing the cumsum operation.

Sign up or log in to comment