使用 lm_eval 测试时报错了

#1
by xianf - opened
  File "/workspace/lm_eval_new/lm-evaluation-harness/main.py", line 416, in <module>
  File "/workspace/lm_eval_new/lm-evaluation-harness/main.py", line 416, in <module>
  File "/workspace/lm_eval_new/lm-evaluation-harness/main.py", line 416, in <module>
  File "/workspace/lm_eval_new/lm-evaluation-harness/main.py", line 416, in <module>
    cli_evaluate()
  File "/workspace/lm_eval_new/lm-evaluation-harness/main.py", line 347, in cli_evaluate
    cli_evaluate()
  File "/workspace/lm_eval_new/lm-evaluation-harness/main.py", line 347, in cli_evaluate
    cli_evaluate()
  File "/workspace/lm_eval_new/lm-evaluation-harness/main.py", line 347, in cli_evaluate
    cli_evaluate()
  File "/workspace/lm_eval_new/lm-evaluation-harness/main.py", line 347, in cli_evaluate
    results = evaluator.simple_evaluate(
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/utils.py", line 321, in _wrapper
    results = evaluator.simple_evaluate(
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/utils.py", line 321, in _wrapper
    results = evaluator.simple_evaluate(
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/utils.py", line 321, in _wrapper
    results = evaluator.simple_evaluate(
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/utils.py", line 321, in _wrapper
    return fn(*args, **kwargs)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/evaluator.py", line 256, in simple_evaluate
    return fn(*args, **kwargs)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/evaluator.py", line 256, in simple_evaluate
    return fn(*args, **kwargs)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/evaluator.py", line 256, in simple_evaluate
    return fn(*args, **kwargs)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/evaluator.py", line 256, in simple_evaluate
    results = evaluate(
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/utils.py", line 321, in _wrapper
    results = evaluate(
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/utils.py", line 321, in _wrapper
    results = evaluate(
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/utils.py", line 321, in _wrapper
    results = evaluate(
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/utils.py", line 321, in _wrapper
    return fn(*args, **kwargs)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/evaluator.py", line 406, in evaluate
    return fn(*args, **kwargs)
    return fn(*args, **kwargs)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/evaluator.py", line 406, in evaluate
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/evaluator.py", line 406, in evaluate
    return fn(*args, **kwargs)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/evaluator.py", line 406, in evaluate
    resps = getattr(lm, reqtype)(cloned_reqs)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/api/model.py", line 340, in loglikelihood
    resps = getattr(lm, reqtype)(cloned_reqs)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/api/model.py", line 340, in loglikelihood
    resps = getattr(lm, reqtype)(cloned_reqs)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/api/model.py", line 340, in loglikelihood
    resps = getattr(lm, reqtype)(cloned_reqs)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/api/model.py", line 340, in loglikelihood
    return self._loglikelihood_tokens(new_reqs, disable_tqdm=disable_tqdm)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/models/huggingface.py", line 1086, in _loglikelihood_tokens
    return self._loglikelihood_tokens(new_reqs, disable_tqdm=disable_tqdm)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/models/huggingface.py", line 1086, in _loglikelihood_tokens
    return self._loglikelihood_tokens(new_reqs, disable_tqdm=disable_tqdm)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/models/huggingface.py", line 1086, in _loglikelihood_tokens
    return self._loglikelihood_tokens(new_reqs, disable_tqdm=disable_tqdm)
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/models/huggingface.py", line 1086, in _loglikelihood_tokens
    self._model_call(batched_inps, **call_kwargs), dim=-1
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/models/huggingface.py", line 801, in _model_call
    self._model_call(batched_inps, **call_kwargs), dim=-1
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/models/huggingface.py", line 801, in _model_call
    self._model_call(batched_inps, **call_kwargs), dim=-1
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/models/huggingface.py", line 801, in _model_call
    return self.model(inps).logits
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    self._model_call(batched_inps, **call_kwargs), dim=-1
  File "/workspace/lm_eval_new/lm-evaluation-harness/lm_eval/models/huggingface.py", line 801, in _model_call
    return self.model(inps).logits
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    return self.model(inps).logits
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 870, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 870, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 870, in forward
    return self.model(inps).logits
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 870, in forward
    transformer_outputs = self.transformer(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    transformer_outputs = self.transformer(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    transformer_outputs = self.transformer(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 772, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 772, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 772, in forward
    transformer_outputs = self.transformer(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 772, in forward
    hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 613, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 613, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 613, in forward
    hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    layer_ret = layer(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    layer_ret = layer(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    layer_ret = layer(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    return forward_call(*input, **kwargs)
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 613, in forward
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 516, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 516, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 516, in forward
    attention_output, kv_cache = self.self_attention(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    attention_output, kv_cache = self.self_attention(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    layer_ret = layer(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 413, in forward
    attention_output, kv_cache = self.self_attention(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 413, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 413, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 516, in forward
    context_layer = self.core_attention(query_layer, key_layer, value_layer, attention_mask)
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 266, in forward
    context_layer = self.core_attention(query_layer, key_layer, value_layer, attention_mask)
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    context_layer = self.core_attention(query_layer, key_layer, value_layer, attention_mask)
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    attention_output, kv_cache = self.self_attention(
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    value_layer = value_layer.view(output_size[0] * output_size[1], value_layer.size(2), -1)
RuntimeError: shape '[2400, 75, -1]' is invalid for input of size 1536000
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 266, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 266, in forward
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 413, in forward
    value_layer = value_layer.view(output_size[0] * output_size[1], value_layer.size(2), -1)
RuntimeError: shape '[2912, 91, -1]' is invalid for input of size 1863680
    value_layer = value_layer.view(output_size[0] * output_size[1], value_layer.size(2), -1)
RuntimeError: shape '[2528, 79, -1]' is invalid for input of size 1617920
    context_layer = self.core_attention(query_layer, key_layer, value_layer, attention_mask)
  File "/usr/local/python/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1190, in _call_impl
    return forward_call(*input, **kwargs)
  File "/workspace/lm-evaluation-harness/cache/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 266, in forward
    value_layer = value_layer.view(output_size[0] * output_size[1], value_layer.size(2), -1)
RuntimeError: shape '[2848, 89, -1]' is invalid for input of size 1822720

This seems like either the model is not loaded correctly on your transformers version or something is wrong with loading via the harness, can you provide versions of those modules?

This seems like either the model is not loaded correctly on your transformers version or something is wrong with loading via the harness, can you provide versions of those modules?

Here is a simple inference test:

#coding:utf-8                   
import sys                      
from transformers import AutoModelForCausalLM, AutoTokenizer
from torch.nn import CrossEntropyLoss
import torch                    
                                
def generate_text(prefix, length=50, temperature=1.0, k=0, p=0.9):
    model_path = "/workspace/llm/open_source/glm-4-9b"
                                
    # 加载预训练模型及其分词器  
    model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True)
    tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False, trust_remote_code=True)
                                
    # 编码输入前缀              
    input_ids = tokenizer.encode(prefix, return_tensors='pt', add_special_tokens=False)
                                
    # 生成文本                  
    sample_outputs = model.generate(
        input_ids,              
        do_sample=True,  # 开启sample search
        max_length=length + len(input_ids[0]),
        temperature=temperature,  # 控制生成的随机性
        top_k=k,  # Top-K sampling
        top_p=p,  # Nucleus sampling
        pad_token_id=tokenizer.eos_token_id
    )                           
                                
    # 解码生成的文本            
    generated_text = tokenizer.decode(sample_outputs[0], skip_special_tokens=True)
    return generated_text       
                                
prefix = "腾讯是"  # 你的输入前缀                                                                                                                                                                                                                                                                  
generated_text = generate_text(prefix)
print(generated_text)

Then the log is:

Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [01:11<00:00,  7.20s/it]
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Traceback (most recent call last):
  File "/workspace/llm/open_source/glm-4-9b/test_hf_generate.py", line 33, in <module>
    generated_text = generate_text(prefix)
  File "/workspace/llm/open_source/glm-4-9b/test_hf_generate.py", line 18, in generate_text
    sample_outputs = model.generate(
  File "/root/miniconda2/envs/py39/lib/python3.9/site-packages/torch/autograd/grad_mode.py", line 27, in decorate_context
    return func(*args, **kwargs)
  File "/root/miniconda2/envs/py39/lib/python3.9/site-packages/transformers/generation/utils.py", line 1758, in generate
    result = self._sample(
  File "/root/miniconda2/envs/py39/lib/python3.9/site-packages/transformers/generation/utils.py", line 2397, in _sample
    outputs = self(
  File "/root/miniconda2/envs/py39/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 870, in forward
    transformer_outputs = self.transformer(
  File "/root/miniconda2/envs/py39/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 772, in forward
    hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
  File "/root/miniconda2/envs/py39/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 613, in forward
    layer_ret = layer(
  File "/root/miniconda2/envs/py39/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 516, in forward
    attention_output, kv_cache = self.self_attention(
  File "/root/miniconda2/envs/py39/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 413, in forward
    context_layer = self.core_attention(query_layer, key_layer, value_layer, attention_mask)
  File "/root/miniconda2/envs/py39/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/root/.cache/huggingface/modules/transformers_modules/glm-4-9b/modeling_chatglm.py", line 268, in forward
    attention_probs = attention_probs.view(output_size[0] * output_size[1], output_size[2], -1)
RuntimeError: shape '[64, 32, -1]' is invalid for input of size 128

Sign up or log in to comment