适配新版transformers | adapt transformers update (https://github.com/huggingface/transformers/pull/31116)
Browse files![2024-07-02_172230.png](https://cdn-uploads.huggingface.co/production/uploads/6683c7f3e365c0f6667d6bf2/HH5HIGvbvjADxw-9ENW5B.png)
新版的transformers改了这里的返回值,会导致以下错误 | the latest transformer changed the return value in this picture, cause the error blow
修改没有处理返回值是cache_params的情况,希望有人补充 | this modify not complete cache_params type return, anyone volunteer?
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|█████████████████████████████████████████████████| 10/10 [00:15<00:00, 1.52s/it]
C:\Users\Hiber\.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py:271: UserWarning: 1Torch was not compiled with flash attention. (Triggered internally at ..\aten\src\ATen\native\transformers\cuda\sdp_utils.cpp:455.)
context_layer = torch.nn.functional.scaled_dot_product_attention(query_layer, key_layer, value_layer,
Traceback (most recent call last):
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\runpy.py", line 198, in _run_module_as_main
return _run_code(code, main_globals, None,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\runpy.py", line 88, in _run_code
exec(code, run_globals)
File "c:\Users\Hiber\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher/../..\debugpy\__main__.py", line 39, in <module>
cli.main()
File "c:\Users\Hiber\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher/../..\debugpy/..\debugpy\server\cli.py", line 430, in main
run()
File "c:\Users\Hiber\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher/../..\debugpy/..\debugpy\server\cli.py", line 284, in run_file
runpy.run_path(target, run_name="__main__")
File "c:\Users\Hiber\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 321, in run_path
return _run_module_code(code, init_globals, run_name,
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 135, in _run_module_code
_run_code(code, mod_globals, init_globals,
File "c:\Users\Hiber\.vscode\extensions\ms-python.debugpy-2024.6.0-win32-x64\bundled\libs\debugpy\_vendored\pydevd\_pydevd_bundle\pydevd_runpy.py", line 124, in _run_code
exec(code, run_globals)
File "D:\works\gml4-9b-chat\main.py", line 35, in <module>
outputs = model.generate(**inputs, **gen_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\torch\utils\_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\transformers\generation\utils.py", line 1914, in generate
result = self._sample(
^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\transformers\generation\utils.py", line 2651, in _sample
outputs = self(
^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\accelerate\hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Hiber\.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 1005, in forward
transformer_outputs = self.transformer(
^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\accelerate\hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Hiber\.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 901, in forward
hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\accelerate\hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Hiber\.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 726, in forward
layer_ret = layer(
^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\accelerate\hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Hiber\.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 629, in forward
attention_output, kv_cache = self.self_attention(
^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\torch\nn\modules\module.py", line 1532, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\torch\nn\modules\module.py", line 1541, in _call_impl
return forward_call(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Users\Hiber\.pyenv\pyenv-win\versions\3.11.2\Lib\site-packages\accelerate\hooks.py", line 166, in new_forward
output = module._old_forward(*args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Users\Hiber\.cache\huggingface\modules\transformers_modules\model\modeling_chatglm.py", line 494, in forward
cache_k, cache_v = kv_cache
^^^^^^^^^^^^^^^^
ValueError: too many values to unpack (expected 2)
- modeling_chatglm.py +10 -1
@@ -936,9 +936,18 @@ class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
|
|
936 |
standardize_cache_format: bool = False,
|
937 |
) -> Dict[str, Any]:
|
938 |
# update past_key_values
|
939 |
-
|
940 |
outputs, standardize_cache_format=standardize_cache_format
|
941 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
942 |
|
943 |
# update attention mask
|
944 |
if "attention_mask" in model_kwargs:
|
|
|
936 |
standardize_cache_format: bool = False,
|
937 |
) -> Dict[str, Any]:
|
938 |
# update past_key_values
|
939 |
+
past_output = self._extract_past_from_model_output(
|
940 |
outputs, standardize_cache_format=standardize_cache_format
|
941 |
)
|
942 |
+
# adapt transformers update (https://github.com/huggingface/transformers/pull/31116)
|
943 |
+
if(type(past_output) is tuple and type(past_output[0]) is str):
|
944 |
+
if past_output[0]=="past_key_values":
|
945 |
+
model_kwargs["past_key_values"] = past_output[1]
|
946 |
+
else:
|
947 |
+
model_kwargs["past_key_values"] = None
|
948 |
+
print(f"WARN: Get \"{past_output[0]}\" during self._extract_past_from_model_output, not \"past_key_values\"")
|
949 |
+
else:
|
950 |
+
model_kwargs["past_key_values"] = past_output
|
951 |
|
952 |
# update attention mask
|
953 |
if "attention_mask" in model_kwargs:
|