Gunulhona commited on Dec 17, 2024

Commit

29463e6

verified ·

1 Parent(s): 017a757

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
MoRA/README.md +68 -0
MoRA/config.py +4 -0
MoRA/model.py +4 -0
MoRA/peft_mora/__init__.py +90 -0
MoRA/peft_mora/__pycache__/__init__.cpython-312.pyc +0 -0
MoRA/peft_mora/__pycache__/auto.cpython-312.pyc +0 -0
MoRA/peft_mora/__pycache__/config.cpython-312.pyc +0 -0
MoRA/peft_mora/__pycache__/import_utils.cpython-312.pyc +0 -0
MoRA/peft_mora/__pycache__/mapping.cpython-312.pyc +0 -0
MoRA/peft_mora/__pycache__/mixed_model.cpython-312.pyc +0 -0
MoRA/peft_mora/__pycache__/peft_model.cpython-312.pyc +0 -0
MoRA/peft_mora/auto.py +170 -0
MoRA/peft_mora/config.py +270 -0
MoRA/peft_mora/helpers.py +113 -0
MoRA/peft_mora/import_utils.py +73 -0
MoRA/peft_mora/mapping.py +168 -0
MoRA/peft_mora/mixed_model.py +402 -0
MoRA/peft_mora/peft_model.py +1929 -0
MoRA/peft_mora/py.typed +0 -0
MoRA/peft_mora/tuners/__init__.py +32 -0
MoRA/peft_mora/tuners/__pycache__/__init__.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/__pycache__/lycoris_utils.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/__pycache__/tuners_utils.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/adalora/__init__.py +37 -0
MoRA/peft_mora/tuners/adalora/__pycache__/__init__.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/adalora/__pycache__/config.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/adalora/__pycache__/gptq.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/adalora/__pycache__/layer.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/adalora/__pycache__/model.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/adalora/bnb.py +145 -0
MoRA/peft_mora/tuners/adalora/config.py +52 -0
MoRA/peft_mora/tuners/adalora/gptq.py +72 -0
MoRA/peft_mora/tuners/adalora/layer.py +346 -0
MoRA/peft_mora/tuners/adalora/model.py +346 -0
MoRA/peft_mora/tuners/adaption_prompt/__init__.py +19 -0
MoRA/peft_mora/tuners/adaption_prompt/__pycache__/__init__.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/adaption_prompt/__pycache__/config.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/adaption_prompt/__pycache__/layer.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/adaption_prompt/__pycache__/model.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/adaption_prompt/__pycache__/utils.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/adaption_prompt/config.py +73 -0
MoRA/peft_mora/tuners/adaption_prompt/layer.py +120 -0
MoRA/peft_mora/tuners/adaption_prompt/model.py +161 -0
MoRA/peft_mora/tuners/adaption_prompt/utils.py +111 -0
MoRA/peft_mora/tuners/ia3/__init__.py +36 -0
MoRA/peft_mora/tuners/ia3/__pycache__/__init__.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/ia3/__pycache__/config.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/ia3/__pycache__/layer.cpython-312.pyc +0 -0
MoRA/peft_mora/tuners/ia3/__pycache__/model.cpython-312.pyc +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

MoRA/README.md ADDED Viewed

	@@ -0,0 +1,68 @@

+# [MoRA: High-Rank Updating for Parameter-Efﬁcient Fine-Tuning](https://arxiv.org/abs/2405.12130)
+## Setup
+We implement MoRA in peft-mora based on HF peft in the [`apply_mora`](https://github.com/kongds/MoRA/blob/main/peft-mora/src/peft/tuners/lora/layer.py#L229) and [`get_delta_weight`](https://github.com/kongds/MoRA/blob/main/peft-mora/src/peft/tuners/lora/layer.py#L514).
+``` sh
+pip install -e ./peft-mora
+```
+After installation, it can be used like
+``` python
+from peft import LoraConfig, get_peft_model
+config = LoraConfig(
+    # enable MoRA
+    use_mora=True,
+    # type 1 (Sharing) for large lora ranks, Eq. 6 in paper
+    # type 6 (RoPE based) for small lora ranks, Eq. 9 in paper
+    mora_type=6,
+    # lora rank here, we will calculate corresponding $\hat{r}$ in MoRA
+    r=lora_r,
+    # MoRA does not use lora_alpha
+    # lora_alpha=lora_alpha,
+    target_modules=lora_target_modules,
+    lora_dropout=lora_dropout,
+    task_type="CAUSAL_LM",
+    **kwargs,
+)
+model = get_peft_model(model, config)
+# training here...
+# can be merged into model via `merge_and_unload` like LoRA
+model = model.merge_and_unload()
+```
+## Examples
+### fine-tuning MetaMath with MoRA
+``` sh
+RANK=8
+deepspeed --num_gpus=8 --num_nodes=2 train.py \
+           --base_model <LLAMA-2> --micro_batch_size 4\
+            --wandb_run_name mora_math_r8 --lora_target_modules q_proj,k_proj,v_proj,o_proj,gate_proj,down_proj,up_proj \
+            --num_epochs 3 --deepspeed ds.config --wandb_project lora-math --lora_r $RANK --batch_size 128 \
+            --data_path meta-math/MetaMath \
+            --save_steps 3000 \
+            --learning_rate 3e-4 --mora_type 6 \
+            --logging_steps 5  --use_bf16  --use_16bit --use_mora
+```
+### pretraining
+``` sh
+deepspeed --num_gpus=8 --num_nodes=4 train.py \
+        --micro_batch_size 16 --wandb_run_name mora-pretrain250m-r128 \
+        --num_epochs 1 --wandb_project lora-pretrain --batch_size 1024 \
+        --data_path <processed C4> --logging_steps 1 \
+        --lora_target_modules q_proj,k_proj,v_proj,o_proj,gate_proj,down_proj,up_proj \
+        --lora_r 128 --lora_alpha 64 --warmup_steps 1000  \
+        --force_tqdm_update --lr_scheduler_type cosine \
+        --max_steps 10000 --pretrain 250m \
+        --train_embhead --learning_rate 5e-4 \
+        --use_mora --use_relora --use_relora_step 2000  # ReMoRA merge per 2000 steps
+```
+## Acknowledgement
+Our Code is based on peft, alpaca-lora and ReLoRA

MoRA/config.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from peft_mora import LoraConfig
+class MoRAModelForCausalLM(LoraConfig):
+    pass

MoRA/model.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from peft_mora import PeftModelForCausalLM
+class MoRAModelForCausalLM(PeftModelForCausalLM):
+    pass

MoRA/peft_mora/__init__.py ADDED Viewed

	@@ -0,0 +1,90 @@

+# flake8: noqa
+# There's no way to ignore "F401 '...' imported but unused" warnings in this
+# module, but to preserve other warnings. So, don't check this module at all.
+# coding=utf-8
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+__version__ = "0.9.0"
+from .auto import (
+    AutoPeftModel,
+    AutoPeftModelForCausalLM,
+    AutoPeftModelForSequenceClassification,
+    AutoPeftModelForSeq2SeqLM,
+    AutoPeftModelForTokenClassification,
+    AutoPeftModelForQuestionAnswering,
+    AutoPeftModelForFeatureExtraction,
+)
+from .mapping import (
+    MODEL_TYPE_TO_PEFT_MODEL_MAPPING,
+    PEFT_TYPE_TO_CONFIG_MAPPING,
+    get_peft_config,
+    get_peft_model,
+    inject_adapter_in_model,
+)
+from .mixed_model import PeftMixedModel
+from .peft_model import (
+    PeftModel,
+    PeftModelForCausalLM,
+    PeftModelForSeq2SeqLM,
+    PeftModelForSequenceClassification,
+    PeftModelForTokenClassification,
+    PeftModelForQuestionAnswering,
+    PeftModelForFeatureExtraction,
+)
+from .tuners import (
+    AdaptionPromptConfig,
+    AdaptionPromptModel,
+    LoraConfig,
+    LoftQConfig,
+    LoraModel,
+    LoHaConfig,
+    LoHaModel,
+    LoKrConfig,
+    LoKrModel,
+    IA3Config,
+    IA3Model,
+    AdaLoraConfig,
+    AdaLoraModel,
+    PrefixEncoder,
+    PrefixTuningConfig,
+    PromptEmbedding,
+    PromptEncoder,
+    PromptEncoderConfig,
+    PromptEncoderReparameterizationType,
+    PromptTuningConfig,
+    PromptTuningInit,
+    MultitaskPromptTuningConfig,
+    MultitaskPromptTuningInit,
+    OFTConfig,
+    OFTModel,
+    PolyConfig,
+    PolyModel,
+)
+from .utils import (
+    TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
+    PeftType,
+    TaskType,
+    bloom_model_postprocess_past_key_value,
+    get_peft_model_state_dict,
+    prepare_model_for_int8_training,
+    prepare_model_for_kbit_training,
+    set_peft_model_state_dict,
+    shift_tokens_right,
+    load_peft_weights,
+    cast_mixed_precision_params,
+)
+from .config import PeftConfig, PromptLearningConfig

MoRA/peft_mora/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (2.31 kB). View file

MoRA/peft_mora/__pycache__/auto.cpython-312.pyc ADDED Viewed

Binary file (6.68 kB). View file

MoRA/peft_mora/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (11.7 kB). View file

MoRA/peft_mora/__pycache__/import_utils.cpython-312.pyc ADDED Viewed

Binary file (2.85 kB). View file

MoRA/peft_mora/__pycache__/mapping.cpython-312.pyc ADDED Viewed

Binary file (5.64 kB). View file

MoRA/peft_mora/__pycache__/mixed_model.cpython-312.pyc ADDED Viewed

Binary file (18.5 kB). View file

MoRA/peft_mora/__pycache__/peft_model.cpython-312.pyc ADDED Viewed

Binary file (82.2 kB). View file

MoRA/peft_mora/auto.py ADDED Viewed

	@@ -0,0 +1,170 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import importlib
+import os
+from typing import Optional
+from transformers import (
+    AutoModel,
+    AutoModelForCausalLM,
+    AutoModelForQuestionAnswering,
+    AutoModelForSeq2SeqLM,
+    AutoModelForSequenceClassification,
+    AutoModelForTokenClassification,
+    AutoTokenizer,
+)
+from .config import PeftConfig
+from .mapping import MODEL_TYPE_TO_PEFT_MODEL_MAPPING
+from .peft_model import (
+    PeftModel,
+    PeftModelForCausalLM,
+    PeftModelForFeatureExtraction,
+    PeftModelForQuestionAnswering,
+    PeftModelForSeq2SeqLM,
+    PeftModelForSequenceClassification,
+    PeftModelForTokenClassification,
+)
+from .utils.constants import TOKENIZER_CONFIG_NAME
+from .utils.other import check_file_exists_on_hf_hub
+class _BaseAutoPeftModel:
+    _target_class = None
+    _target_peft_class = None
+    def __init__(self, *args, **kwargs):
+        # For consistency with transformers: https://github.com/huggingface/transformers/blob/91d7df58b6537d385e90578dac40204cb550f706/src/transformers/models/auto/auto_factory.py#L400
+        raise EnvironmentError(  # noqa: UP024
+            f"{self.__class__.__name__} is designed to be instantiated "
+            f"using the `{self.__class__.__name__}.from_pretrained(pretrained_model_name_or_path)` or "
+            f"`{self.__class__.__name__}.from_config(config)` methods."
+        )
+    @classmethod
+    def from_pretrained(
+        cls,
+        pretrained_model_name_or_path,
+        adapter_name: str = "default",
+        is_trainable: bool = False,
+        config: Optional[PeftConfig] = None,
+        **kwargs,
+    ):
+        r"""
+        A wrapper around all the preprocessing steps a user needs to perform in order to load a PEFT model. The kwargs
+        are passed along to `PeftConfig` that automatically takes care of filtering the kwargs of the Hub methods and
+        the config object init.
+        """
+        peft_config = PeftConfig.from_pretrained(pretrained_model_name_or_path, **kwargs)
+        base_model_path = peft_config.base_model_name_or_path
+        task_type = getattr(peft_config, "task_type", None)
+        if cls._target_class is not None:
+            target_class = cls._target_class
+        elif cls._target_class is None and task_type is not None:
+            # this is only in the case where we use `AutoPeftModel`
+            raise ValueError(
+                "Cannot use `AutoPeftModel` with a task type, please use a specific class for your task type. (e.g. `AutoPeftModelForCausalLM` for `task_type='CAUSAL_LM'`)"
+            )
+        if task_type is not None:
+            expected_target_class = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[task_type]
+            if cls._target_peft_class.__name__ != expected_target_class.__name__:
+                raise ValueError(
+                    f"Expected target PEFT class: {expected_target_class.__name__}, but you have asked for: {cls._target_peft_class.__name__ }"
+                    " make sure that you are loading the correct model for your task type."
+                )
+        elif task_type is None and getattr(peft_config, "auto_mapping", None) is not None:
+            auto_mapping = getattr(peft_config, "auto_mapping", None)
+            base_model_class = auto_mapping["base_model_class"]
+            parent_library_name = auto_mapping["parent_library"]
+            parent_library = importlib.import_module(parent_library_name)
+            target_class = getattr(parent_library, base_model_class)
+        else:
+            raise ValueError(
+                "Cannot infer the auto class from the config, please make sure that you are loading the correct model for your task type."
+            )
+        base_model = target_class.from_pretrained(base_model_path, **kwargs)
+        tokenizer_exists = False
+        if os.path.exists(os.path.join(pretrained_model_name_or_path, TOKENIZER_CONFIG_NAME)):
+            tokenizer_exists = True
+        else:
+            token = kwargs.get("token", None)
+            if token is None:
+                token = kwargs.get("use_auth_token", None)
+            tokenizer_exists = check_file_exists_on_hf_hub(
+                repo_id=pretrained_model_name_or_path,
+                filename=TOKENIZER_CONFIG_NAME,
+                revision=kwargs.get("revision", None),
+                repo_type=kwargs.get("repo_type", None),
+                token=token,
+            )
+        if tokenizer_exists:
+            tokenizer = AutoTokenizer.from_pretrained(
+                pretrained_model_name_or_path, trust_remote_code=kwargs.get("trust_remote_code", False)
+            )
+            base_model.resize_token_embeddings(len(tokenizer))
+        return cls._target_peft_class.from_pretrained(
+            base_model,
+            pretrained_model_name_or_path,
+            adapter_name=adapter_name,
+            is_trainable=is_trainable,
+            config=config,
+            **kwargs,
+        )
+class AutoPeftModel(_BaseAutoPeftModel):
+    _target_class = None
+    _target_peft_class = PeftModel
+class AutoPeftModelForCausalLM(_BaseAutoPeftModel):
+    _target_class = AutoModelForCausalLM
+    _target_peft_class = PeftModelForCausalLM
+class AutoPeftModelForSeq2SeqLM(_BaseAutoPeftModel):
+    _target_class = AutoModelForSeq2SeqLM
+    _target_peft_class = PeftModelForSeq2SeqLM
+class AutoPeftModelForSequenceClassification(_BaseAutoPeftModel):
+    _target_class = AutoModelForSequenceClassification
+    _target_peft_class = PeftModelForSequenceClassification
+class AutoPeftModelForTokenClassification(_BaseAutoPeftModel):
+    _target_class = AutoModelForTokenClassification
+    _target_peft_class = PeftModelForTokenClassification
+class AutoPeftModelForQuestionAnswering(_BaseAutoPeftModel):
+    _target_class = AutoModelForQuestionAnswering
+    _target_peft_class = PeftModelForQuestionAnswering
+class AutoPeftModelForFeatureExtraction(_BaseAutoPeftModel):
+    _target_class = AutoModel
+    _target_peft_class = PeftModelForFeatureExtraction

MoRA/peft_mora/config.py ADDED Viewed

	@@ -0,0 +1,270 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+import json
+import os
+from dataclasses import asdict, dataclass, field
+from typing import Dict, Optional, Union
+from huggingface_hub import hf_hub_download
+from transformers.utils import PushToHubMixin
+from .utils import CONFIG_NAME, PeftType, TaskType
+@dataclass
+class PeftConfigMixin(PushToHubMixin):
+    r"""
+    This is the base configuration class for PEFT adapter models. It contains all the methods that are common to all
+    PEFT adapter models. This class inherits from [`~transformers.utils.PushToHubMixin`] which contains the methods to
+    push your model to the Hub. The method `save_pretrained` will save the configuration of your adapter model in a
+    directory. The method `from_pretrained` will load the configuration of your adapter model from a directory.
+    Args:
+        peft_type (Union[[`~peft.utils.config.PeftType`], `str`]): The type of Peft method to use.
+    """
+    peft_type: Optional[PeftType] = field(default=None, metadata={"help": "The type of PEFT model."})
+    auto_mapping: Optional[dict] = field(
+        default=None, metadata={"help": "An auto mapping dict to help retrieve the base model class if needed."}
+    )
+    def to_dict(self) -> Dict:
+        r"""
+        Returns the configuration for your adapter model as a dictionary.
+        """
+        return asdict(self)
+    def save_pretrained(self, save_directory: str, **kwargs) -> None:
+        r"""
+        This method saves the configuration of your adapter model in a directory.
+        Args:
+            save_directory (`str`):
+                The directory where the configuration will be saved.
+            kwargs (additional keyword arguments, *optional*):
+                Additional keyword arguments passed along to the [`~transformers.utils.PushToHubMixin.push_to_hub`]
+                method.
+        """
+        if os.path.isfile(save_directory):
+            raise AssertionError(f"Provided path ({save_directory}) should be a directory, not a file")
+        os.makedirs(save_directory, exist_ok=True)
+        auto_mapping_dict = kwargs.pop("auto_mapping_dict", None)
+        output_dict = asdict(self)
+        # converting set type to list
+        for key, value in output_dict.items():
+            if isinstance(value, set):
+                output_dict[key] = list(value)
+        output_path = os.path.join(save_directory, CONFIG_NAME)
+        # Add auto mapping details for custom models.
+        if auto_mapping_dict is not None:
+            output_dict["auto_mapping"] = auto_mapping_dict
+        # save it
+        with open(output_path, "w") as writer:
+            writer.write(json.dumps(output_dict, indent=2, sort_keys=True))
+    @classmethod
+    def from_peft_type(cls, **kwargs):
+        r"""
+        This method loads the configuration of your adapter model from a set of kwargs.
+        The appropriate configuration type is determined by the `peft_type` argument. If `peft_type` is not provided,
+        the calling class type is instantiated.
+        Args:
+            kwargs (configuration keyword arguments):
+                Keyword arguments passed along to the configuration initialization.
+        """
+        # Avoid circular dependency .. TODO: fix this with a larger refactor
+        from peft_mora.mapping import PEFT_TYPE_TO_CONFIG_MAPPING
+        # TODO: this hack is needed to fix the following issue (on commit 702f937):
+        # if someone saves a default config and loads it back with `PeftConfig` class it yields to
+        # not loading the correct config class.
+        # from peft import AdaLoraConfig, PeftConfig
+        # peft_config = AdaLoraConfig()
+        # print(peft_config)
+        # >>> AdaLoraConfig(peft_type=<PeftType.ADALORA: 'ADALORA'>, auto_mapping=None, base_model_name_or_path=None,
+        # revision=None, task_type=None, inference_mode=False, r=8, target_modules=None, lora_alpha=8, lora_dropout=0.0, ...
+        #
+        # peft_config.save_pretrained("./test_config")
+        # peft_config = PeftConfig.from_pretrained("./test_config")
+        # print(peft_config)
+        # >>> PeftConfig(peft_type='ADALORA', auto_mapping=None, base_model_name_or_path=None, revision=None, task_type=None, inference_mode=False)
+        if "peft_type" in kwargs:
+            peft_type = kwargs["peft_type"]
+            config_cls = PEFT_TYPE_TO_CONFIG_MAPPING[peft_type]
+        else:
+            config_cls = cls
+        return config_cls(**kwargs)
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path: str, subfolder: Optional[str] = None, **kwargs):
+        r"""
+        This method loads the configuration of your adapter model from a directory.
+        Args:
+            pretrained_model_name_or_path (`str`):
+                The directory or the Hub repository id where the configuration is saved.
+            kwargs (additional keyword arguments, *optional*):
+                Additional keyword arguments passed along to the child class initialization.
+        """
+        path = (
+            os.path.join(pretrained_model_name_or_path, subfolder)
+            if subfolder is not None
+            else pretrained_model_name_or_path
+        )
+        hf_hub_download_kwargs, class_kwargs, _ = cls._split_kwargs(kwargs)
+        if os.path.isfile(os.path.join(path, CONFIG_NAME)):
+            config_file = os.path.join(path, CONFIG_NAME)
+        else:
+            try:
+                config_file = hf_hub_download(
+                    pretrained_model_name_or_path, CONFIG_NAME, subfolder=subfolder, **hf_hub_download_kwargs
+                )
+            except Exception:
+                raise ValueError(f"Can't find '{CONFIG_NAME}' at '{pretrained_model_name_or_path}'")
+        loaded_attributes = cls.from_json_file(config_file)
+        kwargs = {**class_kwargs, **loaded_attributes}
+        return cls.from_peft_type(**kwargs)
+    @classmethod
+    def from_json_file(cls, path_json_file: str, **kwargs):
+        r"""
+        Loads a configuration file from a json file.
+        Args:
+            path_json_file (`str`):
+                The path to the json file.
+        """
+        with open(path_json_file) as file:
+            json_object = json.load(file)
+        return json_object
+    @classmethod
+    def _split_kwargs(cls, kwargs):
+        hf_hub_download_kwargs = {}
+        class_kwargs = {}
+        other_kwargs = {}
+        for key, value in kwargs.items():
+            if key in inspect.signature(hf_hub_download).parameters:
+                hf_hub_download_kwargs[key] = value
+            elif key in list(cls.__annotations__):
+                class_kwargs[key] = value
+            else:
+                other_kwargs[key] = value
+        return hf_hub_download_kwargs, class_kwargs, other_kwargs
+    @classmethod
+    def _get_peft_type(
+        cls,
+        model_id: str,
+        **hf_hub_download_kwargs,
+    ):
+        subfolder = hf_hub_download_kwargs.get("subfolder", None)
+        path = os.path.join(model_id, subfolder) if subfolder is not None else model_id
+        if os.path.isfile(os.path.join(path, CONFIG_NAME)):
+            config_file = os.path.join(path, CONFIG_NAME)
+        else:
+            try:
+                config_file = hf_hub_download(
+                    model_id,
+                    CONFIG_NAME,
+                    **hf_hub_download_kwargs,
+                )
+            except Exception:
+                raise ValueError(f"Can't find '{CONFIG_NAME}' at '{model_id}'")
+        loaded_attributes = cls.from_json_file(config_file)
+        return loaded_attributes["peft_type"]
+    @property
+    def is_prompt_learning(self) -> bool:
+        r"""
+        Utility method to check if the configuration is for prompt learning.
+        """
+        return False
+    @property
+    def is_adaption_prompt(self) -> bool:
+        """Return True if this is an adaption prompt config."""
+        return False
+@dataclass
+class PeftConfig(PeftConfigMixin):
+    """
+    This is the base configuration class to store the configuration of a [`PeftModel`].
+    Args:
+        peft_type (Union[[`~peft.utils.config.PeftType`], `str`]): The type of Peft method to use.
+        task_type (Union[[`~peft.utils.config.TaskType`], `str`]): The type of task to perform.
+        inference_mode (`bool`, defaults to `False`): Whether to use the Peft model in inference mode.
+    """
+    base_model_name_or_path: Optional[str] = field(
+        default=None, metadata={"help": "The name of the base model to use."}
+    )
+    revision: Optional[str] = field(default=None, metadata={"help": "The specific model version to use."})
+    peft_type: Optional[Union[str, PeftType]] = field(default=None, metadata={"help": "Peft type"})
+    task_type: Optional[Union[str, TaskType]] = field(default=None, metadata={"help": "Task type"})
+    inference_mode: bool = field(default=False, metadata={"help": "Whether to use inference mode"})
+@dataclass
+class PromptLearningConfig(PeftConfig):
+    """
+    This is the base configuration class to store the configuration of [`PrefixTuning`], [`PromptEncoder`], or
+    [`PromptTuning`].
+    Args:
+        num_virtual_tokens (`int`): The number of virtual tokens to use.
+        token_dim (`int`): The hidden embedding dimension of the base transformer model.
+        num_transformer_submodules (`int`): The number of transformer submodules in the base transformer model.
+        num_attention_heads (`int`): The number of attention heads in the base transformer model.
+        num_layers (`int`): The number of layers in the base transformer model.
+    """
+    num_virtual_tokens: int = field(default=None, metadata={"help": "Number of virtual tokens"})
+    token_dim: int = field(
+        default=None, metadata={"help": "The hidden embedding dimension of the base transformer model"}
+    )
+    num_transformer_submodules: Optional[int] = field(
+        default=None, metadata={"help": "Number of transformer submodules"}
+    )
+    num_attention_heads: Optional[int] = field(default=None, metadata={"help": "Number of attention heads"})
+    num_layers: Optional[int] = field(default=None, metadata={"help": "Number of transformer layers"})
+    @property
+    def is_prompt_learning(self) -> bool:
+        r"""
+        Utility method to check if the configuration is for prompt learning.
+        """
+        return True

MoRA/peft_mora/helpers.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import inspect
+from copy import deepcopy
+from functools import update_wrapper
+from types import MethodType
+from .peft_model import PeftModel
+def update_forward_signature(model: PeftModel) -> None:
+    """
+    Args:
+    Updates the forward signature of the PeftModel to include parents class signature
+        model (`PeftModel`): Peft model to update the forward signature
+    Example:
+    ```python
+    >>> from transformers import WhisperForConditionalGeneration
+    >>> from peft import get_peft_model, LoraConfig, update_forward_signature
+    >>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
+    >>> peft_config = LoraConfig(r=8, lora_alpha=32, lora_dropout=0.1, target_modules=["q_proj", "v_proj"])
+    >>> peft_model = get_peft_model(model, peft_config)
+    >>> update_forward_signature(peft_model)
+    ```
+    """
+    # Only update signature when the current forward signature only has *args and **kwargs
+    current_signature = inspect.signature(model.forward)
+    if (
+        len(current_signature.parameters) == 2
+        and "args" in current_signature.parameters
+        and "kwargs" in current_signature.parameters
+    ):
+        forward = deepcopy(model.forward.__func__)
+        update_wrapper(
+            forward, type(model.get_base_model()).forward, assigned=("__doc__", "__name__", "__annotations__")
+        )
+        model.forward = MethodType(forward, model)
+def update_generate_signature(model: PeftModel) -> None:
+    """
+    Args:
+    Updates the generate signature of a PeftModel with overriding generate to include parents class signature
+        model (`PeftModel`): Peft model to update the generate signature
+    Example:
+    ```python
+    >>> from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+    >>> from peft import get_peft_model, LoraConfig, TaskType, update_generate_signature
+    >>> model_name_or_path = "bigscience/mt0-large"
+    >>> tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+    >>> model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path)
+    >>> peft_config = LoraConfig(
+    ...     task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1
+    ... )
+    >>> peft_model = get_peft_model(model, peft_config)
+    >>> update_generate_signature(peft_model)
+    >>> help(peft_model.generate)
+    ```
+    """
+    if not hasattr(model, "generate"):
+        return
+    current_signature = inspect.signature(model.generate)
+    if (
+        len(current_signature.parameters) == 2
+        and "args" in current_signature.parameters
+        and "kwargs" in current_signature.parameters
+    ) or (len(current_signature.parameters) == 1 and "kwargs" in current_signature.parameters):
+        generate = deepcopy(model.generate.__func__)
+        update_wrapper(
+            generate,
+            type(model.get_base_model()).generate,
+            assigned=("__doc__", "__name__", "__annotations__"),
+        )
+        model.generate = MethodType(generate, model)
+def update_signature(model: PeftModel, method: str = "all") -> None:
+    """
+    Args:
+    Updates the signature of a PeftModel include parents class signature for forward or generate method
+        model (`PeftModel`): Peft model to update generate or forward signature method (`str`): method to update
+        signature choose one of "forward", "generate", "all"
+    Example:
+     ```python
+    >>> from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
+    >>> from peft import get_peft_model, LoraConfig, TaskType, update_signature
+    >>> model_name_or_path = "bigscience/mt0-large"
+    >>> tokenizer = AutoTokenizer.from_pretrained(model_name_or_path)
+    >>> model = AutoModelForSeq2SeqLM.from_pretrained(model_name_or_path)
+    >>> peft_config = LoraConfig(
+    ...     task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=8, lora_alpha=32, lora_dropout=0.1
+    ... )
+    >>> peft_model = get_peft_model(model, peft_config)
+    >>> update_signature(peft_model)
+    >>> help(peft_model.generate)
+    ```
+    """
+    if method == "forward":
+        update_forward_signature(model)
+    elif method == "generate":
+        update_generate_signature(model)
+    elif method == "all":
+        update_forward_signature(model)
+        update_generate_signature(model)
+    else:
+        raise ValueError(f"method {method} is not supported please choose one of ['forward', 'generate', 'all']")

MoRA/peft_mora/import_utils.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import importlib
+import importlib.metadata as importlib_metadata
+from functools import lru_cache
+import packaging.version
+def is_bnb_available() -> bool:
+    return importlib.util.find_spec("bitsandbytes") is not None
+def is_bnb_4bit_available() -> bool:
+    if not is_bnb_available():
+        return False
+    import bitsandbytes as bnb
+    return hasattr(bnb.nn, "Linear4bit")
+def is_auto_gptq_available():
+    if importlib.util.find_spec("auto_gptq") is not None:
+        AUTOGPTQ_MINIMUM_VERSION = packaging.version.parse("0.5.0")
+        version_autogptq = packaging.version.parse(importlib_metadata.version("auto_gptq"))
+        if AUTOGPTQ_MINIMUM_VERSION <= version_autogptq:
+            return True
+        else:
+            raise ImportError(
+                f"Found an incompatible version of auto-gptq. Found version {version_autogptq}, "
+                f"but only versions above {AUTOGPTQ_MINIMUM_VERSION} are supported"
+            )
+def is_optimum_available() -> bool:
+    return importlib.util.find_spec("optimum") is not None
+@lru_cache
+def is_torch_tpu_available(check_device=True):
+    "Checks if `torch_xla` is installed and potentially if a TPU is in the environment"
+    if importlib.util.find_spec("torch_xla") is not None:
+        if check_device:
+            # We need to check if `xla_device` can be found, will raise a RuntimeError if not
+            try:
+                import torch_xla.core.xla_model as xm
+                _ = xm.xla_device()
+                return True
+            except RuntimeError:
+                return False
+        return True
+    return False
+def is_aqlm_available():
+    return importlib.util.find_spec("aqlm") is not None
+def is_auto_awq_available():
+    return importlib.util.find_spec("awq") is not None

MoRA/peft_mora/mapping.py ADDED Viewed

	@@ -0,0 +1,168 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+from typing import TYPE_CHECKING, Any
+import torch
+from .config import PeftConfig
+from .mixed_model import PeftMixedModel
+from .peft_model import (
+    PeftModel,
+    PeftModelForCausalLM,
+    PeftModelForFeatureExtraction,
+    PeftModelForQuestionAnswering,
+    PeftModelForSeq2SeqLM,
+    PeftModelForSequenceClassification,
+    PeftModelForTokenClassification,
+)
+from .tuners import (
+    AdaLoraConfig,
+    AdaLoraModel,
+    AdaptionPromptConfig,
+    IA3Config,
+    IA3Model,
+    LoHaConfig,
+    LoHaModel,
+    LoKrConfig,
+    LoKrModel,
+    LoraConfig,
+    LoraModel,
+    MultitaskPromptTuningConfig,
+    OFTConfig,
+    OFTModel,
+    PolyConfig,
+    PolyModel,
+    PrefixTuningConfig,
+    PromptEncoderConfig,
+    PromptTuningConfig,
+)
+from .utils import _prepare_prompt_learning_config
+if TYPE_CHECKING:
+    from transformers import PreTrainedModel
+MODEL_TYPE_TO_PEFT_MODEL_MAPPING: dict[str, PeftModel] = {
+    "SEQ_CLS": PeftModelForSequenceClassification,
+    "SEQ_2_SEQ_LM": PeftModelForSeq2SeqLM,
+    "CAUSAL_LM": PeftModelForCausalLM,
+    "TOKEN_CLS": PeftModelForTokenClassification,
+    "QUESTION_ANS": PeftModelForQuestionAnswering,
+    "FEATURE_EXTRACTION": PeftModelForFeatureExtraction,
+}
+PEFT_TYPE_TO_CONFIG_MAPPING: dict[str, PeftConfig] = {
+    "ADAPTION_PROMPT": AdaptionPromptConfig,
+    "PROMPT_TUNING": PromptTuningConfig,
+    "PREFIX_TUNING": PrefixTuningConfig,
+    "P_TUNING": PromptEncoderConfig,
+    "LORA": LoraConfig,
+    "LOHA": LoHaConfig,
+    "LOKR": LoKrConfig,
+    "ADALORA": AdaLoraConfig,
+    "IA3": IA3Config,
+    "MULTITASK_PROMPT_TUNING": MultitaskPromptTuningConfig,
+    "OFT": OFTConfig,
+    "POLY": PolyConfig,
+}
+PEFT_TYPE_TO_TUNER_MAPPING = {
+    "LORA": LoraModel,
+    "LOHA": LoHaModel,
+    "LOKR": LoKrModel,
+    "ADALORA": AdaLoraModel,
+    "IA3": IA3Model,
+    "OFT": OFTModel,
+    "POLY": PolyModel,
+}
+def get_peft_config(config_dict: dict[str, Any]) -> PeftConfig:
+    """
+    Returns a Peft config object from a dictionary.
+    Args:
+        config_dict (`Dict[str, Any]`): Dictionary containing the configuration parameters.
+    """
+    return PEFT_TYPE_TO_CONFIG_MAPPING[config_dict["peft_type"]](**config_dict)
+def get_peft_model(
+    model: PreTrainedModel, peft_config: PeftConfig, adapter_name: str = "default", mixed: bool = False
+) -> PeftModel | PeftMixedModel:
+    """
+    Returns a Peft model object from a model and a config.
+    Args:
+        model ([`transformers.PreTrainedModel`]):
+            Model to be wrapped.
+        peft_config ([`PeftConfig`]):
+            Configuration object containing the parameters of the Peft model.
+        adapter_name (`str`, `optional`, defaults to `"default"`):
+            The name of the adapter to be injected, if not provided, the default adapter name is used ("default").
+        mixed (`bool`, `optional`, defaults to `False`):
+            Whether to allow mixing different (compatible) adapter types.
+    """
+    model_config = getattr(model, "config", {"model_type": "custom"})
+    if hasattr(model_config, "to_dict"):
+        model_config = model_config.to_dict()
+    peft_config.base_model_name_or_path = model.__dict__.get("name_or_path", None)
+    if mixed:
+        return PeftMixedModel(model, peft_config, adapter_name=adapter_name)
+    if peft_config.task_type not in MODEL_TYPE_TO_PEFT_MODEL_MAPPING.keys() and not peft_config.is_prompt_learning:
+        return PeftModel(model, peft_config, adapter_name=adapter_name)
+    if peft_config.is_prompt_learning:
+        peft_config = _prepare_prompt_learning_config(peft_config, model_config)
+    return MODEL_TYPE_TO_PEFT_MODEL_MAPPING[peft_config.task_type](model, peft_config, adapter_name=adapter_name)
+def inject_adapter_in_model(
+    peft_config: PeftConfig, model: torch.nn.Module, adapter_name: str = "default"
+) -> torch.nn.Module:
+    r"""
+    A simple API to create and inject adapter in-place into a model. Currently the API does not support prompt learning
+    methods and adaption prompt. Make sure to have the correct `target_names` set in the `peft_config` object. The API
+    calls `get_peft_model` under the hood but would be restricted only to non-prompt learning methods.
+    Args:
+        peft_config (`PeftConfig`):
+            Configuration object containing the parameters of the Peft model.
+        model (`torch.nn.Module`):
+            The input model where the adapter will be injected.
+        adapter_name (`str`, `optional`, defaults to `"default"`):
+            The name of the adapter to be injected, if not provided, the default adapter name is used ("default").
+    """
+    if peft_config.is_prompt_learning or peft_config.is_adaption_prompt:
+        raise ValueError("`create_and_replace` does not support prompt learning and adaption prompt yet.")
+    if peft_config.peft_type not in PEFT_TYPE_TO_TUNER_MAPPING.keys():
+        raise ValueError(
+            f"`inject_adapter_in_model` does not support {peft_config.peft_type} yet. Please use `get_peft_model`."
+        )
+    tuner_cls = PEFT_TYPE_TO_TUNER_MAPPING[peft_config.peft_type]
+    # By instantiating a peft model we are injecting randomly initialized LoRA layers into the model's modules.
+    peft_model = tuner_cls(model, peft_config, adapter_name=adapter_name)
+    return peft_model.model

MoRA/peft_mora/mixed_model.py ADDED Viewed

	@@ -0,0 +1,402 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import os
+from contextlib import contextmanager
+from typing import Any, Optional, Union
+import torch
+from accelerate.hooks import remove_hook_from_submodules
+from torch import nn
+from transformers.utils import PushToHubMixin
+from peft_mora.tuners.mixed import COMPATIBLE_TUNER_TYPES
+from .config import PeftConfig
+from .peft_model import PeftModel
+from .tuners import (
+    AdaLoraModel,
+    IA3Model,
+    LoHaModel,
+    LoKrModel,
+    LoraModel,
+    MixedModel,
+    OFTModel,
+)
+from .utils import PeftType, _set_adapter, _set_trainable
+PEFT_TYPE_TO_MODEL_MAPPING = {
+    PeftType.LORA: LoraModel,
+    PeftType.LOHA: LoHaModel,
+    PeftType.LOKR: LoKrModel,
+    PeftType.ADALORA: AdaLoraModel,
+    PeftType.IA3: IA3Model,
+    PeftType.OFT: OFTModel,
+}
+def _prepare_model_for_gradient_checkpointing(model: nn.Module) -> None:
+    r"""
+    Prepares the model for gradient checkpointing if necessary
+    """
+    # Note: same as PeftModel._prepare_model_for_gradient_checkpointing
+    if not getattr(model, "is_gradient_checkpointing", True):
+        return model
+    if not (
+        getattr(model, "is_loaded_in_8bit", False)
+        or getattr(model, "is_loaded_in_4bit", False)
+        or getattr(model, "is_quantized", False)
+    ):
+        if hasattr(model, "enable_input_require_grads"):
+            model.enable_input_require_grads()
+        elif hasattr(model, "get_input_embeddings"):
+            def make_inputs_require_grad(module, input, output):
+                output.requires_grad_(True)
+            model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)
+def _check_config_compatible(peft_config: PeftConfig) -> None:
+    if peft_config.peft_type not in COMPATIBLE_TUNER_TYPES:
+        raise ValueError(
+            f"The provided `peft_type` '{peft_config.peft_type.value}' is not compatible with the `PeftMixedModel`. "
+            f"Compatible types are: {COMPATIBLE_TUNER_TYPES}"
+        )
+class PeftMixedModel(PushToHubMixin, torch.nn.Module):
+    """
+    PeftMixedModel for loading mixing different types of adapters for inference.
+    This class does not support loading/saving, and it shouldn't usually be initialized directly. Instead, use
+    `get_peft_model` with the argument `mixed=True`.
+    <Tip>
+    Read the [Mixed adapter types](https://huggingface.co/docs/peft/en/developer_guides/mixed_models) guide to learn
+    more about using different adapter types.
+    </Tip>
+    Example:
+    ```py
+    >>> from peft import get_peft_model
+    >>> base_model = ...  # load the base model, e.g. from transformers
+    >>> peft_model = PeftMixedModel.from_pretrained(base_model, path_to_adapter1, "adapter1").eval()
+    >>> peft_model.load_adapter(path_to_adapter2, "adapter2")
+    >>> peft_model.set_adapter(["adapter1", "adapter2"])  # activate both adapters
+    >>> peft_model(data)  # forward pass using both adapters
+    ```
+    Args:
+        model (`torch.nn.Module`):
+            The model to be tuned.
+        config (`PeftConfig`):
+            The config of the model to be tuned. The adapter type must be compatible.
+        adapter_name (`str`, `optional`, defaults to `"default"`):
+            The name of the first adapter.
+    """
+    def __init__(self, model: nn.Module, peft_config: PeftConfig, adapter_name: str = "default") -> None:
+        super().__init__()
+        _check_config_compatible(peft_config)
+        _prepare_model_for_gradient_checkpointing(model)
+        self.modules_to_save = None
+        self.base_model = MixedModel(model, {adapter_name: peft_config}, adapter_name)
+        self.set_modules_to_save(peft_config, adapter_name)
+        self.config = getattr(model, "config", {"model_type": "custom"})
+        # the `pretraining_tp` is set for some models to simulate Tensor Parallelism during inference to avoid
+        # numerical differences, https://github.com/pytorch/pytorch/issues/76232 - to avoid any unexpected
+        # behavior we disable that in this line.
+        if hasattr(self.base_model, "config") and hasattr(self.base_model.config, "pretraining_tp"):
+            self.base_model.config.pretraining_tp = 1
+    @property
+    def peft_config(self) -> dict[str, PeftConfig]:
+        return self.base_model.peft_config
+    @property
+    def active_adapter(self) -> str:
+        return self.base_model.active_adapter
+    @property
+    def active_adapters(self) -> list[str]:
+        return self.base_model.active_adapters
+    def get_nb_trainable_parameters(self):
+        r"""
+        Returns the number of trainable parameters and number of all parameters in the model.
+        """
+        # note: same as PeftModel.get_nb_trainable_parameters
+        trainable_params = 0
+        all_param = 0
+        for _, param in self.named_parameters():
+            num_params = param.numel()
+            # if using DS Zero 3 and the weights are initialized empty
+            if num_params == 0 and hasattr(param, "ds_numel"):
+                num_params = param.ds_numel
+            # Due to the design of 4bit linear layers from bitsandbytes
+            # one needs to multiply the number of parameters by 2 to get
+            # the correct number of parameters
+            if param.__class__.__name__ == "Params4bit":
+                num_params = num_params * 2
+            all_param += num_params
+            if param.requires_grad:
+                trainable_params += num_params
+        return trainable_params, all_param
+    def print_trainable_parameters(self):
+        """
+        Prints the number of trainable parameters in the model.
+        """
+        # note: same as PeftModel.print_trainable_parameters
+        trainable_params, all_param = self.get_nb_trainable_parameters()
+        print(
+            f"trainable params: {trainable_params:,d} || "
+            f"all params: {all_param:,d} || "
+            f"trainable%: {100 * trainable_params / all_param:.4f}"
+        )
+    def __getattr__(self, name: str):
+        """Forward missing attributes to the wrapped module."""
+        try:
+            return super().__getattr__(name)  # defer to nn.Module's logic
+        except AttributeError:
+            return getattr(self.base_model, name)
+    def forward(self, *args: Any, **kwargs: Any):
+        """
+        Forward pass of the model.
+        """
+        return self.base_model(*args, **kwargs)
+    def generate(self, *args: Any, **kwargs: Any):
+        """
+        Generate output.
+        """
+        return self.base_model.generate(*args, **kwargs)
+    @contextmanager
+    def disable_adapter(self):
+        """
+        Disables the adapter module.
+        """
+        try:
+            self.base_model.disable_adapter_layers()
+            yield
+        finally:
+            self.base_model.enable_adapter_layers()
+    def add_adapter(self, adapter_name: str, peft_config: PeftConfig):
+        _check_config_compatible(peft_config)
+        try:
+            self.peft_config[adapter_name] = peft_config
+            self.base_model.inject_adapter(self, adapter_name)
+        except Exception:  # something went wrong, roll back
+            if adapter_name in self.peft_config:
+                del self.peft_config[adapter_name]
+            raise
+        self.set_modules_to_save(peft_config, adapter_name)
+    def set_modules_to_save(self, peft_config: PeftConfig, adapter_name: str) -> None:
+        if (modules_to_save := getattr(peft_config, "modules_to_save", None)) is None:
+            return
+        if self.modules_to_save is None:
+            self.modules_to_save = set(modules_to_save)
+        else:
+            self.modules_to_save.update(modules_to_save)
+        _set_trainable(self, adapter_name)
+    def set_adapter(self, adapter_name: Union[str, list[str]]) -> None:
+        """
+        Sets the active adapter(s) for the model.
+        Note that the order in which the adapters are applied during the forward pass may not be the same as the order
+        in which they are passed to this function. Instead, the order during the forward pass is determined by the
+        order in which the adapters were loaded into the model. The active adapters only determine which adapters are
+        active during the forward pass, but not the order in which they are applied.
+        Additionally, this function will set the specified adapters to trainable (i.e., requires_grad=True). If this is
+        not desired, use the following code.
+        ```py
+        >>> for name, param in model_peft.named_parameters():
+        ...     if ...:  # some check on name (ex. if 'lora' in name)
+        ...         param.requires_grad = False
+        ```
+        Args:
+            adapter_name (`str` or `List[str]`):
+                The name of the adapter(s) to be activated.
+        """
+        if isinstance(adapter_name, str):
+            adapter_name = [adapter_name]
+        mismatched = set(adapter_name) - set(self.peft_config.keys())
+        if mismatched:
+            raise ValueError(
+                f"Adapter(s) {sorted(mismatched)} not found, available adapters: {sorted(self.peft_config.keys())}"
+            )
+        self.base_model.set_adapter(adapter_name)
+        _set_adapter(self, adapter_name)
+    def delete_adapter(self, adapter_name: Union[str, list[str]]) -> None:
+        if isinstance(adapter_name, str):
+            adapter_name = [adapter_name]
+        mismatched = set(adapter_name) - set(self.peft_config.keys())
+        if mismatched:
+            raise ValueError(
+                f"Adapter(s) {sorted(mismatched)} not found, available adapters: {sorted(self.peft_config.keys())}"
+            )
+        self.base_model.delete_adapter(adapter_name)
+    def merge_and_unload(self, *args: Any, **kwargs: Any):
+        r"""
+        This method merges the adapter layers into the base model. This is needed if someone wants to use the base
+        model as a standalone model.
+        Args:
+            progressbar (`bool`):
+                whether to show a progressbar indicating the unload and merge process
+            safe_merge (`bool`):
+                whether to activate the safe merging check to check if there is any potential Nan in the adapter
+                weights
+            adapter_names (`List[str]`, *optional*):
+                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
+                to `None`.
+        """
+        return self.base_model.merge_and_unload(*args, **kwargs)
+    def unload(self, *args: Any, **kwargs: Any):
+        """
+        Gets back the base model by removing all the adapter modules without merging. This gives back the original base
+        model.
+        """
+        return self.base_model.unload(*args, **kwargs)
+    @classmethod
+    def _split_kwargs(cls, kwargs: dict[str, Any]):
+        return PeftModel._split_kwargs(kwargs)
+    def load_adapter(self, model_id: str, adapter_name: str, *args: Any, **kwargs: Any):
+        output = PeftModel.load_adapter(self, model_id, adapter_name, *args, **kwargs)
+        # TODO: not quite clear why this is necessary but tests fail without it
+        self.set_adapter(self.active_adapters)
+        return output
+    def create_or_update_model_card(self, output_dir: str):
+        raise NotImplementedError(f"Model card creation is not supported for {self.__class__.__name__} (yet).")
+    def save_pretrained(
+        self,
+        save_directory: str,
+        safe_serialization: bool = False,
+        selected_adapters: Optional[list[str]] = None,
+        **kwargs: Any,
+    ):
+        raise NotImplementedError(f"Saving is not supported for {self.__class__.__name__} (yet).")
+    @classmethod
+    def from_pretrained(
+        cls,
+        model: nn.Module,
+        model_id: str | os.PathLike,
+        adapter_name: str = "default",
+        is_trainable: bool = False,
+        config: Optional[PeftConfig] = None,
+        **kwargs: Any,
+    ):
+        r"""
+        Instantiate a PEFT mixed model from a pretrained model and loaded PEFT weights.
+        Note that the passed `model` may be modified inplace.
+        Args:
+            model (`nn.Module`):
+                The model to be adapted.
+            model_id (`str` or `os.PathLike`):
+                The name of the PEFT configuration to use. Can be either:
+                    - A string, the `model id` of a PEFT configuration hosted inside a model repo on the Hugging Face
+                      Hub.
+                    - A path to a directory containing a PEFT configuration file saved using the `save_pretrained`
+                      method (`./my_peft_config_directory/`).
+            adapter_name (`str`, *optional*, defaults to `"default"`):
+                The name of the adapter to be loaded. This is useful for loading multiple adapters.
+            is_trainable (`bool`, *optional*, defaults to `False`):
+                Whether the adapter should be trainable or not. If `False`, the adapter will be frozen and use for
+                inference
+            config ([`~peft.PeftConfig`], *optional*):
+                The configuration object to use instead of an automatically loaded configuration. This configuration
+                object is mutually exclusive with `model_id` and `kwargs`. This is useful when configuration is already
+                loaded before calling `from_pretrained`.
+            kwargs: (`optional`):
+                Additional keyword arguments passed along to the specific PEFT configuration class.
+        """
+        # note: adapted from PeftModel.from_pretrained
+        from .mapping import PEFT_TYPE_TO_CONFIG_MAPPING
+        # load the config
+        if config is None:
+            config = PEFT_TYPE_TO_CONFIG_MAPPING[
+                PeftConfig._get_peft_type(
+                    model_id,
+                    subfolder=kwargs.get("subfolder", None),
+                    revision=kwargs.get("revision", None),
+                    cache_dir=kwargs.get("cache_dir", None),
+                    use_auth_token=kwargs.get("use_auth_token", None),
+                )
+            ].from_pretrained(model_id, **kwargs)
+        elif isinstance(config, PeftConfig):
+            config.inference_mode = not is_trainable
+        else:
+            raise ValueError(f"The input config must be a PeftConfig, got {config.__class__}")
+        # note: this is different from PeftModel.from_pretrained
+        if config.peft_type not in PEFT_TYPE_TO_MODEL_MAPPING:
+            raise ValueError(f"Adapter of type {config.peft_type} is not supported for mixed models.")
+        if (getattr(model, "hf_device_map", None) is not None) and len(
+            set(model.hf_device_map.values()).intersection({"cpu", "disk"})
+        ) > 0:
+            remove_hook_from_submodules(model)
+        if config.is_prompt_learning and is_trainable:
+            # note: should not be possible to reach, but just in case
+            raise ValueError("Cannot set a prompt learning adapter to trainable when loading pretrained adapter.")
+        else:
+            config.inference_mode = not is_trainable
+        # note: this is different from PeftModel.from_pretrained, we always return a PeftMixedModel
+        model = cls(model, config, adapter_name)
+        model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
+        return model

MoRA/peft_mora/peft_model.py ADDED Viewed

	@@ -0,0 +1,1929 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+import collections
+import inspect
+import os
+import warnings
+from contextlib import contextmanager
+from copy import deepcopy
+from typing import Any, Optional, Union
+import packaging.version
+import torch
+import transformers
+from accelerate import dispatch_model, infer_auto_device_map
+from accelerate.hooks import AlignDevicesHook, add_hook_to_module, remove_hook_from_submodules
+from accelerate.utils import get_balanced_memory
+from huggingface_hub import ModelCard, ModelCardData, hf_hub_download
+from safetensors.torch import save_file as safe_save_file
+from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
+from transformers import PreTrainedModel
+from transformers.modeling_outputs import QuestionAnsweringModelOutput, SequenceClassifierOutput, TokenClassifierOutput
+from transformers.utils import PushToHubMixin
+from . import __version__
+from .config import PeftConfig
+from .tuners import (
+    AdaLoraModel,
+    AdaptionPromptModel,
+    IA3Model,
+    LoHaModel,
+    LoKrModel,
+    LoraModel,
+    MultitaskPromptEmbedding,
+    OFTModel,
+    PolyModel,
+    PrefixEncoder,
+    PromptEmbedding,
+    PromptEncoder,
+)
+from .utils import (
+    SAFETENSORS_WEIGHTS_NAME,
+    TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
+    WEIGHTS_NAME,
+    PeftType,
+    TaskType,
+    _get_batch_size,
+    _prepare_prompt_learning_config,
+    _set_adapter,
+    _set_trainable,
+    get_peft_model_state_dict,
+    id_tensor_storage,
+    infer_device,
+    load_peft_weights,
+    set_peft_model_state_dict,
+    shift_tokens_right,
+)
+PEFT_TYPE_TO_MODEL_MAPPING = {
+    PeftType.LORA: LoraModel,
+    PeftType.LOHA: LoHaModel,
+    PeftType.LOKR: LoKrModel,
+    PeftType.PROMPT_TUNING: PromptEmbedding,
+    PeftType.P_TUNING: PromptEncoder,
+    PeftType.PREFIX_TUNING: PrefixEncoder,
+    PeftType.ADALORA: AdaLoraModel,
+    PeftType.ADAPTION_PROMPT: AdaptionPromptModel,
+    PeftType.IA3: IA3Model,
+    PeftType.OFT: OFTModel,
+    PeftType.POLY: PolyModel,
+}
+class PeftModel(PushToHubMixin, torch.nn.Module):
+    """
+    Base model encompassing various Peft methods.
+    Args:
+        model ([`~transformers.PreTrainedModel`]): The base transformer model used for Peft.
+        peft_config ([`PeftConfig`]): The configuration of the Peft model.
+        adapter_name (`str`,  *optional*): The name of the adapter, defaults to `"default"`.
+    **Attributes**:
+        - **base_model** ([`torch.nn.Module`]) -- The base transformer model used for Peft.
+        - **peft_config** ([`PeftConfig`]) -- The configuration of the Peft model.
+        - **modules_to_save** (`list` of `str`) -- The list of sub-module names to save when
+            saving the model.
+        - **prompt_encoder** ([`PromptEncoder`]) -- The prompt encoder used for Peft if
+            using [`PromptLearningConfig`].
+        - **prompt_tokens** (`torch.Tensor`) -- The virtual prompt tokens used for Peft if
+            using [`PromptLearningConfig`].
+        - **transformer_backbone_name** (`str`) -- The name of the transformer
+            backbone in the base model if using [`PromptLearningConfig`].
+        - **word_embeddings** (`torch.nn.Embedding`) -- The word embeddings of the transformer backbone
+            in the base model if using [`PromptLearningConfig`].
+    """
+    def __init__(self, model: PreTrainedModel, peft_config: PeftConfig, adapter_name: str = "default") -> None:
+        super().__init__()
+        self.modules_to_save = None
+        self.active_adapter = adapter_name
+        self.peft_type = peft_config.peft_type
+        self._is_prompt_learning = peft_config.is_prompt_learning
+        if self._is_prompt_learning:
+            self._peft_config = {adapter_name: peft_config}
+            self.base_model = model
+            self.add_adapter(adapter_name, peft_config)
+        else:
+            self._peft_config = None
+            cls = PEFT_TYPE_TO_MODEL_MAPPING[peft_config.peft_type]
+            self.base_model = cls(model, {adapter_name: peft_config}, adapter_name)
+            self.set_additional_trainable_modules(peft_config, adapter_name)
+        if getattr(model, "is_gradient_checkpointing", True):
+            model = self._prepare_model_for_gradient_checkpointing(model)
+        # the `pretraining_tp` is set for some models to simulate Tensor Parallelism during inference to avoid
+        # numerical differences, https://github.com/pytorch/pytorch/issues/76232 - to avoid any unexpected
+        # behavior we disable that in this line.
+        if hasattr(self.base_model, "config") and hasattr(self.base_model.config, "pretraining_tp"):
+            self.base_model.config.pretraining_tp = 1
+    @property
+    def peft_config(self) -> dict[str, PeftConfig]:
+        if self._is_prompt_learning:
+            return self._peft_config
+        return self.base_model.peft_config
+    @property
+    def active_adapters(self) -> list[str]:
+        try:
+            adapters = self.base_model.active_adapters
+        except AttributeError:
+            adapters = self.active_adapter
+            if isinstance(adapters, str):
+                adapters = [adapters]
+        return adapters
+    @peft_config.setter
+    def peft_config(self, value: dict[str, PeftConfig]):
+        if self._is_prompt_learning:
+            self._peft_config = value
+        else:
+            self.base_model.peft_config = value
+    def save_pretrained(
+        self,
+        save_directory: str,
+        safe_serialization: bool = True,
+        selected_adapters: Optional[list[str]] = None,
+        save_embedding_layers: Union[str, bool] = "auto",
+        is_main_process: bool = True,
+        **kwargs: Any,
+    ) -> None:
+        r"""
+        This function saves the adapter model and the adapter configuration files to a directory, so that it can be
+        reloaded using the [`PeftModel.from_pretrained`] class method, and also used by the [`PeftModel.push_to_hub`]
+        method.
+        Args:
+            save_directory (`str`):
+                Directory where the adapter model and configuration files will be saved (will be created if it does not
+                exist).
+            safe_serialization (`bool`, *optional*):
+                Whether to save the adapter files in safetensors format, defaults to `True`.
+            selected_adapters (`List[str]`,  *optional*):
+                A list of adapters to be saved. If `None`, will default to all adapters.
+            save_embedding_layers (`Union[bool, str]`, *optional*, defaults to `"auto"`):
+                If `True`, save the embedding layers in addition to adapter weights. If `auto`, checks the common
+                embedding layers `peft.utils.other.EMBEDDING_LAYER_NAMES` in config's `target_modules` when available.
+                and automatically sets the boolean flag. This only works for 🤗 transformers models.
+            is_main_process (`bool`, *optional*):
+                Whether the process calling this is the main process or not. Will default to `True`. Will not save the
+                checkpoint if not on the main process, which is important for multi device setups (e.g. DDP).
+            kwargs (additional keyword arguments, *optional*):
+                Additional keyword arguments passed along to the `push_to_hub` method.
+        """
+        if os.path.isfile(save_directory):
+            raise ValueError(f"Provided path ({save_directory}) should be a directory, not a file")
+        if selected_adapters is None:
+            selected_adapters = list(self.peft_config.keys())
+        else:
+            if any(
+                selected_adapter_name not in list(self.peft_config.keys())
+                for selected_adapter_name in selected_adapters
+            ):
+                raise ValueError(
+                    f"You passed an invalid `selected_adapters` arguments, current supported adapter names are"
+                    f" {list(self.peft_config.keys())} - got {selected_adapters}."
+                )
+        if is_main_process:
+            os.makedirs(save_directory, exist_ok=True)
+            self.create_or_update_model_card(save_directory)
+        for adapter_name in selected_adapters:
+            peft_config = self.peft_config[adapter_name]
+            # save only the trainable weights
+            output_state_dict = get_peft_model_state_dict(
+                self,
+                state_dict=kwargs.get("state_dict", None),
+                adapter_name=adapter_name,
+                save_embedding_layers=save_embedding_layers,
+            )
+            output_dir = os.path.join(save_directory, adapter_name) if adapter_name != "default" else save_directory
+            os.makedirs(output_dir, exist_ok=True)
+            if is_main_process and safe_serialization:
+                # Section copied from: https://github.com/huggingface/transformers/blob/main/src/transformers/modeling_utils.py#L2111-L2134
+                # Safetensors does not allow tensor aliasing.
+                # We're going to remove aliases before saving
+                ptrs = collections.defaultdict(list)
+                for name, tensor in output_state_dict.items():
+                    # Sometimes in the state_dict we have non-tensor objects.
+                    # e.g. in bitsandbytes we have some `str` objects in the state_dict
+                    if isinstance(tensor, torch.Tensor):
+                        ptrs[id_tensor_storage(tensor)].append(name)
+                    else:
+                        # In the non-tensor case, fall back to the pointer of the object itself
+                        ptrs[id(tensor)].append(name)
+                # These are all the pointers of shared tensors.
+                shared_ptrs = {ptr: names for ptr, names in ptrs.items() if len(names) > 1}
+                for _, names in shared_ptrs.items():
+                    # Here we just clone the shared tensors to avoid tensor aliasing which is
+                    # not supported in safetensors.
+                    for shared_tensor_name in names[1:]:
+                        output_state_dict[shared_tensor_name] = output_state_dict[shared_tensor_name].clone()
+                safe_save_file(
+                    output_state_dict,
+                    os.path.join(output_dir, SAFETENSORS_WEIGHTS_NAME),
+                    metadata={"format": "pt"},
+                )
+            elif is_main_process:
+                torch.save(output_state_dict, os.path.join(output_dir, WEIGHTS_NAME))
+            # save the config and change the inference mode to `True`
+            if peft_config.base_model_name_or_path is None:
+                peft_config.base_model_name_or_path = (
+                    self.base_model.__dict__.get("name_or_path", None)
+                    if peft_config.is_prompt_learning
+                    else self.base_model.model.__dict__.get("name_or_path", None)
+                )
+            inference_mode = peft_config.inference_mode
+            peft_config.inference_mode = True
+            if peft_config.task_type is None:
+                # deal with auto mapping
+                base_model_class = self._get_base_model_class(
+                    is_prompt_tuning=peft_config.is_prompt_learning,
+                )
+                parent_library = base_model_class.__module__
+                auto_mapping_dict = {
+                    "base_model_class": base_model_class.__name__,
+                    "parent_library": parent_library,
+                }
+            else:
+                auto_mapping_dict = None
+            if is_main_process:
+                peft_config.save_pretrained(output_dir, auto_mapping_dict=auto_mapping_dict)
+            peft_config.inference_mode = inference_mode
+    @classmethod
+    def from_pretrained(
+        cls,
+        model: torch.nn.Module,
+        model_id: Union[str, os.PathLike],
+        adapter_name: str = "default",
+        is_trainable: bool = False,
+        config: Optional[PeftConfig] = None,
+        **kwargs: Any,
+    ) -> PeftModel:
+        r"""
+        Instantiate a PEFT model from a pretrained model and loaded PEFT weights.
+        Note that the passed `model` may be modified inplace.
+        Args:
+            model ([`torch.nn.Module`]):
+                The model to be adapted. For 🤗 Transformers models, the model should be initialized with the
+                [`~transformers.PreTrainedModel.from_pretrained`].
+            model_id (`str` or `os.PathLike`):
+                The name of the PEFT configuration to use. Can be either:
+                    - A string, the `model id` of a PEFT configuration hosted inside a model repo on the Hugging Face
+                      Hub.
+                    - A path to a directory containing a PEFT configuration file saved using the `save_pretrained`
+                      method (`./my_peft_config_directory/`).
+            adapter_name (`str`, *optional*, defaults to `"default"`):
+                The name of the adapter to be loaded. This is useful for loading multiple adapters.
+            is_trainable (`bool`, *optional*, defaults to `False`):
+                Whether the adapter should be trainable or not. If `False`, the adapter will be frozen and can only be
+                used for inference.
+            config ([`~peft.PeftConfig`], *optional*):
+                The configuration object to use instead of an automatically loaded configuration. This configuration
+                object is mutually exclusive with `model_id` and `kwargs`. This is useful when configuration is already
+                loaded before calling `from_pretrained`.
+            kwargs: (`optional`):
+                Additional keyword arguments passed along to the specific PEFT configuration class.
+        """
+        from .mapping import MODEL_TYPE_TO_PEFT_MODEL_MAPPING, PEFT_TYPE_TO_CONFIG_MAPPING
+        # load the config
+        if config is None:
+            config = PEFT_TYPE_TO_CONFIG_MAPPING[
+                PeftConfig._get_peft_type(
+                    model_id,
+                    subfolder=kwargs.get("subfolder", None),
+                    revision=kwargs.get("revision", None),
+                    cache_dir=kwargs.get("cache_dir", None),
+                    use_auth_token=kwargs.get("use_auth_token", None),
+                    token=kwargs.get("token", None),
+                )
+            ].from_pretrained(model_id, **kwargs)
+        elif isinstance(config, PeftConfig):
+            config.inference_mode = not is_trainable
+        else:
+            raise ValueError(f"The input config must be a PeftConfig, got {config.__class__}")
+        if (getattr(model, "hf_device_map", None) is not None) and len(
+            set(model.hf_device_map.values()).intersection({"cpu", "disk"})
+        ) > 0:
+            remove_hook_from_submodules(model)
+        if config.is_prompt_learning and is_trainable:
+            raise ValueError("Cannot set a prompt learning adapter to trainable when loading pretrained adapter.")
+        else:
+            config.inference_mode = not is_trainable
+        if config.task_type not in MODEL_TYPE_TO_PEFT_MODEL_MAPPING.keys():
+            model = cls(model, config, adapter_name)
+        else:
+            model = MODEL_TYPE_TO_PEFT_MODEL_MAPPING[config.task_type](model, config, adapter_name)
+        model.load_adapter(model_id, adapter_name, is_trainable=is_trainable, **kwargs)
+        return model
+    def _setup_prompt_encoder(self, adapter_name: str):
+        config = self.peft_config[adapter_name]
+        if not hasattr(self, "prompt_encoder"):
+            self.prompt_encoder = torch.nn.ModuleDict({})
+            self.prompt_tokens = {}
+        transformer_backbone = None
+        for name, module in self.base_model.named_children():
+            for param in module.parameters():
+                param.requires_grad = False
+            if isinstance(module, PreTrainedModel):
+                # Make sure to freeze Tranformers model
+                if transformer_backbone is None:
+                    transformer_backbone = module
+                    self.transformer_backbone_name = name
+        if transformer_backbone is None:
+            transformer_backbone = self.base_model
+        if config.num_transformer_submodules is None:
+            config.num_transformer_submodules = 2 if config.task_type == TaskType.SEQ_2_SEQ_LM else 1
+        for named_param, value in list(transformer_backbone.named_parameters()):
+            # for ZeRO-3, the tensor is sharded across accelerators and deepspeed modifies it to a tensor with shape [0]
+            # the actual unsharded shape is stored in "ds_shape" attribute
+            # special handling is needed in case the model is initialized in deepspeed.zero.Init() context or HfDeepSpeedConfig
+            # has been called before
+            # For reference refer to issue: https://github.com/huggingface/peft/issues/996
+            deepspeed_distributed_tensor_shape = getattr(value, "ds_shape", None)
+            if value.shape[0] == self.base_model.config.vocab_size or (
+                deepspeed_distributed_tensor_shape is not None
+                and deepspeed_distributed_tensor_shape[0] == self.base_model.config.vocab_size
+            ):
+                self.word_embeddings = transformer_backbone.get_submodule(named_param.replace(".weight", ""))
+                break
+        if config.peft_type == PeftType.PROMPT_TUNING:
+            prompt_encoder = PromptEmbedding(config, self.word_embeddings)
+        elif config.peft_type == PeftType.MULTITASK_PROMPT_TUNING:
+            prompt_encoder = MultitaskPromptEmbedding(config, self.word_embeddings)
+        elif config.peft_type == PeftType.P_TUNING:
+            prompt_encoder = PromptEncoder(config)
+        elif config.peft_type == PeftType.PREFIX_TUNING:
+            prompt_encoder = PrefixEncoder(config)
+        else:
+            raise ValueError("Not supported")
+        prompt_encoder = prompt_encoder.to(self.device)
+        self.prompt_encoder.update(torch.nn.ModuleDict({adapter_name: prompt_encoder}))
+        self.prompt_tokens[adapter_name] = torch.arange(
+            config.num_virtual_tokens * config.num_transformer_submodules
+        ).long()
+    def _prepare_model_for_gradient_checkpointing(self, model: PreTrainedModel):
+        r"""
+        Prepares the model for gradient checkpointing if necessary
+        """
+        if not (
+            getattr(model, "is_loaded_in_8bit", False)
+            or getattr(model, "is_loaded_in_4bit", False)
+            or getattr(model, "is_quantized", False)
+        ):
+            if hasattr(model, "enable_input_require_grads"):
+                model.enable_input_require_grads()
+            elif hasattr(model, "get_input_embeddings"):
+                def make_inputs_require_grad(module, input, output):
+                    output.requires_grad_(True)
+                model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)
+        return model
+    def get_prompt_embedding_to_save(self, adapter_name: str) -> torch.Tensor:
+        """
+        Returns the prompt embedding to save when saving the model. Only applicable when using a prompt learning
+        method.
+        """
+        prompt_encoder = self.prompt_encoder[adapter_name]
+        prompt_tokens = (
+            self.prompt_tokens[adapter_name].unsqueeze(0).expand(1, -1).to(prompt_encoder.embedding.weight.device)
+        )
+        if self.peft_config[adapter_name].peft_type == PeftType.PREFIX_TUNING:
+            prompt_tokens = prompt_tokens[:, : self.peft_config[adapter_name].num_virtual_tokens]
+        if self.peft_config[adapter_name].peft_type == PeftType.MULTITASK_PROMPT_TUNING:
+            prompt_embeddings = super(MultitaskPromptEmbedding, prompt_encoder).forward(prompt_tokens)
+        else:
+            prompt_embeddings = prompt_encoder(prompt_tokens)
+        return prompt_embeddings[0].detach().cpu()
+    def get_prompt(self, batch_size: int, task_ids: Optional[torch.Tensor] = None) -> torch.Tensor:
+        """
+        Returns the virtual prompts to use for Peft. Only applicable when using a prompt learning method.
+        """
+        peft_config = self.active_peft_config
+        prompt_encoder = self.prompt_encoder[self.active_adapter]
+        prompt_tokens = (
+            self.prompt_tokens[self.active_adapter]
+            .unsqueeze(0)
+            .expand(batch_size, -1)
+            .to(prompt_encoder.embedding.weight.device)
+        )
+        if peft_config.peft_type == PeftType.PREFIX_TUNING:
+            prompt_tokens = prompt_tokens[:, : peft_config.num_virtual_tokens]
+            if peft_config.inference_mode:
+                past_key_values = prompt_encoder.embedding.weight.repeat(batch_size, 1, 1)
+            else:
+                past_key_values = prompt_encoder(prompt_tokens)
+            if self.base_model_torch_dtype is not None:
+                past_key_values = past_key_values.to(self.base_model_torch_dtype)
+            past_key_values = past_key_values.view(
+                batch_size,
+                peft_config.num_virtual_tokens,
+                peft_config.num_layers * 2,
+                peft_config.num_attention_heads,
+                peft_config.token_dim // peft_config.num_attention_heads,
+            )
+            if peft_config.num_transformer_submodules == 2:
+                past_key_values = torch.cat([past_key_values, past_key_values], dim=2)
+            past_key_values = past_key_values.permute([2, 0, 3, 1, 4]).split(
+                peft_config.num_transformer_submodules * 2
+            )
+            if TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING.get(self.config.model_type, None) is not None:
+                post_process_fn = TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING[self.config.model_type]
+                past_key_values = post_process_fn(past_key_values)
+            return past_key_values
+        else:
+            if peft_config.peft_type == PeftType.MULTITASK_PROMPT_TUNING:
+                prompts = prompt_encoder(prompt_tokens, task_ids)
+            else:
+                if peft_config.inference_mode:
+                    prompts = prompt_encoder.embedding.weight.repeat(batch_size, 1, 1)
+                else:
+                    prompts = prompt_encoder(prompt_tokens)
+            return prompts
+    def get_nb_trainable_parameters(self) -> tuple[int, int]:
+        r"""
+        Returns the number of trainable parameters and the number of all parameters in the model.
+        """
+        trainable_params = 0
+        all_param = 0
+        for _, param in self.named_parameters():
+            num_params = param.numel()
+            # if using DS Zero 3 and the weights are initialized empty
+            if num_params == 0 and hasattr(param, "ds_numel"):
+                num_params = param.ds_numel
+            # Due to the design of 4bit linear layers from bitsandbytes
+            # one needs to multiply the number of parameters by 2 to get
+            # the correct number of parameters
+            if param.__class__.__name__ == "Params4bit":
+                num_params = num_params * 2
+            all_param += num_params
+            if param.requires_grad:
+                trainable_params += num_params
+        return trainable_params, all_param
+    def print_trainable_parameters(self) -> None:
+        """
+        Prints the number of trainable parameters in the model.
+        """
+        trainable_params, all_param = self.get_nb_trainable_parameters()
+        print(
+            f"trainable params: {trainable_params:,d} || all params: {all_param:,d} || trainable%: {100 * trainable_params / all_param}"
+        )
+    def __getattr__(self, name: str):
+        """Forward missing attributes to the wrapped module."""
+        try:
+            return super().__getattr__(name)  # defer to nn.Module's logic
+        except AttributeError:
+            return getattr(self.base_model, name)
+    def forward(self, *args: Any, **kwargs: Any):
+        """
+        Forward pass of the model.
+        """
+        return self.get_base_model()(*args, **kwargs)
+    def _get_base_model_class(self, is_prompt_tuning=False):
+        """
+        Returns the base model class.
+        """
+        if not is_prompt_tuning:
+            return self.base_model.model.__class__
+        return self.base_model.__class__
+    @contextmanager
+    def disable_adapter(self):
+        """
+        Context manager that disables the adapter module. Use this to run inference on the base model.
+        Example:
+        ```py
+        >>> with model.disable_adapter():
+        ...     model(inputs)
+        ```
+        """
+        try:
+            if self.peft_config[self.active_adapter].is_prompt_learning:
+                # TODO: consider replacing this patching of methods with a more robust mechanism: setting a flag and
+                # letting the underlying methods deal with it, same as how LoRA does it.
+                old_forward = self.forward
+                self.forward = self.base_model.forward
+                old_prepare_inputs_for_generation = self.prepare_inputs_for_generation
+                self.prepare_inputs_for_generation = self.base_model.prepare_inputs_for_generation
+            else:
+                self.base_model.disable_adapter_layers()
+            yield
+        finally:
+            if self.peft_config[self.active_adapter].is_prompt_learning:
+                self.forward = old_forward
+                self.prepare_inputs_for_generation = old_prepare_inputs_for_generation
+            else:
+                self.base_model.enable_adapter_layers()
+    def get_base_model(self) -> torch.nn.Module:
+        """
+        Returns the base model.
+        """
+        return (
+            self.base_model
+            if (self.active_peft_config.is_prompt_learning or self.peft_type == PeftType.POLY)
+            else self.base_model.model
+        )
+    def add_adapter(self, adapter_name: str, peft_config: PeftConfig) -> None:
+        """
+        Add an adapter to the model based on the passed configuration.
+        The name for the new adapter should be unique.
+        The new adapter is not automatically set as the active adapter. Use [`PeftModel.set_adapter`] to set the active
+        adapter.
+        Args:
+            adapter_name (`str`):
+                The name of the adapter to be added.
+            peft_config ([`PeftConfig`]):
+                The configuration of the adapter to be added.
+        """
+        if peft_config.peft_type != self.peft_type:
+            raise ValueError(
+                f"Cannot combine adapters with different peft types. "
+                f"Found {self.peft_type} and {peft_config.peft_type}."
+            )
+        try:
+            if peft_config.is_prompt_learning:
+                self.peft_config[adapter_name] = peft_config
+                if hasattr(self.config, "to_dict"):
+                    dict_config = self.config.to_dict()
+                else:
+                    dict_config = self.config
+                peft_config = _prepare_prompt_learning_config(peft_config, dict_config)
+                self._setup_prompt_encoder(adapter_name)
+            elif peft_config.is_adaption_prompt:
+                self.base_model.add_adapter(adapter_name, peft_config)
+            else:
+                self.peft_config[adapter_name] = peft_config
+                self.base_model.inject_adapter(self.base_model.model, adapter_name)
+        except Exception:  # something went wrong, roll back
+            if adapter_name in self.peft_config:
+                del self.peft_config[adapter_name]
+            raise
+        self.set_additional_trainable_modules(peft_config, adapter_name)
+    def set_additional_trainable_modules(self, peft_config, adapter_name):
+        if getattr(peft_config, "modules_to_save", None) is not None:
+            if self.modules_to_save is None:
+                self.modules_to_save = set(peft_config.modules_to_save)
+            else:
+                self.modules_to_save.update(peft_config.modules_to_save)
+            _set_trainable(self, adapter_name)
+    @classmethod
+    def _split_kwargs(cls, kwargs: dict[str, Any]):
+        _kwargs_not_in_hf_hub_download_signature = ("use_auth_token",)
+        hf_hub_download_kwargs = {}
+        other_kwargs = {}
+        for key, value in kwargs.items():
+            if key in inspect.signature(hf_hub_download).parameters or key in _kwargs_not_in_hf_hub_download_signature:
+                hf_hub_download_kwargs[key] = value
+            else:
+                other_kwargs[key] = value
+        return hf_hub_download_kwargs, other_kwargs
+    def load_adapter(self, model_id: str, adapter_name: str, is_trainable: bool = False, **kwargs: Any):
+        """
+        Load a trained adapter into the model.
+        The name for the new adapter should be unique.
+        The new adapter is not automatically set as the active adapter. Use [`PeftModel.set_adapter`] to set the active
+        adapter.
+        Args:
+            adapter_name (`str`):
+                The name of the adapter to be added.
+            peft_config ([`PeftConfig`]):
+                The configuration of the adapter to be added.
+            is_trainable (`bool`, *optional*, defaults to `False`):
+                Whether the adapter should be trainable or not. If `False`, the adapter will be frozen and can only be
+                used for inference.
+            kwargs: (`optional`):
+                Additional arguments to modify the way the adapter is loaded, e.g. the token for Hugging Face Hub.
+        """
+        from .mapping import PEFT_TYPE_TO_CONFIG_MAPPING
+        hf_hub_download_kwargs, kwargs = self._split_kwargs(kwargs)
+        torch_device = infer_device()
+        if adapter_name not in self.peft_config:
+            # load the config
+            peft_config = PEFT_TYPE_TO_CONFIG_MAPPING[
+                PeftConfig._get_peft_type(
+                    model_id,
+                    **hf_hub_download_kwargs,
+                )
+            ].from_pretrained(
+                model_id,
+                **hf_hub_download_kwargs,
+            )
+            if peft_config.is_prompt_learning and is_trainable:
+                raise ValueError("Cannot set a prompt learning adapter to trainable when loading pretrained adapter.")
+            else:
+                peft_config.inference_mode = not is_trainable
+            self.add_adapter(adapter_name, peft_config)
+        adapters_weights = load_peft_weights(model_id, device=torch_device, **hf_hub_download_kwargs)
+        # load the weights into the model
+        load_result = set_peft_model_state_dict(self, adapters_weights, adapter_name=adapter_name)
+        if (
+            (getattr(self, "hf_device_map", None) is not None)
+            and (len(set(self.hf_device_map.values()).intersection({"cpu", "disk"})) > 0)
+            and len(self.peft_config) == 1
+        ):
+            device_map = kwargs.get("device_map", "auto")
+            max_memory = kwargs.get("max_memory", None)
+            offload_dir = kwargs.get("offload_folder", None)
+            offload_index = kwargs.get("offload_index", None)
+            dispatch_model_kwargs = {}
+            # Safety checker for previous `accelerate` versions
+            # `offload_index` was introduced in https://github.com/huggingface/accelerate/pull/873/
+            if "offload_index" in inspect.signature(dispatch_model).parameters:
+                dispatch_model_kwargs["offload_index"] = offload_index
+            no_split_module_classes = self._no_split_modules
+            if device_map != "sequential":
+                max_memory = get_balanced_memory(
+                    self,
+                    max_memory=max_memory,
+                    no_split_module_classes=no_split_module_classes,
+                    low_zero=(device_map == "balanced_low_0"),
+                )
+            if isinstance(device_map, str):
+                device_map = infer_auto_device_map(
+                    self, max_memory=max_memory, no_split_module_classes=no_split_module_classes
+                )
+            dispatch_model(
+                self,
+                device_map=device_map,
+                offload_dir=offload_dir,
+                **dispatch_model_kwargs,
+            )
+            hook = AlignDevicesHook(io_same_device=True)
+            if self.peft_config[adapter_name].is_prompt_learning:
+                remove_hook_from_submodules(self.prompt_encoder)
+            add_hook_to_module(self.get_base_model(), hook)
+        # Set model in evaluation mode to deactivate Dropout modules by default
+        if not is_trainable:
+            self.eval()
+        return load_result
+    def set_adapter(self, adapter_name: str) -> None:
+        """
+        Sets the active adapter.
+        Only one adapter can be active at a time.
+        Additionally, this function will set the specified adapter to trainable (i.e., requires_grad=True). If this is
+        not desired, use the following code.
+        ```py
+        >>> for name, param in model_peft.named_parameters():
+        ...     if ...:  # some check on name (ex. if 'lora' in name)
+        ...         param.requires_grad = False
+        ```
+        Args:
+            adapter_name (`str`):
+                The name of the adapter to be set as active. The adapter must be loaded first.
+        """
+        if adapter_name not in self.peft_config:
+            raise ValueError(f"Adapter {adapter_name} not found.")
+        self.active_adapter = adapter_name
+        if not self.peft_config[adapter_name].is_prompt_learning:
+            self.base_model.set_adapter(adapter_name)
+        _set_adapter(self, adapter_name)
+    @property
+    def base_model_torch_dtype(self):
+        return getattr(self.base_model, "dtype", None)
+    @property
+    def active_peft_config(self):
+        return self.peft_config[self.active_adapter]
+    def create_or_update_model_card(self, output_dir: str):
+        """
+        Updates or create model card to include information about peft:
+        1. Adds `peft` library tag
+        2. Adds peft version
+        3. Adds base model info
+        4. Adds quantization information if it was used
+        """
+        filename = os.path.join(output_dir, "README.md")
+        card = ModelCard.load(filename) if os.path.exists(filename) else ModelCard.from_template(ModelCardData())
+        card.data["library_name"] = "peft"
+        model_config = getattr(self, "config", None)
+        if hasattr(model_config, "to_dict"):
+            model_config = model_config.to_dict()
+        if model_config is not None and "_name_or_path" in model_config:
+            card.data["base_model"] = model_config["_name_or_path"]
+        lines = card.text.splitlines()
+        quantization_config = None
+        if hasattr(model_config, "quantization_config"):
+            quantization_config = self.config.quantization_config.to_dict()
+        training_config_text = ""
+        quantization_prefix = "The following `bitsandbytes` quantization config was used during training:"
+        # Adds quantization information if it was used
+        if quantization_config is not None:
+            training_config_text += f"\n{quantization_prefix}\n"
+            training_config_text += "\n".join([f"- {name}: {value}" for name, value in quantization_config.items()])
+            training_config_text += "\n"
+        training_procedure_heading = "## Training procedure"
+        if quantization_prefix not in lines and bool(training_config_text):
+            if training_procedure_heading in lines:
+                lines.insert(lines.index(training_procedure_heading) + 2, training_config_text)
+            else:
+                lines.append(f"{training_procedure_heading}\n{training_config_text}")
+        # Adds peft version
+        framework_block_heading = "### Framework versions"
+        if f"- PEFT {__version__}" not in lines:
+            if framework_block_heading in lines:
+                lines.insert(lines.index(framework_block_heading) + 2, f"- PEFT {__version__}")
+            else:
+                lines.append(f"{framework_block_heading}\n\n- PEFT {__version__}")
+        card.text = "\n".join(lines)
+        card.save(filename)
+class PeftModelForSequenceClassification(PeftModel):
+    """
+    Peft model for sequence classification tasks.
+    Args:
+        model ([`~transformers.PreTrainedModel`]): Base transformer model.
+        peft_config ([`PeftConfig`]): Peft config.
+    **Attributes**:
+        - **config** ([`~transformers.PretrainedConfig`]) -- The configuration object of the base model.
+        - **cls_layer_name** (`str`) -- The name of the classification layer.
+    Example:
+        ```py
+        >>> from transformers import AutoModelForSequenceClassification
+        >>> from peft import PeftModelForSequenceClassification, get_peft_config
+        >>> config = {
+        ...     "peft_type": "PREFIX_TUNING",
+        ...     "task_type": "SEQ_CLS",
+        ...     "inference_mode": False,
+        ...     "num_virtual_tokens": 20,
+        ...     "token_dim": 768,
+        ...     "num_transformer_submodules": 1,
+        ...     "num_attention_heads": 12,
+        ...     "num_layers": 12,
+        ...     "encoder_hidden_size": 768,
+        ...     "prefix_projection": False,
+        ...     "postprocess_past_key_value_function": None,
+        ... }
+        >>> peft_config = get_peft_config(config)
+        >>> model = AutoModelForSequenceClassification.from_pretrained("bert-base-cased")
+        >>> peft_model = PeftModelForSequenceClassification(model, peft_config)
+        >>> peft_model.print_trainable_parameters()
+        trainable params: 370178 || all params: 108680450 || trainable%: 0.3406113979101117
+        ```
+    """
+    def __init__(self, model: torch.nn.Module, peft_config: PeftConfig, adapter_name: str = "default") -> None:
+        super().__init__(model, peft_config, adapter_name)
+        if self.modules_to_save is None:
+            self.modules_to_save = {"classifier", "score"}
+        else:
+            self.modules_to_save.update({"classifier", "score"})
+        for name, _ in self.base_model.named_children():
+            if any(module_name in name for module_name in self.modules_to_save):
+                self.cls_layer_name = name
+                break
+        # to make sure classifier layer is trainable
+        _set_trainable(self, adapter_name)
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        task_ids=None,
+        **kwargs,
+    ):
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        peft_config = self.active_peft_config
+        if not peft_config.is_prompt_learning:
+            if peft_config.peft_type == PeftType.POLY:
+                kwargs["task_ids"] = task_ids
+            return self.base_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                inputs_embeds=inputs_embeds,
+                labels=labels,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+                **kwargs,
+            )
+        batch_size = _get_batch_size(input_ids, inputs_embeds)
+        if attention_mask is not None:
+            # concat prompt attention mask
+            prefix_attention_mask = torch.ones(batch_size, peft_config.num_virtual_tokens).to(attention_mask.device)
+            attention_mask = torch.cat((prefix_attention_mask, attention_mask), dim=1)
+        if kwargs.get("position_ids", None) is not None:
+            warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
+            kwargs["position_ids"] = None
+        kwargs.update(
+            {
+                "attention_mask": attention_mask,
+                "labels": labels,
+                "output_attentions": output_attentions,
+                "output_hidden_states": output_hidden_states,
+                "return_dict": return_dict,
+            }
+        )
+        if peft_config.peft_type == PeftType.PREFIX_TUNING:
+            return self._prefix_tuning_forward(input_ids=input_ids, **kwargs)
+        else:
+            if kwargs.get("token_type_ids", None) is not None:
+                kwargs["token_type_ids"] = torch.cat(
+                    (
+                        torch.zeros(batch_size, peft_config.num_virtual_tokens).to(self.word_embeddings.weight.device),
+                        kwargs["token_type_ids"],
+                    ),
+                    dim=1,
+                ).long()
+            if inputs_embeds is None:
+                inputs_embeds = self.word_embeddings(input_ids)
+            prompts = self.get_prompt(batch_size=batch_size, task_ids=task_ids)
+            prompts = prompts.to(inputs_embeds.dtype)
+            inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
+            return self.base_model(inputs_embeds=inputs_embeds, **kwargs)
+    def _prefix_tuning_forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        **kwargs,
+    ):
+        batch_size = _get_batch_size(input_ids, inputs_embeds)
+        past_key_values = self.get_prompt(batch_size)
+        fwd_params = list(inspect.signature(self.base_model.forward).parameters.keys())
+        kwargs.update(
+            {
+                "input_ids": input_ids,
+                "attention_mask": attention_mask,
+                "inputs_embeds": inputs_embeds,
+                "output_attentions": output_attentions,
+                "output_hidden_states": output_hidden_states,
+                "return_dict": return_dict,
+                "past_key_values": past_key_values,
+            }
+        )
+        if "past_key_values" in fwd_params:
+            return self.base_model(labels=labels, **kwargs)
+        else:
+            transformer_backbone_name = self.base_model.get_submodule(self.transformer_backbone_name)
+            fwd_params = list(inspect.signature(transformer_backbone_name.forward).parameters.keys())
+            if "past_key_values" not in fwd_params:
+                raise ValueError("Model does not support past key values which are required for prefix tuning.")
+            outputs = transformer_backbone_name(**kwargs)
+            pooled_output = outputs[1] if len(outputs) > 1 else outputs[0]
+            if "dropout" in [name for name, _ in list(self.base_model.named_children())]:
+                pooled_output = self.base_model.dropout(pooled_output)
+            logits = self.base_model.get_submodule(self.cls_layer_name)(pooled_output)
+            loss = None
+            if labels is not None:
+                if self.config.problem_type is None:
+                    if self.base_model.num_labels == 1:
+                        self.config.problem_type = "regression"
+                    elif self.base_model.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
+                        self.config.problem_type = "single_label_classification"
+                    else:
+                        self.config.problem_type = "multi_label_classification"
+                if self.config.problem_type == "regression":
+                    loss_fct = MSELoss()
+                    if self.base_model.num_labels == 1:
+                        loss = loss_fct(logits.squeeze(), labels.squeeze())
+                    else:
+                        loss = loss_fct(logits, labels)
+                elif self.config.problem_type == "single_label_classification":
+                    loss_fct = CrossEntropyLoss()
+                    loss = loss_fct(logits.view(-1, self.base_model.num_labels), labels.view(-1))
+                elif self.config.problem_type == "multi_label_classification":
+                    loss_fct = BCEWithLogitsLoss()
+                    loss = loss_fct(logits, labels)
+            if not return_dict:
+                output = (logits,) + outputs[2:]
+                return ((loss,) + output) if loss is not None else output
+            return SequenceClassifierOutput(
+                loss=loss,
+                logits=logits,
+                hidden_states=outputs.hidden_states,
+                attentions=outputs.attentions,
+            )
+class PeftModelForCausalLM(PeftModel):
+    """
+    Peft model for causal language modeling.
+    Args:
+        model ([`~transformers.PreTrainedModel`]): Base transformer model.
+        peft_config ([`PeftConfig`]): Peft config.
+    Example:
+        ```py
+        >>> from transformers import AutoModelForCausalLM
+        >>> from peft import PeftModelForCausalLM, get_peft_config
+        >>> config = {
+        ...     "peft_type": "PREFIX_TUNING",
+        ...     "task_type": "CAUSAL_LM",
+        ...     "inference_mode": False,
+        ...     "num_virtual_tokens": 20,
+        ...     "token_dim": 1280,
+        ...     "num_transformer_submodules": 1,
+        ...     "num_attention_heads": 20,
+        ...     "num_layers": 36,
+        ...     "encoder_hidden_size": 1280,
+        ...     "prefix_projection": False,
+        ...     "postprocess_past_key_value_function": None,
+        ... }
+        >>> peft_config = get_peft_config(config)
+        >>> model = AutoModelForCausalLM.from_pretrained("gpt2-large")
+        >>> peft_model = PeftModelForCausalLM(model, peft_config)
+        >>> peft_model.print_trainable_parameters()
+        trainable params: 1843200 || all params: 775873280 || trainable%: 0.23756456724479544
+        ```
+    """
+    def __init__(self, model: torch.nn.Module, peft_config: PeftConfig, adapter_name: str = "default") -> None:
+        super().__init__(model, peft_config, adapter_name)
+        self.base_model_prepare_inputs_for_generation = self.base_model.prepare_inputs_for_generation
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        task_ids=None,
+        **kwargs,
+    ):
+        peft_config = self.active_peft_config
+        if not peft_config.is_prompt_learning:
+            if self.base_model.config.model_type == "mpt":
+                if inputs_embeds is not None:
+                    raise AssertionError("forward in MPTForCausalLM does not support inputs_embeds")
+                return self.base_model(
+                    input_ids=input_ids,
+                    attention_mask=attention_mask,
+                    labels=labels,
+                    output_attentions=output_attentions,
+                    output_hidden_states=output_hidden_states,
+                    return_dict=return_dict,
+                    **kwargs,
+                )
+            if peft_config.peft_type == PeftType.POLY:
+                kwargs["task_ids"] = task_ids
+            return self.base_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                inputs_embeds=inputs_embeds,
+                labels=labels,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+                **kwargs,
+            )
+        batch_size = _get_batch_size(input_ids, inputs_embeds)
+        if attention_mask is not None:
+            # concat prompt attention mask
+            prefix_attention_mask = torch.ones(batch_size, peft_config.num_virtual_tokens).to(attention_mask.device)
+            attention_mask = torch.cat((prefix_attention_mask, attention_mask), dim=1)
+        if kwargs.get("position_ids", None) is not None:
+            warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
+            kwargs["position_ids"] = None
+        if kwargs.get("token_type_ids", None) is not None:
+            warnings.warn("Token type ids are not supported for parameter efficient tuning. Ignoring token type ids")
+            kwargs["token_type_ids"] = None
+        kwargs.update(
+            {
+                "attention_mask": attention_mask,
+                "labels": labels,
+                "output_attentions": output_attentions,
+                "output_hidden_states": output_hidden_states,
+                "return_dict": return_dict,
+            }
+        )
+        if peft_config.peft_type == PeftType.PREFIX_TUNING:
+            past_key_values = self.get_prompt(batch_size)
+            return self.base_model(
+                input_ids=input_ids, inputs_embeds=inputs_embeds, past_key_values=past_key_values, **kwargs
+            )
+        else:
+            if inputs_embeds is None:
+                inputs_embeds = self.word_embeddings(input_ids)
+            # concat prompt labels
+            if labels is not None:
+                prefix_labels = torch.full((batch_size, peft_config.num_virtual_tokens), -100).to(labels.device)
+                kwargs["labels"] = torch.cat((prefix_labels, labels), dim=1)
+            prompts = self.get_prompt(batch_size=batch_size, task_ids=task_ids)
+            prompts = prompts.to(inputs_embeds.dtype)
+            inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
+            return self.base_model(inputs_embeds=inputs_embeds, **kwargs)
+    def generate(self, *args, **kwargs):
+        self.base_model.prepare_inputs_for_generation = self.prepare_inputs_for_generation
+        if hasattr(self.base_model, "model"):
+            self.base_model.model.generation_config = self.generation_config
+        else:
+            self.base_model.generation_config = self.generation_config
+        try:
+            outputs = self.base_model.generate(*args, **kwargs)
+        except:
+            self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation
+            raise
+        else:
+            self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation
+            return outputs
+    def prepare_inputs_for_generation(self, *args, task_ids: Optional[torch.Tensor] = None, **kwargs):
+        peft_config = self.active_peft_config
+        model_kwargs = self.base_model_prepare_inputs_for_generation(*args, **kwargs)
+        # https://github.com/huggingface/transformers/pull/26681/ introduced new cache format
+        # for some architectures which requires a special fix for prompt tuning etc.
+        # TODO: starting with transformers 4.38, all architectures should support caching.
+        uses_transformers_4_38 = packaging.version.parse(transformers.__version__) >= packaging.version.parse("4.38.0")
+        uses_transformers_4_36 = packaging.version.parse(transformers.__version__) >= packaging.version.parse("4.36.0")
+        transformers_new_cache_archs = ["llama", "mistral", "persimmon", "phi"]
+        uses_cache = uses_transformers_4_38 or (
+            uses_transformers_4_36 and self.base_model.config.model_type in transformers_new_cache_archs
+        )
+        if peft_config.peft_type == PeftType.POLY:
+            model_kwargs["task_ids"] = task_ids
+        if peft_config.is_prompt_learning:
+            if uses_cache and (model_kwargs["past_key_values"] is not None):
+                # change in the logic of `prepare_inputs_for_generation` makes the below code necessary
+                # In prompt learning methods, past key values are longer when compared to the `input_ids`.
+                # As such only consider the last input ids in the autogressive generation phase.
+                if model_kwargs["past_key_values"][0][0].shape[-2] >= model_kwargs["input_ids"].shape[1]:
+                    model_kwargs["input_ids"] = model_kwargs["input_ids"][:, -1:]
+            if model_kwargs.get("attention_mask", None) is not None:
+                size = model_kwargs["input_ids"].shape[0], peft_config.num_virtual_tokens
+                prefix_attention_mask = torch.ones(size).to(model_kwargs["input_ids"].device)
+                model_kwargs["attention_mask"] = torch.cat(
+                    (prefix_attention_mask, model_kwargs["attention_mask"]), dim=1
+                )
+            if model_kwargs.get("position_ids", None) is not None:
+                warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
+                model_kwargs["position_ids"] = None
+            if kwargs.get("token_type_ids", None) is not None:
+                warnings.warn(
+                    "Token type ids are not supported for parameter efficient tuning. Ignoring token type ids"
+                )
+                kwargs["token_type_ids"] = None
+            if model_kwargs["past_key_values"] is None and peft_config.peft_type == PeftType.PREFIX_TUNING:
+                past_key_values = self.get_prompt(batch_size=model_kwargs["input_ids"].shape[0])
+                model_kwargs["past_key_values"] = past_key_values
+            else:
+                if model_kwargs["past_key_values"] is None:
+                    inputs_embeds = self.word_embeddings(model_kwargs["input_ids"])
+                    prompts = self.get_prompt(batch_size=model_kwargs["input_ids"].shape[0], task_ids=task_ids)
+                    prompts = prompts.to(inputs_embeds.dtype)
+                    model_kwargs["inputs_embeds"] = torch.cat((prompts, inputs_embeds), dim=1)
+                    model_kwargs["input_ids"] = None
+        # For transformers>=4.38.0 - for some architectures such as Llama, `cache_position` is
+        # passed in the forward pass to keep track of the position ids of the cache. We have to
+        # pop that from `model_kwargs` as `cache_position` is properly created by the model, using the passed
+        # `inputs_embeds`: https://github.com/huggingface/transformers/blob/593230f0a1150ea9c0477b9d859f25daf73c8c33/src/transformers/models/llama/modeling_llama.py#L956
+        _ = model_kwargs.pop("cache_position", None)
+        return model_kwargs
+class PeftModelForSeq2SeqLM(PeftModel):
+    """
+    Peft model for sequence-to-sequence language modeling.
+    Args:
+        model ([`~transformers.PreTrainedModel`]): Base transformer model.
+        peft_config ([`PeftConfig`]): Peft config.
+    Example:
+        ```py
+        >>> from transformers import AutoModelForSeq2SeqLM
+        >>> from peft import PeftModelForSeq2SeqLM, get_peft_config
+        >>> config = {
+        ...     "peft_type": "LORA",
+        ...     "task_type": "SEQ_2_SEQ_LM",
+        ...     "inference_mode": False,
+        ...     "r": 8,
+        ...     "target_modules": ["q", "v"],
+        ...     "lora_alpha": 32,
+        ...     "lora_dropout": 0.1,
+        ...     "fan_in_fan_out": False,
+        ...     "enable_lora": None,
+        ...     "bias": "none",
+        ... }
+        >>> peft_config = get_peft_config(config)
+        >>> model = AutoModelForSeq2SeqLM.from_pretrained("t5-base")
+        >>> peft_model = PeftModelForSeq2SeqLM(model, peft_config)
+        >>> peft_model.print_trainable_parameters()
+        trainable params: 884736 || all params: 223843584 || trainable%: 0.3952474242013566
+        ```
+    """
+    def __init__(self, model: torch.nn.Module, peft_config: PeftConfig, adapter_name: str = "default") -> None:
+        super().__init__(model, peft_config, adapter_name)
+        self.base_model_prepare_inputs_for_generation = self.base_model.prepare_inputs_for_generation
+        self.base_model_prepare_encoder_decoder_kwargs_for_generation = (
+            self.base_model._prepare_encoder_decoder_kwargs_for_generation
+        )
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        decoder_input_ids=None,
+        decoder_attention_mask=None,
+        decoder_inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        task_ids=None,
+        **kwargs,
+    ):
+        peft_config = self.active_peft_config
+        if not peft_config.is_prompt_learning:
+            if peft_config.peft_type == PeftType.POLY:
+                kwargs["task_ids"] = task_ids
+            return self.base_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                inputs_embeds=inputs_embeds,
+                decoder_input_ids=decoder_input_ids,
+                decoder_attention_mask=decoder_attention_mask,
+                decoder_inputs_embeds=decoder_inputs_embeds,
+                labels=labels,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+                **kwargs,
+            )
+        batch_size = _get_batch_size(input_ids, inputs_embeds)
+        if decoder_attention_mask is not None:
+            # concat prompt attention mask
+            prefix_attention_mask = torch.ones(batch_size, peft_config.num_virtual_tokens).to(
+                decoder_attention_mask.device
+            )
+            if peft_config.peft_type not in [PeftType.PROMPT_TUNING, PeftType.P_TUNING]:
+                decoder_attention_mask = torch.cat((prefix_attention_mask, decoder_attention_mask), dim=1)
+        if kwargs.get("position_ids", None) is not None:
+            warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
+            kwargs["position_ids"] = None
+        if kwargs.get("token_type_ids", None) is not None:
+            warnings.warn("Token type ids are not supported for parameter efficient tuning. Ignoring token type ids")
+            kwargs["token_type_ids"] = None
+        kwargs.update(
+            {
+                "attention_mask": attention_mask,
+                "decoder_attention_mask": decoder_attention_mask,
+                "labels": labels,
+                "output_attentions": output_attentions,
+                "output_hidden_states": output_hidden_states,
+                "return_dict": return_dict,
+            }
+        )
+        if peft_config.peft_type == PeftType.PREFIX_TUNING:
+            past_key_values = self.get_prompt(batch_size)
+            return self.base_model(
+                input_ids=input_ids,
+                decoder_input_ids=decoder_input_ids,
+                decoder_inputs_embeds=decoder_inputs_embeds,
+                past_key_values=past_key_values,
+                **kwargs,
+            )
+        elif peft_config.peft_type in [PeftType.PROMPT_TUNING, PeftType.P_TUNING]:
+            if inputs_embeds is None:
+                inputs_embeds = self.word_embeddings(input_ids)
+            if attention_mask is not None:
+                # concat prompt attention mask
+                prefix_attention_mask = torch.ones(batch_size, peft_config.num_virtual_tokens).to(
+                    attention_mask.device
+                )
+                kwargs["attention_mask"] = torch.cat((prefix_attention_mask, attention_mask), dim=1)
+            prompts = self.get_prompt(batch_size=batch_size)
+            prompts = prompts.to(inputs_embeds.dtype)
+            inputs_embeds = torch.cat((prompts[:, : peft_config.num_virtual_tokens], inputs_embeds), dim=1)
+            return self.base_model(
+                inputs_embeds=inputs_embeds,
+                decoder_input_ids=decoder_input_ids,
+                decoder_inputs_embeds=decoder_inputs_embeds,
+                **kwargs,
+            )
+        else:
+            if inputs_embeds is None:
+                inputs_embeds = self.word_embeddings(input_ids)
+            if decoder_inputs_embeds is None and decoder_input_ids is None:
+                decoder_input_ids = shift_tokens_right(
+                    labels, self.config.pad_token_id, self.config.decoder_start_token_id
+                )
+                decoder_inputs_embeds = self.word_embeddings(decoder_input_ids)
+            if attention_mask is not None:
+                # concat prompt attention mask
+                prefix_attention_mask = torch.ones(batch_size, peft_config.num_virtual_tokens).to(
+                    attention_mask.device
+                )
+                kwargs["attention_mask"] = torch.cat((prefix_attention_mask, attention_mask), dim=1)
+            # concat prompt labels
+            if labels is not None:
+                if peft_config.num_transformer_submodules == 1:
+                    kwargs["labels"] = labels
+                elif peft_config.num_transformer_submodules == 2:
+                    prefix_labels = torch.full((batch_size, peft_config.num_virtual_tokens), -100).to(labels.device)
+                    kwargs["labels"] = torch.cat((prefix_labels, labels), dim=1)
+            prompts = self.get_prompt(batch_size=batch_size, task_ids=task_ids)
+            prompts = prompts.to(inputs_embeds.dtype)
+            inputs_embeds = torch.cat((prompts[:, : peft_config.num_virtual_tokens], inputs_embeds), dim=1)
+            if peft_config.num_transformer_submodules == 1:
+                return self.base_model(inputs_embeds=inputs_embeds, **kwargs)
+            elif peft_config.num_transformer_submodules == 2:
+                decoder_inputs_embeds = torch.cat(
+                    (prompts[:, peft_config.num_virtual_tokens :], decoder_inputs_embeds), dim=1
+                )
+                return self.base_model(
+                    inputs_embeds=inputs_embeds, decoder_inputs_embeds=decoder_inputs_embeds, **kwargs
+                )
+    def generate(self, **kwargs):
+        peft_config = self.active_peft_config
+        self.base_model.prepare_inputs_for_generation = self.prepare_inputs_for_generation
+        self.base_model._prepare_encoder_decoder_kwargs_for_generation = (
+            self._prepare_encoder_decoder_kwargs_for_generation
+        )
+        try:
+            if not peft_config.is_prompt_learning:
+                outputs = self.base_model.generate(**kwargs)
+            else:
+                if "input_ids" not in kwargs:
+                    raise ValueError("input_ids must be provided for Peft model generation")
+                if kwargs.get("position_ids", None) is not None:
+                    warnings.warn(
+                        "Position ids are not supported for parameter efficient tuning. Ignoring position ids."
+                    )
+                    kwargs["position_ids"] = None
+                if kwargs.get("token_type_ids", None) is not None:
+                    warnings.warn(
+                        "Token type ids are not supported for parameter efficient tuning. Ignoring token type ids"
+                    )
+                    kwargs["token_type_ids"] = None
+                if peft_config.peft_type == PeftType.PREFIX_TUNING:
+                    outputs = self.base_model.generate(**kwargs)
+                elif peft_config.peft_type in [
+                    PeftType.PROMPT_TUNING,
+                    PeftType.P_TUNING,
+                    PeftType.MULTITASK_PROMPT_TUNING,
+                ]:
+                    kwargs = deepcopy(kwargs)
+                    if "encoder_outputs" in kwargs:
+                        del kwargs["encoder_outputs"]
+                        warnings.warn(
+                            "`encoder_outputs` should not be passed to `generate` when using prompt tuning. Ignoring it."
+                        )
+                    input_ids = kwargs.pop("input_ids")
+                    inputs_embeds = self.word_embeddings(input_ids)
+                    batch_size = inputs_embeds.shape[0]
+                    prompts = self.get_prompt(batch_size=batch_size, task_ids=kwargs.pop("task_ids", None))
+                    prompts = prompts.to(inputs_embeds.dtype)
+                    inputs_embeds = torch.cat((prompts[:, : peft_config.num_virtual_tokens], inputs_embeds), dim=1)
+                    kwargs["inputs_embeds"] = inputs_embeds
+                    if "attention_mask" in kwargs:
+                        prefix_attention_mask = torch.ones(batch_size, peft_config.num_virtual_tokens).to(
+                            kwargs["attention_mask"].device
+                        )
+                        kwargs["attention_mask"] = torch.cat((prefix_attention_mask, kwargs["attention_mask"]), dim=1)
+                    return self.base_model.generate(**kwargs)
+                else:
+                    raise NotImplementedError
+        except:
+            self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation
+            self.base_model._prepare_encoder_decoder_kwargs_for_generation = (
+                self.base_model_prepare_encoder_decoder_kwargs_for_generation
+            )
+            raise
+        else:
+            self.base_model.prepare_inputs_for_generation = self.base_model_prepare_inputs_for_generation
+            self.base_model._prepare_encoder_decoder_kwargs_for_generation = (
+                self.base_model_prepare_encoder_decoder_kwargs_for_generation
+            )
+            return outputs
+    def prepare_inputs_for_generation(self, *args, task_ids: torch.Tensor = None, **kwargs):
+        peft_config = self.active_peft_config
+        model_kwargs = self.base_model_prepare_inputs_for_generation(*args, **kwargs)
+        if peft_config.peft_type == PeftType.POLY:
+            model_kwargs["task_ids"] = task_ids
+        if model_kwargs["past_key_values"] is None and peft_config.peft_type == PeftType.PREFIX_TUNING:
+            batch_size = model_kwargs["decoder_input_ids"].shape[0]
+            past_key_values = self.get_prompt(batch_size)
+            model_kwargs["past_key_values"] = past_key_values
+        return model_kwargs
+class PeftModelForTokenClassification(PeftModel):
+    """
+    Peft model for token classification tasks.
+    Args:
+        model ([`~transformers.PreTrainedModel`]): Base transformer model.
+        peft_config ([`PeftConfig`]): Peft config.
+    **Attributes**:
+        - **config** ([`~transformers.PretrainedConfig`]) -- The configuration object of the base model.
+        - **cls_layer_name** (`str`) -- The name of the classification layer.
+    Example:
+        ```py
+        >>> from transformers import AutoModelForSequenceClassification
+        >>> from peft import PeftModelForTokenClassification, get_peft_config
+        >>> config = {
+        ...     "peft_type": "PREFIX_TUNING",
+        ...     "task_type": "TOKEN_CLS",
+        ...     "inference_mode": False,
+        ...     "num_virtual_tokens": 20,
+        ...     "token_dim": 768,
+        ...     "num_transformer_submodules": 1,
+        ...     "num_attention_heads": 12,
+        ...     "num_layers": 12,
+        ...     "encoder_hidden_size": 768,
+        ...     "prefix_projection": False,
+        ...     "postprocess_past_key_value_function": None,
+        ... }
+        >>> peft_config = get_peft_config(config)
+        >>> model = AutoModelForTokenClassification.from_pretrained("bert-base-cased")
+        >>> peft_model = PeftModelForTokenClassification(model, peft_config)
+        >>> peft_model.print_trainable_parameters()
+        trainable params: 370178 || all params: 108680450 || trainable%: 0.3406113979101117
+        ```
+    """
+    def __init__(self, model: torch.nn.Module, peft_config: PeftConfig = None, adapter_name: str = "default") -> None:
+        super().__init__(model, peft_config, adapter_name)
+        if self.modules_to_save is None:
+            self.modules_to_save = {"classifier", "score"}
+        else:
+            self.modules_to_save.update({"classifier", "score"})
+        for name, _ in self.base_model.named_children():
+            if any(module_name in name for module_name in self.modules_to_save):
+                self.cls_layer_name = name
+                break
+        # to make sure classifier layer is trainable
+        _set_trainable(self, adapter_name)
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        task_ids=None,
+        **kwargs,
+    ):
+        peft_config = self.active_peft_config
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        if not peft_config.is_prompt_learning:
+            if peft_config.peft_type == PeftType.POLY:
+                kwargs["task_ids"] = task_ids
+            return self.base_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                inputs_embeds=inputs_embeds,
+                labels=labels,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+                **kwargs,
+            )
+        batch_size = _get_batch_size(input_ids, inputs_embeds)
+        if attention_mask is not None:
+            # concat prompt attention mask
+            prefix_attention_mask = torch.ones(batch_size, peft_config.num_virtual_tokens).to(attention_mask.device)
+            attention_mask = torch.cat((prefix_attention_mask, attention_mask), dim=1)
+        if kwargs.get("position_ids", None) is not None:
+            warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
+            kwargs["position_ids"] = None
+        kwargs.update(
+            {
+                "attention_mask": attention_mask,
+                "labels": labels,
+                "output_attentions": output_attentions,
+                "output_hidden_states": output_hidden_states,
+                "return_dict": return_dict,
+            }
+        )
+        if peft_config.peft_type == PeftType.PREFIX_TUNING:
+            return self._prefix_tuning_forward(input_ids=input_ids, **kwargs)
+        else:
+            if kwargs.get("token_type_ids", None) is not None:
+                kwargs["token_type_ids"] = torch.cat(
+                    (
+                        torch.zeros(batch_size, peft_config.num_virtual_tokens).to(self.word_embeddings.weight.device),
+                        kwargs["token_type_ids"],
+                    ),
+                    dim=1,
+                ).long()
+            if inputs_embeds is None:
+                inputs_embeds = self.word_embeddings(input_ids)
+            prompts = self.get_prompt(batch_size=batch_size, task_ids=task_ids)
+            prompts = prompts.to(inputs_embeds.dtype)
+            inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
+            return self.base_model(inputs_embeds=inputs_embeds, **kwargs)
+    def _prefix_tuning_forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        labels=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        **kwargs,
+    ):
+        batch_size = _get_batch_size(input_ids, inputs_embeds)
+        past_key_values = self.get_prompt(batch_size)
+        fwd_params = list(inspect.signature(self.base_model.forward).parameters.keys())
+        kwargs.update(
+            {
+                "input_ids": input_ids,
+                "attention_mask": attention_mask,
+                "inputs_embeds": inputs_embeds,
+                "output_attentions": output_attentions,
+                "output_hidden_states": output_hidden_states,
+                "return_dict": return_dict,
+                "past_key_values": past_key_values,
+            }
+        )
+        if "past_key_values" in fwd_params:
+            return self.base_model(labels=labels, **kwargs)
+        else:
+            transformer_backbone_name = self.base_model.get_submodule(self.transformer_backbone_name)
+            fwd_params = list(inspect.signature(transformer_backbone_name.forward).parameters.keys())
+            if "past_key_values" not in fwd_params:
+                raise ValueError("Model does not support past key values which are required for prefix tuning.")
+            outputs = transformer_backbone_name(**kwargs)
+            sequence_output = outputs[0]
+            if "dropout" in [name for name, _ in list(self.base_model.named_children())]:
+                sequence_output = self.base_model.dropout(sequence_output)
+            logits = self.base_model.get_submodule(self.cls_layer_name)(sequence_output)
+            loss = None
+            if labels is not None:
+                loss_fct = CrossEntropyLoss()
+                loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
+            if not return_dict:
+                output = (logits,) + outputs[2:]
+                return ((loss,) + output) if loss is not None else output
+            return TokenClassifierOutput(
+                loss=loss,
+                logits=logits,
+                hidden_states=outputs.hidden_states,
+                attentions=outputs.attentions,
+            )
+class PeftModelForQuestionAnswering(PeftModel):
+    """
+    Peft model for extractive question answering.
+    Args:
+        model ([`~transformers.PreTrainedModel`]): Base transformer model.
+        peft_config ([`PeftConfig`]): Peft config.
+    **Attributes**:
+        - **config** ([`~transformers.PretrainedConfig`]) -- The configuration object of the base model.
+        - **cls_layer_name** (`str`) -- The name of the classification layer.
+    Example:
+        ```py
+        >>> from transformers import AutoModelForQuestionAnswering
+        >>> from peft import PeftModelForQuestionAnswering, get_peft_config
+        >>> config = {
+        ...     "peft_type": "LORA",
+        ...     "task_type": "QUESTION_ANS",
+        ...     "inference_mode": False,
+        ...     "r": 16,
+        ...     "target_modules": ["query", "value"],
+        ...     "lora_alpha": 32,
+        ...     "lora_dropout": 0.05,
+        ...     "fan_in_fan_out": False,
+        ...     "bias": "none",
+        ... }
+        >>> peft_config = get_peft_config(config)
+        >>> model = AutoModelForQuestionAnswering.from_pretrained("bert-base-cased")
+        >>> peft_model = PeftModelForQuestionAnswering(model, peft_config)
+        >>> peft_model.print_trainable_parameters()
+        trainable params: 592900 || all params: 108312580 || trainable%: 0.5473971721475013
+        ```
+    """
+    def __init__(self, model: torch.nn.Module, peft_config: PeftConfig, adapter_name: str = "default") -> None:
+        super().__init__(model, peft_config, adapter_name)
+        if self.modules_to_save is None:
+            self.modules_to_save = {"qa_outputs"}
+        else:
+            self.modules_to_save.update({"qa_outputs"})
+        for name, _ in self.base_model.named_children():
+            if any(module_name in name for module_name in self.modules_to_save):
+                self.cls_layer_name = name
+                break
+        # to make sure classifier layer is trainable
+        _set_trainable(self, adapter_name)
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        token_type_ids=None,
+        position_ids=None,
+        inputs_embeds=None,
+        start_positions=None,
+        end_positions=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        task_ids=None,
+        **kwargs,
+    ):
+        peft_config = self.active_peft_config
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        if not peft_config.is_prompt_learning:
+            if peft_config.peft_type == PeftType.POLY:
+                kwargs["task_ids"] = task_ids
+            return self.base_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                inputs_embeds=inputs_embeds,
+                start_positions=start_positions,
+                end_positions=end_positions,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+                **kwargs,
+            )
+        batch_size = _get_batch_size(input_ids, inputs_embeds)
+        if attention_mask is not None:
+            # concat prompt attention mask
+            prefix_attention_mask = torch.ones(batch_size, peft_config.num_virtual_tokens).to(attention_mask.device)
+            attention_mask = torch.cat((prefix_attention_mask, attention_mask), dim=1)
+        if kwargs.get("position_ids", None) is not None:
+            warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
+            kwargs["position_ids"] = None
+        kwargs.update(
+            {
+                "attention_mask": attention_mask,
+                "start_positions": start_positions,
+                "end_positions": end_positions,
+                "output_attentions": output_attentions,
+                "output_hidden_states": output_hidden_states,
+                "return_dict": return_dict,
+            }
+        )
+        if peft_config.peft_type == PeftType.PREFIX_TUNING:
+            return self._prefix_tuning_forward(input_ids=input_ids, **kwargs)
+        else:
+            if kwargs.get("token_type_ids", None) is not None:
+                kwargs["token_type_ids"] = torch.cat(
+                    (
+                        torch.zeros(batch_size, peft_config.num_virtual_tokens).to(self.word_embeddings.weight.device),
+                        kwargs["token_type_ids"],
+                    ),
+                    dim=1,
+                ).long()
+            if inputs_embeds is None:
+                inputs_embeds = self.word_embeddings(input_ids)
+            prompts = self.get_prompt(batch_size=batch_size)
+            prompts = prompts.to(inputs_embeds.dtype)
+            inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
+            return self.base_model(inputs_embeds=inputs_embeds, **kwargs)
+    def _prefix_tuning_forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        start_positions=None,
+        end_positions=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        **kwargs,
+    ):
+        batch_size = _get_batch_size(input_ids, inputs_embeds)
+        past_key_values = self.get_prompt(batch_size)
+        fwd_params = list(inspect.signature(self.base_model.forward).parameters.keys())
+        kwargs.update(
+            {
+                "input_ids": input_ids,
+                "attention_mask": attention_mask,
+                "inputs_embeds": inputs_embeds,
+                "output_attentions": output_attentions,
+                "output_hidden_states": output_hidden_states,
+                "return_dict": return_dict,
+                "past_key_values": past_key_values,
+            }
+        )
+        if "past_key_values" in fwd_params:
+            return self.base_model(start_positions=start_positions, end_positions=end_positions, **kwargs)
+        else:
+            transformer_backbone_name = self.base_model.get_submodule(self.transformer_backbone_name)
+            fwd_params = list(inspect.signature(transformer_backbone_name.forward).parameters.keys())
+            if "past_key_values" not in fwd_params:
+                raise ValueError("Model does not support past key values which are required for prefix tuning.")
+            outputs = transformer_backbone_name(**kwargs)
+            sequence_output = outputs[0]
+            if "dropout" in [name for name, _ in list(self.base_model.named_children())]:
+                sequence_output = self.base_model.dropout(sequence_output)
+            logits = self.base_model.get_submodule(self.cls_layer_name)(sequence_output)
+            start_logits, end_logits = logits.split(1, dim=-1)
+            start_logits = start_logits.squeeze(-1).contiguous()
+            end_logits = end_logits.squeeze(-1).contiguous()
+            total_loss = None
+            if start_positions is not None and end_positions is not None:
+                # If we are on multi-GPU, split add a dimension
+                if len(start_positions.size()) > 1:
+                    start_positions = start_positions.squeeze(-1)
+                if len(end_positions.size()) > 1:
+                    end_positions = end_positions.squeeze(-1)
+                # sometimes the start/end positions are outside our model inputs, we ignore these terms
+                ignored_index = start_logits.size(1)
+                start_positions = start_positions.clamp(0, ignored_index)
+                end_positions = end_positions.clamp(0, ignored_index)
+                loss_fct = CrossEntropyLoss(ignore_index=ignored_index)
+                start_loss = loss_fct(start_logits, start_positions)
+                end_loss = loss_fct(end_logits, end_positions)
+                total_loss = (start_loss + end_loss) / 2
+            if not return_dict:
+                output = (start_logits, end_logits) + outputs[2:]
+                return ((total_loss,) + output) if total_loss is not None else output
+            return QuestionAnsweringModelOutput(
+                loss=total_loss,
+                start_logits=start_logits,
+                end_logits=end_logits,
+                hidden_states=outputs.hidden_states,
+                attentions=outputs.attentions,
+            )
+class PeftModelForFeatureExtraction(PeftModel):
+    """
+    Peft model for extracting features/embeddings from transformer models
+    Args:
+        model ([`~transformers.PreTrainedModel`]): Base transformer model.
+        peft_config ([`PeftConfig`]): Peft config.
+    **Attributes**:
+        - **config** ([`~transformers.PretrainedConfig`]) -- The configuration object of the base model.
+    Example:
+        ```py
+        >>> from transformers import AutoModel
+        >>> from peft import PeftModelForFeatureExtraction, get_peft_config
+        >>> config = {
+        ...     "peft_type": "LORA",
+        ...     "task_type": "FEATURE_EXTRACTION",
+        ...     "inference_mode": False,
+        ...     "r": 16,
+        ...     "target_modules": ["query", "value"],
+        ...     "lora_alpha": 32,
+        ...     "lora_dropout": 0.05,
+        ...     "fan_in_fan_out": False,
+        ...     "bias": "none",
+        ... }
+        >>> peft_config = get_peft_config(config)
+        >>> model = AutoModel.from_pretrained("bert-base-cased")
+        >>> peft_model = PeftModelForFeatureExtraction(model, peft_config)
+        >>> peft_model.print_trainable_parameters()
+        ```
+    """
+    def __init__(self, model: torch.nn.Module, peft_config: PeftConfig, adapter_name: str = "default"):
+        super().__init__(model, peft_config, adapter_name)
+    def forward(
+        self,
+        input_ids=None,
+        attention_mask=None,
+        inputs_embeds=None,
+        output_attentions=None,
+        output_hidden_states=None,
+        return_dict=None,
+        task_ids=None,
+        **kwargs,
+    ):
+        peft_config = self.active_peft_config
+        if not peft_config.is_prompt_learning:
+            if peft_config.peft_type == PeftType.POLY:
+                kwargs["task_ids"] = task_ids
+            return self.base_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                inputs_embeds=inputs_embeds,
+                output_attentions=output_attentions,
+                output_hidden_states=output_hidden_states,
+                return_dict=return_dict,
+                **kwargs,
+            )
+        batch_size = _get_batch_size(input_ids, inputs_embeds)
+        if attention_mask is not None:
+            # concat prompt attention mask
+            prefix_attention_mask = torch.ones(batch_size, peft_config.num_virtual_tokens).to(attention_mask.device)
+            attention_mask = torch.cat((prefix_attention_mask, attention_mask), dim=1)
+        if kwargs.get("position_ids", None) is not None:
+            warnings.warn("Position ids are not supported for parameter efficient tuning. Ignoring position ids.")
+            kwargs["position_ids"] = None
+        if kwargs.get("token_type_ids", None) is not None:
+            warnings.warn("Token type ids are not supported for parameter efficient tuning. Ignoring token type ids")
+            kwargs["token_type_ids"] = None
+        kwargs.update(
+            {
+                "attention_mask": attention_mask,
+                "output_attentions": output_attentions,
+                "output_hidden_states": output_hidden_states,
+                "return_dict": return_dict,
+            }
+        )
+        if peft_config.peft_type == PeftType.PREFIX_TUNING:
+            past_key_values = self.get_prompt(batch_size)
+            return self.base_model(input_ids=input_ids, past_key_values=past_key_values, **kwargs)
+        else:
+            if inputs_embeds is None:
+                inputs_embeds = self.word_embeddings(input_ids)
+            prompts = self.get_prompt(batch_size=batch_size)
+            prompts = prompts.to(inputs_embeds.dtype)
+            inputs_embeds = torch.cat((prompts, inputs_embeds), dim=1)
+            return self.base_model(inputs_embeds=inputs_embeds, **kwargs)

MoRA/peft_mora/py.typed ADDED Viewed

File without changes

MoRA/peft_mora/tuners/__init__.py ADDED Viewed

	@@ -0,0 +1,32 @@

+# flake8: noqa
+# There's no way to ignore "F401 '...' imported but unused" warnings in this
+# module, but to preserve other warnings. So, don't check this module at all
+# coding=utf-8
+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .adaption_prompt import AdaptionPromptConfig, AdaptionPromptModel
+from .lora import LoraConfig, LoraModel, LoftQConfig
+from .loha import LoHaConfig, LoHaModel
+from .lokr import LoKrConfig, LoKrModel
+from .ia3 import IA3Config, IA3Model
+from .adalora import AdaLoraConfig, AdaLoraModel
+from .p_tuning import PromptEncoder, PromptEncoderConfig, PromptEncoderReparameterizationType
+from .prefix_tuning import PrefixEncoder, PrefixTuningConfig
+from .prompt_tuning import PromptEmbedding, PromptTuningConfig, PromptTuningInit
+from .multitask_prompt_tuning import MultitaskPromptEmbedding, MultitaskPromptTuningConfig, MultitaskPromptTuningInit
+from .oft import OFTConfig, OFTModel
+from .mixed import MixedModel
+from .poly import PolyConfig, PolyModel

MoRA/peft_mora/tuners/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.24 kB). View file

MoRA/peft_mora/tuners/__pycache__/lycoris_utils.cpython-312.pyc ADDED Viewed

Binary file (19.9 kB). View file

MoRA/peft_mora/tuners/__pycache__/tuners_utils.cpython-312.pyc ADDED Viewed

Binary file (29.6 kB). View file

MoRA/peft_mora/tuners/adalora/__init__.py ADDED Viewed

	@@ -0,0 +1,37 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from peft_mora.import_utils import is_bnb_4bit_available, is_bnb_available
+from .config import AdaLoraConfig
+from .gptq import SVDQuantLinear
+from .layer import AdaLoraLayer, RankAllocator, SVDLinear
+from .model import AdaLoraModel
+__all__ = ["AdaLoraConfig", "AdaLoraLayer", "AdaLoraModel", "SVDLinear", "RankAllocator", "SVDQuantLinear"]
+def __getattr__(name):
+    if (name == "SVDLinear8bitLt") and is_bnb_available():
+        from .bnb import SVDLinear8bitLt
+        return SVDLinear8bitLt
+    if (name == "SVDLinear4bit") and is_bnb_4bit_available():
+        from .bnb import SVDLinear4bit
+        return SVDLinear4bit
+    raise AttributeError(f"module {__name__} has no attribute {name}")

MoRA/peft_mora/tuners/adalora/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (1.02 kB). View file

MoRA/peft_mora/tuners/adalora/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (2.82 kB). View file

MoRA/peft_mora/tuners/adalora/__pycache__/gptq.cpython-312.pyc ADDED Viewed

Binary file (2.66 kB). View file

MoRA/peft_mora/tuners/adalora/__pycache__/layer.cpython-312.pyc ADDED Viewed

Binary file (19.8 kB). View file

MoRA/peft_mora/tuners/adalora/__pycache__/model.cpython-312.pyc ADDED Viewed

Binary file (15.8 kB). View file

MoRA/peft_mora/tuners/adalora/bnb.py ADDED Viewed

	@@ -0,0 +1,145 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Any
+import torch
+from peft_mora.import_utils import is_bnb_4bit_available, is_bnb_available
+from .layer import AdaLoraLayer
+if is_bnb_available():
+    class SVDLinear8bitLt(torch.nn.Module, AdaLoraLayer):
+        # Low-rank matrix for SVD-based adaptation
+        def __init__(
+            self,
+            base_layer: torch.nn.Module,
+            adapter_name: str,
+            r: int = 0,
+            lora_alpha: int = 1,
+            lora_dropout: float = 0.0,
+            init_lora_weights: bool = True,
+            **kwargs,
+        ) -> None:
+            super().__init__()
+            AdaLoraLayer.__init__(self, base_layer)
+            # Freezing the pre-trained weight matrix
+            self.get_base_layer().weight.requires_grad = False
+            self._active_adapter = adapter_name
+            self.update_layer(adapter_name, r, lora_alpha, lora_dropout, init_lora_weights)
+        def forward(self, x: torch.Tensor) -> torch.Tensor:
+            # note: no check for self.merged because merging is not supported (yet)
+            result = self.base_layer(x)
+            if self.disable_adapters:
+                return result
+            for active_adapter in self.active_adapters:
+                if active_adapter not in self.lora_A.keys():
+                    continue
+                requires_conversion = not torch.is_autocast_enabled()
+                if requires_conversion:
+                    expected_dtype = result.dtype
+                    if x.dtype != torch.float32:
+                        x = x.float()
+                lora_A = self.lora_A[active_adapter]
+                lora_B = self.lora_B[active_adapter]
+                lora_E = self.lora_E[active_adapter]
+                dropout = self.lora_dropout[active_adapter]
+                scaling = self.scaling[active_adapter]
+                ranknum = self.ranknum[active_adapter] + 1e-5
+                output = dropout(x) @ (lora_A * lora_E).T @ lora_B.T
+                if requires_conversion:
+                    output = output.to(expected_dtype)
+                output = output * scaling / ranknum
+                # inplace operation on view is forbidden for MatMul8bitLtBackward, so avoid it
+                result = result + output
+            return result
+        def __repr__(self) -> str:
+            rep = super().__repr__()
+            return "adalora." + rep
+if is_bnb_4bit_available():
+    class SVDLinear4bit(torch.nn.Module, AdaLoraLayer):
+        # Low-rank matrix for SVD-based adaptation
+        def __init__(
+            self,
+            base_layer: torch.nn.Module,
+            adapter_name: str,
+            r: int = 0,
+            lora_alpha: int = 1,
+            lora_dropout: float = 0.0,
+            init_lora_weights: bool = True,
+            **kwargs,
+        ) -> None:
+            super().__init__()
+            AdaLoraLayer.__init__(self, base_layer)
+            # Freezing the pre-trained weight matrix
+            self.get_base_layer().weight.requires_grad = False
+            self._active_adapter = adapter_name
+            self.update_layer(adapter_name, r, lora_alpha, lora_dropout, init_lora_weights)
+        def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
+            # note: no check for self.merged because merging is not supported (yet)
+            result = self.base_layer(x, *args, **kwargs)
+            if self.disable_adapters:
+                return result
+            # As per Tim Dettmers, for 4bit, we need to defensively clone here.
+            # The reason is that in some cases, an error can occur that backprop
+            # does not work on a manipulated view. This issue may be solved with
+            # newer PyTorch versions but this would need extensive testing to be
+            # sure.
+            result = result.clone()
+            for active_adapter in self.active_adapters:
+                if active_adapter not in self.lora_A.keys():
+                    continue
+                lora_A = self.lora_A[active_adapter]
+                lora_B = self.lora_B[active_adapter]
+                lora_E = self.lora_E[active_adapter]
+                dropout = self.lora_dropout[active_adapter]
+                scaling = self.scaling[active_adapter]
+                ranknum = self.ranknum[active_adapter] + 1e-5
+                requires_conversion = not torch.is_autocast_enabled()
+                if requires_conversion:
+                    expected_dtype = result.dtype
+                    compute_dtype = lora_A.dtype
+                    if x.dtype != compute_dtype:
+                        x = x.to(compute_dtype)
+                output = dropout(x) @ (lora_A * lora_E).T @ lora_B.T
+                if requires_conversion:
+                    output = output.to(expected_dtype)
+                output = output * scaling / ranknum
+                result += output
+            return result
+        def __repr__(self) -> str:
+            rep = super().__repr__()
+            return "adalora." + rep

MoRA/peft_mora/tuners/adalora/config.py ADDED Viewed

	@@ -0,0 +1,52 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from dataclasses import dataclass, field
+from typing import Optional
+from peft_mora.tuners.lora import LoraConfig
+from peft_mora.utils import PeftType
+@dataclass
+class AdaLoraConfig(LoraConfig):
+    """
+    This is the configuration class to store the configuration of a [`~peft.AdaLora`].
+    Args:
+        target_r (`int`): The target average rank of incremental matrix.
+        init_r (`int`): The initial rank for each incremental matrix.
+        tinit (`int`): The steps of initial fine-tuning warmup.
+        tfinal (`int`): The step of final fine-tuning.
+        deltaT (`int`): The time internval between two budget allocations.
+        beta1 (`float`): The hyperparameter of EMA for sensitivity smoothing.
+        beta2 (`float`): The hyperparameter of EMA for undertainty quantification.
+        orth_reg_weight (`float`): The coefficient of orthogonal regularization.
+        total_step (`int`): The total training steps that should be specified before training.
+        rank_pattern (`list`): The allocated rank for each weight matrix by RankAllocator.
+    """
+    target_r: int = field(default=8, metadata={"help": "Target Lora matrix dimension."})
+    init_r: int = field(default=12, metadata={"help": "Initial Lora matrix dimension."})
+    tinit: int = field(default=0, metadata={"help": "The steps of initial warmup."})
+    tfinal: int = field(default=0, metadata={"help": "The steps of final warmup."})
+    deltaT: int = field(default=1, metadata={"help": "Step interval of rank allocation."})
+    beta1: float = field(default=0.85, metadata={"help": "Hyperparameter of EMA."})
+    beta2: float = field(default=0.85, metadata={"help": "Hyperparameter of EMA."})
+    orth_reg_weight: float = field(default=0.5, metadata={"help": "The orthogonal regularization coefficient."})
+    total_step: Optional[int] = field(default=None, metadata={"help": "The total training steps."})
+    rank_pattern: Optional[dict] = field(default=None, metadata={"help": "The saved rank pattern."})
+    def __post_init__(self):
+        self.peft_type = PeftType.ADALORA

MoRA/peft_mora/tuners/adalora/gptq.py ADDED Viewed

	@@ -0,0 +1,72 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import torch
+from .layer import AdaLoraLayer
+class SVDQuantLinear(torch.nn.Module, AdaLoraLayer):
+    def __init__(
+        self,
+        base_layer,
+        adapter_name,
+        r: int = 0,
+        lora_alpha: int = 1,
+        lora_dropout: float = 0.0,
+        init_lora_weights: bool = True,
+        **kwargs,
+    ) -> None:
+        super().__init__()
+        AdaLoraLayer.__init__(self, base_layer)
+        # self.base_layer and self.quant_linear_module are the same; we need the former for consistency and the latter
+        # for backwards compatibility
+        self.quant_linear_module = base_layer
+        self._active_adapter = adapter_name
+        self.update_layer(adapter_name, r, lora_alpha, lora_dropout, init_lora_weights)
+    def forward(self, x: torch.Tensor) -> torch.Tensor:
+        result = self.quant_linear_module(x)
+        if self.disable_adapters:
+            return result
+        for active_adapter in self.active_adapters:
+            if active_adapter not in self.lora_A.keys():
+                continue
+            lora_A = self.lora_A[active_adapter]
+            lora_B = self.lora_B[active_adapter]
+            lora_E = self.lora_E[active_adapter]
+            dropout = self.lora_dropout[active_adapter]
+            scaling = self.scaling[active_adapter]
+            ranknum = self.ranknum[active_adapter] + 1e-5
+            requires_conversion = not torch.is_autocast_enabled()
+            if requires_conversion:
+                expected_dtype = result.dtype
+                if x.dtype != torch.float32:
+                    x = x.float()
+            output = (dropout(x) @ (lora_A * lora_E).T @ lora_B.T) * scaling / ranknum
+            # TODO: here, the dtype conversion is applied on the *whole expression*,
+            # not the intermediate result, unlike for SVDLinear8bitLT and
+            # SVDLinear4bit, is that correct?
+            if requires_conversion:
+                output = output.to(expected_dtype)
+            result += output
+        return result
+        def __repr__(self) -> str:
+            rep = super().__repr__()
+            return "adalora." + rep

MoRA/peft_mora/tuners/adalora/layer.py ADDED Viewed

	@@ -0,0 +1,346 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+from typing import Any, List, Optional
+import torch
+from torch import nn
+from peft_mora.tuners.lora import LoraLayer
+from peft_mora.tuners.tuners_utils import check_adapters_to_merge
+from peft_mora.utils import transpose
+class AdaLoraLayer(LoraLayer):
+    # List all names of layers that may contain adapter weights
+    # Note: ranknum doesn't need to be included as it is not an nn.Module
+    adapter_layer_names = ("lora_A", "lora_B", "lora_E", "lora_embedding_A", "lora_embedding_B")
+    # other_param_names is defined in LoraLayer
+    def __init__(self, base_layer: nn.Module) -> None:
+        super().__init__(base_layer)
+        self.lora_E = nn.ParameterDict({})
+        self.lora_A = nn.ParameterDict({})
+        self.lora_B = nn.ParameterDict({})
+        self.ranknum = nn.ParameterDict({})
+    def update_layer(self, adapter_name, r, lora_alpha, lora_dropout, init_lora_weights):
+        if r <= 0:
+            raise ValueError(f"`r` should be a positive integer value but the value passed is {r}")
+        self.r[adapter_name] = r
+        self.lora_alpha[adapter_name] = lora_alpha
+        if lora_dropout > 0.0:
+            lora_dropout_layer = nn.Dropout(p=lora_dropout)
+        else:
+            lora_dropout_layer = nn.Identity()
+        self.lora_dropout[adapter_name] = lora_dropout_layer
+        # Actual trainable parameters
+        # Right singular vectors
+        self.lora_A[adapter_name] = nn.Parameter(torch.randn(r, self.in_features))
+        # Singular values
+        self.lora_E[adapter_name] = nn.Parameter(torch.randn(r, 1))
+        # Left singular vectors
+        self.lora_B[adapter_name] = nn.Parameter(torch.randn(self.out_features, r))
+        # The current rank
+        self.ranknum[adapter_name] = nn.Parameter(torch.randn(1), requires_grad=False)
+        self.ranknum[adapter_name].data.fill_(float(r))
+        self.ranknum[adapter_name].requires_grad = False
+        self.scaling[adapter_name] = lora_alpha if lora_alpha > 0 else float(r)
+        if init_lora_weights:
+            self.reset_lora_parameters(adapter_name)
+        if hasattr(self.get_base_layer(), "qweight"):
+            # QuantLinear
+            self.to(self.get_base_layer().qweight.device)
+        else:
+            self.to(self.get_base_layer().weight.device)
+        self.set_adapter(self.active_adapters)
+    def reset_lora_parameters(self, adapter_name):
+        if adapter_name in self.lora_A.keys():
+            nn.init.normal_(self.lora_E[adapter_name], mean=0.0, std=0.02)
+            nn.init.normal_(self.lora_A[adapter_name], mean=0.0, std=0.02)
+            nn.init.normal_(self.lora_B[adapter_name], mean=0.0, std=0.02)
+class SVDLinear(nn.Module, AdaLoraLayer):
+    # SVD-based adaptation by a dense layer
+    def __init__(
+        self,
+        base_layer: nn.Module,
+        adapter_name: str,
+        r: int = 0,
+        lora_alpha: int = 1,
+        lora_dropout: float = 0.0,
+        fan_in_fan_out: bool = False,
+        init_lora_weights: bool = True,
+        **kwargs,
+    ) -> None:
+        super().__init__()
+        AdaLoraLayer.__init__(self, base_layer)
+        # Freezing the pre-trained weight matrix
+        self.get_base_layer().weight.requires_grad = False
+        self.fan_in_fan_out = fan_in_fan_out
+        self._active_adapter = adapter_name
+        self.update_layer(adapter_name, r, lora_alpha, lora_dropout, init_lora_weights)
+    def merge(self, safe_merge: bool = False, adapter_names: Optional[List[str]] = None) -> None:
+        """
+        Merge the active adapter weights into the base weights
+        Args:
+            safe_merge (`bool`, *optional*):
+                If True, the merge operation will be performed in a copy of the original weights and check for NaNs
+                before merging the weights. This is useful if you want to check if the merge operation will produce
+                NaNs. Defaults to `False`.
+            adapter_names (`List[str]`, *optional*):
+                The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
+                to `None`.
+        """
+        adapter_names = check_adapters_to_merge(self, adapter_names)
+        if not adapter_names:
+            # no adapter to merge
+            return
+        for active_adapter in adapter_names:
+            base_layer = self.get_base_layer()
+            if active_adapter in self.lora_A.keys():
+                if safe_merge:
+                    # Note that safe_merge will be slower than the normal merge
+                    # because of the copy operation.
+                    orig_weights = base_layer.weight.data.clone()
+                    orig_weights += self.get_delta_weight(active_adapter)
+                    if not torch.isfinite(orig_weights).all():
+                        raise ValueError(
+                            f"NaNs detected in the merged weights. The adapter {active_adapter} seems to be broken"
+                        )
+                    base_layer.weight.data = orig_weights
+                else:
+                    base_layer.weight.data += self.get_delta_weight(active_adapter)
+                self.merged_adapters.append(active_adapter)
+    def unmerge(self) -> None:
+        """
+        This method unmerges all merged adapter layers from the base weights.
+        """
+        if not self.merged:
+            warnings.warn("Already unmerged. Nothing to do.")
+            return
+        while len(self.merged_adapters) > 0:
+            active_adapter = self.merged_adapters.pop()
+            if active_adapter in self.lora_A.keys():
+                self.get_base_layer().weight.data -= self.get_delta_weight(active_adapter)
+    def get_delta_weight(self, adapter) -> torch.Tensor:
+        return (
+            transpose(self.lora_B[adapter] @ (self.lora_A[adapter] * self.lora_E[adapter]), self.fan_in_fan_out)
+            * self.scaling[adapter]
+            / (self.ranknum[adapter] + 1e-5)
+        )
+    def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
+        if self.disable_adapters:
+            if self.merged:
+                self.unmerge()
+            result = self.base_layer(x, *args, **kwargs)
+        elif self.merged:
+            result = self.base_layer(x, *args, **kwargs)
+        else:
+            result = self.base_layer(x, *args, **kwargs)
+            for active_adapter in self.active_adapters:
+                if active_adapter not in self.lora_A.keys():
+                    continue
+                lora_A = self.lora_A[active_adapter]
+                lora_B = self.lora_B[active_adapter]
+                lora_E = self.lora_E[active_adapter]
+                dropout = self.lora_dropout[active_adapter]
+                scaling = self.scaling[active_adapter]
+                ranknum = self.ranknum[active_adapter] + 1e-5
+                x = x.to(lora_A.dtype)
+                result += (dropout(x) @ (lora_A * lora_E).T @ lora_B.T) * scaling / ranknum
+        return result
+    def __repr__(self) -> str:
+        rep = super().__repr__()
+        return "adalora." + rep
+class RankAllocator:
+    """
+    The RankAllocator for AdaLoraModel. Paper: https://openreview.net/pdf?id=lq62uWRJjiY
+    Args:
+        config ([`AdaLoraConfig`]): The configuration of the AdaLora model.
+        model: the model that we apply AdaLoRA to.
+    """
+    def __init__(self, model, peft_config, adapter_name):
+        self.peft_config = peft_config
+        self.adapter_name = adapter_name
+        self.beta1 = peft_config.beta1
+        self.beta2 = peft_config.beta2
+        assert self.beta1 > 0 and self.beta1 < 1
+        assert self.beta2 > 0 and self.beta2 < 1
+        self.reset_ipt()
+        self._set_budget_scheduler(model)
+    def set_total_step(self, total_step):
+        self.peft_config.total_step = total_step
+    def reset_ipt(self):
+        self.ipt = {}
+        self.exp_avg_ipt = {}
+        self.exp_avg_unc = {}
+    def _set_budget_scheduler(self, model):
+        self.init_bgt = 0
+        self.name_set = set()
+        for n, p in model.named_parameters():
+            if f"lora_A.{self.adapter_name}" in n:
+                self.init_bgt += p.size(0)
+                self.name_set.add(n.replace("lora_A", "%s"))
+        self.name_set = sorted(self.name_set)
+        # The total final rank budget
+        self.target_bgt = self.peft_config.target_r * len(self.name_set)
+    def budget_schedule(self, step: int):
+        tinit = self.peft_config.tinit
+        tfinal = self.peft_config.tfinal
+        total_step = self.peft_config.total_step
+        # Initial warmup
+        if step <= tinit:
+            budget = self.init_bgt
+            mask_ind = False
+        # Final fine-tuning
+        elif step > total_step - tfinal:
+            budget = self.target_bgt
+            mask_ind = True
+        else:
+            # Budget decreasing with a cubic scheduler
+            mul_coeff = 1 - (step - tinit) / (total_step - tfinal - tinit)
+            budget = int((self.init_bgt - self.target_bgt) * (mul_coeff**3) + self.target_bgt)
+            mask_ind = True if step % self.peft_config.deltaT == 0 else False
+        return budget, mask_ind
+    def update_ipt(self, model):
+        # Update the sensitivity and uncertainty for every weight
+        for n, p in model.named_parameters():
+            if "lora_" in n and self.adapter_name in n:
+                if n not in self.ipt:
+                    self.ipt[n] = torch.zeros_like(p)
+                    self.exp_avg_ipt[n] = torch.zeros_like(p)
+                    self.exp_avg_unc[n] = torch.zeros_like(p)
+                with torch.no_grad():
+                    self.ipt[n] = (p * p.grad).abs().detach()
+                    # Sensitivity smoothing
+                    self.exp_avg_ipt[n] = self.beta1 * self.exp_avg_ipt[n] + (1 - self.beta1) * self.ipt[n]
+                    # Uncertainty quantification
+                    self.exp_avg_unc[n] = (
+                        self.beta2 * self.exp_avg_unc[n] + (1 - self.beta2) * (self.ipt[n] - self.exp_avg_ipt[n]).abs()
+                    )
+    def _element_score(self, n):
+        return self.exp_avg_ipt[n] * self.exp_avg_unc[n]
+    def _combine_ipt(self, ipt_E, ipt_AB):
+        ipt_AB = ipt_AB.sum(dim=1, keepdim=False)
+        sum_ipt = ipt_E.view(-1) + ipt_AB.view(-1)
+        return sum_ipt
+    def mask_to_budget(self, model, budget):
+        value_ipt = {}
+        vector_ipt = {}
+        triplet_ipt = {}
+        # Get the importance score for A, E, B
+        for n, p in model.named_parameters():
+            if f"lora_A.{self.adapter_name}" in n:
+                entry_ipt = self._element_score(n)
+                comb_ipt = torch.mean(entry_ipt, dim=1, keepdim=True)
+                name_m = n.replace("lora_A", "%s")
+                if name_m not in vector_ipt:
+                    vector_ipt[name_m] = [comb_ipt]
+                else:
+                    vector_ipt[name_m].append(comb_ipt)
+            if f"lora_B.{self.adapter_name}" in n:
+                entry_ipt = self._element_score(n)
+                comb_ipt = torch.mean(entry_ipt, dim=0, keepdim=False).view(-1, 1)
+                name_m = n.replace("lora_B", "%s")
+                if name_m not in vector_ipt:
+                    vector_ipt[name_m] = [comb_ipt]
+                else:
+                    vector_ipt[name_m].append(comb_ipt)
+            if f"lora_E.{self.adapter_name}" in n:
+                entry_ipt = self._element_score(n)
+                name_m = n.replace("lora_E", "%s")
+                value_ipt[name_m] = entry_ipt
+        all_score = []
+        # Calculate the score for each triplet
+        for name_m in vector_ipt:
+            ipt_E = value_ipt[name_m]
+            ipt_AB = torch.cat(vector_ipt[name_m], dim=1)
+            sum_ipt = self._combine_ipt(ipt_E, ipt_AB)
+            name_E = name_m % "lora_E"
+            triplet_ipt[name_E] = sum_ipt.view(-1, 1)
+            all_score.append(sum_ipt.view(-1))
+        # Get the threshold by ranking ipt
+        mask_threshold = torch.kthvalue(
+            torch.cat(all_score),
+            k=self.init_bgt - budget,
+        )[0].item()
+        rank_pattern = {}
+        # Mask the unimportant triplets
+        with torch.no_grad():
+            for n, p in model.named_parameters():
+                if f"lora_E.{self.adapter_name}" in n:
+                    p.masked_fill_(triplet_ipt[n] <= mask_threshold, 0.0)
+                    rank_pattern[n] = (~(triplet_ipt[n] <= mask_threshold)).view(-1).tolist()
+        return rank_pattern
+    def update_and_allocate(self, model, global_step, force_mask=False):
+        # # Update the importance score and allocate the budget
+        if global_step < self.peft_config.total_step - self.peft_config.tfinal:
+            self.update_ipt(model)
+        budget, mask_ind = self.budget_schedule(global_step)
+        # Allocate the budget according to importance scores
+        if mask_ind or force_mask:
+            rank_pattern = self.mask_to_budget(model, budget)
+        else:
+            rank_pattern = None
+        return budget, rank_pattern
+    def mask_using_rank_pattern(self, model, rank_pattern):
+        # Mask the unimportant triplets
+        is_adapter_name_truncated = False
+        if self.adapter_name not in next(iter(rank_pattern.keys())):
+            is_adapter_name_truncated = True
+        with torch.no_grad():
+            for n, p in model.named_parameters():
+                if f"lora_E.{self.adapter_name}" in n:
+                    key = n if not is_adapter_name_truncated else n.replace(f".{self.adapter_name}", "")
+                    mask = torch.Tensor(rank_pattern[key]).unsqueeze(-1).to(p.device)
+                    p.masked_fill_(~mask.bool(), 0.0)

MoRA/peft_mora/tuners/adalora/model.py ADDED Viewed

	@@ -0,0 +1,346 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import warnings
+import torch
+from transformers.pytorch_utils import Conv1D
+from peft_mora.import_utils import is_bnb_4bit_available, is_bnb_available
+from peft_mora.tuners.lora import LoraConfig, LoraModel
+from peft_mora.tuners.tuners_utils import BaseTunerLayer
+from peft_mora.utils import (
+    TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING,
+    _freeze_adapter,
+    _get_submodules,
+    get_auto_gptq_quant_linear,
+    get_quantization_config,
+)
+from .gptq import SVDQuantLinear
+from .layer import AdaLoraLayer, RankAllocator, SVDLinear
+class AdaLoraModel(LoraModel):
+    """
+    Creates AdaLoRA (Adaptive LoRA) model from a pretrained transformers model. Paper:
+    https://openreview.net/forum?id=lq62uWRJjiY
+    Args:
+        model ([`transformers.PreTrainedModel`]): The model to be adapted.
+        config ([`AdaLoraConfig`]): The configuration of the AdaLora model.
+        adapter_name (`str`): The name of the adapter, defaults to `"default"`.
+    Returns:
+        `torch.nn.Module`: The AdaLora model.
+    Example::
+        >>> from transformers import AutoModelForSeq2SeqLM, LoraConfig >>> from peft import AdaLoraModel, AdaLoraConfig
+        >>> config = AdaLoraConfig(
+                peft_type="ADALORA", task_type="SEQ_2_SEQ_LM", r=8, lora_alpha=32, target_modules=["q", "v"],
+                lora_dropout=0.01,
+            )
+        >>> model = AutoModelForSeq2SeqLM.from_pretrained("t5-base") >>> model = AdaLoraModel(model, config, "default")
+    **Attributes**:
+        - **model** ([`transformers.PreTrainedModel`]) -- The model to be adapted.
+        - **peft_config** ([`AdaLoraConfig`]): The configuration of the AdaLora model.
+    """
+    # Note: don't redefine prefix here, it should be inherited from LoraModel
+    def __init__(self, model, config, adapter_name):
+        super().__init__(model, config, adapter_name)
+        traininable_mode_counter = 0
+        for config in self.peft_config.values():
+            if not config.inference_mode:
+                traininable_mode_counter += 1
+        if traininable_mode_counter > 1:
+            raise ValueError(
+                "AdaLoraModel supports only 1 trainable adapter. "
+                "When using multiple adapters, set inference_mode to True for all adapters except the one you want to train."
+            )
+        if self.peft_config[adapter_name].inference_mode:
+            _freeze_adapter(self.model, adapter_name)
+        else:
+            self.trainable_adapter_name = adapter_name
+            self.rankallocator = RankAllocator(self.model, self.peft_config[adapter_name], self.trainable_adapter_name)
+    def _check_new_adapter_config(self, config: LoraConfig) -> None:
+        """
+        A helper method to check the config when a new adapter is being added.
+        Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
+        """
+        super()._check_new_adapter_config(config)
+        traininable_mode_counter = 0
+        for config_ in self.peft_config.values():
+            if not config_.inference_mode:
+                traininable_mode_counter += 1
+        if traininable_mode_counter > 1:
+            raise ValueError(
+                f"{self.__class__.__name__} supports only 1 trainable adapter. "
+                "When using multiple adapters, set inference_mode to True for all adapters except the one "
+                "you want to train."
+            )
+    def _create_and_replace(
+        self,
+        lora_config,
+        adapter_name,
+        target,
+        target_name,
+        parent,
+        current_key,
+    ):
+        kwargs = {
+            "r": lora_config.init_r,
+            "lora_alpha": lora_config.lora_alpha,
+            "lora_dropout": lora_config.lora_dropout,
+            "fan_in_fan_out": lora_config.fan_in_fan_out,
+            "init_lora_weights": lora_config.init_lora_weights,
+            "loaded_in_8bit": getattr(self.model, "is_loaded_in_8bit", False),
+            "loaded_in_4bit": getattr(self.model, "is_loaded_in_4bit", False),
+        }
+        if (kwargs["loaded_in_8bit"] or kwargs["loaded_in_4bit"]) and not is_bnb_available():
+            raise ImportError(
+                "To use AdaLora with 8-bit quantization, please install the `bitsandbytes` package. "
+                "You can install it with `pip install bitsandbytes`."
+            )
+        quantization_config = get_quantization_config(self.model, method="gptq")
+        if quantization_config is not None:
+            kwargs["gptq_quantization_config"] = quantization_config
+        # If it is not an AdaLoraLayer, create a new module, else update it with new adapters
+        if not isinstance(target, AdaLoraLayer):
+            new_module = self._create_new_module(lora_config, adapter_name, target, **kwargs)
+            if adapter_name != self.active_adapter:
+                # adding an additional adapter: it is not automatically trainable
+                new_module.requires_grad_(False)
+            self._replace_module(parent, target_name, new_module, target)
+        else:
+            target.update_layer(
+                adapter_name,
+                lora_config.init_r,
+                lora_config.lora_alpha,
+                lora_config.lora_dropout,
+                lora_config.init_lora_weights,
+            )
+    @staticmethod
+    def _create_new_module(lora_config, adapter_name, target, **kwargs):
+        # avoid eager bnb import
+        if is_bnb_available():
+            import bitsandbytes as bnb
+            from .bnb import SVDLinear8bitLt
+        if is_bnb_4bit_available():
+            from .bnb import SVDLinear4bit
+        gptq_quantization_config = kwargs.get("gptq_quantization_config", None)
+        AutoGPTQQuantLinear = get_auto_gptq_quant_linear(gptq_quantization_config)
+        loaded_in_8bit = kwargs.pop("loaded_in_8bit", False)
+        loaded_in_4bit = kwargs.pop("loaded_in_4bit", False)
+        if isinstance(target, BaseTunerLayer):
+            target_base_layer = target.get_base_layer()
+        else:
+            target_base_layer = target
+        if loaded_in_8bit and isinstance(target_base_layer, bnb.nn.Linear8bitLt):
+            kwargs.update(
+                {
+                    "has_fp16_weights": target_base_layer.state.has_fp16_weights,
+                    "memory_efficient_backward": target_base_layer.state.memory_efficient_backward,
+                    "threshold": target_base_layer.state.threshold,
+                    "index": target_base_layer.index,
+                }
+            )
+            new_module = SVDLinear8bitLt(target, adapter_name, **kwargs)
+        elif loaded_in_4bit and is_bnb_4bit_available() and isinstance(target_base_layer, bnb.nn.Linear4bit):
+            fourbit_kwargs = kwargs.copy()
+            fourbit_kwargs.update(
+                {
+                    "compute_dtype": target_base_layer.compute_dtype,
+                    "compress_statistics": target_base_layer.weight.compress_statistics,
+                    "quant_type": target_base_layer.weight.quant_type,
+                }
+            )
+            new_module = SVDLinear4bit(target, adapter_name, **fourbit_kwargs)
+        elif AutoGPTQQuantLinear is not None and isinstance(target, AutoGPTQQuantLinear):
+            new_module = SVDQuantLinear(target, adapter_name, **kwargs)
+        else:
+            if isinstance(target_base_layer, torch.nn.Linear):
+                if kwargs["fan_in_fan_out"]:
+                    warnings.warn(
+                        "fan_in_fan_out is set to True but the target module is `torch.nn.Linear`. "
+                        "Setting fan_in_fan_out to False."
+                    )
+                    kwargs["fan_in_fan_out"] = lora_config.fan_in_fan_out = False
+            elif isinstance(target_base_layer, Conv1D):
+                if not kwargs["fan_in_fan_out"]:
+                    warnings.warn(
+                        "fan_in_fan_out is set to False but the target module is `Conv1D`. "
+                        "Setting fan_in_fan_out to True."
+                    )
+                    kwargs["fan_in_fan_out"] = lora_config.fan_in_fan_out = True
+            else:
+                raise ValueError(
+                    f"Target module {target} is not supported. "
+                    f"Currently, only `torch.nn.Linear` and `Conv1D` are supported."
+                )
+            new_module = SVDLinear(target, adapter_name, **kwargs)
+        return new_module
+    @staticmethod
+    def _prepare_adapter_config(peft_config, model_config):
+        if peft_config.target_modules is None:
+            if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING:
+                raise ValueError("Please specify `target_modules` in `peft_config`")
+            peft_config.target_modules = TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING[
+                model_config["model_type"]
+            ]
+        return peft_config
+    def __getattr__(self, name: str):
+        """Forward missing attributes to the wrapped module."""
+        try:
+            return super().__getattr__(name)  # defer to nn.Module's logic
+        except AttributeError:
+            return getattr(self.model, name)
+    def forward(self, *args, **kwargs):
+        outputs = self.model.forward(*args, **kwargs)
+        if (getattr(outputs, "loss", None) is not None) and isinstance(outputs.loss, torch.Tensor):
+            # Calculate the orthogonal regularization
+            orth_reg_weight = self.peft_config[self.trainable_adapter_name].orth_reg_weight
+            if orth_reg_weight <= 0:
+                raise ValueError("orth_reg_weight should be greater than 0. ")
+            regu_loss = 0
+            num_param = 0
+            for n, p in self.model.named_parameters():
+                if ("lora_A" in n or "lora_B" in n) and self.trainable_adapter_name in n:
+                    para_cov = p @ p.T if "lora_A" in n else p.T @ p
+                    I = torch.eye(*para_cov.size(), out=torch.empty_like(para_cov))  # noqa: E741
+                    I.requires_grad = False
+                    num_param += 1
+                    regu_loss += torch.norm(para_cov - I, p="fro")
+            if num_param > 0:
+                regu_loss = regu_loss / num_param
+            else:
+                regu_loss = 0
+            outputs.loss += orth_reg_weight * regu_loss
+        return outputs
+    def resize_modules_by_rank_pattern(self, rank_pattern, adapter_name):
+        lora_config = self.peft_config[adapter_name]
+        for name, rank_idx in rank_pattern.items():
+            if isinstance(rank_idx, list):
+                rank = sum(rank_idx)
+            elif isinstance(rank_idx, torch.Tensor):
+                rank_idx = rank_idx.view(-1)
+                rank = rank_idx.sum().item()
+            else:
+                raise ValueError("Unexpected type of rank_idx")
+            key = ".".join(name.split(".")[0:-2]) if adapter_name in name else ".".join(name.split(".")[0:-1])
+            _, target, _ = _get_submodules(self.model, key)
+            lora_E_weights = target.lora_E[adapter_name][rank_idx]
+            lora_A_weights = target.lora_A[adapter_name][rank_idx]
+            lora_B_weights = target.lora_B[adapter_name][:, rank_idx]
+            ranknum = target.ranknum[adapter_name]
+            target.update_layer(
+                adapter_name,
+                rank,
+                lora_config.lora_alpha,
+                lora_config.lora_dropout,
+                lora_config.init_lora_weights,
+            )
+            with torch.no_grad():
+                if rank > 0:
+                    target.lora_E[adapter_name].copy_(lora_E_weights)
+                    target.lora_A[adapter_name].copy_(lora_A_weights)
+                    target.lora_B[adapter_name].copy_(lora_B_weights)
+                    # The scaling is exactly as the previous
+                    target.ranknum[adapter_name].copy_(ranknum)
+    def resize_state_dict_by_rank_pattern(self, rank_pattern, state_dict, adapter_name):
+        for name, rank_idx in rank_pattern.items():
+            rank = sum(rank_idx)
+            prefix = ".".join(name.split(".")[0:-2]) if adapter_name in name else ".".join(name.split(".")[0:-1])
+            for layer in ["lora_E", "lora_A", "lora_B"]:
+                key = f"base_model.model.{prefix}.{layer}.{adapter_name}"
+                if layer != "lora_B":
+                    state_dict[key] = (
+                        state_dict[key][rank_idx] if rank != state_dict[key].shape[0] else state_dict[key]
+                    )
+                else:
+                    state_dict[key] = (
+                        state_dict[key][:, rank_idx] if rank != state_dict[key].shape[1] else state_dict[key]
+                    )
+        return state_dict
+    def update_and_allocate(self, global_step):
+        """
+        This method updates Adalora budget and mask.
+        This should be called in every training step after `loss.backward()` and before `zero_grad()`.
+        `tinit`, `tfinal` and `deltaT` are handled with in the method.
+        Args:
+            global_step (`int`): The current training step, it is used to calculate adalora budget.
+        Example:
+        ```python
+        >>> loss = model(**input).loss
+        >>> loss.backward()
+        >>> optimizer.step()
+        >>> model.base_model.update_and_allocate(i_step)
+        >>> optimizer.zero_grad()
+        ```
+        """
+        lora_config = self.peft_config[self.trainable_adapter_name]
+        # Update the importance score and allocate the budget
+        if global_step < lora_config.total_step - lora_config.tfinal:
+            _, rank_pattern = self.rankallocator.update_and_allocate(self.model, global_step)
+            if rank_pattern:
+                lora_config.rank_pattern = rank_pattern
+        # Finalize the budget allocation
+        elif global_step == lora_config.total_step - lora_config.tfinal:
+            _, rank_pattern = self.rankallocator.update_and_allocate(self.model, global_step, force_mask=True)
+            # for some reason, this freezes the trainable parameters and nothing gets updates
+            # self.resize_modules_by_rank_pattern(rank_pattern, self.trainable_adapter_name)
+            lora_config.rank_pattern = rank_pattern
+            self.rankallocator.reset_ipt()
+        # Currently using inefficient way to mask the unimportant weights using the rank pattern
+        #  due to problem mentioned above
+        elif global_step > lora_config.total_step - lora_config.tfinal:
+            self.rankallocator.mask_using_rank_pattern(self.model, lora_config.rank_pattern)
+        # Pass the function and do forward propagation
+        else:
+            return None

MoRA/peft_mora/tuners/adaption_prompt/__init__.py ADDED Viewed

	@@ -0,0 +1,19 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .config import AdaptionPromptConfig
+from .layer import AdaptedAttention
+from .model import AdaptionPromptModel
+__all__ = ["AdaptionPromptConfig", "AdaptedAttention", "AdaptionPromptModel"]

MoRA/peft_mora/tuners/adaption_prompt/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (390 Bytes). View file

MoRA/peft_mora/tuners/adaption_prompt/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (2.57 kB). View file

MoRA/peft_mora/tuners/adaption_prompt/__pycache__/layer.cpython-312.pyc ADDED Viewed

Binary file (5.89 kB). View file

MoRA/peft_mora/tuners/adaption_prompt/__pycache__/model.cpython-312.pyc ADDED Viewed

Binary file (8.35 kB). View file

MoRA/peft_mora/tuners/adaption_prompt/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (5.71 kB). View file

MoRA/peft_mora/tuners/adaption_prompt/config.py ADDED Viewed

	@@ -0,0 +1,73 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from collections import namedtuple
+from dataclasses import dataclass, field
+from peft_mora.config import PeftConfig
+from peft_mora.utils import PeftType
+from .utils import llama_compute_query_states
+@dataclass
+class AdaptionPromptConfig(PeftConfig):
+    """Stores the configuration of an [`AdaptionPromptModel`]."""
+    target_modules: str = field(
+        default=None, metadata={"help": "Name of the attention submodules to insert adaption prompts into."}
+    )
+    adapter_len: int = field(default=None, metadata={"help": "Number of adapter tokens to insert"})
+    adapter_layers: int = field(default=None, metadata={"help": "Number of adapter layers (from the top)"})
+    def __post_init__(self):
+        self.peft_type = PeftType.ADAPTION_PROMPT
+    @property
+    def is_adaption_prompt(self) -> bool:
+        """Return True if this is an adaption prompt config."""
+        return True
+# Contains the config that is specific to a transformers model type.
+ModelTypeConfig = namedtuple(
+    "ModelTypeConfig", ["compute_query_states", "target_modules", "k_proj_layer", "v_proj_layer", "o_proj_layer"]
+)
+# Mapping of transformers model types to their specific configuration.
+TRANSFORMERS_MODEL_CONFIG = {
+    "llama": ModelTypeConfig(
+        compute_query_states=llama_compute_query_states,
+        target_modules="self_attn",
+        k_proj_layer="k_proj",
+        v_proj_layer="v_proj",
+        o_proj_layer="o_proj",
+    ),
+}
+def prepare_config(
+    peft_config: AdaptionPromptConfig,
+    model,
+) -> AdaptionPromptConfig:
+    """Prepare the config based on the llama model type."""
+    if model.config.model_type not in TRANSFORMERS_MODEL_CONFIG:
+        raise ValueError("Unsupported model type for adaption prompt: '{model.config.model_type}'.")
+    model_config = TRANSFORMERS_MODEL_CONFIG[model.config.model_type]
+    if peft_config.target_modules is None:
+        peft_config.target_modules = model_config.target_modules
+    return peft_config

MoRA/peft_mora/tuners/adaption_prompt/layer.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .config import TRANSFORMERS_MODEL_CONFIG
+class AdaptedAttention(nn.Module):
+    """This module wraps a LLamaAttention module and injects adaption prompts."""
+    def __init__(self, model_type: str, adapter_len: int, model):
+        """
+        Initialize object.
+        Args:
+            model_type: The transformer model type. This is used to retrieve the right method to
+                compute query states.
+            adapter_len: The length of the adaption prompt to insert.
+            model: The original transformer attention module that is being wrapped.
+        """
+        assert not isinstance(model, AdaptedAttention)
+        super().__init__()
+        self.model_type = model_type
+        self.model = model
+        self.adapter_len = adapter_len
+        # Assume all parameters of the attention model we are wrapping are on the same device.
+        device = next(model.parameters()).device
+        # Don't think this was specified in the paper, but we follow the official repo which used an Embedding
+        # which initializes the tokens with standard normal values.
+        # https://github.com/ZrrSkywalker/LLaMA-Adapter/blob/41c3546fe1997ab8a65809dc8d8f9252b19d9faf/llama/model.py#L234
+        # (bsz, adapter_len, hidden_size)
+        target_dtype = (
+            model.q_proj.weight.dtype if model.q_proj.weight.dtype not in [torch.int8, torch.uint8] else torch.float32
+        )
+        self.adaption_prompt = nn.Parameter(
+            torch.empty(1, adapter_len, self.model.hidden_size, device=device, dtype=target_dtype).normal_()
+        )
+        # Initialize the gate to 0 as this is "zero-init".
+        self.adaption_gate = nn.Parameter(torch.zeros(1, device=device, dtype=target_dtype))
+    def forward(self, **kwargs):
+        """
+        Forward pass for the adapter which wraps the original LlamaAttention module.
+        "Official" paper implementation:
+        https://github.com/ZrrSkywalker/LLaMA-Adapter/blob/41c3546fe1997ab8a65809dc8d8f9252b19d9faf/llama/model.py#L141
+        Args:
+            kwargs: See the original LlamaAttention module.
+        """
+        if kwargs.get("output_attention", False):
+            raise NotImplementedError("output_attention is not currently supported.")
+        output, _, past_key_value = self.model(**kwargs)
+        bsz = output.shape[0]
+        q_len = output.shape[1]
+        embed_dim = output.shape[2]
+        k_proj_layer = TRANSFORMERS_MODEL_CONFIG[self.model_type].k_proj_layer
+        v_proj_layer = TRANSFORMERS_MODEL_CONFIG[self.model_type].v_proj_layer
+        o_proj_layer = TRANSFORMERS_MODEL_CONFIG[self.model_type].o_proj_layer
+        if k_proj_layer == v_proj_layer:
+            _, key, value = getattr(self.model, k_proj_layer)(self.adaption_prompt).split(embed_dim, dim=2)
+        else:
+            key = getattr(self.model, k_proj_layer)(self.adaption_prompt)
+            value = getattr(self.model, v_proj_layer)(self.adaption_prompt)
+        # (bsz, num_heads, adapter_len, head_dim)
+        adapter_k = (
+            key.view(1, self.adapter_len, self.model.num_heads, self.model.head_dim)
+            .repeat(bsz, 1, 1, 1)
+            .transpose(1, 2)
+        )
+        # (bsz, num_heads, adapter_len, head_dim)
+        adapter_v = (
+            value.view(1, self.adapter_len, self.model.num_heads, self.model.head_dim)
+            .repeat(bsz, 1, 1, 1)
+            .transpose(1, 2)
+        )
+        # Recompute query states.
+        compute_query_states = TRANSFORMERS_MODEL_CONFIG[self.model_type].compute_query_states
+        # (bsz, num_heads, q_len, head_dim)
+        query_states = compute_query_states(model=self.model, **kwargs)
+        previous_dtype = query_states.dtype
+        # (bsz, num_heads, q_len, adapter_len)
+        scores = torch.matmul(query_states, adapter_k.transpose(2, 3).to(previous_dtype)) / math.sqrt(
+            self.model.head_dim
+        )
+        # Upcast attention to fp32
+        # (bsz, num_heads, q_len, adapter_len)
+        scores = self.adaption_gate * F.softmax(scores, dim=-1, dtype=torch.float32).to(previous_dtype)
+        # (bsz, q_len, num_heads * head_dim)
+        adapter_output = torch.matmul(scores, adapter_v).transpose(1, 2).reshape(bsz, q_len, -1)
+        # (bsz, q_len, hidden_size)
+        if o_proj_layer is not None:
+            adapter_output = getattr(self.model, o_proj_layer)(adapter_output)
+        # Add adaption prompt output to original output.
+        output = output + adapter_output
+        # Restore original dtype.
+        output = output.to(previous_dtype)
+        return output, None, past_key_value

MoRA/peft_mora/tuners/adaption_prompt/model.py ADDED Viewed

	@@ -0,0 +1,161 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from typing import Dict, List
+import torch.nn as nn
+from peft_mora.utils import _freeze_adapter, _get_submodules
+from .config import AdaptionPromptConfig, prepare_config
+from .layer import AdaptedAttention
+from .utils import is_adaption_prompt_trainable
+class AdaptionPromptModel(nn.Module):
+    """
+    Implements adaption prompts as described in https://arxiv.org/pdf/2303.16199.pdf.
+    The top L attention modules are replaced with AdaptedAttention modules that wrap the original ones, but insert
+    trainable prompts with gates (for zero init).
+    Notes on the multi-adapter pattern:
+    - We store the states of different adapters by keeping a dictionary of AdaptedAttention modules indexed by adapter
+      name.
+    - Every time we switch adapters, we remove the modules of the currently active adapter from the model, store them
+      in the dictionary, and replace them with the modules of the new adapter.
+    - To avoid duplicated and potentially inconsistent state, the currently active adapter is always removed from the
+      dictionary.
+    - Disabling the adapter would also result in the modules being removed from the model.
+    """
+    def __init__(self, model, configs: Dict, adapter_name: str):
+        super().__init__()
+        self.model = model
+        # Store adapter configs by name.
+        self.peft_config: Dict[str, AdaptionPromptConfig] = {}
+        # Store lists of the parents of the affected attention modules by adapter name.
+        # We keep references to the parents so we can swap the adapters in-and-out of the model.
+        self._parents: Dict[str, List[nn.Module]] = {}
+        # Store lists of cached AdaptedAttention modules by name.
+        self._cached_adapters: Dict[str, List] = {}
+        # The name of the currently active adapter.
+        self._active_adapter = None
+        # Whether the adapter is enabled.
+        self._enabled = True
+        self.forward = self.model.forward
+        self.add_adapter(adapter_name, configs[adapter_name])
+        self._mark_only_adaption_prompts_as_trainable(self.model)
+    def add_adapter(self, adapter_name: str, config: AdaptionPromptConfig) -> None:
+        """Add an adapter with the given name and config."""
+        config = prepare_config(config, self.model)
+        if adapter_name in self.peft_config:
+            raise ValueError(f"Adapter with name '{adapter_name}' already exists.")
+        parents = []
+        for name, _ in self.model.named_modules():
+            if name.endswith(config.target_modules):
+                par, _, _ = _get_submodules(self.model, name)
+                parents.append(par)
+        if len(parents) < config.adapter_layers:
+            raise ValueError(
+                f"Config specifies more adapter layers '{config.adapter_layers}'"
+                f" than the model has '{len(parents)}'."
+            )
+        # Note that if the target modules are not in Sequential, ModuleList, or
+        # some other PyTorch ordered container, the behavior is undefined as we
+        # assume here that the order of the modules is the same as the order of
+        # the transformer decoder layers.
+        parents = parents[-config.adapter_layers :]
+        self._parents[adapter_name] = parents
+        # It is only None during initialization.
+        # If it is disabled, we don't have to remove the modules.
+        if self._active_adapter is not None and self._enabled:
+            self._remove_adapted_attentions(self._active_adapter)
+        self._active_adapter = adapter_name
+        self.peft_config[adapter_name] = config
+        self._create_adapted_attentions(config, parents)
+        if not self._enabled:
+            self._remove_adapted_attentions(self._active_adapter)
+        if config.inference_mode:
+            _freeze_adapter(self.model, adapter_name)
+    def set_adapter(self, adapter_name: str) -> None:
+        """Set the model to use the adapter with the given name."""
+        if self._active_adapter == adapter_name:
+            return
+        if adapter_name not in self.peft_config:
+            raise ValueError(f"Adapter with name '{adapter_name}' does not exist.")
+        if self._enabled:
+            self._remove_adapted_attentions(self._active_adapter)
+            self._set_adapted_attentions(adapter_name)
+        self._active_adapter = adapter_name
+    def enable_adapter_layers(self):
+        """Enable adapter layers by swapping in cached AdaptedAttention modules."""
+        self._enabled = True
+        self._set_adapted_attentions(self._active_adapter)
+    def disable_adapter_layers(self):
+        """Disable adapter layers by swapping out AdaptedAttention modules."""
+        self._enabled = False
+        self._remove_adapted_attentions(self._active_adapter)
+    def _create_adapted_attentions(self, config: AdaptionPromptConfig, parents: List[nn.Module]) -> None:
+        """Wrap LlamaAttention modules with newly created AdaptedAttention modules."""
+        for par in parents:
+            attn = AdaptedAttention(
+                model_type=self.model.config.model_type,
+                adapter_len=config.adapter_len,
+                model=getattr(par, config.target_modules),
+            )
+            setattr(par, config.target_modules, attn)
+    def _set_adapted_attentions(self, adapter_name: str) -> None:
+        """Replace LlamaAttention modules with cached AdaptedAttention modules."""
+        cached = self._cached_adapters[adapter_name]
+        del self._cached_adapters[adapter_name]
+        config = self.peft_config[adapter_name]
+        for i, par in enumerate(self._parents[adapter_name]):
+            setattr(par, config.target_modules, cached[i])
+    def _remove_adapted_attentions(self, adapter_name: str) -> None:
+        """Remove AdaptedAttention modules from the model and store them in the cache."""
+        config = self.peft_config[adapter_name]
+        adapted_attentions = []
+        for par in self._parents[adapter_name]:
+            attn = getattr(par, config.target_modules)
+            adapted_attentions.append(attn)
+            setattr(par, config.target_modules, attn.model)
+        self._cached_adapters[adapter_name] = adapted_attentions
+    def _mark_only_adaption_prompts_as_trainable(self, model: nn.Module) -> None:
+        """Freeze all parameters of the model except the adaption prompts."""
+        for n, p in model.named_parameters():
+            if not is_adaption_prompt_trainable(n):
+                p.requires_grad = False
+    def __getattr__(self, name: str):
+        """Forward missing attributes to the wrapped module."""
+        try:
+            return super().__getattr__(name)  # defer to nn.Module's logic
+        except AttributeError:
+            # This is necessary as e.g. causal models have various methods that we
+            # don't want to re-implement here.
+            return getattr(self.model, name)

MoRA/peft_mora/tuners/adaption_prompt/utils.py ADDED Viewed

	@@ -0,0 +1,111 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import inspect
+import torch
+import torch.nn as nn
+def llama_rotate_half(x: torch.Tensor) -> torch.Tensor:
+    """
+    Rotate half the hidden dims of the input.
+    This function was duplicated verbatim from:
+    https://github.com/huggingface/transformers/blob/1de8ce9ee1191ba761a593ac15d9ccbf5851bfc5/src/transformers/models/llama/modeling_llama.py#L126
+    This was done to eliminate the Llama transformers implementation as a dependency of this file. Note that some other
+    functions were also adapted from the transformers implementation but were modified.
+    """
+    x1 = x[..., : x.shape[-1] // 2]
+    x2 = x[..., x.shape[-1] // 2 :]
+    return torch.cat((-x2, x1), dim=-1)
+def llama_apply_rotary_pos_emb(q, cos, sin, position_ids):
+    """
+    Apply rotary position embedding to query states in the Llama model.
+    This function was adapted from:
+    https://github.com/huggingface/transformers/blob/1de8ce9ee1191ba761a593ac15d9ccbf5851bfc5/src/transformers/models/llama/modeling_llama.py#L133
+    It was modified to remove unnecessary processing of key states. The method is compatible with transformers <=
+    4.34.2 and also with the latest version (>=4.35).
+    """
+    # In previous transformers version cos/sin cached had a shape of 4D
+    if len(cos.shape) == 4:
+        gather_indices = position_ids[:, None, :, None]  # [bs, 1, seq_len, 1]
+        gather_indices = gather_indices.repeat(1, cos.shape[1], 1, cos.shape[3])
+        cos = torch.gather(cos.repeat(gather_indices.shape[0], 1, 1, 1), 2, gather_indices)
+        sin = torch.gather(sin.repeat(gather_indices.shape[0], 1, 1, 1), 2, gather_indices)
+    # In the new version, it is 2D so we fall back to the new implementation
+    # https://github.com/huggingface/transformers/blame/eef7ea98c31a333bacdc7ae7a2372bde772be8e4/src/transformers/models/llama/modeling_llama.py#L222-L226
+    else:
+        cos = cos[position_ids].unsqueeze(1)
+        sin = sin[position_ids].unsqueeze(1)
+    q_embed = (q * cos) + (llama_rotate_half(q) * sin)
+    return q_embed
+def llama_compute_query_states(model: nn.Module, **kwargs) -> torch.Tensor:
+    """
+    Compute query states for Llama models specifically. They need to be recomputed as the forward() method of the
+    original LlamaModel in the transformers library does not return them. See the related discussion in the PR:
+    https://github.com/huggingface/peft/pull/268
+    """
+    hidden_states = kwargs.get("hidden_states")
+    position_ids = kwargs.get("position_ids")
+    past_key_value = kwargs.get("past_key_value")
+    bsz, q_len, _ = hidden_states.size()
+    query_states = model.q_proj(hidden_states).view(bsz, q_len, model.num_heads, model.head_dim).transpose(1, 2)
+    value_states = model.v_proj(hidden_states).view(bsz, q_len, model.num_heads, model.head_dim).transpose(1, 2)
+    seq_len = q_len
+    if past_key_value is not None:
+        if isinstance(past_key_value, tuple):
+            # for transformers <= 4.35
+            seq_len += past_key_value[0].shape[-2]
+        else:
+            # since transformers 4.36, this is a DynamicCache instance
+            seq_len += past_key_value.get_seq_length(model.layer_idx)
+    # For transformers > 4.37.2 `position_ids` became a required arguments in the rotary embedding's forward pass.
+    if "position_ids" not in inspect.signature(model.rotary_emb.forward).parameters:
+        # TODO we assume that position_ids is not None here, not sure if that is safe but the old code also did that
+        cos, sin = model.rotary_emb(value_states, seq_len=seq_len)
+        return llama_apply_rotary_pos_emb(query_states, cos, sin, position_ids)
+    past_seen_tokens = 0
+    if position_ids is None:
+        # Compute position_ids, since they are required for transformers > 4.37.2
+        if past_key_value is None:
+            new_cache_positions = torch.arange(q_len, q_len + q_len, device=value_states.device)
+        else:
+            past_seen_tokens = past_key_value.get_usable_length(q_len, model.layer_idx)
+            new_cache_positions = torch.arange(past_seen_tokens, past_seen_tokens + q_len, device=value_states.device)
+        position_ids = new_cache_positions.unsqueeze(0)
+    cos, sin = model.rotary_emb(value_states, seq_len=q_len + past_seen_tokens, position_ids=position_ids)
+    # For batched inference unsqueeze it on the correct dim
+    # since: https://github.com/huggingface/transformers/pull/29109
+    if len(cos.shape) == 3:
+        cos = cos.unsqueeze(1)
+        sin = sin.unsqueeze(1)
+    return (query_states * cos) + (llama_rotate_half(query_states) * sin)
+def is_adaption_prompt_trainable(params: str) -> bool:
+    """Return True if module is trainable under adaption prompt fine-tuning."""
+    return params.split(".")[-1].startswith("adaption_")

MoRA/peft_mora/tuners/ia3/__init__.py ADDED Viewed

	@@ -0,0 +1,36 @@

+# Copyright 2023-present the HuggingFace Inc. team.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from peft_mora.import_utils import is_bnb_4bit_available, is_bnb_available
+from .config import IA3Config
+from .layer import Conv2d, IA3Layer, Linear
+from .model import IA3Model
+__all__ = ["Conv2d", "IA3Config", "IA3Layer", "IA3Model", "Linear"]
+def __getattr__(name):
+    if (name == "Linear8bitLt") and is_bnb_available():
+        from .bnb import Linear8bitLt
+        return Linear8bitLt
+    if (name == "Linear4bit") and is_bnb_4bit_available():
+        from .bnb import Linear4bit
+        return Linear4bit
+    raise AttributeError(f"module {__name__} has no attribute {name}")

MoRA/peft_mora/tuners/ia3/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (937 Bytes). View file

MoRA/peft_mora/tuners/ia3/__pycache__/config.cpython-312.pyc ADDED Viewed

Binary file (5.09 kB). View file

MoRA/peft_mora/tuners/ia3/__pycache__/layer.cpython-312.pyc ADDED Viewed

Binary file (15.8 kB). View file

MoRA/peft_mora/tuners/ia3/__pycache__/model.cpython-312.pyc ADDED Viewed

Binary file (18.4 kB). View file