| | """ |
| | 2025.3.17 |
| | 2025.3.19 |
| | 4.50.0 |
| | 0.15.2 |
| | __UNSLOTH_VERSIONING__ |
| | """ |
| |
|
| | torch_compile_options = {'epilogue_fusion': True, 'max_autotune': False, 'shape_padding': True, 'trace.enabled': False, 'triton.cudagraphs': False} |
| | from torch import Tensor |
| | import torch |
| | import torch.nn as nn |
| | from torch.nn import functional as F |
| | from peft.tuners.lora.layer import (Any, F, nn, torch) |
| |
|
| |
|
| | torch_addmm = torch.addmm |
| | torch_add = torch.add |
| | |
| | def lora_forward(result, lora_A, lora_B, dropout, x, scaling): |
| | xA = dropout(x) @ lora_A.weight.t() |
| | |
| | shape = result.shape |
| | output = torch_addmm( |
| | result.view(-1, shape[-1]), |
| | xA.view(-1, xA.shape[-1]), |
| | lora_B.weight.t(), |
| | alpha = scaling, |
| | beta = 1, |
| | ).view(shape) |
| |
|
| | bias = lora_B.bias |
| | if bias is not None: |
| | output = torch_add( |
| | output, |
| | bias, |
| | alpha = scaling, |
| | ) |
| | return output |
| | pass |
| |
|
| | def unsloth_forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor: |
| | |
| | adapter_names = kwargs.pop("adapter_names", None) |
| |
|
| | if self.disable_adapters: |
| | if self.merged: |
| | self.unmerge() |
| | result = self.base_layer(x, *args, **kwargs) |
| | elif adapter_names is not None: |
| | result = self._mixed_batch_forward(x, *args, adapter_names=adapter_names, **kwargs) |
| | elif self.merged: |
| | result = self.base_layer(x, *args, **kwargs) |
| | else: |
| | result = self.base_layer(x, *args, **kwargs) |
| | torch_result_dtype = result.dtype |
| |
|
| | lora_A_keys = self.lora_A.keys() |
| | for active_adapter in self.active_adapters: |
| | if active_adapter not in lora_A_keys: |
| | continue |
| |
|
| | lora_A = self.lora_A[active_adapter] |
| | lora_B = self.lora_B[active_adapter] |
| | dropout = self.lora_dropout[active_adapter] |
| | scaling = self.scaling[active_adapter] |
| | if not torch.is_autocast_enabled(): result, x = result.to(lora_A.weight.dtype), x.to(lora_A.weight.dtype) |
| |
|
| | if not self.use_dora[active_adapter]: |
| | return lora_forward(result, lora_A, lora_B, dropout, x, scaling) |
| | else: |
| | if isinstance(dropout, nn.Identity) or not self.training: |
| | base_result = result |
| | else: |
| | x = dropout(x) |
| | base_result = None |
| |
|
| | result = result + self.lora_magnitude_vector[active_adapter]( |
| | x, |
| | lora_A=lora_A, |
| | lora_B=lora_B, |
| | scaling=scaling, |
| | base_layer=self.get_base_layer(), |
| | base_result=base_result, |
| | ) |
| |
|
| | result = result.to(torch_result_dtype) |
| |
|
| | return result |
| |
|