huggingface · romitjain · Oct 29, 2025 · Oct 30, 2025 · Oct 31, 2025 · Oct 31, 2025
diff --git a/src/peft/peft_model.py b/src/peft/peft_model.py
@@ -328,6 +328,15 @@ def save_mutated_as_lora(peft_config, path_initial_model_for_weight_conversion,
                     output_state_dict = save_mutated_as_lora(
                         peft_config, path_initial_model_for_weight_conversion, output_state_dict, kwargs
                     )
+
+                # Before exporting the parameters we need to make sure
+                # all the tensors are contigious. Tensors can become non contigiuous
+                # if they are a transpose view of another tensor. This can happen
+                # during adapter tying or parameter sharing.
+                for k, v in output_state_dict.items():
+                    if not v.is_contiguous():
+                        output_state_dict[k] = v.contiguous()
+
                 safe_save_file(
                     output_state_dict,
                     os.path.join(output_dir, SAFETENSORS_WEIGHTS_NAME),

diff --git a/src/peft/tuners/lora/config.py b/src/peft/tuners/lora/config.py
@@ -382,6 +382,11 @@ class LoraConfig(PeftConfig):
             `target_parameters`. As an example, for Llama4, you can pass:
             `target_parameters=['feed_forward.experts.gate_up_proj', 'feed_forward.experts.down_proj]`. Passing a
             string for regex matching is not implemented yet.
+        ensure_weight_tying (`bool`, *optional*)
+            Whether to tie weights or not after peft initialization. This will ensure that the adapters added to the
+            tied layers are also tied. This is only applicable for layers passed via `modules_to_save` and
+            `target_modules`.
+
     """
 
     r: int = field(default=8, metadata={"help": "Lora attention dimension"})
@@ -670,7 +675,7 @@ class LoraConfig(PeftConfig):
                 "Whether to tie weights or not after peft initialization. "
                 "This will ensure that the adapters added to the tied layers "
                 "are also tied. This is only applicable for layers passed via "
-                "`modules_to_save`."
+                "`modules_to_save` and and `target_modules`."
             )
         },
     )
@@ -695,6 +700,7 @@ def __post_init__(self):
 
         if self.ensure_weight_tying:
             self.modules_to_tie = None
+            self.target_modules_to_tie = None
 
         if isinstance(self.target_parameters, str):
             raise TypeError("`target_parameters` must be a list of strings or None.")

diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py
@@ -156,6 +156,7 @@ def update_layer(
         arrow_config: ArrowConfig = None,
         qalora_group_size: int = 32,
         inference_mode: bool = False,
+        tied_adapters: Optional[dict[str, nn.Parameter]] = None,
         **kwargs,
     ):
         # collect the kwargs
@@ -195,6 +196,17 @@ def update_layer(
         # Actual trainable parameters
         self.lora_A[adapter_name] = nn.Linear(self.in_features, r, bias=False)
         self.lora_B[adapter_name] = nn.Linear(r, self.out_features, bias=lora_bias)
+
+        # Tying adapters is only implemented for Linear layers
+        # where the source is the embedding layer.
+        # Currently, this is the most prevelant way of tying layers (weight tying)
+        if tied_adapters:
+            lora_A_params = tied_adapters["lora_A"]
+            lora_B_params = tied_adapters["lora_B"]
+
+            self.lora_A[adapter_name].weight = torch.nn.Parameter(lora_A_params)
+            self.lora_B[adapter_name].weight = torch.nn.Parameter(lora_B_params)
+
         self.lora_bias[adapter_name] = lora_bias
 
         if use_rslora:
@@ -631,6 +643,7 @@ def __init__(
             use_alora=use_alora,
             lora_bias=lora_bias,
             arrow_config=arrow_config,
+            tied_adapters=kwargs.get("tied_adapters"),
         )
         self.is_target_conv_1d_layer = is_target_conv_1d_layer
 

diff --git a/src/peft/tuners/lora/model.py b/src/peft/tuners/lora/model.py
@@ -26,11 +26,7 @@
 from transformers.modeling_layers import GradientCheckpointingLayer
 
 from peft.import_utils import is_bnb_4bit_available, is_bnb_available
-from peft.tuners.tuners_utils import (
-    BaseTuner,
-    BaseTunerLayer,
-    replicate_layers,
-)
+from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, find_parameter_name_by_tensor, replicate_layers
 from peft.utils import (
     TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
     AuxiliaryTrainingWrapper,
@@ -201,6 +197,17 @@ def _create_and_replace(
         r = lora_config.rank_pattern.get(r_key, lora_config.r)
         alpha = lora_config.alpha_pattern.get(alpha_key, lora_config.lora_alpha)
 
+        # Checks if the target is marked as a tied layer
+        # If true, we add the reference to lora adapters of embedding layer in `tied_adapters`
+        is_tied = target_name in (getattr(lora_config, "target_modules_to_tie", []) or [])
+        tied_adapters = {}
+        if is_tied:
+            tied_module = self.model.get_input_embeddings()
+            emb_A = tied_module.lora_embedding_A[adapter_name]
+            emb_B = tied_module.lora_embedding_B[adapter_name]
+
+            tied_adapters = {"lora_A": emb_B.t(), "lora_B": emb_A.t()}
+
         kwargs = {
             "r": r,
             "lora_alpha": alpha,
@@ -218,6 +225,7 @@ def _create_and_replace(
             "loaded_in_8bit": getattr(self.model, "is_loaded_in_8bit", False),
             "loaded_in_4bit": getattr(self.model, "is_loaded_in_4bit", False),
             "parameter_name": parameter_name,
+            "tied_adapters": tied_adapters,
         }
 
         # for torchao merging, we need the get_apply_tensor_subclass from the quantization config
@@ -263,6 +271,7 @@ def _create_and_replace(
             if adapter_name not in self.active_adapters:
                 # adding an additional adapter: it is not automatically trainable
                 new_module.requires_grad_(False)
+
             self._replace_module(parent, target_name, new_module, target)
 
     def _replace_module(self, parent, child_name, new_module, child):
@@ -861,8 +870,63 @@ def subtract_mutated_init(self, output_state_dict: dict[str, torch.Tensor], adap
 
         return tensors_lora
 
-    def _add_modules_to_tie(self, peft_config, tied_weight_keys):
-        modules_to_save = set(getattr(peft_config, "modules_to_save", []) or [])
-        missing_keys = set(tied_weight_keys) - modules_to_save
+    def _add_modules_to_tie(self, peft_config: LoraConfig, tied_weight_keys: list[str]):
+        """
+        Tied weight keys contains the layers tied to the embedding layer. Add embedding layer and remove rest of the
+        tied layers from `module_to_save`. Maintain a separate set for layers to be tied
+
+        Args:
+            peft_config (LoraConfig)
+            tied_weight_keys (list[str])
+        """
+        tied_weight_keys = set(tied_weight_keys)
+        peft_config.modules_to_tie = tied_weight_keys
+
+        modules_to_save = getattr(peft_config, "modules_to_save", []) or []
+
+        embed_layer_name = find_parameter_name_by_tensor(self.model, self.model.get_input_embeddings().weight)
+        # find_parameter_name_by_tensor returns the parameter name, so we need to strip the weight from the name
+        embed_layer_name = embed_layer_name.replace(".weight", "").replace("model.", "")
+
+        if embed_layer_name not in modules_to_save:
+            modules_to_save.append(embed_layer_name)
+
+        for m in tied_weight_keys:
+            if m in modules_to_save:
+                modules_to_save.remove(m)
+
+        peft_config.modules_to_save = modules_to_save
+
+    def _add_targets_to_tie(self, peft_config: LoraConfig, tied_weight_keys: list[str]):
+        """
+        Tied weight keys contains the layers tied to the embedding layer. Add embedding layer and remove rest of the
+        tied layers from `target_modules`. Maintain a separate set for layers to be tied
+
+        Args:
+            peft_config (LoraConfig)
+            tied_weight_keys (list[str])
+        """
+        tied_weight_keys = set(tied_weight_keys)
+        peft_config.target_modules_to_tie = tied_weight_keys
+
+        raw_target_modules = getattr(peft_config, "target_modules", None)
+        embed_layer_name = find_parameter_name_by_tensor(self.model, self.model.get_input_embeddings().weight)
+        # find_parameter_name_by_tensor returns the parameter name, so we need to strip the weight from the name
+        embed_layer_name = embed_layer_name.replace(".weight", "").replace("model.", "")
+
+        if isinstance(raw_target_modules, str):
+            # The way weight tying is handled for adapters, we always want to add
+            # lora adapters to the input embedding layer (embed_tokens)
+            # instead of output embedding lauyer.
+            raw_target_modules = rf"(?:{raw_target_modules}|.*{embed_layer_name}$)"
+            peft_config.target_modules = raw_target_modules
+            return
+
+        target_modules = set(raw_target_modules or [])
+        target_modules.add(embed_layer_name)
+
+        for m in tied_weight_keys:
+            if m in target_modules:
+                target_modules.remove(m)
 
-        peft_config.modules_to_tie = missing_keys
+        peft_config.target_modules = target_modules