Fix SuperAnimal / pretrained load for RTMPose: implement convert_weights on RTMCCHead (#3270)

deruyter92 · C-Achard · MMathisLab · web-flow · commit 7bde4ccd62cc · 2026-04-13T15:26:28.000+02:00
* Fix SuperAnimal / pretrained load for RTMPose: implement convert_weights on RTMCCHead

* update RTMCC convert_weights: add flag for optional gau.w omission; ensure deterministic init.

---------

Co-authored-by: Cyril Achard &lt;cyril.achard@epfl.ch&gt;
Co-authored-by: Mackenzie Mathis &lt;mathis@rowland.harvard.edu&gt;
diff --git a/deeplabcut/pose_estimation_pytorch/models/heads/rtmcc_head.py b/deeplabcut/pose_estimation_pytorch/models/heads/rtmcc_head.py
@@ -26,6 +26,7 @@
 from deeplabcut.pose_estimation_pytorch.models.heads.base import (
     HEADS,
     BaseHead,
+    WeightConversionMixin,
 )
 from deeplabcut.pose_estimation_pytorch.models.modules import (
     GatedAttentionUnit,
@@ -37,7 +38,7 @@
 
 
 @HEADS.register_module
-class RTMCCHead(BaseHead):
+class RTMCCHead(WeightConversionMixin, BaseHead):
     """RTMPose Coordinate Classification head.
 
     The RTMCC head is itself adapted from the SimCC head. For more information, see
@@ -136,6 +137,64 @@ def forward(self, x: torch.Tensor) -> dict[str, torch.Tensor]:
         x, y = self.cls_x(feats), self.cls_y(feats)
         return dict(x=x, y=y)
 
+    @staticmethod
+    def convert_weights(
+        state_dict: dict[str, torch.Tensor],
+        module_prefix: str,
+        conversion: torch.Tensor,
+        *,
+        omit_gau_w: bool = False,
+    ) -> dict[str, torch.Tensor]:
+        """Re-order / subset bodypart (token) channels for transfer from SuperAnimal.
+
+        Args:
+            state_dict: State dict for this head.
+            module_prefix: Prefix for state-dict keys.
+            conversion: Mapping from new bodyparts to source bodyparts.
+            omit_gau_w: If True, remove ``gau.w`` from the returned dict instead of
+                constructing a remapped replacement. This requires loading with
+                ``strict=False`` to avoid missing-key errors.
+                Prefer omitting when source/target keypoint ordering semantics differ.
+        """
+        conv = conversion.long()
+        k_new = int(conv.shape[0])
+
+        # Remap final layer weights and biases if they exist.
+        fl_w = f"{module_prefix}final_layer.weight"
+        fl_b = f"{module_prefix}final_layer.bias"
+        if fl_w in state_dict:
+            state_dict[fl_w] = state_dict[fl_w][conv]
+        if fl_b in state_dict:
+            state_dict[fl_b] = state_dict[fl_b][conv]
+
+        # Remap or re-init gau.w if it exists (only if omit_gau_w is False)
+        w_key = f"{module_prefix}gau.w"
+        if w_key in state_dict:
+            if omit_gau_w:
+                state_dict.pop(w_key, None)
+                return state_dict
+
+            w_old = state_dict[w_key]
+            k_old = (w_old.shape[0] + 1) // 2
+            old_center = k_old - 1
+            new_center = k_new - 1
+
+            # Deterministic default for unmapped offsets (mean of original weights).
+            default_val = w_old.mean()
+            w_new = torch.empty(2 * k_new - 1, dtype=w_old.dtype, device=w_old.device)
+            for idx_new, d in enumerate(range(-new_center, new_center + 1)):
+                old_vals = []
+                for i in range(k_new):
+                    j = i - d
+                    if not (0 <= j < k_new):
+                        continue
+                    old_idx = int(conv[i] - conv[j]) + old_center
+                    if 0 <= old_idx < w_old.shape[0]:
+                        old_vals.append(w_old[old_idx])
+                w_new[idx_new] = torch.stack(old_vals).mean() if old_vals else default_val
+            state_dict[w_key] = w_new
+        return state_dict
+
     @staticmethod
     def update_input_size(model_cfg: dict, input_size: tuple[int, int]) -> None:
         """Updates an RTMPose model configuration file for a new image input size.