feat: Simplify TRM

ValerianRey · ValerianRey · commit 97f907167b85 · 2025-12-22T16:57:09.000+01:00
diff --git a/src/recursion/models/recursive_reasoning/trm.py b/src/recursion/models/recursive_reasoning/trm.py
@@ -282,18 +282,11 @@ def forward(
 
         # Input encoding
         input_embeddings = self._input_embeddings(batch["inputs"], batch["puzzle_identifiers"])
+        # shape: [batch_size, 97, 512] (97 tokens of dim 512)
 
         # Forward iterations
         z_H, z_L = carry.z_H, carry.z_L
-        # H_cycles-1 without grad
-        with torch.no_grad():
-            for _H_step in range(self.config.H_cycles - 1):
-                for _L_step in range(self.config.L_cycles):
-                    z_L = self.L_level(z_L, z_H + input_embeddings, **seq_info)
-                z_H = self.L_level(z_H, z_L, **seq_info)
-        # 1 with grad
-        for _L_step in range(self.config.L_cycles):
-            z_L = self.L_level(z_L, z_H + input_embeddings, **seq_info)
+        z_L = self.L_level(z_L, z_H + input_embeddings, **seq_info)
         z_H = self.L_level(z_H, z_L, **seq_info)
 
         # LM Outputs