Motif-Technologies
/

optimizer

wyldecat Claude Opus 4.6 commited on Mar 13

Commit

7e33533

1 Parent(s): 3f5cf49

Update fast path comment to reflect current behavior [skip-build]

Files changed (1) hide show

torch-ext/optimizer/distributed/utils.py CHANGED Viewed

@@ -163,9 +163,10 @@ def construct_shard_mesh(
     assert mesh.mesh.device.type == 'cpu'
     # -- Fast path: 1D all-shard mesh → reuse existing PG. ----------------
-    # This avoids a non-collective dist.new_group() call, which would
-    # deadlock when only a subset of ranks call this function (e.g. expert
-    # DTensors on a TP submesh where ranks 0-3 and 4-7 call separately).
     if mesh.ndim == 1 and len(placements) == 1 and _is_shard(placements[0]):
         key = (*mesh.mesh.shape, *mesh.mesh.flatten().tolist())
         if key not in _ranks_to_dist_cache:

     assert mesh.mesh.device.type == 'cpu'
     # -- Fast path: 1D all-shard mesh → reuse existing PG. ----------------
+    # Reuses the mesh's existing ProcessGroup directly, avoiding the
+    # overhead of dist.new_group(). The standard path below also handles
+    # subset calls safely via use_local_synchronization=True, but this
+    # fast path is still beneficial for the common 1D shard case.
     if mesh.ndim == 1 and len(placements) == 1 and _is_shard(placements[0]):
         key = (*mesh.mesh.shape, *mesh.mesh.flatten().tolist())
         if key not in _ranks_to_dist_cache: