Add unsharded module property to sharded modules and EBC

aporialiao · facebook-github-bot · commit 1b0bddc33691 · 2025-04-23T15:13:37.000-07:00
Summary:
Adding a simple unsharded module reference to sharded modules. This will be used in Dynamic Sharding by DistributedModelParallel to reshard an already-sharded_module.

As DMP is created with only one-way relationship in mind, accessing the unsharded module type will help determine which sharder to use in 'resharding'.


See comment under `types.py`

Differential Revision: D73537260
diff --git a/torchrec/distributed/embedding_types.py b/torchrec/distributed/embedding_types.py
@@ -11,7 +11,18 @@
 import copy
 from dataclasses import dataclass
 from enum import Enum, unique
-from typing import Any, Dict, Generic, Iterator, List, Optional, Tuple, TypeVar, Union
+from typing import (
+    Any,
+    Dict,
+    Generic,
+    Iterator,
+    List,
+    Optional,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+)
 
 import torch
 from fbgemm_gpu.split_table_batched_embeddings_ops_training import EmbeddingLocation
@@ -399,6 +410,16 @@ def train(self, mode: bool = True):  # pyre-ignore[3]
 
         return self
 
+    @property
+    def unsharded_module_type(self) -> Type[nn.Module]:
+        """
+        As this is the generic ShardedEmbeddingModule class, simply
+        return the generic nn.Module type. In the inherited classes of
+        ShardedEmbeddingModule, the specific unsharded module type will
+        be returned.
+        """
+        return nn.Module
+
 
 M = TypeVar("M", bound=nn.Module)
 
diff --git a/torchrec/distributed/embeddingbag.py b/torchrec/distributed/embeddingbag.py
@@ -1627,6 +1627,10 @@ def create_context(self) -> EmbeddingBagCollectionContext:
     def extend_shard_name(shard_name: str) -> str:
         return f"embedding_bags.{shard_name}.weight"
 
+    @property
+    def unsharded_module_type(self) -> Type[EmbeddingBagCollection]:
+        return EmbeddingBagCollection
+
 
 class EmbeddingBagCollectionSharder(BaseEmbeddingSharder[EmbeddingBagCollection]):
     """
@@ -1916,6 +1920,10 @@ def fused_optimizer(self) -> KeyedOptimizer:
     def create_context(self) -> NullShardedModuleContext:
         return NullShardedModuleContext()
 
+    @property
+    def unsharded_module_type(self) -> Type[nn.EmbeddingBag]:
+        return nn.EmbeddingBag
+
 
 class EmbeddingBagSharder(BaseEmbeddingSharder[nn.EmbeddingBag]):
     """
diff --git a/torchrec/distributed/object_pool.py b/torchrec/distributed/object_pool.py
@@ -8,7 +8,7 @@
 # pyre-strict
 
 from abc import abstractmethod
-from typing import Generic
+from typing import Generic, Type
 
 import torch
 from torch._prims_common import is_integer_dtype
@@ -144,3 +144,7 @@ def compute(self, ctx: ShrdCtx, dist_input: torch.Tensor) -> DistOut:
     #  `None`.
     def output_dist(self, ctx: ShrdCtx, output: DistOut) -> LazyAwaitable[Out]:
         pass
+
+    @property
+    def unsharded_module_type(self) -> Type[ObjectPool[Out]]:
+        return ObjectPool[Out]
diff --git a/torchrec/distributed/types.py b/torchrec/distributed/types.py
@@ -1034,6 +1034,19 @@ def sharded_parameter_names(self, prefix: str = "") -> Iterator[str]:
         for key, _ in self.named_parameters(prefix):
             yield key
 
+    @property
+    @abc.abstractmethod
+    def unsharded_module_type(self) -> Type[nn.Module]:
+        """
+        This property is added as part of dynamic sharding implementation.
+
+        When resharding an already-sharded module wrapped in DMP, the unsharded
+        module type is needed to identify the proper sharder to reshard. This is
+        due to DistributedModelParellel (DMP) references module Sharders based
+        on the unsharded module type.
+        """
+        ...
+
 
 def get_tensor_size_bytes(t: torch.Tensor) -> int:
     b: int = t.numel() * t.element_size()