diff --git a/docs/source/data.rst b/docs/source/data.rst
index 3dffeb8977..c95659bc6e 100644
--- a/docs/source/data.rst
+++ b/docs/source/data.rst
@@ -160,6 +160,9 @@ DistributedSampler
 ~~~~~~~~~~~~~~~~~~
 .. autoclass:: monai.data.DistributedSampler
 
+DistributedWeightedRandomSampler
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. autoclass:: monai.data.DistributedWeightedRandomSampler
 
 Decathlon Datalist
 ~~~~~~~~~~~~~~~~~~
diff --git a/monai/data/__init__.py b/monai/data/__init__.py
index 9fa5c935e2..54beb53e3f 100644
--- a/monai/data/__init__.py
+++ b/monai/data/__init__.py
@@ -30,10 +30,10 @@
 from .nifti_writer import write_nifti
 from .png_saver import PNGSaver
 from .png_writer import write_png
+from .samplers import DistributedSampler, DistributedWeightedRandomSampler
 from .synthetic import create_test_image_2d, create_test_image_3d
 from .thread_buffer import ThreadBuffer
 from .utils import (
-    DistributedSampler,
     compute_importance_map,
     compute_shape_offset,
     correct_nifti_header_if_necessary,
diff --git a/monai/data/samplers.py b/monai/data/samplers.py
new file mode 100644
index 0000000000..5fea6959de
--- /dev/null
+++ b/monai/data/samplers.py
@@ -0,0 +1,122 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from typing import Optional, Sequence
+
+import torch
+from torch.utils.data import Dataset
+from torch.utils.data import DistributedSampler as _TorchDistributedSampler
+
+__all__ = ["DistributedSampler", "DistributedWeightedRandomSampler"]
+
+
+class DistributedSampler(_TorchDistributedSampler):
+    """
+    Enhance PyTorch DistributedSampler to support non-evenly divisible sampling.
+
+    Args:
+        dataset: Dataset used for sampling.
+        even_divisible: if False, different ranks can have different data length.
+            for example, input data: [1, 2, 3, 4, 5], rank 0: [1, 3, 5], rank 1: [2, 4].
+        num_replicas: number of processes participating in distributed training.
+            by default, `world_size` is retrieved from the current distributed group.
+        rank: rank of the current process within `num_replicas`. by default,
+            `rank` is retrieved from the current distributed group.
+        shuffle: if `True`, sampler will shuffle the indices, default to True.
+        kwargs: additional arguments for `DistributedSampler` super class, can be `seed` and `drop_last`.
+
+    More information about DistributedSampler, please check:
+    https://github.com/pytorch/pytorch/blob/master/torch/utils/data/distributed.py
+
+    """
+
+    def __init__(
+        self,
+        dataset: Dataset,
+        even_divisible: bool = True,
+        num_replicas: Optional[int] = None,
+        rank: Optional[int] = None,
+        shuffle: bool = True,
+        **kwargs,
+    ):
+        super().__init__(dataset=dataset, num_replicas=num_replicas, rank=rank, shuffle=shuffle, **kwargs)
+
+        if not even_divisible:
+            data_len = len(dataset)  # type: ignore
+            extra_size = self.total_size - data_len
+            if self.rank + extra_size >= self.num_replicas:
+                self.num_samples -= 1
+            self.total_size = data_len
+
+
+class DistributedWeightedRandomSampler(DistributedSampler):
+    """
+    Extend the `DistributedSampler` to support weighted sampling.
+    Refer to `torch.utils.data.WeightedRandomSampler`, for more details please check:
+    https://github.com/pytorch/pytorch/blob/master/torch/utils/data/sampler.py#L150
+
+    Args:
+        dataset: Dataset used for sampling.
+        weights: a sequence of weights, not necessary summing up to one, length should exactly
+            match the full dataset.
+        num_samples_per_rank: number of samples to draw for every rank, sample from
+            the distributed subset of dataset.
+            if None, default to the length of dataset split by DistributedSampler.
+        replacement: if ``True``, samples are drawn with replacement, otherwise, they are
+            drawn without replacement, which means that when a sample index is drawn for a row,
+            it cannot be drawn again for that row, default to True.
+        generator: PyTorch Generator used in sampling.
+        even_divisible: if False, different ranks can have different data length.
+            for example, input data: [1, 2, 3, 4, 5], rank 0: [1, 3, 5], rank 1: [2, 4].'
+        num_replicas: number of processes participating in distributed training.
+            by default, `world_size` is retrieved from the current distributed group.
+        rank: rank of the current process within `num_replicas`. by default,
+            `rank` is retrieved from the current distributed group.
+        shuffle: if `True`, sampler will shuffle the indices, default to True.
+        kwargs: additional arguments for `DistributedSampler` super class, can be `seed` and `drop_last`.
+
+    """
+
+    def __init__(
+        self,
+        dataset: Dataset,
+        weights: Sequence[float],
+        num_samples_per_rank: Optional[int] = None,
+        replacement: bool = True,
+        generator: Optional[torch.Generator] = None,
+        even_divisible: bool = True,
+        num_replicas: Optional[int] = None,
+        rank: Optional[int] = None,
+        shuffle: bool = True,
+        **kwargs,
+    ):
+        super().__init__(
+            dataset=dataset,
+            even_divisible=even_divisible,
+            num_replicas=num_replicas,
+            rank=rank,
+            shuffle=shuffle,
+            **kwargs,
+        )
+        self.weights = weights
+        self.num_samples_per_rank = num_samples_per_rank
+        self.replacement = replacement
+        self.generator = generator
+
+    def __iter__(self):
+        indices = list(super().__iter__())
+        num_samples = self.num_samples_per_rank if self.num_samples_per_rank is not None else self.num_samples
+        weights = torch.as_tensor([self.weights[i] for i in indices], dtype=torch.double)
+        # sample based on the provided weights
+        rand_tensor = torch.multinomial(weights, num_samples, self.replacement, generator=self.generator)
+
+        for i in rand_tensor:
+            yield indices[i]
diff --git a/monai/data/utils.py b/monai/data/utils.py
index 2e2f8c00cb..1db2f6676f 100644
--- a/monai/data/utils.py
+++ b/monai/data/utils.py
@@ -22,7 +22,6 @@
 
 import numpy as np
 import torch
-from torch.utils.data import DistributedSampler as _TorchDistributedSampler
 from torch.utils.data._utils.collate import default_collate
 
 from monai.networks.layers.simplelayers import GaussianFilter
@@ -61,7 +60,6 @@
     "partition_dataset",
     "partition_dataset_classes",
     "select_cross_validation_folds",
-    "DistributedSampler",
     "json_hashing",
     "pickle_hashing",
     "sorted_dict",
@@ -921,30 +919,6 @@ def select_cross_validation_folds(partitions: Sequence[Iterable], folds: Union[S
     return [data_item for fold_id in ensure_tuple(folds) for data_item in partitions[fold_id]]
 
 
-class DistributedSampler(_TorchDistributedSampler):
-    """
-    Enhance PyTorch DistributedSampler to support non-evenly divisible sampling.
-
-    Args:
-        even_divisible: if False, different ranks can have different data length.
-        for example, input data: [1, 2, 3, 4, 5], rank 0: [1, 3, 5], rank 1: [2, 4].
-
-    More information about DistributedSampler, please check:
-    https://github.com/pytorch/pytorch/blob/master/torch/utils/data/distributed.py
-
-    """
-
-    def __init__(self, even_divisible: bool = True, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-
-        if not even_divisible:
-            data_len = len(kwargs["dataset"])
-            extra_size = self.total_size - data_len
-            if self.rank + extra_size >= self.num_replicas:
-                self.num_samples -= 1
-            self.total_size = data_len
-
-
 def json_hashing(item) -> bytes:
     """
 
diff --git a/tests/test_distributed_weighted_random_sampler.py b/tests/test_distributed_weighted_random_sampler.py
new file mode 100644
index 0000000000..6e27e78d4c
--- /dev/null
+++ b/tests/test_distributed_weighted_random_sampler.py
@@ -0,0 +1,64 @@
+# Copyright 2020 - 2021 MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+import torch
+import torch.distributed as dist
+
+from monai.data import DistributedWeightedRandomSampler
+from tests.utils import DistCall, DistTestCase
+
+
+class DistributedWeightedRandomSamplerTest(DistTestCase):
+    @DistCall(nnodes=1, nproc_per_node=2)
+    def test_replacement(self):
+        data = [1, 2, 3, 4, 5]
+        weights = [1, 2, 3, 4, 5]
+        sampler = DistributedWeightedRandomSampler(
+            weights=weights,
+            replacement=True,
+            dataset=data,
+            shuffle=False,
+            generator=torch.Generator().manual_seed(0),
+        )
+        samples = np.array([data[i] for i in list(sampler)])
+
+        if dist.get_rank() == 0:
+            np.testing.assert_allclose(samples, np.array([5, 5, 5]))
+
+        if dist.get_rank() == 1:
+            np.testing.assert_allclose(samples, np.array([1, 4, 4]))
+
+    @DistCall(nnodes=1, nproc_per_node=2)
+    def test_num_samples(self):
+        data = [1, 2, 3, 4, 5]
+        weights = [1, 2, 3, 4, 5]
+        sampler = DistributedWeightedRandomSampler(
+            weights=weights,
+            num_samples_per_rank=5,
+            replacement=True,
+            dataset=data,
+            shuffle=False,
+            generator=torch.Generator().manual_seed(123),
+        )
+        samples = np.array([data[i] for i in list(sampler)])
+
+        if dist.get_rank() == 0:
+            np.testing.assert_allclose(samples, np.array([3, 1, 5, 1, 5]))
+
+        if dist.get_rank() == 1:
+            np.testing.assert_allclose(samples, np.array([4, 2, 4, 2, 4]))
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/test_rotated.py b/tests/test_rotated.py
index 82bc4aed40..e0c1a27e98 100644
--- a/tests/test_rotated.py
+++ b/tests/test_rotated.py
@@ -79,7 +79,8 @@ def test_correct_results(self, angle, keep_size, mode, padding_mode, align_corne
         expected = scipy.ndimage.rotate(
             self.imt[0, 0], np.rad2deg(angle), (0, 2), not keep_size, order=_order, mode=_mode, prefilter=False
         )
-        np.testing.assert_allclose(expected.astype(np.float32), rotated["img"][0], atol=1e-3)
+        good = np.sum(np.isclose(expected.astype(np.float32), rotated["img"][0], atol=1e-3))
+        self.assertLessEqual(np.abs(good - expected.size), 5, "diff at most 5 voxels.")
 
         expected = scipy.ndimage.rotate(
             self.segn[0, 0], np.rad2deg(angle), (0, 2), not keep_size, order=0, mode=_mode, prefilter=False