diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml
index bc5d667a52..b6ed2274ee 100644
--- a/.github/workflows/cron.yml
+++ b/.github/workflows/cron.yml
@@ -62,7 +62,7 @@ jobs:
     if: github.repository == 'Project-MONAI/MONAI'
     strategy:
       matrix:
-        container: ["pytorch:21.02", "pytorch:21.10"]  # 21.02 for backward comp.
+        container: ["pytorch:21.02", "pytorch:22.02"]  # 21.02 for backward comp.
     container:
       image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
       options: "--gpus all"
@@ -106,7 +106,7 @@ jobs:
     if: github.repository == 'Project-MONAI/MONAI'
     strategy:
       matrix:
-        container: ["pytorch:21.02", "pytorch:21.10"]  # 21.02 for backward comp.
+        container: ["pytorch:21.02", "pytorch:22.02"]  # 21.02 for backward comp.
     container:
       image: nvcr.io/nvidia/${{ matrix.container }}-py3  # testing with the latest pytorch base image
       options: "--gpus all"
diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 6f3965d112..8ed9790dca 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -7,7 +7,7 @@ on:
 jobs:
   integration-py3:
     container:
-      image: nvcr.io/nvidia/pytorch:21.10-py3  # CUDA 11.4
+      image: nvcr.io/nvidia/pytorch:21.12-py3  # CUDA 11.5
       options: --gpus all
     runs-on: [self-hosted, linux, x64, common]
     steps:
@@ -34,7 +34,7 @@ jobs:
         which python
         python -m pip install --upgrade pip wheel
         python -m pip uninstall -y torch torchvision
-        python -m pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
+        python -m pip install torch==1.11.0+cu115 torchvision==0.12.0+cu115 -f https://download.pytorch.org/whl/torch_stable.html
         python -m pip install -r requirements-dev.txt
     - name: Run integration tests
       run: |
diff --git a/Dockerfile b/Dockerfile
index 2d889c8b45..4171309c70 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -11,7 +11,7 @@
 
 # To build with a different base image
 # please run `docker build` using the `--build-arg PYTORCH_IMAGE=...` flag.
-ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:21.10-py3
+ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:22.02-py3
 FROM ${PYTORCH_IMAGE}
 
 LABEL maintainer="monai.contact@gmail.com"
diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py
index a41615c908..502c6fb93b 100644
--- a/monai/data/image_reader.py
+++ b/monai/data/image_reader.py
@@ -678,7 +678,7 @@ def get_data(self, img):
         It computes `spatial_shape` and stores it in meta dict.
         When loading a list of files, they are stacked together at a new dimension as the first dimension,
         and the meta data of the first image is used to represent the output meta data.
-        Note that it will switch axis 0 and 1 after loading the array because the `HW` definition in PIL
+        Note that it will swap axis 0 and 1 after loading the array because the `HW` definition in PIL
         is different from other common medical packages.
 
         Args:
diff --git a/tests/test_integration_workers.py b/tests/test_integration_workers.py
new file mode 100644
index 0000000000..1f12f81712
--- /dev/null
+++ b/tests/test_integration_workers.py
@@ -0,0 +1,53 @@
+# Copyright (c) MONAI Consortium
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#     http://www.apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+import torch
+
+from monai.data import DataLoader
+from monai.utils import set_determinism
+from tests.utils import DistTestCase, TimedCall, skip_if_no_cuda, skip_if_quick
+
+
+def run_loading_test(num_workers=50, device="cuda:0" if torch.cuda.is_available() else "cpu", pw=False):
+    """multi workers stress tests"""
+    set_determinism(seed=0)
+    train_ds = list(range(10000))
+    train_loader = DataLoader(train_ds, batch_size=300, shuffle=True, num_workers=num_workers, persistent_workers=pw)
+    answer = []
+    for _ in range(2):
+        np.testing.assert_equal(torch.cuda.memory_allocated(), 0)
+        for batch_data in train_loader:
+            x = batch_data.to(device)
+            mem = torch.cuda.memory_allocated()
+            np.testing.assert_equal(mem > 0 and mem < 5000, True)
+        answer.append(x[-1].item())
+        del x
+    return answer
+
+
+@skip_if_quick
+@skip_if_no_cuda
+class IntegrationLoading(DistTestCase):
+    def tearDown(self):
+        set_determinism(seed=None)
+
+    @TimedCall(seconds=5000, skip_timing=not torch.cuda.is_available(), daemon=False)
+    def test_timing(self):
+        for pw, expected in zip((False, True), ((6966, 7714), (6966, 4112))):
+            result = run_loading_test(pw=pw)
+            np.testing.assert_allclose(result, expected)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/utils.py b/tests/utils.py
index 0f22f65d33..3065f9b3df 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -245,11 +245,20 @@ def __call__(self, obj):
         )(obj)
 
 
+def is_main_test_process():
+    ps = torch.multiprocessing.current_process()
+    if not ps or not hasattr(ps, "name"):
+        return False
+    return ps.name.startswith("Main")
+
+
 def has_cupy():
     """
     Returns True if the user has installed a version of cupy.
     """
     cp, has_cp = optional_import("cupy")
+    if not is_main_test_process():
+        return has_cp  # skip the check if we are running in subprocess
     if not has_cp:
         return False
     try:  # test cupy installation with a basic example
@@ -258,7 +267,10 @@ def has_cupy():
         kernel = cp.ElementwiseKernel(
             "float32 x, float32 y", "float32 z", """ if (x - 2 > y) { z = x * y; } else { z = x + y; } """, "my_kernel"
         )
-        return kernel(x, y)[0, 0] == 0
+        flag = kernel(x, y)[0, 0] == 0
+        del x, y, kernel
+        cp.get_default_memory_pool().free_all_blocks()
+        return flag
     except Exception:
         return False