diff --git a/.github/workflows/cron.yml b/.github/workflows/cron.yml index bc5d667a52..b6ed2274ee 100644 --- a/.github/workflows/cron.yml +++ b/.github/workflows/cron.yml @@ -62,7 +62,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' strategy: matrix: - container: ["pytorch:21.02", "pytorch:21.10"] # 21.02 for backward comp. + container: ["pytorch:21.02", "pytorch:22.02"] # 21.02 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image options: "--gpus all" @@ -106,7 +106,7 @@ jobs: if: github.repository == 'Project-MONAI/MONAI' strategy: matrix: - container: ["pytorch:21.02", "pytorch:21.10"] # 21.02 for backward comp. + container: ["pytorch:21.02", "pytorch:22.02"] # 21.02 for backward comp. container: image: nvcr.io/nvidia/${{ matrix.container }}-py3 # testing with the latest pytorch base image options: "--gpus all" diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml index 6f3965d112..8ed9790dca 100644 --- a/.github/workflows/integration.yml +++ b/.github/workflows/integration.yml @@ -7,7 +7,7 @@ on: jobs: integration-py3: container: - image: nvcr.io/nvidia/pytorch:21.10-py3 # CUDA 11.4 + image: nvcr.io/nvidia/pytorch:21.12-py3 # CUDA 11.5 options: --gpus all runs-on: [self-hosted, linux, x64, common] steps: @@ -34,7 +34,7 @@ jobs: which python python -m pip install --upgrade pip wheel python -m pip uninstall -y torch torchvision - python -m pip install torch==1.11.0+cu113 torchvision==0.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html + python -m pip install torch==1.11.0+cu115 torchvision==0.12.0+cu115 -f https://download.pytorch.org/whl/torch_stable.html python -m pip install -r requirements-dev.txt - name: Run integration tests run: | diff --git a/Dockerfile b/Dockerfile index 2d889c8b45..4171309c70 100644 --- a/Dockerfile +++ b/Dockerfile @@ -11,7 +11,7 @@ # To build with a different base image # please run `docker build` using the `--build-arg PYTORCH_IMAGE=...` flag. -ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:21.10-py3 +ARG PYTORCH_IMAGE=nvcr.io/nvidia/pytorch:22.02-py3 FROM ${PYTORCH_IMAGE} LABEL maintainer="monai.contact@gmail.com" diff --git a/monai/data/image_reader.py b/monai/data/image_reader.py index a41615c908..502c6fb93b 100644 --- a/monai/data/image_reader.py +++ b/monai/data/image_reader.py @@ -678,7 +678,7 @@ def get_data(self, img): It computes `spatial_shape` and stores it in meta dict. When loading a list of files, they are stacked together at a new dimension as the first dimension, and the meta data of the first image is used to represent the output meta data. - Note that it will switch axis 0 and 1 after loading the array because the `HW` definition in PIL + Note that it will swap axis 0 and 1 after loading the array because the `HW` definition in PIL is different from other common medical packages. Args: diff --git a/tests/test_integration_workers.py b/tests/test_integration_workers.py new file mode 100644 index 0000000000..1f12f81712 --- /dev/null +++ b/tests/test_integration_workers.py @@ -0,0 +1,53 @@ +# Copyright (c) MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +import torch + +from monai.data import DataLoader +from monai.utils import set_determinism +from tests.utils import DistTestCase, TimedCall, skip_if_no_cuda, skip_if_quick + + +def run_loading_test(num_workers=50, device="cuda:0" if torch.cuda.is_available() else "cpu", pw=False): + """multi workers stress tests""" + set_determinism(seed=0) + train_ds = list(range(10000)) + train_loader = DataLoader(train_ds, batch_size=300, shuffle=True, num_workers=num_workers, persistent_workers=pw) + answer = [] + for _ in range(2): + np.testing.assert_equal(torch.cuda.memory_allocated(), 0) + for batch_data in train_loader: + x = batch_data.to(device) + mem = torch.cuda.memory_allocated() + np.testing.assert_equal(mem > 0 and mem < 5000, True) + answer.append(x[-1].item()) + del x + return answer + + +@skip_if_quick +@skip_if_no_cuda +class IntegrationLoading(DistTestCase): + def tearDown(self): + set_determinism(seed=None) + + @TimedCall(seconds=5000, skip_timing=not torch.cuda.is_available(), daemon=False) + def test_timing(self): + for pw, expected in zip((False, True), ((6966, 7714), (6966, 4112))): + result = run_loading_test(pw=pw) + np.testing.assert_allclose(result, expected) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/utils.py b/tests/utils.py index 0f22f65d33..3065f9b3df 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -245,11 +245,20 @@ def __call__(self, obj): )(obj) +def is_main_test_process(): + ps = torch.multiprocessing.current_process() + if not ps or not hasattr(ps, "name"): + return False + return ps.name.startswith("Main") + + def has_cupy(): """ Returns True if the user has installed a version of cupy. """ cp, has_cp = optional_import("cupy") + if not is_main_test_process(): + return has_cp # skip the check if we are running in subprocess if not has_cp: return False try: # test cupy installation with a basic example @@ -258,7 +267,10 @@ def has_cupy(): kernel = cp.ElementwiseKernel( "float32 x, float32 y", "float32 z", """ if (x - 2 > y) { z = x * y; } else { z = x + y; } """, "my_kernel" ) - return kernel(x, y)[0, 0] == 0 + flag = kernel(x, y)[0, 0] == 0 + del x, y, kernel + cp.get_default_memory_pool().free_all_blocks() + return flag except Exception: return False