diff --git a/InnerEye/ML/deep_learning_config.py b/InnerEye/ML/deep_learning_config.py index 1c35926d4..ba2f42480 100644 --- a/InnerEye/ML/deep_learning_config.py +++ b/InnerEye/ML/deep_learning_config.py @@ -309,19 +309,34 @@ class DatasetParams(param.Parameterized): "AzureML. Use an empty string for all datasets where a randomly chosen mount/download point " "should be used.") + def validate(self) -> None: + if not self.azure_dataset_id and self.local_dataset is None: + raise ValueError("Either of local_dataset or azure_dataset_id must be set.") + + if self.all_dataset_mountpoints() and len(self.all_azure_dataset_ids()) != len(self.all_dataset_mountpoints()): + raise ValueError(f"Expected the number of azure datasets to equal the number of mountpoints, " + f"got datasets [{','.join(self.all_azure_dataset_ids())}] " + f"and mountpoints [{','.join(self.all_dataset_mountpoints())}]") + def all_azure_dataset_ids(self) -> List[str]: """ Returns a list with all azure dataset IDs that are specified in self.azure_dataset_id and self.extra_azure_dataset_ids """ - return [self.azure_dataset_id] + self.extra_azure_dataset_ids + if not self.azure_dataset_id: + return self.extra_azure_dataset_ids + else: + return [self.azure_dataset_id] + self.extra_azure_dataset_ids def all_dataset_mountpoints(self) -> List[str]: """ Returns a list with all dataset mount points that are specified in self.dataset_mountpoint and self.extra_dataset_mountpoints """ - return [self.dataset_mountpoint] + self.extra_dataset_mountpoints + if not self.dataset_mountpoint: + return self.extra_dataset_mountpoints + else: + return [self.dataset_mountpoint] + self.extra_dataset_mountpoints class OutputParams(param.Parameterized): @@ -628,9 +643,7 @@ def validate(self) -> None: """ WorkflowParams.validate(self) OptimizerParams.validate(self) - - if self.azure_dataset_id is None and self.local_dataset is None: - raise ValueError("Either of local_dataset or azure_dataset_id must be set.") + DatasetParams.validate(self) @property def model_category(self) -> ModelCategory: diff --git a/Tests/Common/test_segmentation_configs.py b/Tests/Common/test_segmentation_configs.py index 32a704777..f8f37d8e2 100644 --- a/Tests/Common/test_segmentation_configs.py +++ b/Tests/Common/test_segmentation_configs.py @@ -5,6 +5,7 @@ import random import string from typing import List +from pathlib import Path import pytest @@ -86,7 +87,8 @@ def test_head_and_neck_base() -> None: Check we can instantiate HeadAndNeckBase class. """ ground_truth_ids = DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS - config = HeadAndNeckBase(ground_truth_ids) + config = HeadAndNeckBase(local_dataset=Path("foo"), + ground_truth_ids=ground_truth_ids) assert config.ground_truth_ids == ground_truth_ids @@ -104,6 +106,7 @@ def test_head_and_neck_base_with_optional_params() -> None: slice_exclusion_rules = generate_random_slice_exclusion_rules(ground_truth_ids) summed_probability_rules = generate_random_summed_probability_rules(ground_truth_ids) config = HeadAndNeckBase( + local_dataset=Path("foo"), ground_truth_ids=ground_truth_ids, ground_truth_ids_display_names=ground_truth_ids_display_names, colours=colours, @@ -122,69 +125,39 @@ def test_head_and_neck_base_with_optional_params() -> None: assert config.summed_probability_rules == summed_probability_rules -def test_head_and_neck_base_with_invalid_slice_exclusion_rule() -> None: +@pytest.mark.parametrize(["slice_exclusion_rules", "invalid_ground_truth"], + [([SliceExclusionRule("brainstem2", "spinal_cord", False)], "brainstem2"), + ([SliceExclusionRule("brainstem", "spinal_cord2", False)], "spinal_cord2")]) +def test_head_and_neck_base_with_invalid_slice_exclusion_rule(slice_exclusion_rules: List[SliceExclusionRule], + invalid_ground_truth: str) -> None: """ Check that an invalid slice exclusion rule raises an Exception. """ ground_truth_ids = DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS - slice_exclusion_rules = [SliceExclusionRule("brainstem2", "spinal_cord", False)] with pytest.raises(Exception) as e: assert HeadAndNeckBase( + local_dataset=Path("foo"), ground_truth_ids=ground_truth_ids, slice_exclusion_rules=slice_exclusion_rules) - assert str(e.value) == "slice_exclusion_rules: brainstem2 not in ground truth IDs" + assert str(e.value) == f"slice_exclusion_rules: {invalid_ground_truth} not in ground truth IDs" -def test_head_and_neck_base_with_invalid_slice_exclusion_rule2() -> None: - """ - Check that an invalid slice exclusion rule raises a Exception. - """ - ground_truth_ids = DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS - slice_exclusion_rules = [SliceExclusionRule("brainstem", "spinal_cord2", False)] - with pytest.raises(Exception) as e: - assert HeadAndNeckBase( - ground_truth_ids=ground_truth_ids, - slice_exclusion_rules=slice_exclusion_rules) - assert str(e.value) == "slice_exclusion_rules: spinal_cord2 not in ground truth IDs" - - -def test_head_and_neck_base_with_invalid_summed_probability_rule() -> None: - """ - Check that an invalid summed probability rule raises a ValueError. - """ - ground_truth_ids = DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS - summed_probability_rules = [SummedProbabilityRule("spinal_cord2", "brainstem", "external")] - with pytest.raises(ValueError) as e: - assert HeadAndNeckBase( - ground_truth_ids=ground_truth_ids, - summed_probability_rules=summed_probability_rules) - assert str(e.value) == "SummedProbabilityRule.validate: spinal_cord2 not in ground truth IDs" - - -def test_head_and_neck_base_with_invalid_summed_probability_rule2() -> None: +@pytest.mark.parametrize(["summed_probability_rules", "invalid_ground_truth"], + [([SummedProbabilityRule("spinal_cord2", "brainstem", "external")], "spinal_cord2"), + ([SummedProbabilityRule("spinal_cord", "brainstem2", "external")], "brainstem2"), + ([SummedProbabilityRule("spinal_cord", "brainstem", "external2")], "external2")]) +def test_head_and_neck_base_with_invalid_summed_probability_rule( + summed_probability_rules: List[SummedProbabilityRule], invalid_ground_truth: str) -> None: """ Check that an invalid summed probability rule raises a ValueError. """ ground_truth_ids = DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS - summed_probability_rules = [SummedProbabilityRule("spinal_cord", "brainstem2", "external")] with pytest.raises(ValueError) as e: assert HeadAndNeckBase( + local_dataset=Path("foo"), ground_truth_ids=ground_truth_ids, summed_probability_rules=summed_probability_rules) - assert str(e.value) == "SummedProbabilityRule.validate: brainstem2 not in ground truth IDs" - - -def test_head_and_neck_base_with_invalid_summed_probability_rule3() -> None: - """ - Check that an invalid summed probability rule raises a ValueError. - """ - ground_truth_ids = DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS - summed_probability_rules = [SummedProbabilityRule("spinal_cord", "brainstem", "external2")] - with pytest.raises(ValueError) as e: - assert HeadAndNeckBase( - ground_truth_ids=ground_truth_ids, - summed_probability_rules=summed_probability_rules) - assert str(e.value) == "SummedProbabilityRule.validate: external2 not in ground truth IDs" + assert str(e.value) == f"SummedProbabilityRule.validate: {invalid_ground_truth} not in ground truth IDs" def test_head_and_neck_paper_with_no_ground_truth_ids() -> None: @@ -192,7 +165,7 @@ def test_head_and_neck_paper_with_no_ground_truth_ids() -> None: Check that passing num_structures = default generates all default structures. """ ground_truth_ids = DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS - config = HeadAndNeckPaper() + config = HeadAndNeckPaper(local_dataset=Path("foo"),) assert config.ground_truth_ids == ground_truth_ids @@ -201,7 +174,8 @@ def test_head_and_neck_paper_with_0_ground_truth_ids() -> None: Check that passing num_structures = 0 raises ValueError exception. """ with pytest.raises(ValueError) as e: - assert HeadAndNeckPaper(num_structures=0) + assert HeadAndNeckPaper(local_dataset=Path("foo"), + num_structures=0) assert str(e.value) == f"num structures must be between 0 and {len(DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS)}" @@ -212,7 +186,8 @@ def test_head_and_neck_paper_with_some_ground_truth_ids( Check that passing a num_structures between 1 and len(defaults) generates the correct subset. """ ground_truth_ids = DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS[:ground_truth_count] - config = HeadAndNeckPaper(num_structures=ground_truth_count) + config = HeadAndNeckPaper(local_dataset=Path("foo"), + num_structures=ground_truth_count) assert config.ground_truth_ids == ground_truth_ids @@ -222,7 +197,8 @@ def test_head_and_neck_paper_with_too_many_ground_truth_ids() -> None: """ ground_truth_count = len(DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS) + 2 with pytest.raises(ValueError) as e: - assert HeadAndNeckPaper(num_structures=ground_truth_count) + assert HeadAndNeckPaper(local_dataset=Path("foo"), + num_structures=ground_truth_count) assert str(e.value) == f"num structures must be between 0 and {len(DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS)}" @@ -241,6 +217,7 @@ def test_head_and_neck_paper_with_optional_params( slice_exclusion_rules = generate_random_slice_exclusion_rules(ground_truth_ids) summed_probability_rules = generate_random_summed_probability_rules(ground_truth_ids) config = HeadAndNeckPaper( + local_dataset=Path("foo"), num_structures=ground_truth_count, ground_truth_ids_display_names=ground_truth_ids_display_names, colours=colours, @@ -266,7 +243,8 @@ def test_head_and_neck_paper_with_mismatched_display_names_raises() -> None: ground_truth_count = len(DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS) - 2 ground_truth_ids_display_names = generate_random_display_ids(ground_truth_count - 1) with pytest.raises(ValueError) as e: - assert HeadAndNeckPaper(num_structures=ground_truth_count, + assert HeadAndNeckPaper(local_dataset=Path("foo"), + num_structures=ground_truth_count, ground_truth_ids_display_names=ground_truth_ids_display_names) assert str(e.value) == "len(ground_truth_ids_display_names)!=len(ground_truth_ids)" @@ -278,7 +256,9 @@ def test_head_and_neck_paper_with_mismatched_colours_raises() -> None: ground_truth_count = len(DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS) - 2 colours = generate_random_colours_list(RANDOM_COLOUR_GENERATOR, ground_truth_count - 1) with pytest.raises(ValueError) as e: - assert HeadAndNeckPaper(num_structures=ground_truth_count, colours=colours) + assert HeadAndNeckPaper(local_dataset=Path("foo"), + num_structures=ground_truth_count, + colours=colours) assert str(e.value) == "len(ground_truth_ids_display_names)!=len(colours)" @@ -289,7 +269,9 @@ def test_head_and_neck_paper_with_mismatched_fill_holes_raises() -> None: ground_truth_count = len(DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS) - 2 fill_holes = generate_random_fill_holes(ground_truth_count - 1) with pytest.raises(ValueError) as e: - assert HeadAndNeckPaper(num_structures=ground_truth_count, fill_holes=fill_holes) + assert HeadAndNeckPaper(local_dataset=Path("foo"), + num_structures=ground_truth_count, + fill_holes=fill_holes) assert str(e.value) == "len(ground_truth_ids_display_names)!=len(fill_holes)" @@ -300,7 +282,9 @@ def test_head_and_neck_paper_with_mismatched_class_weights_raises() -> None: ground_truth_count = len(DEFAULT_HEAD_AND_NECK_GROUND_TRUTH_IDS) - 2 class_weights = generate_random_class_weights(ground_truth_count - 1) with pytest.raises(ValueError) as e: - assert HeadAndNeckPaper(num_structures=ground_truth_count, class_weights=class_weights) + assert HeadAndNeckPaper(local_dataset=Path("foo"), + num_structures=ground_truth_count, + class_weights=class_weights) assert str(e.value) == "class_weights needs to be equal to number of ground_truth_ids + 1" @@ -309,7 +293,8 @@ def test_prostate_base() -> None: Check that ProstateBase class can be instantiated. """ ground_truth_ids = DEFAULT_PROSTATE_GROUND_TRUTH_IDS - config = ProstateBase(ground_truth_ids) + config = ProstateBase(local_dataset=Path("foo"), + ground_truth_ids=ground_truth_ids) assert config.ground_truth_ids == ground_truth_ids @@ -325,7 +310,8 @@ def test_prostate_base_with_optional_params() -> None: class_weights = generate_random_class_weights(ground_truth_count + 1) largest_connected_component_foreground_classes = DEFAULT_PROSTATE_GROUND_TRUTH_IDS[1:3] config = ProstateBase( - ground_truth_ids, + local_dataset=Path("foo"), + ground_truth_ids=ground_truth_ids, ground_truth_ids_display_names=ground_truth_ids_display_names, colours=colours, fill_holes=fill_holes, @@ -345,7 +331,7 @@ def test_prostate_paper() -> None: Check that ProstatePaper class can be instantiated. """ ground_truth_ids = DEFAULT_PROSTATE_GROUND_TRUTH_IDS - config = ProstatePaper() + config = ProstatePaper(local_dataset=Path("foo")) assert config.ground_truth_ids == ground_truth_ids @@ -361,6 +347,7 @@ def test_prostate_paper_with_optional_params() -> None: class_weights = generate_random_class_weights(ground_truth_count + 1) largest_connected_component_foreground_classes = DEFAULT_PROSTATE_GROUND_TRUTH_IDS[1:3] config = ProstatePaper( + local_dataset=Path("foo"), ground_truth_ids_display_names=ground_truth_ids_display_names, colours=colours, fill_holes=fill_holes, diff --git a/Tests/ML/models/test_instantiate_models.py b/Tests/ML/models/test_instantiate_models.py index aa7e47e45..459e254b1 100644 --- a/Tests/ML/models/test_instantiate_models.py +++ b/Tests/ML/models/test_instantiate_models.py @@ -28,7 +28,7 @@ def find_models() -> List[str]: path = namespace_to_path(ModelConfigLoader.get_default_search_module()) folders = [path / "segmentation", path / "classification", path / "regression"] names = [str(f.stem) for folder in folders for f in folder.glob("*.py") if folder.exists()] - return [name for name in names if not name.endswith("Base") and not name.startswith("__")] + return [name for name in names if not (name.endswith("Base") or name.endswith("Paper")) and not name.startswith("__")] def test_any_models_found() -> None: diff --git a/Tests/ML/pipelines/test_forward_pass.py b/Tests/ML/pipelines/test_forward_pass.py index dfdcc4e09..a95ff9d14 100644 --- a/Tests/ML/pipelines/test_forward_pass.py +++ b/Tests/ML/pipelines/test_forward_pass.py @@ -13,7 +13,8 @@ @pytest.mark.skipif(common_util.is_windows(), reason="Has issues on windows build") @pytest.mark.cpu_and_gpu -@pytest.mark.parametrize("config", [DeepLearningConfig(), LightningContainer()]) +@pytest.mark.parametrize("config", [DeepLearningConfig(should_validate=False), + LightningContainer()]) def test_use_gpu_flag(config: TrainerParams) -> None: """ Test that the use_gpu flag is set correctly on both InnerEye configs and containers. diff --git a/Tests/ML/test_config_helpers.py b/Tests/ML/test_config_helpers.py index 020f342a3..ee9ee9a25 100644 --- a/Tests/ML/test_config_helpers.py +++ b/Tests/ML/test_config_helpers.py @@ -246,7 +246,7 @@ def test_config_str() -> None: """ Check if dataframe fields are omitted from the string conversion of a config object. """ - config = DeepLearningConfig() + config = DeepLearningConfig(should_validate=False) df = DataFrame(columns=["foobar"], data=[1.0, 2.0]) config.dataset_data_frame = df s = str(config) diff --git a/Tests/ML/test_deep_learning_config.py b/Tests/ML/test_deep_learning_config.py index 64152e108..71979fad2 100644 --- a/Tests/ML/test_deep_learning_config.py +++ b/Tests/ML/test_deep_learning_config.py @@ -10,21 +10,69 @@ from InnerEye.ML.deep_learning_config import DeepLearningConfig -def test_validate() -> None: - # DeepLearningConfig cannot be initialized with neither of these these parameters set +def test_validate_dataset_params() -> None: + # DatasetParams cannot be initialized with neither of these these parameters set with pytest.raises(ValueError) as ex: - DeepLearningConfig(local_dataset=None, azure_dataset_id=None) - assert ex.value.args[0] == "Either of local_dataset or azure_dataset_id must be set." + DeepLearningConfig(local_dataset=None, azure_dataset_id="") + assert ex.value.args[0] == "Either of local_dataset or azure_dataset_id must be set." # The following should be okay DeepLearningConfig(local_dataset=Path("foo")) DeepLearningConfig(azure_dataset_id="bar") + config = DeepLearningConfig(local_dataset=Path("foo"), + azure_dataset_id="", + extra_azure_dataset_ids=[]) + assert not config.all_azure_dataset_ids() + + config = DeepLearningConfig(azure_dataset_id="foo", + extra_azure_dataset_ids=[]) + assert len(config.all_azure_dataset_ids()) == 1 + + config = DeepLearningConfig(local_dataset=Path("foo"), + azure_dataset_id="", + extra_azure_dataset_ids=["bar"]) + assert len(config.all_azure_dataset_ids()) == 1 + + config = DeepLearningConfig(azure_dataset_id="foo", + extra_azure_dataset_ids=["bar"]) + assert len(config.all_azure_dataset_ids()) == 2 + + config = DeepLearningConfig(azure_dataset_id="foo", + dataset_mountpoint="", + extra_dataset_mountpoints=[]) + assert not config.all_dataset_mountpoints() + + config = DeepLearningConfig(azure_dataset_id="foo", + dataset_mountpoint="foo", + extra_dataset_mountpoints=[]) + assert len(config.all_dataset_mountpoints()) == 1 + + config = DeepLearningConfig(azure_dataset_id="foo", + dataset_mountpoint="", + extra_dataset_mountpoints=["bar"]) + assert len(config.all_dataset_mountpoints()) == 1 + + config = DeepLearningConfig(azure_dataset_id="foo", + extra_azure_dataset_ids=["bar"], + dataset_mountpoint="foo", + extra_dataset_mountpoints=["bar"]) + assert len(config.all_dataset_mountpoints()) == 2 + + with pytest.raises(ValueError) as ex: + DeepLearningConfig(azure_dataset_id="foo", + dataset_mountpoint="foo", + extra_dataset_mountpoints=["bar"]) + assert "Expected the number of azure datasets to equal the number of mountpoints" in ex.value.args[0] + + +def test_validate_deep_learning_config() -> None: + # DeepLearningConfig cannot be initialized with both these parameters set with pytest.raises(ValueError) as ex: DeepLearningConfig(local_dataset=Path("foo"), local_weights_path=Path("foo"), weights_url="bar") - assert ex.value.args[0] == "Cannot specify both local_weights_path and weights_url." + assert ex.value.args[0] == "Cannot specify both local_weights_path and weights_url." # The following should be okay DeepLearningConfig(local_dataset=Path("foo"), local_weights_path=Path("foo")) diff --git a/Tests/ML/test_lightning_containers.py b/Tests/ML/test_lightning_containers.py index 51a852d74..d5fd8d827 100644 --- a/Tests/ML/test_lightning_containers.py +++ b/Tests/ML/test_lightning_containers.py @@ -97,7 +97,7 @@ def test_innereye_container_init() -> None: # The constructor should copy all fields that belong to either WorkflowParams or DatasetParams from the # config object to the container. for (attrib, type_) in [("weights_url", WorkflowParams), ("azure_dataset_id", DatasetParams)]: - config = ModelConfigBase() + config = ModelConfigBase(should_validate=False) assert hasattr(type_, attrib) assert hasattr(config, attrib) setattr(config, attrib, "foo") @@ -106,7 +106,7 @@ def test_innereye_container_init() -> None: def test_copied_properties() -> None: - config = ModelConfigBase() + config = ModelConfigBase(should_validate=False) # This field lives in DatasetParams config.azure_dataset_id = "foo" # This field lives in WorkflowParams