From 3729a8f93685d29617933eea009b84f379adb526 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Mon, 20 Dec 2021 16:49:42 +0000
Subject: [PATCH 01/35] fixing PandaInnereSSLMIL

---
 InnerEye/Common/fixed_paths.py                | 14 ++++++++++++++
 InnerEye/ML/Histopathology/models/deepmil.py  |  4 +++-
 .../histo_configs/classification/BaseMIL.py   | 12 +-----------
 .../classification/DeepSMILECrck.py           | 19 +++++++++++++++++++
 .../classification/DeepSMILEPanda.py          |  8 +++++---
 InnerEye/ML/configs/histo_configs/run_ids.py  |  1 +
 hi-ml                                         |  2 +-
 7 files changed, 44 insertions(+), 16 deletions(-)

diff --git a/InnerEye/Common/fixed_paths.py b/InnerEye/Common/fixed_paths.py
index ceefc0f57..183f50d76 100755
--- a/InnerEye/Common/fixed_paths.py
+++ b/InnerEye/Common/fixed_paths.py
@@ -25,6 +25,20 @@ def repository_root_directory(path: Optional[PathOrString] = None) -> Path:
         return root
 
 
+def repository_parent_directory(path: Optional[PathOrString] = None) -> Path:
+    """
+    Gets the full path to the parent directory that holds the present repository.
+    :param path: if provided, a relative path to append to the absolute path to the repository root.
+    :return: The full path to the repository's root directory, with symlinks resolved if any.
+    """
+    current = Path(__file__)
+    root = current.parent.parent.parent.parent
+    if path:
+        return root / path
+    else:
+        return root
+
+
 INNEREYE_PACKAGE_NAME = "InnerEye"
 # Child paths to include in a registered model that live outside InnerEye/.
 ENVIRONMENT_YAML_FILE_NAME = "environment.yml"
diff --git a/InnerEye/ML/Histopathology/models/deepmil.py b/InnerEye/ML/Histopathology/models/deepmil.py
index e42eacb84..424ac854d 100644
--- a/InnerEye/ML/Histopathology/models/deepmil.py
+++ b/InnerEye/ML/Histopathology/models/deepmil.py
@@ -105,7 +105,9 @@ def get_classifier(self) -> Callable:
 
     def get_loss(self) -> Callable:
         if self.n_classes > 1:
-            return nn.CrossEntropyLoss(weight=self.class_weights)
+            if self.class_weights is not None:
+                class_weights = self.class_weights.float()
+            return nn.CrossEntropyLoss(weight=class_weights)
         else:
             pos_weight = None
             if self.class_weights is not None:
diff --git a/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py b/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
index 29ce29da8..155a09808 100644
--- a/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
+++ b/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
@@ -7,7 +7,6 @@
 It is responsible for instantiating the encoder and full DeepMIL model. Subclasses should define
 their datamodules and configure experiment-specific parameters.
 """
-import os
 from pathlib import Path
 from typing import Type
 
@@ -15,9 +14,7 @@
 from torch import nn
 from torchvision.models.resnet import resnet18
 
-from health_azure.utils import CheckpointDownloader, get_workspace
 from health_ml.networks.layers.attention_layers import AttentionLayer, GatedAttentionLayer
-from InnerEye.Common import fixed_paths
 from InnerEye.ML.lightning_container import LightningContainer
 from InnerEye.ML.Histopathology.datamodules.base_module import CacheMode, TilesDataModule
 from InnerEye.ML.Histopathology.models.deepmil import DeepMILModule
@@ -56,14 +53,7 @@ def cache_dir(self) -> Path:
 
     def setup(self) -> None:
         if self.encoder_type == InnerEyeSSLEncoder.__name__:
-            self.downloader = CheckpointDownloader(
-                aml_workspace=get_workspace(),
-                run_id="updated_transforms:updated_transforms_1636471522_5473e3ff",
-                checkpoint_filename="best_checkpoint.ckpt",
-                download_dir='outputs/'
-            )
-            os.chdir(fixed_paths.repository_root_directory())
-            self.downloader.download_checkpoint_if_necessary()
+            raise NotImplementedError("InnerEyeSSLEncoder requires a pre-trained checkpoint.")
 
         self.encoder = self.get_encoder()
         self.encoder.cuda()
diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
index df2394f68..b60ff1486 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
@@ -15,11 +15,14 @@
 """
 from pathlib import Path
 from typing import Any, Dict
+import os
 
 from monai.transforms import Compose
 from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint
 
 from health_ml.networks.layers.attention_layers import GatedAttentionLayer
+from health_azure.utils import get_workspace
+from health_azure.utils import CheckpointDownloader
 from InnerEye.Common import fixed_paths
 from InnerEye.ML.configs.histo_configs.classification.BaseMIL import BaseMIL
 from InnerEye.ML.Histopathology.datamodules.base_module import TilesDataModule
@@ -87,6 +90,22 @@ def cache_dir(self) -> Path:
             f"/tmp/innereye_cache1/{self.__class__.__name__}-{self.encoder_type}/"
         )
 
+    def setup(self) -> None:
+        if self.encoder_type == InnerEyeSSLEncoder.__name__:
+            self.downloader = CheckpointDownloader(
+                azure_config_json_path=get_workspace(),
+                run_id="updated_transforms:updated_transforms_1636471522_5473e3ff",
+                checkpoint_filename="best_checkpoint.ckpt",
+                download_dir="outputs/",
+                remote_checkpoint_dir=Path("outputs/checkpoints")
+            )
+            os.chdir(fixed_paths.repository_root_directory().parent)
+            self.downloader.download_checkpoint_if_necessary()
+
+        self.encoder = self.get_encoder()
+        self.encoder.cuda()
+        self.encoder.eval()
+
     def get_data_module(self) -> TilesDataModule:
         image_key = TcgaCrck_TilesDataset.IMAGE_COLUMN
         transform = Compose(
diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
index 17f61921f..4f3a32de3 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
@@ -27,7 +27,7 @@
     InnerEyeSSLEncoder,
 )
 from InnerEye.ML.configs.histo_configs.classification.BaseMIL import BaseMIL
-from InnerEye.ML.configs.histo_configs.run_ids import innereye_ssl_checkpoint
+from InnerEye.ML.configs.histo_configs.run_ids import innereye_ssl_checkpoint_binary
 
 
 class DeepSMILEPanda(BaseMIL):
@@ -43,6 +43,7 @@ def __init__(self, **kwargs: Any) -> None:
             num_epochs=200,
             recovery_checkpoint_save_interval=10,
             recovery_checkpoints_save_last_k=-1,
+            # use_mixed_precision = True,
             # declared in WorkflowParams:
             number_of_cross_validation_splits=5,
             cross_validation_split_index=0,
@@ -78,11 +79,12 @@ def setup(self) -> None:
         if self.encoder_type == InnerEyeSSLEncoder.__name__:
             self.downloader = CheckpointDownloader(
                 azure_config_json_path=get_workspace(),
-                run_recovery_id=innereye_ssl_checkpoint,
+                run_id=innereye_ssl_checkpoint_binary,  # innereye_ssl_checkpoint
                 checkpoint_filename="last.ckpt",
                 download_dir="outputs/",
+                remote_checkpoint_dir=Path("outputs/checkpoints")
             )
-            os.chdir(fixed_paths.repository_root_directory())
+            os.chdir(fixed_paths.repository_root_directory().parent)
             self.downloader.download_checkpoint_if_necessary()
         self.encoder = self.get_encoder()
         self.encoder.cuda()
diff --git a/InnerEye/ML/configs/histo_configs/run_ids.py b/InnerEye/ML/configs/histo_configs/run_ids.py
index 0c9ca2163..205918c5f 100644
--- a/InnerEye/ML/configs/histo_configs/run_ids.py
+++ b/InnerEye/ML/configs/histo_configs/run_ids.py
@@ -1 +1,2 @@
 innereye_ssl_checkpoint = "hsharma_panda_explore:hsharma_panda_explore_1638437076_357167ae"
+innereye_ssl_checkpoint_binary = "hsharma_panda_tiles_ssl:hsharma_panda_tiles_ssl_1639766433_161e03b9"
diff --git a/hi-ml b/hi-ml
index 38fd68557..17373323c 160000
--- a/hi-ml
+++ b/hi-ml
@@ -1 +1 @@
-Subproject commit 38fd685579749e6c5e5f8c199c76c5854394421c
+Subproject commit 17373323cceb649ee070676c333e4448a4112efa

From 2d927a3e57c811ffe5e4286ef8de272e5dfd3853 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 21 Dec 2021 14:09:54 +0000
Subject: [PATCH 02/35] updating checkpoint downloader

---
 .../configs/histo_configs/classification/DeepSMILEPanda.py  | 6 +++---
 hi-ml                                                       | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
index 4f3a32de3..9afdea305 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
@@ -27,7 +27,6 @@
     InnerEyeSSLEncoder,
 )
 from InnerEye.ML.configs.histo_configs.classification.BaseMIL import BaseMIL
-from InnerEye.ML.configs.histo_configs.run_ids import innereye_ssl_checkpoint_binary
 
 
 class DeepSMILEPanda(BaseMIL):
@@ -77,10 +76,11 @@ def cache_dir(self) -> Path:
 
     def setup(self) -> None:
         if self.encoder_type == InnerEyeSSLEncoder.__name__:
+            from InnerEye.ML.configs.histo_configs.run_ids import innereye_ssl_checkpoint_binary
             self.downloader = CheckpointDownloader(
-                azure_config_json_path=get_workspace(),
+                aml_workspace=get_workspace(),
                 run_id=innereye_ssl_checkpoint_binary,  # innereye_ssl_checkpoint
-                checkpoint_filename="last.ckpt",
+                checkpoint_filename="best_checkpoint.ckpt",  # "last.ckpt",
                 download_dir="outputs/",
                 remote_checkpoint_dir=Path("outputs/checkpoints")
             )
diff --git a/hi-ml b/hi-ml
index 17373323c..49fbd3967 160000
--- a/hi-ml
+++ b/hi-ml
@@ -1 +1 @@
-Subproject commit 17373323cceb649ee070676c333e4448a4112efa
+Subproject commit 49fbd3967cc45b1713fc852d051ece928eb100f4

From b8fd6c87fd91e02c2a3613644686c0dd85e46552 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 21 Dec 2021 18:57:07 +0000
Subject: [PATCH 03/35] checkpoint for inference found

---
 .../classification/DeepSMILEPanda.py          | 23 +++++++++++++++----
 InnerEye/ML/deep_learning_config.py           |  1 +
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
index 9afdea305..ca2bf5c47 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
@@ -15,6 +15,7 @@
 from InnerEye.Common import fixed_paths
 from InnerEye.ML.Histopathology.datamodules.panda_module import PandaTilesDataModule
 from InnerEye.ML.Histopathology.datasets.panda_tiles_dataset import PandaTilesDataset
+from InnerEye.ML.common import get_best_checkpoint_path
 
 from InnerEye.ML.Histopathology.models.transforms import (
     EncodeTilesBatchd,
@@ -120,11 +121,23 @@ def get_path_to_best_checkpoint(self) -> Path:
         was applied there.
         """
         # absolute path is required for registering the model.
-        return (
-            fixed_paths.repository_root_directory()
-            / self.checkpoint_folder_path
-            / self.best_checkpoint_filename_with_suffix
-        )
+        absolute_checkpoint_path = Path(fixed_paths.repository_root_directory(),
+                                        self.checkpoint_folder_path,
+                                        self.best_checkpoint_filename_with_suffix)
+        if absolute_checkpoint_path.is_file():
+            return absolute_checkpoint_path
+
+        absolute_checkpoint_path_parent = Path(fixed_paths.repository_root_directory().parent,
+                                    self.checkpoint_folder_path,
+                                    self.best_checkpoint_filename_with_suffix)
+        if absolute_checkpoint_path_parent.is_file():
+            return absolute_checkpoint_path_parent
+
+        checkpoint_path = get_best_checkpoint_path(self.checkpoint_folder_path)
+        if checkpoint_path.is_file():
+            return checkpoint_path
+
+        raise ValueError("Path to best checkpoint not found")
 
 
 class PandaImageNetMIL(DeepSMILEPanda):
diff --git a/InnerEye/ML/deep_learning_config.py b/InnerEye/ML/deep_learning_config.py
index 7b3d2d329..42e820fc7 100644
--- a/InnerEye/ML/deep_learning_config.py
+++ b/InnerEye/ML/deep_learning_config.py
@@ -475,6 +475,7 @@ def logs_folder(self) -> Path:
     @property
     def checkpoint_folder(self) -> Path:
         """Gets the full path in which the model checkpoints should be stored during training."""
+        print(f"Expected Checkpoint path {self.outputs_folder / CHECKPOINT_FOLDER}")
         return self.outputs_folder / CHECKPOINT_FOLDER
 
     @property

From 7c8e7d2507dafb4f65349ed03f4732fba80455f6 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 21 Dec 2021 18:59:13 +0000
Subject: [PATCH 04/35] fixing outputs paths

---
 InnerEye/ML/Histopathology/models/deepmil.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/InnerEye/ML/Histopathology/models/deepmil.py b/InnerEye/ML/Histopathology/models/deepmil.py
index 424ac854d..1367e8459 100644
--- a/InnerEye/ML/Histopathology/models/deepmil.py
+++ b/InnerEye/ML/Histopathology/models/deepmil.py
@@ -265,9 +265,9 @@ def test_epoch_end(self, outputs: List[Dict[str, Any]]) -> None:  # type: ignore
             list_slide_dicts.append(slide_dict)
             list_encoded_features.append(results[ResultsKey.IMAGE][slide_idx])
 
-        print(f"Metrics results will be output to {fixed_paths.repository_root_directory()}/outputs")
-        csv_filename = fixed_paths.repository_root_directory() / Path('outputs/test_output.csv')
-        encoded_features_filename = fixed_paths.repository_root_directory() / Path('outputs/test_encoded_features.pickle')
+        print(f"Metrics results will be output to {fixed_paths.repository_parent_directory()}/outputs")
+        csv_filename = fixed_paths.repository_parent_directory() / Path('outputs/test_output.csv')
+        encoded_features_filename = fixed_paths.repository_parent_directory() / Path('outputs/test_encoded_features.pickle')
 
         # Collect the list of dictionaries in a list of pandas dataframe and save
         df_list = []
@@ -290,7 +290,7 @@ def test_epoch_end(self, outputs: List[Dict[str, Any]]) -> None:  # type: ignore
 
         for key in report_cases.keys():
             print(f"Plotting {key} ...")
-            output_path = Path(fixed_paths.repository_root_directory(), f'outputs/fig/{key}/')
+            output_path = Path(fixed_paths.repository_parent_directory(), f'outputs/fig/{key}/')
             Path(output_path).mkdir(parents=True, exist_ok=True)
             nslides = len(report_cases[key][0])
             for i in range(nslides):
@@ -306,7 +306,7 @@ def test_epoch_end(self, outputs: List[Dict[str, Any]]) -> None:  # type: ignore
 
         print("Plotting histogram ...")
         fig = plot_scores_hist(results)
-        output_path = Path(fixed_paths.repository_root_directory(), 'outputs/fig/hist_scores.png')
+        output_path = Path(fixed_paths.repository_parent_directory(), 'outputs/fig/hist_scores.png')
         fig.savefig(output_path, bbox_inches='tight')
 
     @staticmethod

From 39829b8a0d4259e0fa351d1825572627632f2689 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 11 Jan 2022 09:38:02 +0000
Subject: [PATCH 05/35] fixing test

---
 InnerEye/ML/Histopathology/models/deepmil.py                    | 2 ++
 .../ML/configs/histo_configs/classification/DeepSMILEPanda.py   | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/InnerEye/ML/Histopathology/models/deepmil.py b/InnerEye/ML/Histopathology/models/deepmil.py
index 1367e8459..31d48af4c 100644
--- a/InnerEye/ML/Histopathology/models/deepmil.py
+++ b/InnerEye/ML/Histopathology/models/deepmil.py
@@ -107,6 +107,8 @@ def get_loss(self) -> Callable:
         if self.n_classes > 1:
             if self.class_weights is not None:
                 class_weights = self.class_weights.float()
+            else:
+                class_weights = self.class_weights
             return nn.CrossEntropyLoss(weight=class_weights)
         else:
             pos_weight = None
diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
index ca2bf5c47..757e32f4e 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
@@ -133,7 +133,7 @@ def get_path_to_best_checkpoint(self) -> Path:
         if absolute_checkpoint_path_parent.is_file():
             return absolute_checkpoint_path_parent
 
-        checkpoint_path = get_best_checkpoint_path(self.checkpoint_folder_path)
+        checkpoint_path = get_best_checkpoint_path(Path(self.checkpoint_folder_path))
         if checkpoint_path.is_file():
             return checkpoint_path
 

From 6b9ab9410997fb6d1f60a513c00846f3378c0486 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 11 Jan 2022 09:41:50 +0000
Subject: [PATCH 06/35] update changelog

---
 CHANGELOG.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ef596be14..b6c8d1bb9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -75,6 +75,7 @@ gets uploaded to AzureML, by skipping all test folders.
 - ([#606](https://github.com/microsoft/InnerEye-DeepLearning/pull/606)) Bug fix: registered models do not include the hi-ml submodule
 - ([#593](https://github.com/microsoft/InnerEye-DeepLearning/pull/593)) Bug fix for hi-ml 0.1.11 issue (#130): empty mount point is turned into ".", which fails the AML job
 - ([#587](https://github.com/microsoft/InnerEye-DeepLearning/pull/587)) Bug fix for regression in AzureML's handling of environments: upgrade to hi-ml 0.1.11
+- ([#625](https://github.com/microsoft/InnerEye-DeepLearning/pull/625)) updates to PandaDeepMIL to enable the use of a SSL pre-trained checkpoint and updated commit to hi-ml
 - ([#537](https://github.com/microsoft/InnerEye-DeepLearning/pull/537)) Print warning if inference is disabled but comparison requested.
 - ([#567](https://github.com/microsoft/InnerEye-DeepLearning/pull/567)) fix pillow version.
 - ([#546](https://github.com/microsoft/InnerEye-DeepLearning/pull/546)) Environment and hello_world_model documentation updated

From c932ef7c37106faf9a8a97a9368c8c584758c35b Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 11 Jan 2022 14:48:21 +0000
Subject: [PATCH 07/35] implementing PR feedback, thanks Anton and Daniel

---
 InnerEye/Common/fixed_paths.py                |  3 +--
 InnerEye/ML/Histopathology/models/deepmil.py  | 27 ++++++++++---------
 .../classification/DeepSMILECrck.py           |  2 +-
 .../classification/DeepSMILEPanda.py          |  4 +--
 4 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/InnerEye/Common/fixed_paths.py b/InnerEye/Common/fixed_paths.py
index 183f50d76..b0566b896 100755
--- a/InnerEye/Common/fixed_paths.py
+++ b/InnerEye/Common/fixed_paths.py
@@ -31,8 +31,7 @@ def repository_parent_directory(path: Optional[PathOrString] = None) -> Path:
     :param path: if provided, a relative path to append to the absolute path to the repository root.
     :return: The full path to the repository's root directory, with symlinks resolved if any.
     """
-    current = Path(__file__)
-    root = current.parent.parent.parent.parent
+    root = repository_root_directory().parent
     if path:
         return root / path
     else:
diff --git a/InnerEye/ML/Histopathology/models/deepmil.py b/InnerEye/ML/Histopathology/models/deepmil.py
index 31d48af4c..808269d8a 100644
--- a/InnerEye/ML/Histopathology/models/deepmil.py
+++ b/InnerEye/ML/Histopathology/models/deepmil.py
@@ -105,11 +105,11 @@ def get_classifier(self) -> Callable:
 
     def get_loss(self) -> Callable:
         if self.n_classes > 1:
-            if self.class_weights is not None:
-                class_weights = self.class_weights.float()
+            if self.class_weights is None:
+                return nn.CrossEntropyLoss
             else:
-                class_weights = self.class_weights
-            return nn.CrossEntropyLoss(weight=class_weights)
+                class_weights = self.class_weights.float()
+                return nn.CrossEntropyLoss(weight=class_weights)
         else:
             pos_weight = None
             if self.class_weights is not None:
@@ -267,9 +267,11 @@ def test_epoch_end(self, outputs: List[Dict[str, Any]]) -> None:  # type: ignore
             list_slide_dicts.append(slide_dict)
             list_encoded_features.append(results[ResultsKey.IMAGE][slide_idx])
 
-        print(f"Metrics results will be output to {fixed_paths.repository_parent_directory()}/outputs")
-        csv_filename = fixed_paths.repository_parent_directory() / Path('outputs/test_output.csv')
-        encoded_features_filename = fixed_paths.repository_parent_directory() / Path('outputs/test_encoded_features.pickle')
+        outputs_path = fixed_paths.repository_parent_directory() / 'outputs'
+        print(f"Metrics results will be output to {outputs_path}")
+        outputs_fig_path = outputs_path / 'fig'
+        csv_filename = outputs_path / 'test_output.csv'
+        encoded_features_filename = outputs_path / 'test_encoded_features.pickle'
 
         # Collect the list of dictionaries in a list of pandas dataframe and save
         df_list = []
@@ -292,24 +294,23 @@ def test_epoch_end(self, outputs: List[Dict[str, Any]]) -> None:  # type: ignore
 
         for key in report_cases.keys():
             print(f"Plotting {key} ...")
-            output_path = Path(fixed_paths.repository_parent_directory(), f'outputs/fig/{key}/')
-            Path(output_path).mkdir(parents=True, exist_ok=True)
+            key_folder_path = outputs_fig_path / f'{key}'
+            Path(key_folder_path).mkdir(parents=True, exist_ok=True)
             nslides = len(report_cases[key][0])
             for i in range(nslides):
                 slide, score, paths, top_attn = report_cases[key][0][i]
                 fig = plot_slide_noxy(slide, score, paths, top_attn, key + '_top', ncols=4)
-                figpath = Path(output_path, f'{slide}_top.png')
+                figpath = Path(key_folder_path, f'{slide}_top.png')
                 fig.savefig(figpath, bbox_inches='tight')
 
                 slide, score, paths, bottom_attn = report_cases[key][1][i]
                 fig = plot_slide_noxy(slide, score, paths, bottom_attn, key + '_bottom', ncols=4)
-                figpath = Path(output_path, f'{slide}_bottom.png')
+                figpath = Path(key_folder_path, f'{slide}_bottom.png')
                 fig.savefig(figpath, bbox_inches='tight')
 
         print("Plotting histogram ...")
         fig = plot_scores_hist(results)
-        output_path = Path(fixed_paths.repository_parent_directory(), 'outputs/fig/hist_scores.png')
-        fig.savefig(output_path, bbox_inches='tight')
+        fig.savefig(outputs_fig_path / 'hist_scores.png', bbox_inches='tight')
 
     @staticmethod
     def normalize_dict_for_df(dict_old: Dict[str, Any], use_gpu: bool) -> Dict:
diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
index b60ff1486..d060df942 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
@@ -99,7 +99,7 @@ def setup(self) -> None:
                 download_dir="outputs/",
                 remote_checkpoint_dir=Path("outputs/checkpoints")
             )
-            os.chdir(fixed_paths.repository_root_directory().parent)
+            os.chdir(fixed_paths.repository_parent_directory())
             self.downloader.download_checkpoint_if_necessary()
 
         self.encoder = self.get_encoder()
diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
index 757e32f4e..9bdfcc811 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
@@ -85,7 +85,7 @@ def setup(self) -> None:
                 download_dir="outputs/",
                 remote_checkpoint_dir=Path("outputs/checkpoints")
             )
-            os.chdir(fixed_paths.repository_root_directory().parent)
+            os.chdir(fixed_paths.repository_parent_directory())
             self.downloader.download_checkpoint_if_necessary()
         self.encoder = self.get_encoder()
         self.encoder.cuda()
@@ -127,7 +127,7 @@ def get_path_to_best_checkpoint(self) -> Path:
         if absolute_checkpoint_path.is_file():
             return absolute_checkpoint_path
 
-        absolute_checkpoint_path_parent = Path(fixed_paths.repository_root_directory().parent,
+        absolute_checkpoint_path_parent = Path(fixed_paths.repository_parent_directory(),
                                     self.checkpoint_folder_path,
                                     self.best_checkpoint_filename_with_suffix)
         if absolute_checkpoint_path_parent.is_file():

From 7d8d86c82e6ae5ffc4177da8314d6b2523beed5b Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 11 Jan 2022 16:01:23 +0000
Subject: [PATCH 08/35] typo

---
 InnerEye/ML/Histopathology/models/deepmil.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/Histopathology/models/deepmil.py b/InnerEye/ML/Histopathology/models/deepmil.py
index 808269d8a..4caf352d4 100644
--- a/InnerEye/ML/Histopathology/models/deepmil.py
+++ b/InnerEye/ML/Histopathology/models/deepmil.py
@@ -106,7 +106,7 @@ def get_classifier(self) -> Callable:
     def get_loss(self) -> Callable:
         if self.n_classes > 1:
             if self.class_weights is None:
-                return nn.CrossEntropyLoss
+                return nn.CrossEntropyLoss()
             else:
                 class_weights = self.class_weights.float()
                 return nn.CrossEntropyLoss(weight=class_weights)

From 8dc5a48f81a73052dfde4efac716195517c4895d Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 11 Jan 2022 16:06:55 +0000
Subject: [PATCH 09/35] updating to latest CRCk checkpoint, new augmentations

---
 .../ML/configs/histo_configs/classification/DeepSMILECrck.py  | 4 +++-
 hi-ml                                                         | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
index d060df942..12dde68b6 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
@@ -94,7 +94,9 @@ def setup(self) -> None:
         if self.encoder_type == InnerEyeSSLEncoder.__name__:
             self.downloader = CheckpointDownloader(
                 azure_config_json_path=get_workspace(),
-                run_id="updated_transforms:updated_transforms_1636471522_5473e3ff",
+                run_id="ModifyOldSSLCheckpoint:a9259fdb-3964-4c5b-8962-4660e0b79d44",
+                #  checkpoint in RadiomicsNN "ModifyOldSSLCheckpoint:704b1af8-7c75-46ed-8460-d80a0e603194",
+                #  old checkpoint "updated_transforms:updated_transforms_1636471522_5473e3ff",
                 checkpoint_filename="best_checkpoint.ckpt",
                 download_dir="outputs/",
                 remote_checkpoint_dir=Path("outputs/checkpoints")
diff --git a/hi-ml b/hi-ml
index 49fbd3967..a33c1ed07 160000
--- a/hi-ml
+++ b/hi-ml
@@ -1 +1 @@
-Subproject commit 49fbd3967cc45b1713fc852d051ece928eb100f4
+Subproject commit a33c1ed07da8a42486dec9f939cd59eea4b2583e

From 9a010c6cd10130efd790720fbb2f2786c585582f Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Tue, 11 Jan 2022 16:11:39 +0000
Subject: [PATCH 10/35] moving checkpoint ids to file

---
 .../ML/configs/histo_configs/classification/DeepSMILECrck.py | 5 ++---
 InnerEye/ML/configs/histo_configs/run_ids.py                 | 5 +++++
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
index 12dde68b6..daaca442a 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILECrck.py
@@ -92,11 +92,10 @@ def cache_dir(self) -> Path:
 
     def setup(self) -> None:
         if self.encoder_type == InnerEyeSSLEncoder.__name__:
+            from InnerEye.ML.configs.histo_configs.run_ids import innereye_ssl_checkpoint_crck_4ws
             self.downloader = CheckpointDownloader(
                 azure_config_json_path=get_workspace(),
-                run_id="ModifyOldSSLCheckpoint:a9259fdb-3964-4c5b-8962-4660e0b79d44",
-                #  checkpoint in RadiomicsNN "ModifyOldSSLCheckpoint:704b1af8-7c75-46ed-8460-d80a0e603194",
-                #  old checkpoint "updated_transforms:updated_transforms_1636471522_5473e3ff",
+                run_id=innereye_ssl_checkpoint_crck_4ws,
                 checkpoint_filename="best_checkpoint.ckpt",
                 download_dir="outputs/",
                 remote_checkpoint_dir=Path("outputs/checkpoints")
diff --git a/InnerEye/ML/configs/histo_configs/run_ids.py b/InnerEye/ML/configs/histo_configs/run_ids.py
index 205918c5f..7692fc3e7 100644
--- a/InnerEye/ML/configs/histo_configs/run_ids.py
+++ b/InnerEye/ML/configs/histo_configs/run_ids.py
@@ -1,2 +1,7 @@
 innereye_ssl_checkpoint = "hsharma_panda_explore:hsharma_panda_explore_1638437076_357167ae"
 innereye_ssl_checkpoint_binary = "hsharma_panda_tiles_ssl:hsharma_panda_tiles_ssl_1639766433_161e03b9"
+innereye_ssl_checkpoint_crck_4ws = "ModifyOldSSLCheckpoint:a9259fdb-3964-4c5b-8962-4660e0b79d44"
+innereye_ssl_checkpoint_crck_radiomics = "ModifyOldSSLCheckpoint:704b1af8-7c75-46ed-8460-d80a0e603194"
+
+# outdated checkpoints
+# innereye_ssl_checkpoint_crck_radiomics = updated_transforms:updated_transforms_1636471522_5473e3ff

From 62e383177ea28f38c64aeb80474c634ec235764d Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 13 Jan 2022 17:39:46 +0000
Subject: [PATCH 11/35] first draft

---
 InnerEye/ML/Histopathology/models/transforms.py | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/InnerEye/ML/Histopathology/models/transforms.py b/InnerEye/ML/Histopathology/models/transforms.py
index e50d088e0..c7be5355e 100644
--- a/InnerEye/ML/Histopathology/models/transforms.py
+++ b/InnerEye/ML/Histopathology/models/transforms.py
@@ -86,19 +86,33 @@ class EncodeTilesBatchd(MapTransform):
     def __init__(self,
                  keys: KeysCollection,
                  encoder: TileEncoder,
-                 allow_missing_keys: bool = False) -> None:
+                 allow_missing_keys: bool = False,
+                 chunk_size: int = 0) -> None:
         """
         :param keys: Key(s) for the image path(s) in the input dictionary.
         :param encoder: The tile encoder to use for feature extraction.
         :param allow_missing_keys: If `False` (default), raises an exception when an input
         dictionary is missing any of the specified keys.
+        :param chunk_size: if > 0 extract features in chunks of size chunk_size
         """
         super().__init__(keys, allow_missing_keys)
         self.encoder = encoder
+        self.chunk_size = chunk_size
 
     @torch.no_grad()
     def _encode_tiles(self, images: torch.Tensor) -> torch.Tensor:
         device = next(self.encoder.parameters()).device
+        if self.chunk_size > 0:
+            embeddings = []
+            chunks = torch.split(images, self.chunk_size)
+            for chunk in chunks:
+                chunk_embeddings = self._encode_images(chunk, device)
+                embeddings.append(chunk_embeddings)
+            return torch.cat(embeddings)
+        else:
+            return self._encode_images(images, device)
+
+    def _encode_images(self, images: torch.Tensor, device: torch.device) -> torch.Tensor:
         images = images.to(device)
         embeddings = self.encoder(images)
         del images

From ffa0dc2aa5089268e4babe13ec5f7f3da4639d75 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 13 Jan 2022 17:52:29 +0000
Subject: [PATCH 12/35] extend test

---
 InnerEye/ML/Histopathology/models/transforms.py   | 3 ++-
 Tests/ML/histopathology/models/test_transforms.py | 8 +++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/InnerEye/ML/Histopathology/models/transforms.py b/InnerEye/ML/Histopathology/models/transforms.py
index c7be5355e..5af607415 100644
--- a/InnerEye/ML/Histopathology/models/transforms.py
+++ b/InnerEye/ML/Histopathology/models/transforms.py
@@ -93,7 +93,7 @@ def __init__(self,
         :param encoder: The tile encoder to use for feature extraction.
         :param allow_missing_keys: If `False` (default), raises an exception when an input
         dictionary is missing any of the specified keys.
-        :param chunk_size: if > 0 extract features in chunks of size chunk_size
+        :param chunk_size: if > 0, extracts features in chunks of size chunk_size.
         """
         super().__init__(keys, allow_missing_keys)
         self.encoder = encoder
@@ -105,6 +105,7 @@ def _encode_tiles(self, images: torch.Tensor) -> torch.Tensor:
         if self.chunk_size > 0:
             embeddings = []
             chunks = torch.split(images, self.chunk_size)
+            # TODO add parallelization step here
             for chunk in chunks:
                 chunk_embeddings = self._encode_images(chunk, device)
                 embeddings.append(chunk_embeddings)
diff --git a/Tests/ML/histopathology/models/test_transforms.py b/Tests/ML/histopathology/models/test_transforms.py
index ba938169a..1039424ff 100644
--- a/Tests/ML/histopathology/models/test_transforms.py
+++ b/Tests/ML/histopathology/models/test_transforms.py
@@ -126,8 +126,10 @@ def test_cached_loading(tmp_path: Path) -> None:
 
 @pytest.mark.skipif(not os.path.isdir(TCGA_CRCK_DATASET_DIR),
                     reason="TCGA-CRCk tiles dataset is unavailable")
-@pytest.mark.parametrize('use_gpu', [False, True])
-def test_encode_tiles(tmp_path: Path, use_gpu: bool) -> None:
+@pytest.mark.parametrize('use_gpu , chunk_size',
+                         [(False, 0), (False, 2), (True, 0), (True, 2)]
+                         )
+def test_encode_tiles(tmp_path: Path, use_gpu: bool, chunk_size: int) -> None:
     tiles_dataset = TcgaCrck_TilesDataset(TCGA_CRCK_DATASET_DIR)
     image_key = tiles_dataset.IMAGE_COLUMN
     max_bag_size = 5
@@ -138,7 +140,7 @@ def test_encode_tiles(tmp_path: Path, use_gpu: bool) -> None:
     if use_gpu:
         encoder.cuda()
 
-    encode_transform = EncodeTilesBatchd(image_key, encoder)
+    encode_transform = EncodeTilesBatchd(image_key, encoder, chunk_size)
     transform = Compose([LoadTilesBatchd(image_key), encode_transform])
     dataset = Dataset(bagged_dataset, transform=transform)  # type: ignore
     sample = dataset[0]

From eff2349da39b6f6dc43a5d782ba4c5fe0cb14f77 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 19 Jan 2022 14:25:18 +0000
Subject: [PATCH 13/35] all works with large batch size

---
 InnerEye/ML/Histopathology/datamodules/base_module.py | 11 +++++++----
 .../histo_configs/classification/DeepSMILEPanda.py    |  3 ++-
 Tests/ML/histopathology/models/test_transforms.py     |  2 +-
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/InnerEye/ML/Histopathology/datamodules/base_module.py b/InnerEye/ML/Histopathology/datamodules/base_module.py
index cb67c5506..40a5cf8c3 100644
--- a/InnerEye/ML/Histopathology/datamodules/base_module.py
+++ b/InnerEye/ML/Histopathology/datamodules/base_module.py
@@ -3,7 +3,7 @@
 #  Licensed under the MIT License (MIT). See LICENSE in the repo root for license information.
 #  ------------------------------------------------------------------------------------------
 
-import pickle
+import torch
 from enum import Enum
 from pathlib import Path
 from typing import Any, Callable, Optional, Sequence, Tuple, Union
@@ -90,14 +90,16 @@ def prepare_data(self) -> None:
     def _dataset_pickle_path(self, stage: str) -> Optional[Path]:
         if self.cache_dir is None:
             return None
-        return self.cache_dir / f"{stage}_dataset.pkl"
+        return self.cache_dir / f"{stage}_dataset.pt"
+        # return self.cache_dir / f"{stage}_dataset.pkl"
 
     def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool) -> Dataset:
         dataset_pickle_path = self._dataset_pickle_path(stage)
 
         if dataset_pickle_path and dataset_pickle_path.exists():
             with dataset_pickle_path.open('rb') as f:
-                return pickle.load(f)
+                return torch.load(f, map_location=torch.device('cpu'))
+                # return pickle.load(f)
 
         generator = _create_generator(self.seed)
         bag_dataset = BagDataset(tiles_dataset,  # type: ignore
@@ -115,7 +117,8 @@ def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool)
         if dataset_pickle_path:
             dataset_pickle_path.parent.mkdir(parents=True, exist_ok=True)
             with dataset_pickle_path.open('wb') as f:
-                pickle.dump(transformed_bag_dataset, f)
+                torch.save(transformed_bag_dataset, f)
+                # pickle.dump(transformed_bag_dataset, f)
 
         return transformed_bag_dataset
 
diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
index 9bdfcc811..9bdf970d1 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
@@ -68,6 +68,7 @@ def __init__(self, **kwargs: Any) -> None:
             mode="max",
         )
         self.callbacks = best_checkpoint_callback
+        self.enconding_chunk_size = 60
 
     @property
     def cache_dir(self) -> Path:
@@ -96,7 +97,7 @@ def get_data_module(self) -> PandaTilesDataModule:
         transform = Compose(
             [
                 LoadTilesBatchd(image_key, progress=True),
-                EncodeTilesBatchd(image_key, self.encoder),
+                EncodeTilesBatchd(image_key, self.encoder, chunk_size=self.enconding_chunk_size),
             ]
         )
         return PandaTilesDataModule(
diff --git a/Tests/ML/histopathology/models/test_transforms.py b/Tests/ML/histopathology/models/test_transforms.py
index 1039424ff..17aa8c60b 100644
--- a/Tests/ML/histopathology/models/test_transforms.py
+++ b/Tests/ML/histopathology/models/test_transforms.py
@@ -140,7 +140,7 @@ def test_encode_tiles(tmp_path: Path, use_gpu: bool, chunk_size: int) -> None:
     if use_gpu:
         encoder.cuda()
 
-    encode_transform = EncodeTilesBatchd(image_key, encoder, chunk_size)
+    encode_transform = EncodeTilesBatchd(image_key, encoder, chunk_size=chunk_size)
     transform = Compose([LoadTilesBatchd(image_key), encode_transform])
     dataset = Dataset(bagged_dataset, transform=transform)  # type: ignore
     sample = dataset[0]

From 5290945988776a4fef0c2edfb3b2444f1cf8bb1d Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 19 Jan 2022 15:14:27 +0000
Subject: [PATCH 14/35] making cpu memory an option

---
 .../Histopathology/datamodules/base_module.py | 29 ++++++++++---------
 .../datamodules/test_datamodule_caching.py    |  3 +-
 2 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/InnerEye/ML/Histopathology/datamodules/base_module.py b/InnerEye/ML/Histopathology/datamodules/base_module.py
index 40a5cf8c3..8c06b3028 100644
--- a/InnerEye/ML/Histopathology/datamodules/base_module.py
+++ b/InnerEye/ML/Histopathology/datamodules/base_module.py
@@ -22,6 +22,7 @@ class CacheMode(Enum):
     NONE = 'none'
     MEMORY = 'memory'
     DISK = 'disk'
+    DISK_CPU = 'disk_cpu'
 
 
 class TilesDataModule(LightningDataModule):
@@ -47,7 +48,8 @@ def __init__(self, root_path: Path, max_bag_size: int = 0, batch_size: int = 1,
         transforms up to the first randomised one should be computed only once and reused in
         subsequent iterations:
           - `MEMORY`: the entire transformed dataset is kept in memory for fastest access;
-          - `DISK`: each transformed sample is saved to disk and loaded on-demand;
+          - `DISK`: each transformed sample is saved to disk and loaded into GPU memory on-demand;
+          - `DISK_CPU`: each transformed sample is saved to disk and loaded into CPU memory on-demand;
           - `NONE` (default): no caching is performed.
         :param save_precache: Whether to pre-cache the entire transformed dataset upfront and save
         it to disk. This is done once in `prepare_data()` only on the local rank-0 process, so
@@ -60,7 +62,7 @@ def __init__(self, root_path: Path, max_bag_size: int = 0, batch_size: int = 1,
             raise ValueError("Can only pre-cache if caching is enabled")
         if save_precache and cache_dir is None:
             raise ValueError("A cache directory is required for pre-caching")
-        if cache_mode is CacheMode.DISK and cache_dir is None:
+        if cache_mode in [CacheMode.DISK, CacheMode.DISK_CPU] and cache_dir is None:
             raise ValueError("A cache directory is required for on-disk caching")
         super().__init__()
 
@@ -87,19 +89,19 @@ def prepare_data(self) -> None:
             self._load_dataset(self.val_dataset, stage='val', shuffle=True)
             self._load_dataset(self.test_dataset, stage='test', shuffle=True)
 
-    def _dataset_pickle_path(self, stage: str) -> Optional[Path]:
+    def _dataset_pt_path(self, stage: str) -> Optional[Path]:
         if self.cache_dir is None:
             return None
         return self.cache_dir / f"{stage}_dataset.pt"
-        # return self.cache_dir / f"{stage}_dataset.pkl"
 
     def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool) -> Dataset:
-        dataset_pickle_path = self._dataset_pickle_path(stage)
+        dataset_pt_path = self._dataset_pt_path(stage)
 
-        if dataset_pickle_path and dataset_pickle_path.exists():
-            with dataset_pickle_path.open('rb') as f:
-                return torch.load(f, map_location=torch.device('cpu'))
-                # return pickle.load(f)
+        if dataset_pt_path and dataset_pt_path.exists():
+            # torch.load will reload on GPU by default, same device it was saved from
+            memory_location = torch.device('cpu') if self.cache_mode == CacheMode.DISK_CPU else None
+            with dataset_pt_path.open('rb') as f:
+                return torch.load(f, map_location=memory_location)
 
         generator = _create_generator(self.seed)
         bag_dataset = BagDataset(tiles_dataset,  # type: ignore
@@ -114,11 +116,10 @@ def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool)
         transformed_bag_dataset = self._get_transformed_dataset(bag_dataset, transform)  # type: ignore
         generator.set_state(generator_state)
 
-        if dataset_pickle_path:
-            dataset_pickle_path.parent.mkdir(parents=True, exist_ok=True)
-            with dataset_pickle_path.open('wb') as f:
+        if dataset_pt_path:
+            dataset_pt_path.parent.mkdir(parents=True, exist_ok=True)
+            with dataset_pt_path.open('wb') as f:
                 torch.save(transformed_bag_dataset, f)
-                # pickle.dump(transformed_bag_dataset, f)
 
         return transformed_bag_dataset
 
@@ -126,7 +127,7 @@ def _get_transformed_dataset(self, base_dataset: BagDataset,
                                  transform: Union[Sequence[Callable], Callable]) -> Dataset:
         if self.cache_mode is CacheMode.MEMORY:
             dataset = CacheDataset(base_dataset, transform, num_workers=1)  # type: ignore
-        elif self.cache_mode is CacheMode.DISK:
+        elif self.cache_mode in [CacheMode.DISK, CacheMode.DISK_CPU]:
             dataset = PersistentDataset(base_dataset, transform, cache_dir=self.cache_dir)  # type: ignore
             if self.save_precache:
                 import tqdm  # TODO: Make optional
diff --git a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
index 41325f401..3f3e10feb 100644
--- a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
+++ b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
@@ -95,6 +95,7 @@ def _get_datamodule(cache_mode: CacheMode, save_precache: bool,
                     cache_dir_provided: bool, data_dir: Path) -> TilesDataModule:
     if (cache_mode is CacheMode.NONE and save_precache) \
             or (cache_mode is CacheMode.DISK and not cache_dir_provided) \
+            or (cache_mode is CacheMode.DISK_CPU and not cache_dir_provided) \
             or (save_precache and not cache_dir_provided):
         pytest.skip("Unsupported combination of caching arguments")
 
@@ -112,7 +113,7 @@ def _get_datamodule(cache_mode: CacheMode, save_precache: bool,
                                cache_dir=cache_dir)
 
 
-@pytest.mark.parametrize('cache_mode', [CacheMode.MEMORY, CacheMode.DISK, CacheMode.NONE])
+@pytest.mark.parametrize('cache_mode', [CacheMode.MEMORY, CacheMode.DISK, CacheMode.DISK_CPU, CacheMode.NONE])
 @pytest.mark.parametrize('save_precache', [True, False])
 @pytest.mark.parametrize('cache_dir_provided', [True, False])
 def test_caching_consistency(mock_data_dir: Path, cache_mode: CacheMode, save_precache: bool,

From 80c6447c2ec5eeaaae1e96eb6ee2e67b9c9375dc Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 19 Jan 2022 15:35:41 +0000
Subject: [PATCH 15/35] clean up chunk size parameter

---
 InnerEye/ML/configs/histo_configs/classification/BaseMIL.py | 2 ++
 .../configs/histo_configs/classification/DeepSMILEPanda.py  | 6 +++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py b/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
index 155a09808..d9652c747 100644
--- a/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
+++ b/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
@@ -45,6 +45,8 @@ class BaseMIL(LightningContainer):
                                                     "'memory' (default), 'disk', or 'none'.")
     save_precache: bool = param.Boolean(True, doc="Whether to pre-cache the entire transformed "
                                                   "dataset upfront and save it to disk.")
+    enconding_chunk_size: int = param.Boolean(0, doc="If > 0 performs encoding in chunks, by loading"
+                                                     "enconding_chunk_size tiles per chunk")
     # local_dataset (used as data module root_path) is declared in DatasetParams superclass
 
     @property
diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
index 9bdf970d1..d83f39e65 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
@@ -35,6 +35,9 @@ def __init__(self, **kwargs: Any) -> None:
         default_kwargs = dict(
             # declared in BaseMIL:
             pooling_type=GatedAttentionLayer.__name__,
+            # average number of tiles is 56 for PANDA
+            enconding_chunk_size=60,
+
             # declared in DatasetParams:
             local_dataset=Path("/tmp/datasets/PANDA_tiles"),
             azure_dataset_id="PANDA_tiles",
@@ -44,9 +47,11 @@ def __init__(self, **kwargs: Any) -> None:
             recovery_checkpoint_save_interval=10,
             recovery_checkpoints_save_last_k=-1,
             # use_mixed_precision = True,
+
             # declared in WorkflowParams:
             number_of_cross_validation_splits=5,
             cross_validation_split_index=0,
+
             # declared in OptimizerParams:
             l_rate=5e-4,
             weight_decay=1e-4,
@@ -68,7 +73,6 @@ def __init__(self, **kwargs: Any) -> None:
             mode="max",
         )
         self.callbacks = best_checkpoint_callback
-        self.enconding_chunk_size = 60
 
     @property
     def cache_dir(self) -> Path:

From 97ae3488eee61113e810561c11446f697f9a3f40 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 19 Jan 2022 15:42:27 +0000
Subject: [PATCH 16/35] add TODO

---
 InnerEye/ML/Histopathology/models/transforms.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/Histopathology/models/transforms.py b/InnerEye/ML/Histopathology/models/transforms.py
index 5af607415..cb10b8446 100644
--- a/InnerEye/ML/Histopathology/models/transforms.py
+++ b/InnerEye/ML/Histopathology/models/transforms.py
@@ -105,7 +105,7 @@ def _encode_tiles(self, images: torch.Tensor) -> torch.Tensor:
         if self.chunk_size > 0:
             embeddings = []
             chunks = torch.split(images, self.chunk_size)
-            # TODO add parallelization step here
+            # TODO parallelize encoding - keep metadata and images aligned
             for chunk in chunks:
                 chunk_embeddings = self._encode_images(chunk, device)
                 embeddings.append(chunk_embeddings)

From cd94e9f62faab092393cb9cb13acb35f42b83a5c Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 19 Jan 2022 16:01:49 +0000
Subject: [PATCH 17/35] update changelog

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1c964130e..e4acf5123 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ created.
   GPU utilization via Lightning's `GpuStatsMonitor`, switch `monitor_loading` to check batch loading times via
   `BatchTimeCallback`, and `pl_profiler` to turn on the Lightning profiler (`simple`, `advanced`, or `pytorch`)
 - ([#544](https://github.com/microsoft/InnerEye-DeepLearning/pull/544)) Add documentation for segmentation model evaluation.
+- ([#637](https://github.com/microsoft/InnerEye-DeepLearning/pull/637)) Add option to encode in chunks and to load pre-cached dataset to the histo pipeline.
 - ([#465](https://github.com/microsoft/InnerEye-DeepLearning/pull/465/)) Adding ability to run segmentation inference
 module on test data with partial ground truth files. (Also [522](https://github.com/microsoft/InnerEye-DeepLearning/pull/522).)
 - ([#502](https://github.com/microsoft/InnerEye-DeepLearning/pull/502)) More flags for fine control of when to run inference.
@@ -128,7 +129,7 @@ in inference-only runs when using lightning containers.
 
 ### Deprecated
 
-- ([#633](https://github.com/microsoft/InnerEye-DeepLearning/pull/633)) Model fields `recovery_checkpoint_save_interval` and `recovery_checkpoints_save_last_k` have been retired. 
+- ([#633](https://github.com/microsoft/InnerEye-DeepLearning/pull/633)) Model fields `recovery_checkpoint_save_interval` and `recovery_checkpoints_save_last_k` have been retired.
   Recovery checkpoint handling is now controlled by `autosave_every_n_val_epochs`.
 
 

From 2498652d367bdae44dd62b68c784be4177211dcc Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 19 Jan 2022 17:25:42 +0000
Subject: [PATCH 18/35] making clarity on cachemode vs precache mode

---
 .../Histopathology/datamodules/base_module.py | 37 +++++++++++++------
 .../histo_configs/classification/BaseMIL.py   | 12 +++---
 .../classification/DeepSMILEPanda.py          |  2 +-
 3 files changed, 33 insertions(+), 18 deletions(-)

diff --git a/InnerEye/ML/Histopathology/datamodules/base_module.py b/InnerEye/ML/Histopathology/datamodules/base_module.py
index 8c06b3028..2e5535b8a 100644
--- a/InnerEye/ML/Histopathology/datamodules/base_module.py
+++ b/InnerEye/ML/Histopathology/datamodules/base_module.py
@@ -22,15 +22,18 @@ class CacheMode(Enum):
     NONE = 'none'
     MEMORY = 'memory'
     DISK = 'disk'
-    DISK_CPU = 'disk_cpu'
-
 
+class CacheLocation(Enum):
+    NONE = 'none'
+    CPU = 'cpu'
+    GPU = 'cuda'
 class TilesDataModule(LightningDataModule):
     """Base class to load the tiles of a dataset as train, val, test sets"""
 
     def __init__(self, root_path: Path, max_bag_size: int = 0, batch_size: int = 1,
                  seed: Optional[int] = None, transform: Optional[Callable] = None,
-                 cache_mode: CacheMode = CacheMode.NONE, save_precache: bool = False,
+                 cache_mode: CacheMode = CacheMode.NONE,
+                 precache_location: CacheLocation = CacheLocation.NONE,
                  cache_dir: Optional[Path] = None,
                  number_of_cross_validation_splits: int = 0,
                  cross_validation_split_index: int = 0) -> None:
@@ -47,22 +50,30 @@ def __init__(self, root_path: Path, max_bag_size: int = 0, batch_size: int = 1,
         :param cache_mode: The type of caching to perform, i.e. whether the results of all
         transforms up to the first randomised one should be computed only once and reused in
         subsequent iterations:
-          - `MEMORY`: the entire transformed dataset is kept in memory for fastest access;
-          - `DISK`: each transformed sample is saved to disk and loaded into GPU memory on-demand;
-          - `DISK_CPU`: each transformed sample is saved to disk and loaded into CPU memory on-demand;
-          - `NONE` (default): no caching is performed.
-        :param save_precache: Whether to pre-cache the entire transformed dataset upfront and save
+          - `MEMORY`: MONAI CacheDataset is used, the entire transformed dataset is kept in memory for fastest access;
+          - `DISK`: MONAI PersistentDataset is used, each transformed sample is saved to disk and loaded on-demand;
+          - `NONE` (default): standard MONAI dataset is used, no caching is performed.
+        :param precache_location: Whether to pre-cache the entire transformed dataset upfront and save
         it to disk. This is done once in `prepare_data()` only on the local rank-0 process, so
-        multiple processes can afterwards access the same cache without contention in DDP settings.
+        multiple processes can afterwards access the same cache without contention in DDP settings. 3 options are
+        available:
+          - `CPU`: each transformed sample is saved to disk and loaded into CPU memory on-demand;
+          - `GPU`: each transformed sample is saved to disk and loaded into GPU memory on-demand;
+          - `NONE (default)`: no pre-cache is performed;
         :param cache_dir: The directory onto which to cache data if caching is enabled.
         :param number_of_cross_validation_splits: Number of folds to perform.
         :param cross_validation_split_index: Index of the cross validation split to be performed.
         """
+        if precache_location == CacheLocation.NONE:
+            save_precache = None
+        else:
+            save_precache = precache_location
+
         if save_precache and cache_mode is CacheMode.NONE:
             raise ValueError("Can only pre-cache if caching is enabled")
         if save_precache and cache_dir is None:
             raise ValueError("A cache directory is required for pre-caching")
-        if cache_mode in [CacheMode.DISK, CacheMode.DISK_CPU] and cache_dir is None:
+        if cache_mode is CacheMode.DISK and cache_dir is None:
             raise ValueError("A cache directory is required for on-disk caching")
         super().__init__()
 
@@ -99,8 +110,9 @@ def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool)
 
         if dataset_pt_path and dataset_pt_path.exists():
             # torch.load will reload on GPU by default, same device it was saved from
-            memory_location = torch.device('cpu') if self.cache_mode == CacheMode.DISK_CPU else None
+            memory_location = torch.device('cpu') if self.save_precache == CacheLocation.CPU else None
             with dataset_pt_path.open('rb') as f:
+                print(f"Loading dataset from {dataset_pt_path} into {memory_location}")
                 return torch.load(f, map_location=memory_location)
 
         generator = _create_generator(self.seed)
@@ -116,6 +128,7 @@ def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool)
         transformed_bag_dataset = self._get_transformed_dataset(bag_dataset, transform)  # type: ignore
         generator.set_state(generator_state)
 
+        # Dataset is saved if cache_dir is True, regardless of CacheMode
         if dataset_pt_path:
             dataset_pt_path.parent.mkdir(parents=True, exist_ok=True)
             with dataset_pt_path.open('wb') as f:
@@ -127,7 +140,7 @@ def _get_transformed_dataset(self, base_dataset: BagDataset,
                                  transform: Union[Sequence[Callable], Callable]) -> Dataset:
         if self.cache_mode is CacheMode.MEMORY:
             dataset = CacheDataset(base_dataset, transform, num_workers=1)  # type: ignore
-        elif self.cache_mode in [CacheMode.DISK, CacheMode.DISK_CPU]:
+        elif self.cache_mode is CacheMode.DISK:
             dataset = PersistentDataset(base_dataset, transform, cache_dir=self.cache_dir)  # type: ignore
             if self.save_precache:
                 import tqdm  # TODO: Make optional
diff --git a/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py b/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
index 0c9f5c5a7..6a7839ac9 100644
--- a/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
+++ b/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
@@ -8,7 +8,7 @@
 their datamodules and configure experiment-specific parameters.
 """
 from pathlib import Path
-from typing import Type
+from typing import Type  # noqa
 
 import param
 from torch import nn
@@ -17,7 +17,7 @@
 from health_ml.networks.layers.attention_layers import AttentionLayer, GatedAttentionLayer
 from InnerEye.ML.lightning_container import LightningContainer
 from InnerEye.ML.Histopathology.datasets.base_dataset import SlidesDataset
-from InnerEye.ML.Histopathology.datamodules.base_module import CacheMode, TilesDataModule
+from InnerEye.ML.Histopathology.datamodules.base_module import CacheMode, CacheLocation, TilesDataModule
 from InnerEye.ML.Histopathology.models.deepmil import DeepMILModule
 from InnerEye.ML.Histopathology.models.encoders import (HistoSSLEncoder, IdentityEncoder,
                                                         ImageNetEncoder, ImageNetSimCLREncoder,
@@ -44,9 +44,11 @@ class BaseMIL(LightningContainer):
     cache_mode: CacheMode = param.ClassSelector(default=CacheMode.MEMORY, class_=CacheMode,
                                                 doc="The type of caching to perform: "
                                                     "'memory' (default), 'disk', or 'none'.")
-    save_precache: bool = param.Boolean(True, doc="Whether to pre-cache the entire transformed "
-                                                  "dataset upfront and save it to disk.")
-    enconding_chunk_size: int = param.Boolean(0, doc="If > 0 performs encoding in chunks, by loading"
+    precache_location: str = param.ClassSelector(default=CacheLocation.NONE, class_=CacheLocation,
+                                                 doc="Whether to pre-cache the entire transformed dataset upfront "
+                                                 "and save it to disk and if re-load in cpu or gpu. Options:"
+                                                 "`none` (default),`cpu`, `gpu`")
+    enconding_chunk_size: int = param.Integer(0, doc="If > 0 performs encoding in chunks, by loading"
                                                      "enconding_chunk_size tiles per chunk")
     # local_dataset (used as data module root_path) is declared in DatasetParams superclass
 
diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
index cc6109c3b..a62e2ee38 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
@@ -115,7 +115,7 @@ def get_data_module(self) -> PandaTilesDataModule:
             batch_size=self.batch_size,
             transform=transform,
             cache_mode=self.cache_mode,
-            save_precache=self.save_precache,
+            precache_locatiom=self.precache_location,
             cache_dir=self.cache_dir,
             number_of_cross_validation_splits=self.number_of_cross_validation_splits,
             cross_validation_split_index=self.cross_validation_split_index,

From a5ce7f8895a57ac5028df94557d05d620e66e742 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 19 Jan 2022 18:31:54 +0000
Subject: [PATCH 19/35] fix typo

---
 .../ML/configs/histo_configs/classification/DeepSMILEPanda.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
index a62e2ee38..3a35e4d98 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
@@ -115,7 +115,7 @@ def get_data_module(self) -> PandaTilesDataModule:
             batch_size=self.batch_size,
             transform=transform,
             cache_mode=self.cache_mode,
-            precache_locatiom=self.precache_location,
+            precache_location=self.precache_location,
             cache_dir=self.cache_dir,
             number_of_cross_validation_splits=self.number_of_cross_validation_splits,
             cross_validation_split_index=self.cross_validation_split_index,

From 505635cd067d3fca8f6e128c37d3ce9685eb3331 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 19 Jan 2022 19:02:58 +0000
Subject: [PATCH 20/35] update test after refactoring

---
 .../datamodules/test_datamodule_caching.py    | 24 +++++++++++--------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
index 3f3e10feb..a180c734c 100644
--- a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
+++ b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
@@ -13,7 +13,7 @@
 import torch
 from torch.utils.data import DataLoader
 
-from InnerEye.ML.Histopathology.datamodules.base_module import CacheMode, TilesDataModule
+from InnerEye.ML.Histopathology.datamodules.base_module import CacheMode, CacheLocation, TilesDataModule
 from InnerEye.ML.Histopathology.datasets.base_dataset import TilesDataset
 
 
@@ -91,11 +91,14 @@ def mock_data_dir(tmp_path: Path) -> Path:
         df.to_csv(csv_path, index=False)
     return csv_dir
 
-def _get_datamodule(cache_mode: CacheMode, save_precache: bool,
+def _get_datamodule(cache_mode: CacheMode, precache_location: CacheLocation,
                     cache_dir_provided: bool, data_dir: Path) -> TilesDataModule:
+    if precache_location == CacheLocation.NONE:
+        save_precache = None
+    else:
+        save_precache = precache_location
     if (cache_mode is CacheMode.NONE and save_precache) \
             or (cache_mode is CacheMode.DISK and not cache_dir_provided) \
-            or (cache_mode is CacheMode.DISK_CPU and not cache_dir_provided) \
             or (save_precache and not cache_dir_provided):
         pytest.skip("Unsupported combination of caching arguments")
 
@@ -109,18 +112,19 @@ def _get_datamodule(cache_mode: CacheMode, save_precache: bool,
                                seed=0,
                                batch_size=2,
                                cache_mode=cache_mode,
-                               save_precache=save_precache,
+                               precache_location=precache_location,
                                cache_dir=cache_dir)
 
 
-@pytest.mark.parametrize('cache_mode', [CacheMode.MEMORY, CacheMode.DISK, CacheMode.DISK_CPU, CacheMode.NONE])
-@pytest.mark.parametrize('save_precache', [True, False])
+@pytest.mark.parametrize('cache_mode', [CacheMode.MEMORY, CacheMode.DISK, CacheMode.NONE])
+@pytest.mark.parametrize('precache_location', [CacheLocation.NONE, CacheLocation.CPU, ])
+@pytest.mark.parametrize('precache_location', [CacheLocation.NONE, CacheLocation.CPU, CacheLocation.GPU])
 @pytest.mark.parametrize('cache_dir_provided', [True, False])
-def test_caching_consistency(mock_data_dir: Path, cache_mode: CacheMode, save_precache: bool,
+def test_caching_consistency(mock_data_dir: Path, cache_mode: CacheMode, precache_location: CacheLocation,
                              cache_dir_provided: bool) -> None:
     # Compare two dataloaders from the same datamodule
     datamodule = _get_datamodule(cache_mode=cache_mode,
-                                 save_precache=save_precache,
+                                 precache_location=precache_location,
                                  cache_dir_provided=cache_dir_provided,
                                  data_dir=mock_data_dir)
     datamodule.prepare_data()
@@ -131,14 +135,14 @@ def test_caching_consistency(mock_data_dir: Path, cache_mode: CacheMode, save_pr
 
     # Compare datamodules reusing the same cache
     datamodule = _get_datamodule(cache_mode=cache_mode,
-                                 save_precache=save_precache,
+                                 precache_location=precache_location,
                                  cache_dir_provided=cache_dir_provided,
                                  data_dir=mock_data_dir)
     datamodule.prepare_data()
     train_dataloader = datamodule.train_dataloader()
 
     reloaded_datamodule = _get_datamodule(cache_mode=cache_mode,
-                                          save_precache=save_precache,
+                                          precache_location=precache_location,
                                           cache_dir_provided=cache_dir_provided,
                                           data_dir=mock_data_dir)
     reloaded_datamodule.prepare_data()

From c891dde502a5967963c8fd4be0c6a319c473a4f8 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Wed, 19 Jan 2022 19:10:35 +0000
Subject: [PATCH 21/35] update test after refactoring

---
 .../histopathology/datamodules/test_datamodule_caching.py   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
index a180c734c..00de3596f 100644
--- a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
+++ b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
@@ -152,12 +152,12 @@ def test_caching_consistency(mock_data_dir: Path, cache_mode: CacheMode, precach
 
 
 @pytest.mark.parametrize('cache_mode', [CacheMode.MEMORY, CacheMode.DISK, CacheMode.NONE])
-@pytest.mark.parametrize('save_precache', [True, False])
+@pytest.mark.parametrize('precache_location', [CacheLocation.NONE, CacheLocation.CPU, CacheLocation.GPU])
 @pytest.mark.parametrize('cache_dir_provided', [True, False])
-def test_tile_id_coverage(mock_data_dir: Path, cache_mode: CacheMode, save_precache: bool,
+def test_tile_id_coverage(mock_data_dir: Path, cache_mode: CacheMode, precache_location: CacheLocation,
                           cache_dir_provided: bool) -> None:
     datamodule = _get_datamodule(cache_mode=cache_mode,
-                                 save_precache=save_precache,
+                                 precache_location=precache_location,
                                  cache_dir_provided=cache_dir_provided,
                                  data_dir=mock_data_dir)
     datamodule.prepare_data()

From d1d7b272b72c4d0cf80028326a7c74701ca1a69d Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 20 Jan 2022 08:47:51 +0000
Subject: [PATCH 22/35] remove typo in tests

---
 Tests/ML/histopathology/datamodules/test_datamodule_caching.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
index 00de3596f..c9049fb12 100644
--- a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
+++ b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
@@ -112,12 +112,11 @@ def _get_datamodule(cache_mode: CacheMode, precache_location: CacheLocation,
                                seed=0,
                                batch_size=2,
                                cache_mode=cache_mode,
-                               precache_location=precache_location,
+                               precache_location=save_precache,
                                cache_dir=cache_dir)
 
 
 @pytest.mark.parametrize('cache_mode', [CacheMode.MEMORY, CacheMode.DISK, CacheMode.NONE])
-@pytest.mark.parametrize('precache_location', [CacheLocation.NONE, CacheLocation.CPU, ])
 @pytest.mark.parametrize('precache_location', [CacheLocation.NONE, CacheLocation.CPU, CacheLocation.GPU])
 @pytest.mark.parametrize('cache_dir_provided', [True, False])
 def test_caching_consistency(mock_data_dir: Path, cache_mode: CacheMode, precache_location: CacheLocation,

From e59fef202144ce83fe5afcdb3f9f17e6a0909489 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 20 Jan 2022 09:45:22 +0000
Subject: [PATCH 23/35] change optional type

---
 Tests/ML/histopathology/datamodules/test_datamodule_caching.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
index c9049fb12..7de4fce04 100644
--- a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
+++ b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
@@ -112,7 +112,7 @@ def _get_datamodule(cache_mode: CacheMode, precache_location: CacheLocation,
                                seed=0,
                                batch_size=2,
                                cache_mode=cache_mode,
-                               precache_location=save_precache,
+                               precache_location=precache_location,
                                cache_dir=cache_dir)
 
 

From 7644da56f920698dd3d6f4c0710d5f3854ddf7c4 Mon Sep 17 00:00:00 2001
From: vale-salvatelli <vale-salvatelli@users.noreply.github.com>
Date: Thu, 20 Jan 2022 09:49:19 +0000
Subject: [PATCH 24/35] Update
 InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py

Co-authored-by: Anton Schwaighofer <antonsc@microsoft.com>
---
 .../ML/configs/histo_configs/classification/DeepSMILEPanda.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
index 3a35e4d98..b6f24adf0 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
@@ -106,7 +106,7 @@ def get_data_module(self) -> PandaTilesDataModule:
         transform = Compose(
             [
                 LoadTilesBatchd(image_key, progress=True),
-                EncodeTilesBatchd(image_key, self.encoder, chunk_size=self.enconding_chunk_size),
+                EncodeTilesBatchd(image_key, self.encoder, chunk_size=self.encoding_chunk_size),
             ]
         )
         return PandaTilesDataModule(

From 7cc9c8bf4494decfb12e1708eb7a846310846959 Mon Sep 17 00:00:00 2001
From: vale-salvatelli <vale-salvatelli@users.noreply.github.com>
Date: Thu, 20 Jan 2022 09:49:24 +0000
Subject: [PATCH 25/35] Update
 InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py

Co-authored-by: Anton Schwaighofer <antonsc@microsoft.com>
---
 .../ML/configs/histo_configs/classification/DeepSMILEPanda.py   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
index b6f24adf0..c72dc3f66 100644
--- a/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
+++ b/InnerEye/ML/configs/histo_configs/classification/DeepSMILEPanda.py
@@ -40,7 +40,7 @@ def __init__(self, **kwargs: Any) -> None:
             # declared in BaseMIL:
             pooling_type=GatedAttentionLayer.__name__,
             # average number of tiles is 56 for PANDA
-            enconding_chunk_size=60,
+            encoding_chunk_size=60,
 
             # declared in DatasetParams:
             local_dataset=Path("/tmp/datasets/PANDA_tiles"),

From 4550982d2934aa40727a19c45d3b5603232931a5 Mon Sep 17 00:00:00 2001
From: vale-salvatelli <vale-salvatelli@users.noreply.github.com>
Date: Thu, 20 Jan 2022 09:49:29 +0000
Subject: [PATCH 26/35] Update
 InnerEye/ML/configs/histo_configs/classification/BaseMIL.py

Co-authored-by: Anton Schwaighofer <antonsc@microsoft.com>
---
 InnerEye/ML/configs/histo_configs/classification/BaseMIL.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py b/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
index 6a7839ac9..4328de36b 100644
--- a/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
+++ b/InnerEye/ML/configs/histo_configs/classification/BaseMIL.py
@@ -48,7 +48,7 @@ class BaseMIL(LightningContainer):
                                                  doc="Whether to pre-cache the entire transformed dataset upfront "
                                                  "and save it to disk and if re-load in cpu or gpu. Options:"
                                                  "`none` (default),`cpu`, `gpu`")
-    enconding_chunk_size: int = param.Integer(0, doc="If > 0 performs encoding in chunks, by loading"
+    encoding_chunk_size: int = param.Integer(0, doc="If > 0 performs encoding in chunks, by loading"
                                                      "enconding_chunk_size tiles per chunk")
     # local_dataset (used as data module root_path) is declared in DatasetParams superclass
 

From 74616666d8bc796ebb2cbfe423fd74a20fc3422c Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 20 Jan 2022 14:32:54 +0000
Subject: [PATCH 27/35] change load_image function

---
 .../ML/Histopathology/datamodules/base_module.py     |  2 +-
 .../Histopathology/datasets/panda_tiles_dataset.py   |  3 ++-
 .../datasets/tcga_crck_tiles_dataset.py              |  3 ++-
 InnerEye/ML/Histopathology/models/transforms.py      | 11 +++++++----
 InnerEye/ML/Histopathology/utils/metrics_utils.py    |  6 +++---
 InnerEye/ML/deep_learning_config.py                  |  1 -
 InnerEye/ML/utils/io_util.py                         | 12 +++++++++---
 7 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/InnerEye/ML/Histopathology/datamodules/base_module.py b/InnerEye/ML/Histopathology/datamodules/base_module.py
index 2e5535b8a..f47763bb9 100644
--- a/InnerEye/ML/Histopathology/datamodules/base_module.py
+++ b/InnerEye/ML/Histopathology/datamodules/base_module.py
@@ -108,7 +108,7 @@ def _dataset_pt_path(self, stage: str) -> Optional[Path]:
     def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool) -> Dataset:
         dataset_pt_path = self._dataset_pt_path(stage)
 
-        if dataset_pt_path and dataset_pt_path.exists():
+        if dataset_pt_path and dataset_pt_path.is_file():
             # torch.load will reload on GPU by default, same device it was saved from
             memory_location = torch.device('cpu') if self.save_precache == CacheLocation.CPU else None
             with dataset_pt_path.open('rb') as f:
diff --git a/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py b/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py
index 30097896b..2f95a11fd 100644
--- a/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py
+++ b/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py
@@ -10,7 +10,7 @@
 from torchvision.datasets.vision import VisionDataset
 
 from InnerEye.ML.Histopathology.datasets.base_dataset import TilesDataset
-from InnerEye.ML.Histopathology.models.transforms import load_pil_image
+from InnerEye.ML.utils.io_util import load_pil_image
 from InnerEye.ML.SSL.datamodules_and_datasets.dataset_cls_utils import InnerEyeDataClassBaseWithReturnIndex
 
 
@@ -47,6 +47,7 @@ class PandaTilesDatasetReturnImageLabel(VisionDataset):
     Any dataset used in SSL needs to return a tuple where the first element is the image and the second is a
     class label.
     """
+
     def __init__(self,
                  root: Path,
                  dataset_csv: Optional[Union[str, Path]] = None,
diff --git a/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py b/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py
index f5614cfa8..01bfcf4e7 100644
--- a/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py
+++ b/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py
@@ -11,7 +11,7 @@
 from torchvision.datasets.vision import VisionDataset
 
 from InnerEye.ML.Histopathology.datasets.base_dataset import TilesDataset
-from InnerEye.ML.Histopathology.models.transforms import load_pil_image
+from InnerEye.ML.utils.io_util import load_pil_image
 from InnerEye.ML.SSL.datamodules_and_datasets.dataset_cls_utils import InnerEyeDataClassBaseWithReturnIndex
 
 
@@ -33,6 +33,7 @@ class TcgaCrck_TilesDatasetReturnImageLabel(VisionDataset):
     Any dataset used in SSL needs to return a tuple where the first element is the image and the second is a
     class label.
     """
+
     def __init__(self,
                  root: Union[str, Path],
                  dataset_csv: Optional[Union[str, Path]] = None,
diff --git a/InnerEye/ML/Histopathology/models/transforms.py b/InnerEye/ML/Histopathology/models/transforms.py
index cb10b8446..95788a8ec 100644
--- a/InnerEye/ML/Histopathology/models/transforms.py
+++ b/InnerEye/ML/Histopathology/models/transforms.py
@@ -6,24 +6,27 @@
 from pathlib import Path
 from typing import Mapping, Sequence, Union
 
-import PIL.Image
 import torch
 from monai.config.type_definitions import KeysCollection
 from monai.transforms.transform import MapTransform
 from torchvision.transforms.functional import to_tensor
 
 from InnerEye.ML.Histopathology.models.encoders import TileEncoder
+from InnerEye.ML.utils.io_util import load_pil_image
 
 PathOrString = Union[Path, str]
 
 
-def load_pil_image(image_path: PathOrString) -> PIL.Image.Image:
-    """Load a PIL image in RGB format from the given path"""
-    return PIL.Image.open(image_path).convert('RGB')
+# def load_pil_image(image_path: PathOrString) -> PIL.Image.Image:
+#    """Load a PIL image in RGB format from the given path"""
+#    with PIL.PngImagePlugin.PngImageFile(image_path) as pil_png:
+#        image = np.asarray(pil_png, np.float)
+#    return image
 
 
 def load_image_as_tensor(image_path: PathOrString) -> torch.Tensor:
     """Load an image as a tensor from the given path"""
+    # pil_image = load_pil_image(image_path)
     pil_image = load_pil_image(image_path)
     return to_tensor(pil_image)
 
diff --git a/InnerEye/ML/Histopathology/utils/metrics_utils.py b/InnerEye/ML/Histopathology/utils/metrics_utils.py
index b6f49f081..295d70456 100644
--- a/InnerEye/ML/Histopathology/utils/metrics_utils.py
+++ b/InnerEye/ML/Histopathology/utils/metrics_utils.py
@@ -8,10 +8,10 @@
 import matplotlib.pyplot as plt
 from math import ceil
 import numpy as np
-import matplotlib.patches as patches 
+import matplotlib.patches as patches
 import matplotlib.collections as collection
 
-from InnerEye.ML.Histopathology.models.transforms import load_pil_image
+from InnerEye.ML.utils.io_util import load_pil_image
 from InnerEye.ML.Histopathology.utils.naming import ResultsKey
 from InnerEye.ML.Histopathology.utils.heatmap_utils import location_selected_tiles
 
@@ -117,7 +117,7 @@ def plot_slide(slide_image: np.ndarray, scale: float) -> plt.figure:
     return fig
 
 
-def plot_heatmap_overlay(slide: str, 
+def plot_heatmap_overlay(slide: str,
                        slide_image: np.ndarray,
                        results: Dict[str, List[Any]],
                        location_bbox: List[int],
diff --git a/InnerEye/ML/deep_learning_config.py b/InnerEye/ML/deep_learning_config.py
index e89101b09..d2d460d1d 100644
--- a/InnerEye/ML/deep_learning_config.py
+++ b/InnerEye/ML/deep_learning_config.py
@@ -475,7 +475,6 @@ def logs_folder(self) -> Path:
     @property
     def checkpoint_folder(self) -> Path:
         """Gets the full path in which the model checkpoints should be stored during training."""
-        print(f"Expected Checkpoint path {self.outputs_folder / CHECKPOINT_FOLDER}")
         return self.outputs_folder / CHECKPOINT_FOLDER
 
     @property
diff --git a/InnerEye/ML/utils/io_util.py b/InnerEye/ML/utils/io_util.py
index c1b6b91f7..84f25fa3b 100644
--- a/InnerEye/ML/utils/io_util.py
+++ b/InnerEye/ML/utils/io_util.py
@@ -459,9 +459,16 @@ def load_labels_from_dataset_source(dataset_source: PatientDatasetSource, check_
     return np.vstack((background, labels))
 
 
+def load_pil_image(image_path: PathOrString) -> PIL.Image.Image:
+    """Load a PIL image from the given path - see https://hi-ml.readthedocs.io/en/latest/loading_images.html"""
+    with PIL.PngImagePlugin.PngImageFile(image_path) as pil_png:
+        image = np.asarray(pil_png)
+    return image
+
+
 def load_image(path: PathOrString, image_type: Optional[Type] = float) -> ImageWithHeader:
     """
-    Loads an image with extension numpy or nifti
+    Loads an image with extension numpy or nifti or png
     For HDF5 path suffix
         For images |<dataset_name>|<channel index>
         For segmentation binary |<dataset_name>|<channel index>
@@ -496,8 +503,7 @@ def load_image(path: PathOrString, image_type: Optional[Type] = float) -> ImageW
             header = get_unit_image_header()
             return ImageWithHeader(image, header)
     elif is_png(path):
-        with PIL.PngImagePlugin.PngImageFile(path) as pil_png:
-            image = np.asarray(pil_png, np.float)
+        image = load_pil_image(path)
         header = get_unit_image_header()
         return ImageWithHeader(image, header)
     raise ValueError(f"Invalid file type {path}")

From 3e06759084eda466f36413d6c14b93f5ad5c7ceb Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 20 Jan 2022 15:49:00 +0000
Subject: [PATCH 28/35] revert some changes to avoid inconsistencies in type

---
 .../Histopathology/datasets/panda_tiles_dataset.py  |  2 +-
 .../datasets/tcga_crck_tiles_dataset.py             |  2 +-
 InnerEye/ML/Histopathology/models/transforms.py     | 13 +++++++------
 InnerEye/ML/Histopathology/utils/metrics_utils.py   |  2 +-
 InnerEye/ML/utils/io_util.py                        | 10 ++--------
 5 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py b/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py
index 2f95a11fd..e332ab356 100644
--- a/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py
+++ b/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py
@@ -10,7 +10,7 @@
 from torchvision.datasets.vision import VisionDataset
 
 from InnerEye.ML.Histopathology.datasets.base_dataset import TilesDataset
-from InnerEye.ML.utils.io_util import load_pil_image
+from InnerEye.ML.Histopathology.models.transforms import load_pil_image
 from InnerEye.ML.SSL.datamodules_and_datasets.dataset_cls_utils import InnerEyeDataClassBaseWithReturnIndex
 
 
diff --git a/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py b/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py
index 01bfcf4e7..f4f677e09 100644
--- a/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py
+++ b/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py
@@ -11,7 +11,7 @@
 from torchvision.datasets.vision import VisionDataset
 
 from InnerEye.ML.Histopathology.datasets.base_dataset import TilesDataset
-from InnerEye.ML.utils.io_util import load_pil_image
+from InnerEye.ML.Histopathology.models.transforms import load_pil_image
 from InnerEye.ML.SSL.datamodules_and_datasets.dataset_cls_utils import InnerEyeDataClassBaseWithReturnIndex
 
 
diff --git a/InnerEye/ML/Histopathology/models/transforms.py b/InnerEye/ML/Histopathology/models/transforms.py
index 95788a8ec..ed03207f5 100644
--- a/InnerEye/ML/Histopathology/models/transforms.py
+++ b/InnerEye/ML/Histopathology/models/transforms.py
@@ -7,21 +7,22 @@
 from typing import Mapping, Sequence, Union
 
 import torch
+import numpy as np
+import PIL
 from monai.config.type_definitions import KeysCollection
 from monai.transforms.transform import MapTransform
 from torchvision.transforms.functional import to_tensor
 
 from InnerEye.ML.Histopathology.models.encoders import TileEncoder
-from InnerEye.ML.utils.io_util import load_pil_image
 
 PathOrString = Union[Path, str]
 
 
-# def load_pil_image(image_path: PathOrString) -> PIL.Image.Image:
-#    """Load a PIL image in RGB format from the given path"""
-#    with PIL.PngImagePlugin.PngImageFile(image_path) as pil_png:
-#        image = np.asarray(pil_png, np.float)
-#    return image
+def load_pil_image(image_path: PathOrString) -> PIL.Image.Image:
+    """Load a PIL image in RGB format from the given path"""
+    with PIL.PngImagePlugin.PngImageFile(image_path) as pil_png:
+        image = np.asarray(pil_png)
+    return image
 
 
 def load_image_as_tensor(image_path: PathOrString) -> torch.Tensor:
diff --git a/InnerEye/ML/Histopathology/utils/metrics_utils.py b/InnerEye/ML/Histopathology/utils/metrics_utils.py
index 295d70456..93f9a4a22 100644
--- a/InnerEye/ML/Histopathology/utils/metrics_utils.py
+++ b/InnerEye/ML/Histopathology/utils/metrics_utils.py
@@ -11,7 +11,7 @@
 import matplotlib.patches as patches
 import matplotlib.collections as collection
 
-from InnerEye.ML.utils.io_util import load_pil_image
+from InnerEye.ML.Histopathology.models.transforms import load_pil_image
 from InnerEye.ML.Histopathology.utils.naming import ResultsKey
 from InnerEye.ML.Histopathology.utils.heatmap_utils import location_selected_tiles
 
diff --git a/InnerEye/ML/utils/io_util.py b/InnerEye/ML/utils/io_util.py
index 84f25fa3b..d25024a18 100644
--- a/InnerEye/ML/utils/io_util.py
+++ b/InnerEye/ML/utils/io_util.py
@@ -459,13 +459,6 @@ def load_labels_from_dataset_source(dataset_source: PatientDatasetSource, check_
     return np.vstack((background, labels))
 
 
-def load_pil_image(image_path: PathOrString) -> PIL.Image.Image:
-    """Load a PIL image from the given path - see https://hi-ml.readthedocs.io/en/latest/loading_images.html"""
-    with PIL.PngImagePlugin.PngImageFile(image_path) as pil_png:
-        image = np.asarray(pil_png)
-    return image
-
-
 def load_image(path: PathOrString, image_type: Optional[Type] = float) -> ImageWithHeader:
     """
     Loads an image with extension numpy or nifti or png
@@ -503,7 +496,8 @@ def load_image(path: PathOrString, image_type: Optional[Type] = float) -> ImageW
             header = get_unit_image_header()
             return ImageWithHeader(image, header)
     elif is_png(path):
-        image = load_pil_image(path)
+        with PIL.PngImagePlugin.PngImageFile(path) as pil_png:
+            image = np.asarray(pil_png, np.float)
         header = get_unit_image_header()
         return ImageWithHeader(image, header)
     raise ValueError(f"Invalid file type {path}")

From 2249ac472ec01c178e4c868df8e2df1672c23ae1 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 20 Jan 2022 16:57:56 +0000
Subject: [PATCH 29/35] implement PR feedback

---
 CHANGELOG.md                                  |  2 +-
 .../Histopathology/datamodules/base_module.py | 42 +++++++++----------
 .../datamodules/test_datamodule_caching.py    |  2 +-
 3 files changed, 21 insertions(+), 25 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e4acf5123..3b0047456 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,7 +18,7 @@ created.
   GPU utilization via Lightning's `GpuStatsMonitor`, switch `monitor_loading` to check batch loading times via
   `BatchTimeCallback`, and `pl_profiler` to turn on the Lightning profiler (`simple`, `advanced`, or `pytorch`)
 - ([#544](https://github.com/microsoft/InnerEye-DeepLearning/pull/544)) Add documentation for segmentation model evaluation.
-- ([#637](https://github.com/microsoft/InnerEye-DeepLearning/pull/637)) Add option to encode in chunks and to load pre-cached dataset to the histo pipeline.
+- ([#637](https://github.com/microsoft/InnerEye-DeepLearning/pull/637)) Add option to encode in chunks and to load pre-cached dataset in CPU or GPU in the histo pipeline.
 - ([#465](https://github.com/microsoft/InnerEye-DeepLearning/pull/465/)) Adding ability to run segmentation inference
 module on test data with partial ground truth files. (Also [522](https://github.com/microsoft/InnerEye-DeepLearning/pull/522).)
 - ([#502](https://github.com/microsoft/InnerEye-DeepLearning/pull/502)) More flags for fine control of when to run inference.
diff --git a/InnerEye/ML/Histopathology/datamodules/base_module.py b/InnerEye/ML/Histopathology/datamodules/base_module.py
index f47763bb9..c40745656 100644
--- a/InnerEye/ML/Histopathology/datamodules/base_module.py
+++ b/InnerEye/ML/Histopathology/datamodules/base_module.py
@@ -55,23 +55,19 @@ def __init__(self, root_path: Path, max_bag_size: int = 0, batch_size: int = 1,
           - `NONE` (default): standard MONAI dataset is used, no caching is performed.
         :param precache_location: Whether to pre-cache the entire transformed dataset upfront and save
         it to disk. This is done once in `prepare_data()` only on the local rank-0 process, so
-        multiple processes can afterwards access the same cache without contention in DDP settings. 3 options are
-        available:
-          - `CPU`: each transformed sample is saved to disk and loaded into CPU memory on-demand;
-          - `GPU`: each transformed sample is saved to disk and loaded into GPU memory on-demand;
+        multiple processes can afterwards access the same cache without contention in DDP settings. This parameter also allow to
+        choose if the cache will be re-loaded into CPU or GPU memory:
           - `NONE (default)`: no pre-cache is performed;
+          - `CPU`: each transformed sample is saved to disk and, if cache_mode is `MEMORY`, reloaded into CPU;
+          - `GPU`: each transformed sample is saved to disk and, if cache_mode is `MEMORY`, reloaded into GPU memory;
+        If cache_mode is `DISK` precache_location `CPU` and `GPU` are equivalent.
         :param cache_dir: The directory onto which to cache data if caching is enabled.
         :param number_of_cross_validation_splits: Number of folds to perform.
         :param cross_validation_split_index: Index of the cross validation split to be performed.
         """
-        if precache_location == CacheLocation.NONE:
-            save_precache = None
-        else:
-            save_precache = precache_location
-
-        if save_precache and cache_mode is CacheMode.NONE:
+        if precache_location is not CacheLocation.NONE and cache_mode is CacheMode.NONE:
             raise ValueError("Can only pre-cache if caching is enabled")
-        if save_precache and cache_dir is None:
+        if precache_location is not CacheLocation.NONE and cache_dir is None:
             raise ValueError("A cache directory is required for pre-caching")
         if cache_mode is CacheMode.DISK and cache_dir is None:
             raise ValueError("A cache directory is required for on-disk caching")
@@ -81,7 +77,7 @@ def __init__(self, root_path: Path, max_bag_size: int = 0, batch_size: int = 1,
         self.max_bag_size = max_bag_size
         self.transform = transform
         self.cache_mode = cache_mode
-        self.save_precache = save_precache
+        self.precache_location = precache_location
         self.cache_dir = cache_dir
         self.batch_size = batch_size
         self.number_of_cross_validation_splits = number_of_cross_validation_splits
@@ -95,24 +91,24 @@ def get_splits(self) -> Tuple[TilesDataset, TilesDataset, TilesDataset]:
         raise NotImplementedError
 
     def prepare_data(self) -> None:
-        if self.save_precache:
+        if self.precache_location != CacheLocation.NONE:
             self._load_dataset(self.train_dataset, stage='train', shuffle=True)
             self._load_dataset(self.val_dataset, stage='val', shuffle=True)
             self._load_dataset(self.test_dataset, stage='test', shuffle=True)
 
-    def _dataset_pt_path(self, stage: str) -> Optional[Path]:
+    def _dataset_pickle_path(self, stage: str) -> Optional[Path]:
         if self.cache_dir is None:
             return None
         return self.cache_dir / f"{stage}_dataset.pt"
 
     def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool) -> Dataset:
-        dataset_pt_path = self._dataset_pt_path(stage)
+        dataset_pickle_path = self._dataset_pickle_path(stage)
 
-        if dataset_pt_path and dataset_pt_path.is_file():
+        if dataset_pickle_path and dataset_pickle_path.is_file():
             # torch.load will reload on GPU by default, same device it was saved from
-            memory_location = torch.device('cpu') if self.save_precache == CacheLocation.CPU else None
-            with dataset_pt_path.open('rb') as f:
-                print(f"Loading dataset from {dataset_pt_path} into {memory_location}")
+            memory_location = torch.device('cpu') if self.precache_location == CacheLocation.CPU else None
+            with dataset_pickle_path.open('rb') as f:
+                print(f"Loading dataset from {dataset_pickle_path} into {memory_location}")
                 return torch.load(f, map_location=memory_location)
 
         generator = _create_generator(self.seed)
@@ -129,9 +125,9 @@ def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool)
         generator.set_state(generator_state)
 
         # Dataset is saved if cache_dir is True, regardless of CacheMode
-        if dataset_pt_path:
-            dataset_pt_path.parent.mkdir(parents=True, exist_ok=True)
-            with dataset_pt_path.open('wb') as f:
+        if dataset_pickle_path:
+            dataset_pickle_path.parent.mkdir(parents=True, exist_ok=True)
+            with dataset_pickle_path.open('wb') as f:
                 torch.save(transformed_bag_dataset, f)
 
         return transformed_bag_dataset
@@ -142,7 +138,7 @@ def _get_transformed_dataset(self, base_dataset: BagDataset,
             dataset = CacheDataset(base_dataset, transform, num_workers=1)  # type: ignore
         elif self.cache_mode is CacheMode.DISK:
             dataset = PersistentDataset(base_dataset, transform, cache_dir=self.cache_dir)  # type: ignore
-            if self.save_precache:
+            if self.precache_location != CacheLocation.NONE:
                 import tqdm  # TODO: Make optional
 
                 for i in tqdm.trange(len(dataset), desc="Loading dataset"):
diff --git a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
index 7de4fce04..15e80b2c5 100644
--- a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
+++ b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
@@ -97,7 +97,7 @@ def _get_datamodule(cache_mode: CacheMode, precache_location: CacheLocation,
         save_precache = None
     else:
         save_precache = precache_location
-    if (cache_mode is CacheMode.NONE and save_precache) \
+    if (cache_mode is CacheMode.NONE and precache_location is not CacheLocation.NONE) \
             or (cache_mode is CacheMode.DISK and not cache_dir_provided) \
             or (save_precache and not cache_dir_provided):
         pytest.skip("Unsupported combination of caching arguments")

From 198ab9b6a37e9294d4b6b26e882c52bfffb16709 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 20 Jan 2022 17:37:30 +0000
Subject: [PATCH 30/35] making realistic test cases in test_tile_id_coverage

---
 .../datamodules/test_datamodule_caching.py      | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
index 15e80b2c5..60f0e2c98 100644
--- a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
+++ b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
@@ -93,13 +93,9 @@ def mock_data_dir(tmp_path: Path) -> Path:
 
 def _get_datamodule(cache_mode: CacheMode, precache_location: CacheLocation,
                     cache_dir_provided: bool, data_dir: Path) -> TilesDataModule:
-    if precache_location == CacheLocation.NONE:
-        save_precache = None
-    else:
-        save_precache = precache_location
     if (cache_mode is CacheMode.NONE and precache_location is not CacheLocation.NONE) \
             or (cache_mode is CacheMode.DISK and not cache_dir_provided) \
-            or (save_precache and not cache_dir_provided):
+            or (precache_location is not CacheLocation.NONE and not cache_dir_provided):
         pytest.skip("Unsupported combination of caching arguments")
 
     cache_dir = data_dir / f"datamodule_cache_{cache_mode.value}" if cache_dir_provided else None
@@ -150,9 +146,14 @@ def test_caching_consistency(mock_data_dir: Path, cache_mode: CacheMode, precach
     compare_dataloaders(train_dataloader, reloaded_train_dataloader)
 
 
-@pytest.mark.parametrize('cache_mode', [CacheMode.MEMORY, CacheMode.DISK, CacheMode.NONE])
-@pytest.mark.parametrize('precache_location', [CacheLocation.NONE, CacheLocation.CPU, CacheLocation.GPU])
-@pytest.mark.parametrize('cache_dir_provided', [True, False])
+@pytest.mark.parametrize('cache_mode, precache_location, cache_dir_provided',
+                         [(CacheMode.MEMORY, CacheLocation.CPU, True),
+                          (CacheMode.MEMORY, CacheLocation.GPU, True),
+                          (CacheMode.MEMORY, CacheLocation.NONE, False),
+                          (CacheMode.DISK, CacheLocation.GPU, True),
+                          (CacheMode.DISK, CacheLocation.NONE, True),
+                          (CacheMode.NONE, CacheLocation.NONE, False)
+                          ])
 def test_tile_id_coverage(mock_data_dir: Path, cache_mode: CacheMode, precache_location: CacheLocation,
                           cache_dir_provided: bool) -> None:
     datamodule = _get_datamodule(cache_mode=cache_mode,

From afe1d149ca130ad1365b76397b02225033ad2055 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Thu, 20 Jan 2022 17:54:50 +0000
Subject: [PATCH 31/35] minor fixes

---
 InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py     | 1 -
 InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py | 1 -
 InnerEye/ML/Histopathology/models/transforms.py                | 1 -
 3 files changed, 3 deletions(-)

diff --git a/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py b/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py
index e332ab356..30097896b 100644
--- a/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py
+++ b/InnerEye/ML/Histopathology/datasets/panda_tiles_dataset.py
@@ -47,7 +47,6 @@ class PandaTilesDatasetReturnImageLabel(VisionDataset):
     Any dataset used in SSL needs to return a tuple where the first element is the image and the second is a
     class label.
     """
-
     def __init__(self,
                  root: Path,
                  dataset_csv: Optional[Union[str, Path]] = None,
diff --git a/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py b/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py
index f4f677e09..f5614cfa8 100644
--- a/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py
+++ b/InnerEye/ML/Histopathology/datasets/tcga_crck_tiles_dataset.py
@@ -33,7 +33,6 @@ class TcgaCrck_TilesDatasetReturnImageLabel(VisionDataset):
     Any dataset used in SSL needs to return a tuple where the first element is the image and the second is a
     class label.
     """
-
     def __init__(self,
                  root: Union[str, Path],
                  dataset_csv: Optional[Union[str, Path]] = None,
diff --git a/InnerEye/ML/Histopathology/models/transforms.py b/InnerEye/ML/Histopathology/models/transforms.py
index ed03207f5..1d61fdcb3 100644
--- a/InnerEye/ML/Histopathology/models/transforms.py
+++ b/InnerEye/ML/Histopathology/models/transforms.py
@@ -27,7 +27,6 @@ def load_pil_image(image_path: PathOrString) -> PIL.Image.Image:
 
 def load_image_as_tensor(image_path: PathOrString) -> torch.Tensor:
     """Load an image as a tensor from the given path"""
-    # pil_image = load_pil_image(image_path)
     pil_image = load_pil_image(image_path)
     return to_tensor(pil_image)
 

From c3e1eef4cb09a1d8f08653a071da8ee7bc1391ff Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Mon, 24 Jan 2022 10:02:59 +0000
Subject: [PATCH 32/35] fix test and extend location cases

---
 InnerEye/ML/Histopathology/datamodules/base_module.py  | 10 ++++++++--
 .../datamodules/test_datamodule_caching.py             |  8 ++++----
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/InnerEye/ML/Histopathology/datamodules/base_module.py b/InnerEye/ML/Histopathology/datamodules/base_module.py
index c40745656..e263eda43 100644
--- a/InnerEye/ML/Histopathology/datamodules/base_module.py
+++ b/InnerEye/ML/Histopathology/datamodules/base_module.py
@@ -105,8 +105,14 @@ def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool)
         dataset_pickle_path = self._dataset_pickle_path(stage)
 
         if dataset_pickle_path and dataset_pickle_path.is_file():
-            # torch.load will reload on GPU by default, same device it was saved from
-            memory_location = torch.device('cpu') if self.precache_location == CacheLocation.CPU else None
+            if self.precache_location == CacheLocation.CPU:
+                memory_location = torch.device('cpu')
+            elif self.precache_location == CacheLocation.GPU:
+                memory_location = torch.device('cuda')
+            else:
+                # by default torch.load will reload on the same device it was saved from
+                memory_location = None  # type: ignore
+
             with dataset_pickle_path.open('rb') as f:
                 print(f"Loading dataset from {dataset_pickle_path} into {memory_location}")
                 return torch.load(f, map_location=memory_location)
diff --git a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
index 60f0e2c98..d77ef6e74 100644
--- a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
+++ b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
@@ -98,7 +98,7 @@ def _get_datamodule(cache_mode: CacheMode, precache_location: CacheLocation,
             or (precache_location is not CacheLocation.NONE and not cache_dir_provided):
         pytest.skip("Unsupported combination of caching arguments")
 
-    cache_dir = data_dir / f"datamodule_cache_{cache_mode.value}" if cache_dir_provided else None
+    cache_dir = data_dir / f"datamodule_cache_{cache_mode.value}_{precache_location.value}" if cache_dir_provided else None
 
     if cache_dir is not None and cache_dir.exists():
         shutil.rmtree(cache_dir)
@@ -147,11 +147,11 @@ def test_caching_consistency(mock_data_dir: Path, cache_mode: CacheMode, precach
 
 
 @pytest.mark.parametrize('cache_mode, precache_location, cache_dir_provided',
-                         [(CacheMode.MEMORY, CacheLocation.CPU, True),
+                         [(CacheMode.DISK, CacheLocation.GPU, True),
+                          (CacheMode.DISK, CacheLocation.CPU, True),
+                          (CacheMode.MEMORY, CacheLocation.CPU, True),
                           (CacheMode.MEMORY, CacheLocation.GPU, True),
                           (CacheMode.MEMORY, CacheLocation.NONE, False),
-                          (CacheMode.DISK, CacheLocation.GPU, True),
-                          (CacheMode.DISK, CacheLocation.NONE, True),
                           (CacheMode.NONE, CacheLocation.NONE, False)
                           ])
 def test_tile_id_coverage(mock_data_dir: Path, cache_mode: CacheMode, precache_location: CacheLocation,

From 62354a65b85fe0fe7770ee16d9dfd5a6d15ab47d Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Mon, 24 Jan 2022 11:50:33 +0000
Subject: [PATCH 33/35] remove generic to cuda

---
 InnerEye/ML/Histopathology/datamodules/base_module.py       | 6 ++----
 .../histopathology/datamodules/test_datamodule_caching.py   | 4 ++--
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/InnerEye/ML/Histopathology/datamodules/base_module.py b/InnerEye/ML/Histopathology/datamodules/base_module.py
index e263eda43..59ae2a1eb 100644
--- a/InnerEye/ML/Histopathology/datamodules/base_module.py
+++ b/InnerEye/ML/Histopathology/datamodules/base_module.py
@@ -26,7 +26,7 @@ class CacheMode(Enum):
 class CacheLocation(Enum):
     NONE = 'none'
     CPU = 'cpu'
-    GPU = 'cuda'
+    SAME = 'cuda'
 class TilesDataModule(LightningDataModule):
     """Base class to load the tiles of a dataset as train, val, test sets"""
 
@@ -59,7 +59,7 @@ def __init__(self, root_path: Path, max_bag_size: int = 0, batch_size: int = 1,
         choose if the cache will be re-loaded into CPU or GPU memory:
           - `NONE (default)`: no pre-cache is performed;
           - `CPU`: each transformed sample is saved to disk and, if cache_mode is `MEMORY`, reloaded into CPU;
-          - `GPU`: each transformed sample is saved to disk and, if cache_mode is `MEMORY`, reloaded into GPU memory;
+          - `SAME`: each transformed sample is saved to disk and, if cache_mode is `MEMORY`, reloaded on the same device it was saved from;
         If cache_mode is `DISK` precache_location `CPU` and `GPU` are equivalent.
         :param cache_dir: The directory onto which to cache data if caching is enabled.
         :param number_of_cross_validation_splits: Number of folds to perform.
@@ -107,8 +107,6 @@ def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool)
         if dataset_pickle_path and dataset_pickle_path.is_file():
             if self.precache_location == CacheLocation.CPU:
                 memory_location = torch.device('cpu')
-            elif self.precache_location == CacheLocation.GPU:
-                memory_location = torch.device('cuda')
             else:
                 # by default torch.load will reload on the same device it was saved from
                 memory_location = None  # type: ignore
diff --git a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
index d77ef6e74..b70a0590a 100644
--- a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
+++ b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
@@ -147,10 +147,10 @@ def test_caching_consistency(mock_data_dir: Path, cache_mode: CacheMode, precach
 
 
 @pytest.mark.parametrize('cache_mode, precache_location, cache_dir_provided',
-                         [(CacheMode.DISK, CacheLocation.GPU, True),
+                         [(CacheMode.DISK, CacheLocation.SAME, True),
                           (CacheMode.DISK, CacheLocation.CPU, True),
                           (CacheMode.MEMORY, CacheLocation.CPU, True),
-                          (CacheMode.MEMORY, CacheLocation.GPU, True),
+                          (CacheMode.MEMORY, CacheLocation.SAME, True),
                           (CacheMode.MEMORY, CacheLocation.NONE, False),
                           (CacheMode.NONE, CacheLocation.NONE, False)
                           ])

From 14c97460f6778293bad995a781412f827cc79a4b Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Mon, 24 Jan 2022 12:11:23 +0000
Subject: [PATCH 34/35] fix naming error GPU

---
 .../ML/histopathology/datamodules/test_datamodule_caching.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
index b70a0590a..5e97f846d 100644
--- a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
+++ b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
@@ -113,7 +113,7 @@ def _get_datamodule(cache_mode: CacheMode, precache_location: CacheLocation,
 
 
 @pytest.mark.parametrize('cache_mode', [CacheMode.MEMORY, CacheMode.DISK, CacheMode.NONE])
-@pytest.mark.parametrize('precache_location', [CacheLocation.NONE, CacheLocation.CPU, CacheLocation.GPU])
+@pytest.mark.parametrize('precache_location', [CacheLocation.NONE, CacheLocation.CPU, CacheLocation.SAME])
 @pytest.mark.parametrize('cache_dir_provided', [True, False])
 def test_caching_consistency(mock_data_dir: Path, cache_mode: CacheMode, precache_location: CacheLocation,
                              cache_dir_provided: bool) -> None:
@@ -149,8 +149,8 @@ def test_caching_consistency(mock_data_dir: Path, cache_mode: CacheMode, precach
 @pytest.mark.parametrize('cache_mode, precache_location, cache_dir_provided',
                          [(CacheMode.DISK, CacheLocation.SAME, True),
                           (CacheMode.DISK, CacheLocation.CPU, True),
-                          (CacheMode.MEMORY, CacheLocation.CPU, True),
                           (CacheMode.MEMORY, CacheLocation.SAME, True),
+                          (CacheMode.MEMORY, CacheLocation.CPU, True),
                           (CacheMode.MEMORY, CacheLocation.NONE, False),
                           (CacheMode.NONE, CacheLocation.NONE, False)
                           ])

From 91dfabc94f5f8aaa95ef945ca37d404f341fe8b2 Mon Sep 17 00:00:00 2001
From: vsalvatelli <vsalvatelli@microsoft.com>
Date: Mon, 24 Jan 2022 19:15:25 +0000
Subject: [PATCH 35/35] trying adding some reproducibility to failing test

---
 InnerEye/ML/Histopathology/datamodules/base_module.py         | 4 ++--
 .../ML/histopathology/datamodules/test_datamodule_caching.py  | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/InnerEye/ML/Histopathology/datamodules/base_module.py b/InnerEye/ML/Histopathology/datamodules/base_module.py
index 59ae2a1eb..b20d97f8c 100644
--- a/InnerEye/ML/Histopathology/datamodules/base_module.py
+++ b/InnerEye/ML/Histopathology/datamodules/base_module.py
@@ -26,7 +26,7 @@ class CacheMode(Enum):
 class CacheLocation(Enum):
     NONE = 'none'
     CPU = 'cpu'
-    SAME = 'cuda'
+    SAME = 'same'
 class TilesDataModule(LightningDataModule):
     """Base class to load the tiles of a dataset as train, val, test sets"""
 
@@ -107,12 +107,12 @@ def _load_dataset(self, tiles_dataset: TilesDataset, stage: str, shuffle: bool)
         if dataset_pickle_path and dataset_pickle_path.is_file():
             if self.precache_location == CacheLocation.CPU:
                 memory_location = torch.device('cpu')
+                print(f"Loading dataset from {dataset_pickle_path} into {memory_location}")
             else:
                 # by default torch.load will reload on the same device it was saved from
                 memory_location = None  # type: ignore
 
             with dataset_pickle_path.open('rb') as f:
-                print(f"Loading dataset from {dataset_pickle_path} into {memory_location}")
                 return torch.load(f, map_location=memory_location)
 
         generator = _create_generator(self.seed)
diff --git a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
index 5e97f846d..7bd4ac2e5 100644
--- a/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
+++ b/Tests/ML/histopathology/datamodules/test_datamodule_caching.py
@@ -50,6 +50,7 @@ class MockTilesDataset(TilesDataset):
 
 
 def generate_mock_dataset_df(n_slides: int, n_tiles: int, n_classes: int) -> pd.DataFrame:
+    np.random.seed(1234)
     slide_ids = np.random.randint(n_slides, size=n_tiles)
     slide_labels = np.random.randint(n_classes, size=n_slides)
     tile_labels = slide_labels[slide_ids]