ROCm · coketaste · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025 · Jul 31, 2025
diff --git a/src/madengine/mad_cli.py b/src/madengine/mad_cli.py
@@ -736,9 +736,14 @@ def build(
     except typer.Exit:
         raise
     except Exception as e:
-        from madengine.core.errors import handle_error
+        from madengine.core.errors import handle_error, create_error_context
 
-        handle_error(e, context={"operation": "build", "phase": "build"})
+        context = create_error_context(
+            operation="build", 
+            phase="build",
+            component="build_command"
+        )
+        handle_error(e, context=context)
         raise typer.Exit(ExitCode.FAILURE)
 
 

diff --git a/src/madengine/tools/container_runner.py b/src/madengine/tools/container_runner.py
@@ -13,6 +13,7 @@
 import typing
 import warnings
 import re
+from rich.console import Console as RichConsole
 from contextlib import redirect_stdout, redirect_stderr
 from madengine.core.console import Console
 from madengine.core.context import Context
@@ -45,6 +46,7 @@ def __init__(
         self.data = data
         self.console = console or Console(live_output=live_output)
         self.live_output = live_output
+        self.rich_console = RichConsole()
         self.credentials = None
         self.perf_csv_path = "perf.csv"  # Default output path
 
@@ -150,7 +152,7 @@ def login_to_registry(self, registry: str, credentials: typing.Dict = None) -> N
             credentials: Optional credentials dictionary containing username/password
         """
         if not credentials:
-            print("No credentials provided for registry login")
+            self.rich_console.print("[yellow]No credentials provided for registry login[/yellow]")
             return
 
         # Check if registry credentials are available
@@ -207,9 +209,9 @@ def login_to_registry(self, registry: str, credentials: typing.Dict = None) -> N
 
         try:
             self.console.sh(login_command, secret=True)
-            print(f"Successfully logged in to registry: {registry or 'DockerHub'}")
+            self.rich_console.print(f"[green]✅ Successfully logged in to registry: {registry or 'DockerHub'}[/green]")
         except Exception as e:
-            print(f"Failed to login to registry {registry}: {e}")
+            self.rich_console.print(f"[red]❌ Failed to login to registry {registry}: {e}[/red]")
             # Don't raise exception here, as public images might still be pullable
 
     def pull_image(
@@ -234,7 +236,7 @@ def pull_image(
         if registry and credentials:
             self.login_to_registry(registry, credentials)
 
-        print(f"\n📥 Starting docker pull from registry...")
+        self.rich_console.print(f"\n[bold blue]📥 Starting docker pull from registry...[/bold blue]")
         print(f"📍 Registry: {registry or 'Default'}")
         print(f"🏷️  Image: {registry_image}")
         try:
@@ -243,16 +245,16 @@ def pull_image(
             if local_name:
                 self.console.sh(f"docker tag {registry_image} {local_name}")
                 print(f"🏷️  Tagged as: {local_name}")
-                print(f"✅ Successfully pulled and tagged image")
-                print(f"{'='*80}")
+                self.rich_console.print(f"[bold green]✅ Successfully pulled and tagged image[/bold green]")
+                self.rich_console.print(f"[dim]{'='*80}[/dim]")
                 return local_name
 
-            print(f"✅ Successfully pulled image: {registry_image}")
-            print(f"{'='*80}")
+            self.rich_console.print(f"[bold green]✅ Successfully pulled image:[/bold green] [cyan]{registry_image}[/cyan]")
+            self.rich_console.print(f"[dim]{'='*80}[/dim]")
             return registry_image
 
         except Exception as e:
-            print(f"Failed to pull image {registry_image}: {e}")
+            self.rich_console.print(f"[red]❌ Failed to pull image {registry_image}: {e}[/red]")
             raise
 
     def get_gpu_arg(self, requested_gpus: str) -> str:
@@ -503,7 +505,7 @@ def run_container(
         Returns:
             dict: Execution results including performance metrics
         """
-        print(f"Running model {model_info['name']} in container {docker_image}")
+        self.rich_console.print(f"[bold green]🏃 Running model:[/bold green] [bold cyan]{model_info['name']}[/bold cyan] [dim]in container[/dim] [yellow]{docker_image}[/yellow]")
 
         # Create log file for this run
         # Extract dockerfile part from docker image name (remove "ci-" prefix and model name prefix)
@@ -639,12 +641,12 @@ def run_container(
         # set timeout
         print(f"⏰ Setting timeout to {str(timeout)} seconds.")
 
-        print(f"\n🏃 Starting Docker container execution...")
+        self.rich_console.print(f"\n[bold blue]🏃 Starting Docker container execution...[/bold blue]")
         print(f"🏷️  Image: {docker_image}")
         print(f"📦 Container: {container_name}")
         print(f"📝 Log file: {log_file_path}")
         print(f"🎮 GPU Vendor: {gpu_vendor}")
-        print(f"{'='*80}")
+        self.rich_console.print(f"[dim]{'='*80}[/dim]")
 
         # Run the container with logging
         try:
@@ -668,12 +670,10 @@ def run_container(
                         # Show GPU info
                         if gpu_vendor.find("AMD") != -1:
                             print(f"🎮 Checking AMD GPU status...")
-                            smi = model_docker.sh("/opt/rocm/bin/rocm-smi || true")
-                            print(smi)
+                            model_docker.sh("/opt/rocm/bin/rocm-smi || true")
                         elif gpu_vendor.find("NVIDIA") != -1:
                             print(f"🎮 Checking NVIDIA GPU status...")
-                            smi = model_docker.sh("/usr/bin/nvidia-smi || true")
-                            print(smi)
+                            model_docker.sh("/usr/bin/nvidia-smi || true")
 
                         # Prepare model directory
                         model_dir = "run_directory"
@@ -785,7 +785,7 @@ def run_container(
 
                         # Run the model
                         test_start_time = time.time()
-                        print("Running model...")
+                        self.rich_console.print("[bold blue]Running model...[/bold blue]")
 
                         model_args = self.context.ctx.get(
                             "model_args", model_info["args"]
@@ -828,8 +828,8 @@ def run_container(
                                                     )
                                                     break
                                 except Exception as e:
-                                    print(
-                                        f"Warning: Could not validate multiple results file: {e}"
+                                    self.rich_console.print(
+                                        f"[yellow]Warning: Could not validate multiple results file: {e}[/yellow]"
                                     )
                                     run_results["performance"] = None
                             else:
@@ -909,20 +909,20 @@ def run_container(
 
                             if has_errors:
                                 run_results["status"] = "FAILURE"
-                                print(
-                                    f"Status: FAILURE (error patterns detected in logs)"
+                                self.rich_console.print(
+                                    f"[red]Status: FAILURE (error patterns detected in logs)[/red]"
                                 )
                             elif has_performance:
                                 run_results["status"] = "SUCCESS"
-                                print(
-                                    f"Status: SUCCESS (performance metrics found, no errors)"
+                                self.rich_console.print(
+                                    f"[green]Status: SUCCESS (performance metrics found, no errors)[/green]"
                                 )
                             else:
                                 run_results["status"] = "FAILURE"
-                                print(f"Status: FAILURE (no performance metrics)")
+                                self.rich_console.print(f"[red]Status: FAILURE (no performance metrics)[/red]")
 
                         except Exception as e:
-                            print(f"Warning: Error in status determination: {e}")
+                            self.rich_console.print(f"[yellow]Warning: Error in status determination: {e}[/yellow]")
                             # Fallback to simple performance check
                             run_results["status"] = (
                                 "SUCCESS"
@@ -988,7 +988,7 @@ def run_container(
                                 )
 
                         except Exception as e:
-                            print(f"Warning: Could not update perf.csv: {e}")
+                            self.rich_console.print(f"[yellow]Warning: Could not update perf.csv: {e}[/yellow]")
 
                         # Cleanup if not keeping alive
                         if not keep_alive:
@@ -1003,12 +1003,12 @@ def run_container(
                         del model_docker
 
         except Exception as e:
-            print("===== EXCEPTION =====")
-            print("Exception: ", e)
+            self.rich_console.print("[bold red]===== EXCEPTION =====[/bold red]")
+            self.rich_console.print(f"[red]Exception: {e}[/red]")
             import traceback
 
             traceback.print_exc()
-            print("=============== =====")
+            self.rich_console.print("[bold red]=============== =====[/bold red]")
             run_results["status"] = "FAILURE"
 
             # Also update perf.csv for failures
@@ -1033,7 +1033,7 @@ def run_container(
                 )
 
             except Exception as csv_e:
-                print(f"Warning: Could not update perf.csv with exception: {csv_e}")
+                self.rich_console.print(f"[yellow]Warning: Could not update perf.csv with exception: {csv_e}[/yellow]")
 
         return run_results
 

diff --git a/src/madengine/tools/discover_models.py b/src/madengine/tools/discover_models.py
@@ -10,6 +10,7 @@
 import importlib.util
 import typing
 from dataclasses import dataclass, field, asdict
+from rich.console import Console as RichConsole
 
 
 @dataclass
@@ -53,6 +54,7 @@ def __init__(self, args: argparse.Namespace):
             args (argparse.Namespace): Arguments passed to the script.
         """
         self.args = args
+        self.rich_console = RichConsole()
         # list of models from models.json and scripts/model_dir/models.json
         self.models: typing.List[dict] = []
         # list of custom models from scripts/model_dir/get_models_json.py
@@ -77,13 +79,13 @@ def _setup_model_dir_if_needed(self) -> None:
             import subprocess
 
             cwd_path = os.getcwd()
-            print(f"MODEL_DIR environment variable detected: {model_dir_env}")
+            self.rich_console.print(f"[bold cyan]📁 MODEL_DIR environment variable detected:[/bold cyan] [yellow]{model_dir_env}[/yellow]")
             print(f"Copying contents to current working directory: {cwd_path}")
 
             try:
                 # Check if source directory exists
                 if not os.path.exists(model_dir_env):
-                    print(f"Warning: MODEL_DIR path does not exist: {model_dir_env}")
+                    self.rich_console.print(f"[yellow]⚠️  Warning: MODEL_DIR path does not exist: {model_dir_env}[/yellow]")
                     return
 
                 # Use cp command similar to the original implementation
@@ -92,20 +94,20 @@ def _setup_model_dir_if_needed(self) -> None:
                 result = subprocess.run(
                     cmd, shell=True, capture_output=True, text=True, check=True
                 )
-                print(f"Successfully copied MODEL_DIR contents")
+                self.rich_console.print(f"[green]✅ Successfully copied MODEL_DIR contents[/green]")
                 # Only show verbose output if there are not too many files
                 if result.stdout and len(result.stdout.splitlines()) < 20:
                     print(result.stdout)
                 elif result.stdout:
                     print(f"Copied {len(result.stdout.splitlines())} files/directories")
                 print(f"Model dir: {model_dir_env} → current dir: {cwd_path}")
             except subprocess.CalledProcessError as e:
-                print(f"Warning: Failed to copy MODEL_DIR contents: {e}")
+                self.rich_console.print(f"[yellow]⚠️  Warning: Failed to copy MODEL_DIR contents: {e}[/yellow]")
                 if e.stderr:
                     print(f"Error details: {e.stderr}")
                 # Continue execution even if copy fails
             except Exception as e:
-                print(f"Warning: Unexpected error copying MODEL_DIR: {e}")
+                self.rich_console.print(f"[yellow]⚠️  Warning: Unexpected error copying MODEL_DIR: {e}[/yellow]")
                 # Continue execution even if copy fails
 
     def discover_models(self) -> None:
@@ -125,6 +127,7 @@ def discover_models(self) -> None:
                 self.models = model_dict_list
                 self.model_list = [model_dict["name"] for model_dict in model_dict_list]
         else:
+            self.rich_console.print("[red]❌ models.json file not found.[/red]")
             raise FileNotFoundError("models.json file not found.")
 
         # walk through the subdirs in model_dir/scripts directory to find the models.json file
@@ -134,6 +137,7 @@ def discover_models(self) -> None:
                 files = os.listdir(root)
 
                 if "models.json" in files and "get_models_json.py" in files:
+                    self.rich_console.print(f"[red]❌ Both models.json and get_models_json.py found in {root}.[/red]")
                     raise ValueError(
                         f"Both models.json and get_models_json.py found in {root}."
                     )
@@ -179,8 +183,8 @@ def discover_models(self) -> None:
                             self.custom_models.append(custom_model)
                             self.model_list.append(custom_model.name)
                     except AssertionError:
-                        print(
-                            "See madengine/tests/fixtures/dummy/scripts/dummy3/get_models_json.py for an example."
+                        self.rich_console.print(
+                            "[yellow]💡 See madengine/tests/fixtures/dummy/scripts/dummy3/get_models_json.py for an example.[/yellow]"
                         )
                         raise
 
@@ -240,6 +244,7 @@ def select_models(self) -> None:
                         tag_models.append(model_dict)
 
                 if not tag_models:
+                    self.rich_console.print(f"[red]❌ No models found corresponding to the given tag: {tag}[/red]")
                     raise ValueError(
                         f"No models found corresponding to the given tag: {tag}"
                     )
@@ -249,12 +254,13 @@ def select_models(self) -> None:
     def print_models(self) -> None:
         if self.selected_models:
             # print selected models using parsed tags and adding backslash-separated extra args
+            self.rich_console.print(f"[bold green]📋 Selected Models ({len(self.selected_models)} models):[/bold green]")
             print(json.dumps(self.selected_models, indent=4))
         else:
             # print list of all model names
-            print(f"Number of models in total: {len(self.model_list)}")
+            self.rich_console.print(f"[bold cyan]📊 Available Models ({len(self.model_list)} total):[/bold cyan]")
             for model_name in self.model_list:
-                print(f"{model_name}")
+                print(f"  {model_name}")
 
     def run(self, live_output: bool = True):