ROCm · coketaste · Aug 13, 2025 · Aug 8, 2025 · Aug 8, 2025 · Aug 8, 2025
diff --git a/src/madengine/mad_cli.py b/src/madengine/mad_cli.py
@@ -459,44 +459,85 @@ def _process_batch_manifest_entries(
     )
 
 
-def display_results_table(summary: Dict, title: str) -> None:
-    """Display results in a formatted table."""
+def display_results_table(summary: Dict, title: str, show_gpu_arch: bool = False) -> None:
+    """Display results in a formatted table with each model as a separate row."""
     table = Table(title=title, show_header=True, header_style="bold magenta")
+    table.add_column("Index", justify="right", style="dim")
     table.add_column("Status", style="bold")
-    table.add_column("Count", justify="right")
-    table.add_column("Items", style="dim")
+    table.add_column("Model", style="cyan")
+
+    # Add GPU Architecture column if multi-arch build was used
+    if show_gpu_arch:
+        table.add_column("GPU Architecture", style="yellow")
 
     successful = summary.get("successful_builds", summary.get("successful_runs", []))
     failed = summary.get("failed_builds", summary.get("failed_runs", []))
 
-    # Helper function to extract display names from items
-    def get_display_names(items, limit=5):
-        if not items:
-            return ""
-
-        display_items = []
-        for item in items[:limit]:
-            if isinstance(item, dict):
-                # For dictionary items (run results), use model name or name field
-                name = item.get("model", item.get("name", str(item)[:20]))
-                display_items.append(name)
+    # Helper function to extract model name from build result
+    def extract_model_name(item):
+        if isinstance(item, dict):
+            # For build results, prioritize docker_image extraction for model name
+            if "docker_image" in item:
+                # Extract model name from docker image name
+                # e.g., "ci-dummy_dummy.ubuntu.amd" -> "dummy"
+                # e.g., "ci-dummy_dummy.ubuntu.amd_gfx908" -> "dummy"
+                docker_image = item["docker_image"]
+                if docker_image.startswith("ci-"):
+                    # Remove ci- prefix and extract model name
+                    parts = docker_image[3:].split("_")
+                    if len(parts) >= 2:
+                        model_name = parts[0]  # First part is the model name
+                    else:
+                        model_name = parts[0] if parts else docker_image
+                else:
+                    model_name = docker_image
+                return model_name
+            # For run results, use model name or name field
+            elif "model" in item:
+                return item["model"]
+            elif "name" in item:
+                return item["name"]
+        return str(item)[:20]  # Fallback
+
+    # Helper function to extract GPU architecture
+    def extract_gpu_arch(item):
+        if isinstance(item, dict) and "gpu_architecture" in item:
+            return item["gpu_architecture"]
+        return "N/A"
+
+    # Add successful builds/runs
+    row_index = 1
+    for item in successful:
+        model_name = extract_model_name(item)
+        if show_gpu_arch:
+            gpu_arch = extract_gpu_arch(item)
+            table.add_row(str(row_index), "✅ Success", model_name, gpu_arch)
+        else:
+            table.add_row(str(row_index), "✅ Success", model_name)
+        row_index += 1
+
+    # Add failed builds/runs
+    for item in failed:
+        if isinstance(item, dict):
+            model_name = item.get("model", "Unknown")
+            if show_gpu_arch:
+                gpu_arch = item.get("architecture", "N/A")
+                table.add_row(str(row_index), "❌ Failed", model_name, gpu_arch)
             else:
-                # For string items (build results), use as-is
-                display_items.append(str(item))
-
-        result = ", ".join(display_items)
-        if len(items) > limit:
-            result += "..."
-        return result
-
-    if successful:
-        table.add_row("✅ Success", str(len(successful)), get_display_names(successful))
-
-    if failed:
-        table.add_row("❌ Failed", str(len(failed)), get_display_names(failed))
+                table.add_row(str(row_index), "❌ Failed", model_name)
+        else:
+            if show_gpu_arch:
+                table.add_row(str(row_index), "❌ Failed", str(item), "N/A")
+            else:
+                table.add_row(str(row_index), "❌ Failed", str(item))
+        row_index += 1
 
+    # Show empty state if no results
     if not successful and not failed:
-        table.add_row("ℹ️ No items", "0", "")
+        if show_gpu_arch:
+            table.add_row("1", "ℹ️ No items", "", "")
+        else:
+            table.add_row("1", "ℹ️ No items", "")
 
     console.print(table)
 
@@ -507,6 +548,14 @@ def build(
         List[str],
         typer.Option("--tags", "-t", help="Model tags to build (can specify multiple)"),
     ] = [],
+    target_archs: Annotated[
+        List[str],
+        typer.Option(
+            "--target-archs", 
+            "-a", 
+            help="Target GPU architectures to build for (e.g., gfx908,gfx90a,gfx942). If not specified, builds single image with MAD_SYSTEM_GPU_ARCHITECTURE from additional_context or detected GPU architecture."
+        ),
+    ] = [],
     registry: Annotated[
         Optional[str],
         typer.Option("--registry", "-r", help="Docker registry to push images to"),
@@ -658,6 +707,7 @@ def build(
         # Create arguments object
         args = create_args_namespace(
             tags=effective_tags,
+            target_archs=target_archs,
             registry=registry,
             additional_context=additional_context,
             additional_context_file=additional_context_file,
@@ -716,7 +766,9 @@ def build(
                 )
 
         # Display results
-        display_results_table(build_summary, "Build Results")
+        # Check if target_archs was used to show GPU architecture column
+        show_gpu_arch = bool(target_archs)
+        display_results_table(build_summary, "Build Results", show_gpu_arch)
 
         # Save summary
         save_summary_with_feedback(build_summary, summary_output, "Build")

diff --git a/src/madengine/tools/distributed_orchestrator.py b/src/madengine/tools/distributed_orchestrator.py
@@ -181,6 +181,17 @@ def build_phase(
             else ""
         )
 
+        # Get target architectures from args if provided
+        target_archs = getattr(self.args, "target_archs", [])
+
+        # Handle comma-separated architectures in a single string
+        if target_archs:
+            processed_archs = []
+            for arch_arg in target_archs:
+                # Split comma-separated values and add to list
+                processed_archs.extend([arch.strip() for arch in arch_arg.split(',') if arch.strip()])
+            target_archs = processed_archs
+
         # If batch_build_metadata is provided, use it to set per-model registry/registry_image
         build_summary = builder.build_all_models(
             models,
@@ -189,6 +200,7 @@ def build_phase(
             registry,
             phase_suffix,
             batch_build_metadata=batch_build_metadata,
+            target_archs=target_archs,
         )
 
         # Export build manifest with registry information
@@ -389,6 +401,52 @@ def run_phase(
 
         print(f"Loaded manifest with {len(manifest['built_images'])} images")
 
+        # Filter images by GPU architecture compatibility
+        try:
+            runtime_gpu_arch = self.context.get_system_gpu_architecture()
+            print(f"Runtime GPU architecture detected: {runtime_gpu_arch}")
+
+            # Filter manifest images by GPU architecture compatibility
+            compatible_images = self._filter_images_by_gpu_architecture(
+                manifest["built_images"], runtime_gpu_arch
+            )
+
+            if not compatible_images:
+                available_archs = list(set(
+                    img.get('gpu_architecture', 'unknown') 
+                    for img in manifest['built_images'].values()
+                ))
+                available_archs = [arch for arch in available_archs if arch != 'unknown']
+
+                if available_archs:
+                    error_msg = (
+                        f"No compatible Docker images found for runtime GPU architecture '{runtime_gpu_arch}'. "
+                        f"Available image architectures: {available_archs}. "
+                        f"Please build images for the target architecture using: "
+                        f"--target-archs {runtime_gpu_arch}"
+                    )
+                else:
+                    error_msg = (
+                        f"No compatible Docker images found for runtime GPU architecture '{runtime_gpu_arch}'. "
+                        f"The manifest contains legacy images without architecture information. "
+                        f"These will be treated as compatible for backward compatibility."
+                    )
+
+                raise RuntimeError(error_msg)
+
+            # Update manifest to only include compatible images
+            manifest["built_images"] = compatible_images
+            print(f"Filtered to {len(compatible_images)} compatible images for GPU architecture '{runtime_gpu_arch}'")
+
+        except Exception as e:
+            # If GPU architecture detection fails, proceed with all images for backward compatibility
+            self.rich_console.print(
+                f"[yellow]Warning: GPU architecture filtering failed: {e}[/yellow]"
+            )
+            self.rich_console.print(
+                "[yellow]Proceeding with all available images (backward compatibility mode)[/yellow]"
+            )
+
         # Registry is now per-image; CLI registry is fallback
         if registry:
             print(f"Using registry from CLI: {registry}")
@@ -789,6 +847,48 @@ def _copy_scripts(self) -> None:
         self.console.sh(f"cp -vLR --preserve=all {scripts_path} .")
         print(f"Scripts copied to {os.getcwd()}/scripts")
 
+    def _filter_images_by_gpu_architecture(self, built_images: typing.Dict, runtime_arch: str) -> typing.Dict:
+        """Filter built images by GPU architecture compatibility.
+
+        Args:
+            built_images: Dictionary of built images from manifest
+            runtime_arch: Runtime GPU architecture (e.g., 'gfx908')
+
+        Returns:
+            dict: Filtered dictionary containing only compatible images
+        """
+        compatible = {}
+
+        self.rich_console.print(f"[cyan]Filtering images for runtime GPU architecture: {runtime_arch}[/cyan]")
+
+        for image_name, image_info in built_images.items():
+            image_arch = image_info.get("gpu_architecture")
+
+            if not image_arch:
+                # Legacy images without architecture info - assume compatible for backward compatibility
+                self.rich_console.print(
+                    f"[yellow]  Warning: Image {image_name} has no architecture info, assuming compatible (legacy mode)[/yellow]"
+                )
+                compatible[image_name] = image_info
+            elif image_arch == runtime_arch:
+                # Exact architecture match
+                self.rich_console.print(
+                    f"[green]  ✓ Compatible: {image_name} (architecture: {image_arch})[/green]"
+                )
+                compatible[image_name] = image_info
+            else:
+                # Architecture mismatch
+                self.rich_console.print(
+                    f"[red]  ✗ Incompatible: {image_name} (architecture: {image_arch}, runtime: {runtime_arch})[/red]"
+                )
+
+        if not compatible:
+            self.rich_console.print(f"[red]No compatible images found for runtime architecture: {runtime_arch}[/red]")
+        else:
+            self.rich_console.print(f"[green]Found {len(compatible)} compatible image(s)[/green]")
+
+        return compatible
+
     def cleanup(self) -> None:
         """Cleanup the scripts/common directory."""
         # check the directory exists