Skip to content
77 changes: 76 additions & 1 deletion src/madengine/core/console.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
# built-in modules
import subprocess
import typing
import re
# third-party modules
import typing_extensions

Expand All @@ -33,6 +34,73 @@ def __init__(
self.shellVerbose = shellVerbose
self.live_output = live_output

def _highlight_docker_operations(self, command: str) -> str:
"""Highlight docker push/pull/build/run operations for better visibility.

Args:
command (str): The command to potentially highlight.

Returns:
str: The highlighted command if it's a docker operation.
"""
# Check if this is a docker operation
docker_push_pattern = r'^docker\s+push\s+'
docker_pull_pattern = r'^docker\s+pull\s+'
docker_build_pattern = r'^docker\s+build\s+'
docker_run_pattern = r'^docker\s+run\s+'

if re.match(docker_push_pattern, command, re.IGNORECASE):
return f"\n{'='*80}\n🚀 DOCKER PUSH OPERATION: {command}\n{'='*80}"
elif re.match(docker_pull_pattern, command, re.IGNORECASE):
return f"\n{'='*80}\n📥 DOCKER PULL OPERATION: {command}\n{'='*80}"
elif re.match(docker_build_pattern, command, re.IGNORECASE):
return f"\n{'='*80}\n🔨 DOCKER BUILD OPERATION: {command}\n{'='*80}"
elif re.match(docker_run_pattern, command, re.IGNORECASE):
return f"\n{'='*80}\n🏃 DOCKER RUN OPERATION: {command}\n{'='*80}"

return command

def _show_docker_completion(self, command: str, success: bool = True) -> None:
"""Show completion message for docker operations.

Args:
command (str): The command that was executed.
success (bool): Whether the operation was successful.
"""
docker_push_pattern = r'^docker\s+push\s+'
docker_pull_pattern = r'^docker\s+pull\s+'
docker_build_pattern = r'^docker\s+build\s+'
docker_run_pattern = r'^docker\s+run\s+'

if re.match(docker_push_pattern, command, re.IGNORECASE):
if success:
print(f"✅ DOCKER PUSH COMPLETED SUCCESSFULLY")
print(f"{'='*80}\n")
else:
print(f"❌ DOCKER PUSH FAILED")
print(f"{'='*80}\n")
elif re.match(docker_pull_pattern, command, re.IGNORECASE):
if success:
print(f"✅ DOCKER PULL COMPLETED SUCCESSFULLY")
print(f"{'='*80}\n")
else:
print(f"❌ DOCKER PULL FAILED")
print(f"{'='*80}\n")
elif re.match(docker_build_pattern, command, re.IGNORECASE):
if success:
print(f"✅ DOCKER BUILD COMPLETED SUCCESSFULLY")
print(f"{'='*80}\n")
else:
print(f"❌ DOCKER BUILD FAILED")
print(f"{'='*80}\n")
elif re.match(docker_run_pattern, command, re.IGNORECASE):
if success:
print(f"✅ DOCKER RUN COMPLETED SUCCESSFULLY")
print(f"{'='*80}\n")
else:
print(f"❌ DOCKER RUN FAILED")
print(f"{'='*80}\n")

def sh(
self,
command: str,
Expand Down Expand Up @@ -60,7 +128,8 @@ def sh(
"""
# Print the command if shellVerbose is True
if self.shellVerbose and not secret:
print("> " + command, flush=True)
highlighted_command = self._highlight_docker_operations(command)
print("> " + highlighted_command, flush=True)

# Run the shell command
proc = subprocess.Popen(
Expand Down Expand Up @@ -91,6 +160,12 @@ def sh(
raise RuntimeError("Console script timeout") from exc

# Check for failure
success = proc.returncode == 0

# Show docker operation completion status
if not secret:
self._show_docker_completion(command, success)

if proc.returncode != 0:
if not canFail:
if not secret:
Expand Down
219 changes: 211 additions & 8 deletions src/madengine/mad_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,56 @@ def __init__(self, **kwargs):
return Args(**kwargs)


def process_batch_manifest(batch_manifest_file: str) -> Dict[str, List[str]]:
"""Process batch manifest file and extract model tags based on build_new flag.

Args:
batch_manifest_file: Path to the input batch.json file

Returns:
Dict containing 'build_tags' and 'all_tags' lists

Raises:
FileNotFoundError: If the manifest file doesn't exist
ValueError: If the manifest format is invalid
"""
if not os.path.exists(batch_manifest_file):
raise FileNotFoundError(f"Batch manifest file not found: {batch_manifest_file}")

try:
with open(batch_manifest_file, 'r') as f:
manifest_data = json.load(f)
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in batch manifest file: {e}")

if not isinstance(manifest_data, list):
raise ValueError("Batch manifest must be a list of model objects")

build_tags = [] # Models that need to be built (build_new=true)
all_tags = [] # All models in the manifest

for i, model in enumerate(manifest_data):
if not isinstance(model, dict):
raise ValueError(f"Model entry {i} must be a dictionary")

if "model_name" not in model:
raise ValueError(f"Model entry {i} missing required 'model_name' field")

model_name = model["model_name"]
build_new = model.get("build_new", False)

all_tags.append(model_name)
if build_new:
build_tags.append(model_name)

return {
"build_tags": build_tags,
"all_tags": all_tags,
"manifest_data": manifest_data
}



def validate_additional_context(
additional_context: str,
additional_context_file: Optional[str] = None,
Expand Down Expand Up @@ -219,6 +269,127 @@ def save_summary_with_feedback(summary: Dict, output_path: Optional[str], summar
raise typer.Exit(ExitCode.FAILURE)


def _process_batch_manifest_entries(batch_data: Dict, manifest_output: str, registry: Optional[str]) -> None:
"""Process batch manifest and add entries for all models to build_manifest.json.

Args:
batch_data: Processed batch manifest data
manifest_output: Path to the build manifest file
registry: Registry used for the build
"""
from madengine.tools.discover_models import DiscoverModels

# Load the existing build manifest
if os.path.exists(manifest_output):
with open(manifest_output, 'r') as f:
build_manifest = json.load(f)
else:
# Create a minimal manifest structure
build_manifest = {
"built_images": {},
"built_models": {},
"context": {},
"credentials_required": [],
"registry": registry or ""
}

# Process each model in the batch manifest
for model_entry in batch_data["manifest_data"]:
model_name = model_entry["model_name"]
build_new = model_entry.get("build_new", False)
model_registry_image = model_entry.get("registry_image", "")
model_registry = model_entry.get("registry", "")

# If the model was not built (build_new=false), create an entry for it
if not build_new:
# Find the model configuration by discovering models with this tag
try:
# Create a temporary args object to discover the model
temp_args = create_args_namespace(
tags=[model_name],
registry=registry,
additional_context="{}",
additional_context_file=None,
clean_docker_cache=False,
manifest_output=manifest_output,
live_output=False,
output="perf.csv",
ignore_deprecated_flag=False,
data_config_file_name="data.json",
tools_json_file_name="scripts/common/tools.json",
generate_sys_env_details=True,
force_mirror_local=None,
disable_skip_gpu_arch=False,
verbose=False,
_separate_phases=True,
)

discover_models = DiscoverModels(args=temp_args)
models = discover_models.run()

for model_info in models:
if model_info["name"] == model_name:
# Create a synthetic image name for this model
synthetic_image_name = f"ci-{model_name}_{model_name}.ubuntu.amd"

# Add to built_images (even though it wasn't actually built)
build_manifest["built_images"][synthetic_image_name] = {
"docker_image": synthetic_image_name,
"dockerfile": model_info.get("dockerfile", f"docker/{model_name}"),
"base_docker": "rocm/pytorch", # Default base
"docker_sha": "", # No SHA since not built
"build_duration": 0,
"build_command": f"# Skipped build for {model_name} (build_new=false)",
"log_file": f"{model_name}_{model_name}.ubuntu.amd.build.skipped.log",
"registry_image": model_registry_image or f"{model_registry or registry or 'dockerhub'}/{synthetic_image_name}" if model_registry_image or model_registry or registry else ""
}

# Add to built_models
build_manifest["built_models"][synthetic_image_name] = {
"name": model_info["name"],
"dockerfile": model_info.get("dockerfile", f"docker/{model_name}"),
"scripts": model_info.get("scripts", f"scripts/{model_name}/run.sh"),
"n_gpus": model_info.get("n_gpus", "1"),
"owner": model_info.get("owner", ""),
"training_precision": model_info.get("training_precision", ""),
"tags": model_info.get("tags", []),
"args": model_info.get("args", ""),
"cred": model_info.get("cred", "")
}
break

except Exception as e:
console.print(f"Warning: Could not process model {model_name}: {e}")
# Create a minimal entry anyway
synthetic_image_name = f"ci-{model_name}_{model_name}.ubuntu.amd"
build_manifest["built_images"][synthetic_image_name] = {
"docker_image": synthetic_image_name,
"dockerfile": f"docker/{model_name}",
"base_docker": "rocm/pytorch",
"docker_sha": "",
"build_duration": 0,
"build_command": f"# Skipped build for {model_name} (build_new=false)",
"log_file": f"{model_name}_{model_name}.ubuntu.amd.build.skipped.log",
"registry_image": model_registry_image or ""
}
build_manifest["built_models"][synthetic_image_name] = {
"name": model_name,
"dockerfile": f"docker/{model_name}",
"scripts": f"scripts/{model_name}/run.sh",
"n_gpus": "1",
"owner": "",
"training_precision": "",
"tags": [],
"args": ""
}

# Save the updated manifest
with open(manifest_output, 'w') as f:
json.dump(build_manifest, f, indent=2)

console.print(f"✅ Added entries for all models from batch manifest to {manifest_output}")


def display_results_table(summary: Dict, title: str) -> None:
"""Display results in a formatted table."""
table = Table(title=title, show_header=True, header_style="bold magenta")
Expand Down Expand Up @@ -265,6 +436,7 @@ def get_display_names(items, limit=5):
def build(
tags: Annotated[List[str], typer.Option("--tags", "-t", help="Model tags to build (can specify multiple)")] = [],
registry: Annotated[Optional[str], typer.Option("--registry", "-r", help="Docker registry to push images to")] = None,
batch_manifest: Annotated[Optional[str], typer.Option("--batch-manifest", help="Input batch.json file for batch build mode")] = None,
additional_context: Annotated[str, typer.Option("--additional-context", "-c", help="Additional context as JSON string")] = "{}",
additional_context_file: Annotated[Optional[str], typer.Option("--additional-context-file", "-f", help="File containing additional context JSON")] = None,
clean_docker_cache: Annotated[bool, typer.Option("--clean-docker-cache", help="Rebuild images without using cache")] = False,
Expand All @@ -289,21 +461,46 @@ def build(
"""
setup_logging(verbose)

console.print(Panel(
f"🔨 [bold cyan]Building Models[/bold cyan]\n"
f"Tags: [yellow]{', '.join(tags) if tags else 'All models'}[/yellow]\n"
f"Registry: [yellow]{registry or 'Local only'}[/yellow]",
title="Build Configuration",
border_style="blue"
))
# Validate mutually exclusive options
if batch_manifest and tags:
console.print("❌ [bold red]Error: Cannot specify both --batch-manifest and --tags options[/bold red]")
raise typer.Exit(ExitCode.INVALID_ARGS)

# Process batch manifest if provided
batch_data = None
effective_tags = tags
if batch_manifest:
try:
batch_data = process_batch_manifest(batch_manifest)
effective_tags = batch_data["build_tags"]
console.print(Panel(
f"� [bold cyan]Batch Build Mode[/bold cyan]\n"
f"Input manifest: [yellow]{batch_manifest}[/yellow]\n"
f"Total models: [yellow]{len(batch_data['all_tags'])}[/yellow]\n"
f"Models to build: [yellow]{len(batch_data['build_tags'])}[/yellow] ({', '.join(batch_data['build_tags']) if batch_data['build_tags'] else 'none'})\n"
f"Registry: [yellow]{registry or 'Local only'}[/yellow]",
title="Batch Build Configuration",
border_style="blue"
))
except (FileNotFoundError, ValueError) as e:
console.print(f"❌ [bold red]Error processing batch manifest: {e}[/bold red]")
raise typer.Exit(ExitCode.INVALID_ARGS)
else:
console.print(Panel(
f"�🔨 [bold cyan]Building Models[/bold cyan]\n"
f"Tags: [yellow]{', '.join(tags) if tags else 'All models'}[/yellow]\n"
f"Registry: [yellow]{registry or 'Local only'}[/yellow]",
title="Build Configuration",
border_style="blue"
))

try:
# Validate additional context
validate_additional_context(additional_context, additional_context_file)

# Create arguments object
args = create_args_namespace(
tags=tags,
tags=effective_tags,
registry=registry,
additional_context=additional_context,
additional_context_file=additional_context_file,
Expand Down Expand Up @@ -338,6 +535,12 @@ def build(
)
progress.update(task, description="Build completed!")

# Handle batch manifest post-processing
if batch_data:
with console.status("Processing batch manifest..."):
_process_batch_manifest_entries(batch_data, manifest_output, registry)


# Display results
display_results_table(build_summary, "Build Results")

Expand Down
18 changes: 15 additions & 3 deletions src/madengine/scripts/common/pre_scripts/rocEnvTool/csv_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,11 +284,23 @@ def dump_csv_output(self):
fs.write(sys_config_info[j])
fs.write("\n")
fs.close()
print ("OK: Dumped into {} file.".format(self.filename))
print("\n" + "="*60)
print(f"✅ SUCCESS: System config data dumped to {self.filename}")
print("="*60 + "\n")

def print_csv_output(self):
print ("Printing the sys config info env variables...")
print("\n" + "="*80)
print("📋 SYSTEM CONFIG INFO - ENVIRONMENT VARIABLES")
print("="*80)
if self.sys_config_info_list:
for j in range(len(self.sys_config_info_list)):
line = self.sys_config_info_list[j]
print (line)
# Add some formatting for key-value pairs
if "|" in line and not line.startswith("Tag"):
key, value = line.split("|", 1)
print(f"🔹 {key:<30}: {value}")
else:
print(f"📌 {line}")
else:
print("❌ No system config information available")
print("="*80 + "\n")
Loading