diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 44a0f1044..e2900490d 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -1,389 +1,3 @@ -dsr1-fp4-b300-dynamo-trt: - image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.0 - model: deepseek-r1-fp4 - model-prefix: dsr1 - runner: b300 - precision: fp4 - framework: dynamo-trt - multinode: true - disagg: true - seq-len-configs: - - isl: 1024 - osl: 1024 - search-space: - - spec-decoding: "mtp" - conc-list: [654] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/mtp/ctx1_gen1_dep8_batch64_eplb0_mtp3.yaml" - decode: - num-worker: 1 - tp: 8 - ep: 8 - dp-attn: true - - spec-decoding: "mtp" - conc-list: [271] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/mtp/ctx1_gen2_dep8_batch16_eplb0_mtp3.yaml" - decode: - num-worker: 2 - tp: 8 - ep: 8 - dp-attn: true - - spec-decoding: "mtp" - conc-list: [11] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch1_eplb0_mtp3.yaml" - decode: - num-worker: 5 - tp: 8 - ep: 8 - dp-attn: false - - spec-decoding: "mtp" - conc-list: [10, 20, 25, 60, 120, 200] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/mtp/ctx1_gen5_tep8_batch32_eplb0_mtp3.yaml" - decode: - num-worker: 5 - tp: 8 - ep: 8 - dp-attn: false - - spec-decoding: "mtp" - conc-list: [2342] - prefill: - num-worker: 2 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/mtp/ctx2_gen1_dep8_batch256_eplb0_mtp1.yaml" - decode: - num-worker: 1 - tp: 8 - ep: 8 - dp-attn: true - - spec-decoding: "mtp" - conc-list: [8609] - prefill: - num-worker: 5 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch512_eplb0_mtp1.yaml" - decode: - num-worker: 2 - tp: 8 - ep: 8 - dp-attn: true - - spec-decoding: "mtp" - conc-list: [12926] - prefill: - num-worker: 5 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/mtp/ctx5_gen2_dep8_batch768_eplb0_mtp1.yaml" - decode: - num-worker: 2 - tp: 8 - ep: 8 - dp-attn: true - - # Non-MTP configurations - - conc-list: [1176] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/stp/ctx1_gen2_dep8_batch64_eplb0_mtp0.yaml" - decode: - num-worker: 2 - tp: 8 - ep: 8 - dp-attn: true - - conc-list: [6] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml" - decode: - num-worker: 4 - tp: 8 - ep: 8 - dp-attn: false - - conc-list: [5, 10, 15, 25] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml" - decode: - num-worker: 5 - tp: 4 - ep: 4 - dp-attn: false - - conc-list: [60, 110, 195, 395] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/stp/ctx1_gen5_tep8_batch64_eplb0_mtp0.yaml" - decode: - num-worker: 5 - tp: 8 - ep: 8 - dp-attn: false - - conc-list: [4405] - prefill: - num-worker: 2 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/stp/ctx2_gen1_dep8_batch512_eplb0_mtp0.yaml" - decode: - num-worker: 1 - tp: 8 - ep: 8 - dp-attn: true - - conc-list: [8192] - prefill: - num-worker: 3 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/stp/ctx3_gen1_dep8_batch1024_eplb0_mtp0.yaml" - decode: - num-worker: 1 - tp: 8 - ep: 8 - dp-attn: true - - conc-list: [4611] - prefill: - num-worker: 3 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/1k1k/stp/ctx3_gen2_dep8_batch256_eplb0_mtp0.yaml" - decode: - num-worker: 2 - tp: 8 - ep: 8 - dp-attn: true - - - isl: 8192 - osl: 1024 - search-space: - - spec-decoding: "mtp" - conc-list: [2198] - prefill: - num-worker: 10 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml" - decode: - num-worker: 1 - tp: 8 - ep: 8 - dp-attn: true - - spec-decoding: "mtp" - conc-list: [52] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep4_batch8_eplb0_mtp3.yaml" - decode: - num-worker: 4 - tp: 4 - ep: 4 - dp-attn: false - - spec-decoding: "mtp" - conc-list: [8] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml" - decode: - num-worker: 4 - tp: 8 - ep: 8 - dp-attn: false - - spec-decoding: "mtp" - conc-list: [32] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml" - decode: - num-worker: 4 - tp: 8 - ep: 8 - dp-attn: false - - spec-decoding: "mtp" - conc-list: [181] - prefill: - num-worker: 3 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/mtp/ctx3_gen1_dep8_batch16_eplb0_mtp3.yaml" - decode: - num-worker: 1 - tp: 8 - ep: 8 - dp-attn: true - - spec-decoding: "mtp" - conc-list: [1197] - prefill: - num-worker: 9 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/mtp/ctx9_gen1_dep8_batch128_eplb0_mtp1.yaml" - decode: - num-worker: 1 - tp: 8 - ep: 8 - dp-attn: true - - # Non-MTP configurations - - conc-list: [105] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep4_batch32_eplb0_mtp0.yaml" - decode: - num-worker: 3 - tp: 4 - ep: 4 - dp-attn: false - - conc-list: [63] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml" - decode: - num-worker: 3 - tp: 8 - ep: 8 - dp-attn: false - - conc-list: [4] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/stp/ctx1_gen3_tep8_batch1_eplb0_mtp0.yaml" - decode: - num-worker: 3 - tp: 8 - ep: 8 - dp-attn: false - - conc-list: [12] - prefill: - num-worker: 1 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/stp/ctx1_gen4_tep4_batch2_eplb0_mtp0.yaml" - decode: - num-worker: 4 - tp: 4 - ep: 4 - dp-attn: false - - conc-list: [589] - prefill: - num-worker: 5 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/stp/ctx5_gen2_dep8_batch32_eplb0_mtp0.yaml" - decode: - num-worker: 2 - tp: 8 - ep: 8 - dp-attn: true - - conc-list: [1093] - prefill: - num-worker: 6 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/stp/ctx6_gen1_dep8_batch128_eplb0_mtp0.yaml" - decode: - num-worker: 1 - tp: 8 - ep: 8 - dp-attn: true - - conc-list: [2048] - prefill: - num-worker: 8 - tp: 2 - ep: 2 - dp-attn: true - additional-settings: - - "CONFIG_FILE=recipies/trtllm/b300-fp4/8k1k/stp/ctx8_gen1_dep8_batch256_eplb0_mtp0.yaml" - decode: - num-worker: 1 - tp: 8 - ep: 8 - dp-attn: true - dsr1-fp4-b200-sglang: image: lmsysorg/sglang:v0.5.6-cu129-amd64 model: nvidia/DeepSeek-R1-0528-FP4-V2 diff --git a/.github/configs/runners.yaml b/.github/configs/runners.yaml index 7754dda67..5610ad273 100644 --- a/.github/configs/runners.yaml +++ b/.github/configs/runners.yaml @@ -60,5 +60,3 @@ mi355x-disagg: - 'mi355x-amds_2' gb200: - gb200-nv_0 -b300: -- 'b300-nv_0' diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 07772d370..933fd5413 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -261,12 +261,6 @@ pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/558 evals-only: true -- config-keys: - - dsr1-fp4-b300-dynamo-trt - description: - - "Add DSR1 FP4 B300 Dynamo TRT configurations" - pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/511 - - config-keys: - dsr1-fp8-mi355x-sglang description: diff --git a/runners/launch_b300-nv.sh b/runners/launch_b300-nv.sh deleted file mode 100644 index 3fc95e9c8..000000000 --- a/runners/launch_b300-nv.sh +++ /dev/null @@ -1,166 +0,0 @@ -#!/usr/bin/bash - -set -x - -echo "Cloning srt-slurm-trtllm repository..." -SRT_REPO_DIR="srt-slurm" -if [ -d "$SRT_REPO_DIR" ]; then - echo "Removing existing $SRT_REPO_DIR..." - rm -rf "$SRT_REPO_DIR" -fi - -git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR" -cd "$SRT_REPO_DIR" -git checkout b4abe4643a7009f3539b36bdc508408874a4c930 - -echo "Installing srtctl..." -curl -LsSf https://astral.sh/uv/install.sh | sh -source $HOME/.local/bin/env - -uv venv -source .venv/bin/activate -uv pip install -e . - -if ! command -v srtctl &> /dev/null; then - echo "Error: Failed to install srtctl" - exit 1 -fi - -echo "Configs available at: $SRT_REPO_DIR/" - -export SLURM_PARTITION="batch_1" -export SLURM_ACCOUNT="benchmark" - -SQUASH_FILE="/data/squash/$(echo "$IMAGE" | sed 's/[\/:@#]/_/g').sqsh" - -if [[ $MODEL_PREFIX == "dsr1" ]]; then - export MODEL_PATH="/scratch/models/deepseek-r1-0528-nvfp4-v2" - export SERVED_MODEL_NAME="deepseek-r1-fp4" -else - echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: dsr1" - exit 1 -fi - -export ISL="$ISL" -export OSL="$OSL" - -# Create srtslurm.yaml for srtctl -echo "Creating srtslurm.yaml configuration..." -cat > srtslurm.yaml <&1) -echo "$SRTCTL_OUTPUT" - -# Extract JOB_ID from srtctl output (e.g., "✅ Job 1168 submitted!") -JOB_ID=$(echo "$SRTCTL_OUTPUT" | grep -oP '✅ Job \K[0-9]+' || echo "$SRTCTL_OUTPUT" | grep -oP 'Job \K[0-9]+') - -if [ -z "$JOB_ID" ]; then - echo "Error: Failed to extract JOB_ID from srtctl output" - exit 1 -fi - -echo "Extracted JOB_ID: $JOB_ID" - -# Wait for this specific job to complete -echo "Waiting for job $JOB_ID to complete..." -while [ -n "$(squeue -j $JOB_ID --noheader 2>/dev/null)" ]; do - echo "Job $JOB_ID still running..." - squeue -j $JOB_ID - sleep 30 -done -echo "Job $JOB_ID completed!" - -cat "outputs/$JOB_ID/logs/sweep_${JOB_ID}.log" - -echo "Collecting results..." - -# Use the JOB_ID to find the logs directory -# srtctl creates logs in outputs/JOB_ID/logs/ -LOGS_DIR="outputs/$JOB_ID/logs" - -if [ ! -d "$LOGS_DIR" ]; then - echo "Warning: Logs directory not found at $LOGS_DIR" - exit 1 -fi - -echo "Found logs directory: $LOGS_DIR" - -# Find all result subdirectories (e.g., sa-bench_isl_8192_osl_1024) -RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) - -if [ -z "$RESULT_SUBDIRS" ]; then - echo "Warning: No result subdirectories found in $LOGS_DIR" -else - # Process results from all configurations - for result_subdir in $RESULT_SUBDIRS; do - echo "Processing result subdirectory: $result_subdir" - - # Extract configuration info from directory name - CONFIG_NAME=$(basename "$result_subdir") - - # Find all result JSON files (e.g., results_concurrency_128_gpus_16_ctx_8_gen_8.json) - RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) - - for result_file in $RESULT_FILES; do - if [ -f "$result_file" ]; then - # Extract metadata from filename - filename=$(basename "$result_file") - concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') - gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') - ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') - gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') - - echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" - - WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" - cp "$result_file" "$WORKSPACE_RESULT_FILE" - - echo "Copied result file to: $WORKSPACE_RESULT_FILE" - fi - done - done -fi - -echo "All result files processed" - -# Cleanup -echo "Cleaning up..." -deactivate 2>/dev/null || true -rm -rf .venv -echo "Cleanup complete" -