From 07075d67bb15a78d5684d534e2cf61ec1c35d166 Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Tue, 3 Feb 2026 10:04:29 -0800 Subject: [PATCH 1/6] gb300 fp8 configs Signed-off-by: jthomson04 --- .github/configs/nvidia-master.yaml | 591 +++++++++++++++++++++++++++++ runners/launch_gb300-nv.sh | 5 +- 2 files changed, 595 insertions(+), 1 deletion(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index a5cad5206..c54f268ae 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -3737,6 +3737,597 @@ dsr1-fp4-gb300-dynamo-trt: ep: 16 dp-attn: true +dsr1-fp8-gb300-dynamo-trt: + image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2 + model: /scratch/models/DeepSeek-R1-0528 + model-prefix: dsr1-fp8 + runner: gb300 + precision: fp8 + framework: dynamo-trt + multinode: true + disagg: true + seq-len-configs: + - isl: 1024 + osl: 1024 + search-space: + # MTP configurations (spec_decoding="mtp") + - spec-decoding: "mtp" + conc-list: [8] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [24] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [180] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_180.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_180.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [564] + prefill: + num-worker: 2 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep32_batch16_eplb0_mtp3_564.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep32_batch16_eplb0_mtp3_564.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [666] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp3_666.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx1_gen1_dep16_batch32_eplb0_mtp3_666.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [2253] + prefill: + num-worker: 2 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep16_batch128_eplb0_mtp1_2253.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx2_gen1_dep16_batch128_eplb0_mtp1_2253.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [8192] + prefill: + num-worker: 3 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/mtp/ctx3_gen2_dep8_batch512_eplb0_mtp1_8192.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/mtp/ctx3_gen2_dep8_batch512_eplb0_mtp1_8192.yaml" + decode: + num-worker: 2 + tp: 8 + ep: 8 + dp-attn: true + # STP configurations (no spec_decoding) + - conc-list: [4] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [24] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [84] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0_84.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx1_gen4_tep8_batch16_eplb0_mtp0_84.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [1229] + prefill: + num-worker: 2 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0_1229.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0_1229.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [2253] + prefill: + num-worker: 2 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep16_batch128_eplb0_mtp0_2253.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx2_gen1_dep16_batch128_eplb0_mtp0_2253.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [8602] + prefill: + num-worker: 3 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch512_eplb0_mtp0_8602.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch512_eplb0_mtp0_8602.yaml" + decode: + num-worker: 2 + tp: 8 + ep: 8 + dp-attn: true + - conc-list: [12288] + prefill: + num-worker: 3 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch768_eplb0_mtp0_12288.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k1k/stp/ctx3_gen2_dep8_batch768_eplb0_mtp0_12288.yaml" + decode: + num-worker: 2 + tp: 8 + ep: 8 + dp-attn: true + - isl: 1024 + osl: 8192 + search-space: + # MTP configurations (spec_decoding="mtp") + - spec-decoding: "mtp" + conc-list: [4] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_4.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_4.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [16] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_16.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_16.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [141] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_141.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_141.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [544] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_544.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_544.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [2048] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen2_dep16_batch64_eplb0_mtp1_2048.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen2_dep16_batch64_eplb0_mtp1_2048.yaml" + decode: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [8192] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_dep8_batch256_eplb0_mtp1_8192.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_dep8_batch256_eplb0_mtp1_8192.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: true + # STP configurations (no spec_decoding) + - conc-list: [4] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [36] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [282] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_282.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_282.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [1024] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1024.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1024.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [4096] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen2_dep16_batch128_eplb0_mtp0_4096.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen2_dep16_batch128_eplb0_mtp0_4096.yaml" + decode: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [8192] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen2_dep16_batch256_eplb0_mtp0_8192.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen2_dep16_batch256_eplb0_mtp0_8192.yaml" + decode: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: true + - isl: 8192 + osl: 1024 + search-space: + # MTP configurations (spec_decoding="mtp") + - spec-decoding: "mtp" + conc-list: [8] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_8.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [24] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_24.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [333] + prefill: + num-worker: 6 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx6_gen1_dep32_batch8_eplb0_mtp3_333.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx6_gen1_dep32_batch8_eplb0_mtp3_333.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [666] + prefill: + num-worker: 8 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx8_gen1_dep16_batch32_eplb0_mtp3_666.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx8_gen1_dep16_batch32_eplb0_mtp3_666.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [1229] + prefill: + num-worker: 10 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx10_gen1_dep16_batch64_eplb0_mtp1_1229.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx10_gen1_dep16_batch64_eplb0_mtp1_1229.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [1229] + prefill: + num-worker: 7 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/mtp/ctx7_gen1_dep8_batch128_eplb0_mtp1_1229.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/mtp/ctx7_gen1_dep8_batch128_eplb0_mtp1_1229.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + # STP configurations (no spec_decoding) + - conc-list: [4] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [24] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch4_eplb0_mtp0_24.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [36] + prefill: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [512] + prefill: + num-worker: 6 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx6_gen1_dep32_batch16_eplb0_mtp0_512.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx6_gen1_dep32_batch16_eplb0_mtp0_512.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [666] + prefill: + num-worker: 4 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx4_gen1_dep16_batch32_eplb0_mtp0_666.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx4_gen1_dep16_batch32_eplb0_mtp0_666.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [1229] + prefill: + num-worker: 7 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep16_batch64_eplb0_mtp0_1229.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep16_batch64_eplb0_mtp0_1229.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [2151] + prefill: + num-worker: 7 + tp: 4 + ep: 4 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep8_batch256_eplb0_mtp0_2151.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp8/8k1k/stp/ctx7_gen1_dep8_batch256_eplb0_mtp0_2151.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + gptoss-fp4-gb200-dynamo-trt: image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0.post2 diff --git a/runners/launch_gb300-nv.sh b/runners/launch_gb300-nv.sh index 11d2a6b58..1c0133693 100644 --- a/runners/launch_gb300-nv.sh +++ b/runners/launch_gb300-nv.sh @@ -36,8 +36,11 @@ export MODEL_PATH=$MODEL if [[ $MODEL_PREFIX == "dsr1" ]]; then export SERVED_MODEL_NAME="deepseek-r1-fp4" export MODEL_PATH=/raid/shared/models/deepseek-r1-0528-fp4-v2 +elif [[ $MODEL_PREFIX == "dsr1-fp8" ]]; then + export SERVED_MODEL_NAME="deepseek-r1-fp8" + export MODEL_PATH=/raid/shared/models/deepseek-r1-0528-fp8 else - echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: dsr1" + echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: dsr1, dsr1-fp8" exit 1 fi From dab0c1dd8ba7614154a63764bf72300c6177e780 Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Tue, 3 Feb 2026 20:19:42 -0800 Subject: [PATCH 2/6] remove 1k8k for now Signed-off-by: jthomson04 --- .github/configs/nvidia-master.yaml | 179 ----------------------------- 1 file changed, 179 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index c54f268ae..8c0934035 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -3955,185 +3955,6 @@ dsr1-fp8-gb300-dynamo-trt: tp: 8 ep: 8 dp-attn: true - - isl: 1024 - osl: 8192 - search-space: - # MTP configurations (spec_decoding="mtp") - - spec-decoding: "mtp" - conc-list: [4] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_4.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3_4.yaml" - decode: - num-worker: 4 - tp: 8 - ep: 8 - dp-attn: false - - spec-decoding: "mtp" - conc-list: [16] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_16.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3_16.yaml" - decode: - num-worker: 4 - tp: 8 - ep: 8 - dp-attn: false - - spec-decoding: "mtp" - conc-list: [141] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_141.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen1_dep32_batch4_eplb0_mtp3_141.yaml" - decode: - num-worker: 1 - tp: 32 - ep: 32 - dp-attn: true - - spec-decoding: "mtp" - conc-list: [544] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_544.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3_544.yaml" - decode: - num-worker: 1 - tp: 32 - ep: 32 - dp-attn: true - - spec-decoding: "mtp" - conc-list: [2048] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen2_dep16_batch64_eplb0_mtp1_2048.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen2_dep16_batch64_eplb0_mtp1_2048.yaml" - decode: - num-worker: 2 - tp: 16 - ep: 16 - dp-attn: true - - spec-decoding: "mtp" - conc-list: [8192] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_dep8_batch256_eplb0_mtp1_8192.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/mtp/ctx1_gen4_dep8_batch256_eplb0_mtp1_8192.yaml" - decode: - num-worker: 4 - tp: 8 - ep: 8 - dp-attn: true - # STP configurations (no spec_decoding) - - conc-list: [4] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0_4.yaml" - decode: - num-worker: 4 - tp: 8 - ep: 8 - dp-attn: false - - conc-list: [36] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen4_tep8_batch8_eplb0_mtp0_36.yaml" - decode: - num-worker: 4 - tp: 8 - ep: 8 - dp-attn: false - - conc-list: [282] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_282.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen1_dep32_batch8_eplb0_mtp0_282.yaml" - decode: - num-worker: 1 - tp: 32 - ep: 32 - dp-attn: true - - conc-list: [1024] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1024.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0_1024.yaml" - decode: - num-worker: 1 - tp: 32 - ep: 32 - dp-attn: true - - conc-list: [4096] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen2_dep16_batch128_eplb0_mtp0_4096.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen2_dep16_batch128_eplb0_mtp0_4096.yaml" - decode: - num-worker: 2 - tp: 16 - ep: 16 - dp-attn: true - - conc-list: [8192] - prefill: - num-worker: 1 - tp: 4 - ep: 4 - dp-attn: true - additional-settings: - # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen2_dep16_batch256_eplb0_mtp0_8192.yaml - - "CONFIG_FILE=recipes/trtllm/gb300-fp8/1k8k/stp/ctx1_gen2_dep16_batch256_eplb0_mtp0_8192.yaml" - decode: - num-worker: 2 - tp: 16 - ep: 16 - dp-attn: true - isl: 8192 osl: 1024 search-space: From f5cae754fc39af9291ade9bcebce9f12d5e69acf Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Wed, 4 Feb 2026 10:05:42 -0800 Subject: [PATCH 3/6] perf changelog entry Signed-off-by: jthomson04 --- perf-changelog.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/perf-changelog.yaml b/perf-changelog.yaml index aa8ad57f9..e34eab9b3 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -361,3 +361,9 @@ - "8k1k: 14 scenarios (7 MTP, 7 STP) for long context workloads" - "Prefill workers: 1-5P, Decode workers: 1-4D, TP/EP: 8/16/32" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/617 + +- config-keys: + - dsr1-fp8-gb300-dynamo-trt + description: + - "Add DeepSeek R1 FP8 GB300 Dynamo TRT-LLM disaggregated multinode configurations for 8k1k and 1k1k" + pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/627 From 504fd22d9d0dff6f86bb9be1b587d0a03f22e13e Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Wed, 4 Feb 2026 10:42:15 -0800 Subject: [PATCH 4/6] fix model path Signed-off-by: jthomson04 --- .github/configs/nvidia-master.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 8c0934035..4ad63709c 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -3739,7 +3739,7 @@ dsr1-fp4-gb300-dynamo-trt: dsr1-fp8-gb300-dynamo-trt: image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2 - model: /scratch/models/DeepSeek-R1-0528 + model: deepseek-ai/DeepSeek-R1-0528 model-prefix: dsr1-fp8 runner: gb300 precision: fp8 From b6c4e2b6a9e76118bd7b84d44b95661508e123dc Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Wed, 4 Feb 2026 13:45:16 -0800 Subject: [PATCH 5/6] fix fp8 model path Signed-off-by: jthomson04 --- runners/launch_gb300-nv.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/runners/launch_gb300-nv.sh b/runners/launch_gb300-nv.sh index 1c0133693..abb7d8248 100644 --- a/runners/launch_gb300-nv.sh +++ b/runners/launch_gb300-nv.sh @@ -38,7 +38,7 @@ if [[ $MODEL_PREFIX == "dsr1" ]]; then export MODEL_PATH=/raid/shared/models/deepseek-r1-0528-fp4-v2 elif [[ $MODEL_PREFIX == "dsr1-fp8" ]]; then export SERVED_MODEL_NAME="deepseek-r1-fp8" - export MODEL_PATH=/raid/shared/models/deepseek-r1-0528-fp8 + export MODEL_PATH=/raid/shared/models/deepseek-r1-0528 else echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: dsr1, dsr1-fp8" exit 1 From 4bf4a06b09ee6379cf430995a2199c80a393b737 Mon Sep 17 00:00:00 2001 From: jthomson04 Date: Wed, 4 Feb 2026 19:18:35 -0800 Subject: [PATCH 6/6] fix model prefix Signed-off-by: jthomson04 --- .github/configs/nvidia-master.yaml | 2 +- runners/launch_gb300-nv.sh | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index 4ad63709c..27fb1d1d4 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -3740,7 +3740,7 @@ dsr1-fp4-gb300-dynamo-trt: dsr1-fp8-gb300-dynamo-trt: image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2 model: deepseek-ai/DeepSeek-R1-0528 - model-prefix: dsr1-fp8 + model-prefix: dsr1 runner: gb300 precision: fp8 framework: dynamo-trt diff --git a/runners/launch_gb300-nv.sh b/runners/launch_gb300-nv.sh index abb7d8248..0fe24b891 100644 --- a/runners/launch_gb300-nv.sh +++ b/runners/launch_gb300-nv.sh @@ -33,14 +33,16 @@ export SLURM_ACCOUNT="benchmark" export MODEL_PATH=$MODEL -if [[ $MODEL_PREFIX == "dsr1" ]]; then +if [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp4" ]]; then export SERVED_MODEL_NAME="deepseek-r1-fp4" export MODEL_PATH=/raid/shared/models/deepseek-r1-0528-fp4-v2 -elif [[ $MODEL_PREFIX == "dsr1-fp8" ]]; then + export SRT_SLURM_MODEL_PREFIX="dsr1" +elif [[ $MODEL_PREFIX == "dsr1" && $PRECISION == "fp8" ]]; then export SERVED_MODEL_NAME="deepseek-r1-fp8" export MODEL_PATH=/raid/shared/models/deepseek-r1-0528 + export SRT_SLURM_MODEL_PREFIX="dsr1-fp8" else - echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: dsr1, dsr1-fp8" + echo "Unsupported model: $MODEL_PREFIX-$PRECISION. Supported models are: dsr1-fp4, dsr1-fp8" exit 1 fi @@ -67,7 +69,7 @@ network_interface: "" srtctl_root: "${GITHUB_WORKSPACE}/srt-slurm" # Model path aliases model_paths: - "${MODEL_PREFIX}": "${MODEL_PATH}" + "${SRT_SLURM_MODEL_PREFIX}": "${MODEL_PATH}" containers: dynamo-trtllm: ${SQUASH_FILE} use_segment_sbatch_directive: false