diff --git a/.github/configs/nvidia-master.yaml b/.github/configs/nvidia-master.yaml index f12b586f4..1fe2cf33a 100644 --- a/.github/configs/nvidia-master.yaml +++ b/.github/configs/nvidia-master.yaml @@ -2560,6 +2560,613 @@ dsr1-fp4-gb200-dynamo-sglang: additional-settings: - "DECODE_NODES=8" +dsr1-fp4-gb300-dynamo-trt: + image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2 + model: nvidia/DeepSeek-R1-0528-NVFP4-v2 + model-prefix: dsr1 + runner: gb300 + precision: fp4 + framework: dynamo-trt + multinode: true + disagg: true + seq-len-configs: + - isl: 1024 + osl: 1024 + search-space: + # MTP configurations + - spec-decoding: "mtp" + conc-list: [3226] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep4_batch768_eplb0_mtp1.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep4_batch768_eplb0_mtp1.yaml" + decode: + num-worker: 1 + tp: 4 + ep: 4 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [333] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep32_batch8_eplb0_mtp.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen1_dep32_batch8_eplb0_mtp.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [5] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [8, 12, 24, 48] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx1_gen4_tep8_batch8_eplb0_mtp3.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [2253] + prefill: + num-worker: 3 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep16_batch128_eplb256_mtp1.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep16_batch128_eplb256_mtp1.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [1229] + prefill: + num-worker: 3 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep32_batch32_eplb288_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/mtp/ctx3_gen1_dep32_batch32_eplb288_mtp3.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + # Non-MTP configurations (default spec_decoding="none") + - conc-list: [5] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [12, 48, 96, 192] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx1_gen4_tep8_batch32_eplb0_mtp0.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [8192] + prefill: + num-worker: 2 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep8_batch1024_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep8_batch1024_eplb0_mtp0.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + - conc-list: [1229] + prefill: + num-worker: 2 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx2_gen1_dep32_batch32_eplb0_mtp0.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [4301] + prefill: + num-worker: 3 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep16_batch256_eplb256_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep16_batch256_eplb256_mtp0.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [2253] + prefill: + num-worker: 3 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep32_batch64_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k1k/stp/ctx3_gen1_dep32_batch64_eplb0_mtp0.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - isl: 1024 + osl: 8192 + search-space: + # MTP configurations (spec_decoding="mtp") + - spec-decoding: "mtp" + conc-list: [7] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen7_tep8_batch1_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen7_tep8_batch1_eplb0_mtp3.yaml" + decode: + num-worker: 7 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [63] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen7_tep8_batch8_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen7_tep8_batch8_eplb0_mtp3.yaml" + decode: + num-worker: 7 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [563] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen1_dep32_batch16_eplb0_mtp3.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [2088] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen1_dep32_batch64_eplb288_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen1_dep32_batch64_eplb288_mtp3.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [8192] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen2_dep16_batch256_eplb256_mtp1.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen2_dep16_batch256_eplb256_mtp1.yaml" + decode: + num-worker: 2 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [16384] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen4_dep8_batch512_eplb0_mtp1.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/mtp/ctx1_gen4_dep8_batch512_eplb0_mtp1.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: true + # STP configurations (no spec_decoding) + - conc-list: [7] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen7_tep8_batch1_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen7_tep8_batch1_eplb0_mtp0.yaml" + decode: + num-worker: 7 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [60] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen15_tep4_batch4_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen15_tep4_batch4_eplb0_mtp0.yaml" + decode: + num-worker: 15 + tp: 4 + ep: 4 + dp-attn: false + - conc-list: [245] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen7_tep8_batch32_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen7_tep8_batch32_eplb0_mtp0.yaml" + decode: + num-worker: 7 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [1024] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen1_dep32_batch32_eplb0_mtp0.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [4096] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen1_dep32_batch128_eplb288_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen1_dep32_batch128_eplb288_mtp0.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [8192] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen1_dep32_batch256_eplb288_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/1k8k/stp/ctx1_gen1_dep32_batch256_eplb288_mtp0.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - isl: 8192 + osl: 1024 + search-space: + # MTP configurations (spec_decoding="mtp") + - spec-decoding: "mtp" + conc-list: [33] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen3_tep8_batch8_eplb0_mtp3.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [5] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch1_eplb0_mtp3.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [12, 24] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx1_gen4_tep8_batch4_eplb0_mtp3.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - spec-decoding: "mtp" + conc-list: [180] + prefill: + num-worker: 4 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx4_gen1_dep32_batch4_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx4_gen1_dep32_batch4_eplb0_mtp3.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [308] + prefill: + num-worker: 8 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx8_gen1_dep32_batch8_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx8_gen1_dep32_batch8_eplb0_mtp3.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [2253] + prefill: + num-worker: 10 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep8_batch256_eplb0_mtp1.yaml" + decode: + num-worker: 1 + tp: 8 + ep: 8 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [666] + prefill: + num-worker: 10 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep16_batch32_eplb0_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx10_gen1_dep16_batch32_eplb0_mtp3.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - spec-decoding: "mtp" + conc-list: [1127] + prefill: + num-worker: 13 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/mtp/ctx13_gen1_dep16_batch64_eplb256_mtp3.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/mtp/ctx13_gen1_dep16_batch64_eplb256_mtp3.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + # Non-MTP configurations (default spec_decoding="none") + - conc-list: [72] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen3_tep8_batch16_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [5] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch1_eplb0_mtp0.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [12] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch2_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen4_tep8_batch2_eplb0_mtp0.yaml" + decode: + num-worker: 4 + tp: 8 + ep: 8 + dp-attn: false + - conc-list: [5, 15, 30] + prefill: + num-worker: 1 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx1_gen5_tep4_batch4_eplb0_mtp0.yaml" + decode: + num-worker: 5 + tp: 4 + ep: 4 + dp-attn: false + - conc-list: [666] + prefill: + num-worker: 7 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx7_gen1_dep32_batch16_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx7_gen1_dep32_batch16_eplb0_mtp0.yaml" + decode: + num-worker: 1 + tp: 32 + ep: 32 + dp-attn: true + - conc-list: [1229] + prefill: + num-worker: 9 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx9_gen1_dep16_batch64_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx9_gen1_dep16_batch64_eplb0_mtp0.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + - conc-list: [3228] + prefill: + num-worker: 11 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx11_gen3_dep4_batch256_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx11_gen3_dep4_batch256_eplb0_mtp0.yaml" + decode: + num-worker: 3 + tp: 4 + ep: 4 + dp-attn: true + - conc-list: [2253] + prefill: + num-worker: 14 + tp: 2 + ep: 2 + dp-attn: true + additional-settings: + # https://github.com/ishandhanani/srt-slurm/blob/sa-submission-q1-2026/recipes/trtllm/gb300-fp4/8k1k/stp/ctx14_gen1_dep16_batch128_eplb0_mtp0.yaml + - "CONFIG_FILE=recipes/trtllm/gb300-fp4/8k1k/stp/ctx14_gen1_dep16_batch128_eplb0_mtp0.yaml" + decode: + num-worker: 1 + tp: 16 + ep: 16 + dp-attn: true + + gptoss-fp4-gb200-dynamo-trt: image: nvcr.io#nvidia/ai-dynamo/tensorrtllm-runtime:0.7.0.post2 model: openai/gpt-oss-120b diff --git a/.github/configs/runners.yaml b/.github/configs/runners.yaml index f62ccdd22..86ef27315 100644 --- a/.github/configs/runners.yaml +++ b/.github/configs/runners.yaml @@ -64,3 +64,5 @@ b200-multinode-slurm: - 'b200-dgxc-slurm_0' b300: - 'b300-nv_0' +gb300: +- 'gb300-nv_0' diff --git a/perf-changelog.yaml b/perf-changelog.yaml index 3ac387147..e17845a02 100644 --- a/perf-changelog.yaml +++ b/perf-changelog.yaml @@ -325,3 +325,12 @@ - "Disable torch.compile for MI355X DeepSeek-R1 FP8 SGLang" - "set cuda-graph-max-bs to CONC" pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/613 + +- config-keys: + - dsr1-fp4-gb300-dynamo-trt + description: + - "Add DeepSeek-R1 FP4 GB300 Dynamo TRT disaggregated multinode configurations" + - "Image: nvcr.io/nvidia/ai-dynamo/tensorrtllm-runtime:0.8.1.post2" + - "Includes MTP and STP configs for 1k1k, 1k8k, and 8k1k sequence lengths" + - "Add gb300-nv runner and launch script for srt-slurm integration" + pr-link: https://github.com/InferenceMAX/InferenceMAX/pull/618 diff --git a/runners/launch_gb300-nv.sh b/runners/launch_gb300-nv.sh new file mode 100644 index 000000000..11d2a6b58 --- /dev/null +++ b/runners/launch_gb300-nv.sh @@ -0,0 +1,166 @@ +#!/usr/bin/bash + +set -x + +echo "Cloning srt-slurm repository..." +SRT_REPO_DIR="srt-slurm" +if [ -d "$SRT_REPO_DIR" ]; then + echo "Removing existing $SRT_REPO_DIR..." + rm -rf "$SRT_REPO_DIR" +fi + +git clone https://github.com/ishandhanani/srt-slurm.git "$SRT_REPO_DIR" +cd "$SRT_REPO_DIR" +git checkout sa-submission-q1-2026 + +echo "Installing srtctl..." +curl -LsSf https://astral.sh/uv/install.sh | sh +source $HOME/.local/bin/env + +uv venv +source .venv/bin/activate +uv pip install -e . + +if ! command -v srtctl &> /dev/null; then + echo "Error: Failed to install srtctl" + exit 1 +fi + +echo "Configs available at: $SRT_REPO_DIR/" + +export SLURM_PARTITION="batch" +export SLURM_ACCOUNT="benchmark" + +export MODEL_PATH=$MODEL + +if [[ $MODEL_PREFIX == "dsr1" ]]; then + export SERVED_MODEL_NAME="deepseek-r1-fp4" + export MODEL_PATH=/raid/shared/models/deepseek-r1-0528-fp4-v2 +else + echo "Unsupported model prefix: $MODEL_PREFIX. Supported prefixes are: dsr1" + exit 1 +fi + +export ENROOT_ROOTFS_WRITABLE=1 + +export ISL="$ISL" +export OSL="$OSL" + +SQUASH_FILE="/home/sa-shared/squash/$(echo "$IMAGE" | sed 's/[\/:@#]/_/g').sqsh" +srun --partition=$SLURM_PARTITION --exclusive --time=180 bash -c "enroot import -o $SQUASH_FILE docker://$IMAGE" + +# Create srtslurm.yaml for srtctl +echo "Creating srtslurm.yaml configuration..." +cat > srtslurm.yaml <&1) +echo "$SRTCTL_OUTPUT" + +# Extract JOB_ID from srtctl output +JOB_ID=$(echo "$SRTCTL_OUTPUT" | grep -oP '✅ Job \K[0-9]+' || echo "$SRTCTL_OUTPUT" | grep -oP 'Job \K[0-9]+') + +if [ -z "$JOB_ID" ]; then + echo "Error: Failed to extract JOB_ID from srtctl output" + exit 1 +fi + +echo "Extracted JOB_ID: $JOB_ID" + +# Wait for this specific job to complete +echo "Waiting for job $JOB_ID to complete..." +while [ -n "$(squeue -j $JOB_ID --noheader 2>/dev/null)" ]; do + echo "Job $JOB_ID still running..." + squeue -j $JOB_ID + sleep 30 +done +echo "Job $JOB_ID completed!" + +echo "Collecting results..." + +# Use the JOB_ID to find the logs directory +# srtctl creates logs in outputs/JOB_ID/logs/ +LOGS_DIR="outputs/$JOB_ID/logs" + +if [ ! -d "$LOGS_DIR" ]; then + echo "Warning: Logs directory not found at $LOGS_DIR" + exit 1 +fi + +echo "Found logs directory: $LOGS_DIR" + +cat $LOGS_DIR/sweep_${JOB_ID}.log + +for file in $LOGS_DIR/*; do + if [ -f "$file" ]; then + tail -n 500 $file + fi +done + +# Find all result subdirectories +RESULT_SUBDIRS=$(find "$LOGS_DIR" -maxdepth 1 -type d -name "*isl*osl*" 2>/dev/null) + +if [ -z "$RESULT_SUBDIRS" ]; then + echo "Warning: No result subdirectories found in $LOGS_DIR" +else + # Process results from all configurations + for result_subdir in $RESULT_SUBDIRS; do + echo "Processing result subdirectory: $result_subdir" + + # Extract configuration info from directory name + CONFIG_NAME=$(basename "$result_subdir") + + # Find all result JSON files + RESULT_FILES=$(find "$result_subdir" -name "results_concurrency_*.json" 2>/dev/null) + + for result_file in $RESULT_FILES; do + if [ -f "$result_file" ]; then + # Extract metadata from filename + # Files are of the format "results_concurrency_gpus_{num gpus}_ctx_{num ctx}_gen_{num gen}.json" + filename=$(basename "$result_file") + concurrency=$(echo "$filename" | sed -n 's/results_concurrency_\([0-9]*\)_gpus_.*/\1/p') + gpus=$(echo "$filename" | sed -n 's/results_concurrency_[0-9]*_gpus_\([0-9]*\)_ctx_.*/\1/p') + ctx=$(echo "$filename" | sed -n 's/.*_ctx_\([0-9]*\)_gen_.*/\1/p') + gen=$(echo "$filename" | sed -n 's/.*_gen_\([0-9]*\)\.json/\1/p') + + echo "Processing concurrency $concurrency with $gpus GPUs (ctx: $ctx, gen: $gen): $result_file" + + WORKSPACE_RESULT_FILE="$GITHUB_WORKSPACE/${RESULT_FILENAME}_${CONFIG_NAME}_conc${concurrency}_gpus_${gpus}_ctx_${ctx}_gen_${gen}.json" + cp "$result_file" "$WORKSPACE_RESULT_FILE" + + echo "Copied result file to: $WORKSPACE_RESULT_FILE" + fi + done + done +fi + +echo "All result files processed" + +# Cleanup +echo "Cleaning up..." +deactivate 2>/dev/null || true +rm -rf .venv +echo "Cleanup complete" \ No newline at end of file