Fix test_collect_hidden_states: use synthetic short conversations

yeyu-nvidia · claude · yeyu-nvidia · commit 4e12bb465c63 · 2026-04-10T15:05:42.000-07:00
The test was using real daring-anteater conversations (typically 1000+
tokens) with the default --max-seq-len 3072, but the tiny test model has
max_position_embeddings=32. Long conversations were being silently filtered
out ("Skipped N conversations due to length constraints"), producing zero
.pt files and failing the assertion.

Fix by:
- Adding a tiny_conversations_path fixture with synthetic short single-turn
  conversations that tokenize well within max_position_embeddings=32
- Passing --max-seq-len 32 in the test to match the model's capacity
- Guarding tokenizer.chat_template.replace() against None chat_template

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
Signed-off-by: Ye Yu &lt;yeyu@nvidia.com&gt;
diff --git a/examples/speculative_decoding/collect_hidden_states/compute_hidden_states_hf.py b/examples/speculative_decoding/collect_hidden_states/compute_hidden_states_hf.py
@@ -142,7 +142,8 @@ def keep_conversation(entry):
     tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=args.trust_remote_code)
     if tokenizer.pad_token is None:
         tokenizer.pad_token = tokenizer.eos_token
-    tokenizer.chat_template = tokenizer.chat_template.replace(REMOVE_THINK_CHAT_TEMPLATE, "")
+    if tokenizer.chat_template is not None:
+        tokenizer.chat_template = tokenizer.chat_template.replace(REMOVE_THINK_CHAT_TEMPLATE, "")
 
     output_dir = args.output_dir
     output_dir.mkdir(parents=True, exist_ok=True)
diff --git a/tests/examples/speculative_decoding/conftest.py b/tests/examples/speculative_decoding/conftest.py
@@ -13,11 +13,38 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import json
+
 import pytest
 import yaml
 from _test_utils.examples.run_command import run_example_command
 
 
+@pytest.fixture(scope="session")
+def tiny_conversations_path(tmp_path_factory):
+    """Tiny JSONL with short synthetic conversations for compute_hidden_states_hf tests.
+
+    Uses minimal single-turn conversations so that tokenized lengths stay well
+    within the tiny test model's max_position_embeddings (32) even after chat
+    template formatting.
+    """
+    tmp_dir = tmp_path_factory.mktemp("tiny_convs")
+    output_file = tmp_dir / "train.jsonl"
+    conversations = [
+        {
+            "conversation_id": f"test-{i}",
+            "conversations": [
+                {"role": "user", "content": "What is 2 plus 2?"},
+                {"role": "assistant", "content": "4"},
+            ],
+        }
+        for i in range(5)
+    ]
+    with open(output_file, "w") as f:
+        f.writelines(json.dumps(conv) + "\n" for conv in conversations)
+    return output_file
+
+
 @pytest.fixture(scope="session", autouse=True)
 def tiny_daring_anteater_path(tmp_path_factory):
     tmp_dir = tmp_path_factory.mktemp("daring_anteater")
diff --git a/tests/examples/speculative_decoding/test_eagle_offline_ptq.py b/tests/examples/speculative_decoding/test_eagle_offline_ptq.py
@@ -55,7 +55,7 @@ def offline_ptq_dirs(tmp_path_factory):
     }
 
 
-def test_collect_hidden_states(tiny_llama_path, tiny_daring_anteater_path, offline_ptq_dirs):
+def test_collect_hidden_states(tiny_llama_path, tiny_conversations_path, offline_ptq_dirs):
     """Stage 1: generate .pt hidden state files from the base model."""
     run_example_command(
         [
@@ -64,11 +64,13 @@ def test_collect_hidden_states(tiny_llama_path, tiny_daring_anteater_path, offli
             "--model",
             tiny_llama_path,
             "--input-data",
-            str(tiny_daring_anteater_path),
+            str(tiny_conversations_path),
             "--output-dir",
             str(offline_ptq_dirs["hidden_states"]),
             "--debug-max-num-conversations",
             "2",
+            "--max-seq-len",
+            "32",
         ],
         "speculative_decoding",
     )