clopca · wjiuxing · May 27, 2026 · May 27, 2026 · greptile-apps · May 27, 2026
diff --git a/src/hooks/chat-capture.ts b/src/hooks/chat-capture.ts
@@ -10,6 +10,24 @@ const MIN_MESSAGE_LENGTH = 20;
 const MAX_NARRATIVE_LENGTH = 2000;
 const MAX_TITLE_CONTENT_LENGTH = 60;
 
+/**
+ * Patterns that identify internal open-mem prompts injected into the chat
+ * stream. These should NOT be captured as user observations because they are
+ * implementation artifacts, not user intent.
+ */
+const INTERNAL_PROMPT_PATTERNS: RegExp[] = [
+	// The observation extraction prompt (compressor → chat.message)
+	/^\s*<task>\s*\n?\s*Analyze the following tool output and extract a structured observation/i,
+	// The session summarization prompt
+	/^\s*<task>\s*\n?\s*Summarize the following coding session based on its observations/i,
+	// The conflict evaluation prompt
+	/^\s*<conflict_evaluation>/i,
+	// The entity extraction prompt
+	/^\s*<entity_extraction>/i,
+	// The reranking prompt
+	/^\s*<rerank_request>/i,
+];
+
 /**
  * Type guard: checks whether a value is an object with a string `text` property.
  */
@@ -77,6 +95,9 @@ export function persistChatMessage(input: ChatCaptureInput): boolean {
 	const processedText = redactSensitive(stripPrivateBlocks(text), sensitivePatterns);
 	if (processedText.length < MIN_MESSAGE_LENGTH) return false;
 
+	// Filter out internal open-mem prompts that leak into the chat stream
+	if (INTERNAL_PROMPT_PATTERNS.some((p) => p.test(processedText))) return false;
+
 	sessions.getOrCreate(sessionId, projectPath);
 
 	const truncatedContent =

diff --git a/tests/hooks/chat-capture.test.ts b/tests/hooks/chat-capture.test.ts
@@ -232,6 +232,147 @@ describe("createChatCaptureHook", () => {
 		expect((data.filesModified as string[]).length).toBe(0);
 	});
 
+	test("filters out internal observation extraction prompts", async () => {
+		const observations = makeMockObservations();
+		const sessions = makeMockSessions();
+		const hook = createChatCaptureHook(observations as never, sessions as never, "/tmp/proj");
+
+		await hook(
+			{ sessionID: "s1" },
+			{
+				message: {},
+				parts: [
+					`<task>
+Analyze the following tool output and extract a structured observation.
+</task>
+
+<tool_name>bash</tool_name>
+
+<tool_output>
+Some tool output here that is definitely long enough to pass the length check
+</tool_output>`,
+				],
+			},
+		);
+
+		expect(observations.calls).toHaveLength(0);
+	});
+
+	test("filters out internal summarization prompts", async () => {
+		const observations = makeMockObservations();
+		const sessions = makeMockSessions();
+		const hook = createChatCaptureHook(observations as never, sessions as never, "/tmp/proj");
+
+		await hook(
+			{ sessionID: "s1" },
+			{
+				message: {},
+				parts: [
+					`<task>
+Summarize the following coding session based on its observations.
+</task>
+
+<session_id>ses_abc123</session_id>
+
+<observations>
+  <obs index="1">
+    <title>Some observation</title>
+  </obs>
+</observations>`,
+				],
+			},
+		);
+
+		expect(observations.calls).toHaveLength(0);
+	});
+
+	test("filters out internal conflict evaluation prompts", async () => {
+		const observations = makeMockObservations();
+		const sessions = makeMockSessions();
+		const hook = createChatCaptureHook(observations as never, sessions as never, "/tmp/proj");
+
+		await hook(
+			{ sessionID: "s1" },
+			{
+				message: {},
+				parts: [
+					`<conflict_evaluation>
+<new_observation>
+  <title>Some title</title>
+</new_observation>
+</conflict_evaluation>`,
+				],
+			},
+		);
+
+		expect(observations.calls).toHaveLength(0);
+	});
+
+	test("filters out internal entity extraction prompts", async () => {
+		const observations = makeMockObservations();
+		const sessions = makeMockSessions();
+		const hook = createChatCaptureHook(observations as never, sessions as never, "/tmp/proj");
+
+		await hook(
+			{ sessionID: "s1" },
+			{
+				message: {},
+				parts: [
+					`<entity_extraction>
+<observation>
+  <title>Some title</title>
+  <type>discovery</type>
+  <narrative>Some narrative text about entities and relationships</narrative>
+</observation>
+</entity_extraction>`,
+				],
+			},
+		);
+
+		expect(observations.calls).toHaveLength(0);
+	});
+
+	test("filters out internal reranking prompts", async () => {
+		const observations = makeMockObservations();
+		const sessions = makeMockSessions();
+		const hook = createChatCaptureHook(observations as never, sessions as never, "/tmp/proj");
+
+		await hook(
+			{ sessionID: "s1" },
+			{
+				message: {},
+				parts: [
+					`<rerank_request>
+<query>some search query about authentication patterns</query>
+<candidates>
+  <candidate index="0"><title>Auth middleware</title></candidate>
+</candidates>
+</rerank_request>`,
+				],
+			},
+		);
+
+		expect(observations.calls).toHaveLength(0);
+	});
+
+	test("does not filter normal user messages that happen to contain XML-like tags", async () => {
+		const observations = makeMockObservations();
+		const sessions = makeMockSessions();
+		const hook = createChatCaptureHook(observations as never, sessions as never, "/tmp/proj");
+
+		await hook(
+			{ sessionID: "s1" },
+			{
+				message: {},
+				parts: [
+					"Please help me debug this HTML issue with <div> tags and also fix the layout",
+				],
+			},
+		);
+
+		expect(observations.calls.find((c) => c.method === "create")).toBeDefined();
+	});
+
 	test("handles mixed string and object parts", async () => {
 		const observations = makeMockObservations();
 		const sessions = makeMockSessions();