From a7c9f594175ac91b4d1f635a1867749b3189d91b Mon Sep 17 00:00:00 2001 From: Will Pfleger Date: Mon, 8 Jun 2026 12:53:47 -0400 Subject: [PATCH 1/7] fix: resolve multi-word display names in @mention pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CLI mention resolver stopped at the first space when scanning for @names, so '@Will Pfleger' only extracted 'will' — which never matched the profile display_name 'Will Pfleger'. Result: no p-tag, no notification. **Root cause (sprout-sdk/mentions.rs)** extract_at_names() tokenizes on alphanumeric+._- chars only, so it can never produce a multi-word token. match_names_to_profiles() then does a full-string equality check against the profile display_name, so a single-word extract like 'will' never matches 'Will Pfleger'. **Fix 1 — new two-pass extractor (sprout-sdk/mentions.rs)** Add extract_at_mentions_with_known(content, known_names): - Sort known_names longest-first (so 'Will Pfleger' beats 'Will') - At each @ token, try each known name case-insensitively with a word-boundary check (whitespace / punctuation / EOS) - Fall back to the existing single-word tokenizer for unmatched @ tokens so @alice still works even when profiles are loading - Deduplicate, preserve first-seen order **Fix 2 — wire it into the CLI (sprout-cli/messages.rs)** resolve_content_mentions() now extracts display_names from the fetched profiles first, passes them into extract_at_mentions_with_known, then maps matched names back to pubkeys via match_names_to_profiles. The early-exit guard is also tightened to skip I/O when content has no '@'. **Fix 3 — remove false-positive mention rendering (desktop)** buildPrefixPattern() previously fell back to prefix+\S+ when no known names were available, causing '@Will' to render as a blue mention even though no user named 'Will' exists. Changed the empty-names branch to return a never-matching regex (/(?!)/gi) so only text backed by an actual p-tagged member gets mention styling. Signed-off-by: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 --- crates/sprout-cli/src/commands/messages.rs | 46 ++++- crates/sprout-sdk/src/mentions.rs | 221 +++++++++++++++++++++ desktop/src/shared/lib/mentionPattern.ts | 16 +- 3 files changed, 271 insertions(+), 12 deletions(-) diff --git a/crates/sprout-cli/src/commands/messages.rs b/crates/sprout-cli/src/commands/messages.rs index bf08d996f..997e195e6 100644 --- a/crates/sprout-cli/src/commands/messages.rs +++ b/crates/sprout-cli/src/commands/messages.rs @@ -8,7 +8,9 @@ use crate::validate::{ infer_language, parse_event_id, parse_uuid, read_or_stdin, truncate_diff, validate_content_size, validate_hex64, validate_uuid, MAX_DIFF_BYTES, }; -use sprout_sdk::mentions::{extract_at_names, match_names_to_profiles, MentionProfile}; +use sprout_sdk::mentions::{ + extract_at_mentions_with_known, match_names_to_profiles, MentionProfile, +}; // --------------------------------------------------------------------------- // Helpers @@ -124,18 +126,24 @@ async fn resolve_channel_id(client: &SproutClient, event_id: &str) -> Result Vec { - let names = extract_at_names(content); - if names.is_empty() { + // Quick pre-check: if there's no '@' at all we can skip all I/O. + if !content.contains('@') { return vec![]; } + // 1. Membership list (kind 39002 is parameterized-replaceable, addressed by `d` tag). let members_filter = serde_json::json!({ "kinds": [39002], @@ -158,7 +166,7 @@ async fn resolve_content_mentions( None => return vec![], }; - // 3. Hand the parsed profile content + pubkey to the shared matcher. + // 3. Build profile entries and extract display names for the two-pass extractor. let entries: Vec> = profile_events .iter() .filter_map(|e| { @@ -170,6 +178,30 @@ async fn resolve_content_mentions( }) }) .collect(); + + // Extract the display names (with `name` fallback) from each profile so + // the two-pass extractor can match multi-word names like "Will Pfleger". + let display_names: Vec = entries + .iter() + .filter_map(|e| { + let v: serde_json::Value = serde_json::from_str(e.content_json).ok()?; + let name = v + .get("display_name") + .or_else(|| v.get("name")) + .and_then(|n| n.as_str()) + .filter(|n| !n.is_empty())?; + Some(name.to_string()) + }) + .collect(); + + // 4. Two-pass extraction: known multi-word names first, single-word fallback. + let known_refs: Vec<&str> = display_names.iter().map(|s| s.as_str()).collect(); + let names = extract_at_mentions_with_known(content, &known_refs); + if names.is_empty() { + return vec![]; + } + + // 5. Map matched names back to pubkeys via the shared profile matcher. match_names_to_profiles(&names, &entries) } diff --git a/crates/sprout-sdk/src/mentions.rs b/crates/sprout-sdk/src/mentions.rs index 1742f5340..9baae3701 100644 --- a/crates/sprout-sdk/src/mentions.rs +++ b/crates/sprout-sdk/src/mentions.rs @@ -16,6 +16,18 @@ //! explicit mentions ──► normalize ──► merge_mentions ──► p-tags //! ``` //! +//! For callers that have the set of known member display names available +//! upfront, [`extract_at_mentions_with_known`] provides a two-pass approach +//! that correctly handles multi-word display names (e.g. "Will Pfleger"): +//! +//! ```text +//! body text + known_names ──► extract_at_mentions_with_known ──► names: Vec +//! │ +//! direct name→pubkey lookup +//! │ +//! p-tags (merged) +//! ``` +//! //! See [`crate::mentions::MENTION_CAP`] for the hard upper bound on tags. use std::collections::HashSet; @@ -84,6 +96,123 @@ pub fn extract_at_names(content: &str) -> Vec { names } +/// Extract `@mention` names from message content when the set of known member +/// display names is available upfront. +/// +/// Uses a two-pass approach to correctly handle multi-word display names +/// (e.g. "Will Pfleger"): +/// +/// **Pass 1 — known-name matching:** At each `@` token (preceded by +/// start-of-string or ASCII whitespace), try each known name longest-first, +/// case-insensitively. A match is accepted only when the name is followed by a +/// word boundary (whitespace, common punctuation, or end-of-string). When a +/// known name matches, the lowercased name is emitted and the scan advances +/// past the entire matched name. +/// +/// **Pass 2 — single-word fallback:** If no known name matches at a given `@`, +/// falls back to the existing single-word tokenizer (alphanumeric + `.` `-` +/// `_`) so that `@alice` still works even when Alice's profile hasn't been +/// fetched yet. +/// +/// `known_names` should be the display names (or `name` fallbacks) of all +/// channel members. Duplicates and empty strings are ignored. The function +/// does **not** require `known_names` to be pre-sorted — it sorts +/// longest-first internally. +/// +/// Returns lowercased names in first-seen order, deduplicated. +pub fn extract_at_mentions_with_known(content: &str, known_names: &[&str]) -> Vec { + if content.is_empty() || !content.contains('@') { + return vec![]; + } + + // Sort known names longest-first so multi-word names beat their prefixes. + let mut sorted_known: Vec<&str> = known_names + .iter() + .copied() + .filter(|n| !n.trim().is_empty()) + .collect(); + sorted_known.sort_by(|a, b| b.len().cmp(&a.len())); + + let mut names: Vec = Vec::new(); + let mut seen = HashSet::new(); + let chars: Vec = content.chars().collect(); + let len = chars.len(); + let mut i = 0; + + while i < len { + if chars[i] == '@' { + let preceded_by_ws = i == 0 || chars[i - 1].is_ascii_whitespace(); + if preceded_by_ws && i + 1 < len { + // Build the remaining content after '@' as a &str for prefix matching. + let after_at: String = chars[i + 1..].iter().collect(); + + // Pass 1: try each known name (longest-first). + let mut matched: Option<(String, usize)> = None; + for known in &sorted_known { + if after_at.len() < known.len() { + continue; + } + let candidate = &after_at[..known.len()]; + if !candidate.eq_ignore_ascii_case(known) { + continue; + } + // Word-boundary check: must be followed by whitespace, + // common punctuation, or end-of-string. + let after_name = &after_at[known.len()..]; + let boundary = after_name.is_empty() + || after_name + .chars() + .next() + .map(|c| { + c.is_ascii_whitespace() + || matches!( + c, + ',' | ';' | '.' | '!' | '?' | ':' | ')' | ']' | '}' + ) + }) + .unwrap_or(true); + if boundary { + // Advance i past '@' + matched name length (in chars). + let name_char_len = known.chars().count(); + matched = Some((known.to_ascii_lowercase(), name_char_len)); + break; + } + } + + if let Some((lower, char_len)) = matched { + if seen.insert(lower.clone()) { + names.push(lower); + } + // Skip past '@' + the matched name chars. + i += 1 + char_len; + continue; + } + + // Pass 2: single-word fallback (alphanumeric + . - _). + let start = i + 1; + let mut end = start; + while end < len { + let c = chars[end]; + if c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' { + end += 1; + } else { + break; + } + } + if end > start { + let name: String = chars[start..end].iter().collect(); + let lower = name.to_ascii_lowercase(); + if seen.insert(lower.clone()) { + names.push(lower); + } + } + } + } + i += 1; + } + names +} + /// Match extracted `@names` against channel-member profiles. /// /// For each profile, parses its `content_json` and reads the @@ -205,6 +334,98 @@ mod tests { assert!(extract_at_names("hello @").is_empty()); } + // ── extract_at_mentions_with_known ────────────────────────────────── + + #[test] + fn known_multiword_name_matches_fully() { + // "Will Pfleger" should match @Will Pfleger, not just @Will. + let result = extract_at_mentions_with_known("hello @Will Pfleger!", &["Will Pfleger"]); + assert_eq!(result, vec!["will pfleger"]); + } + + #[test] + fn partial_first_word_does_not_match_multiword_name() { + // @Will alone must NOT match "Will Pfleger" — partial matches are rejected. + let result = extract_at_mentions_with_known("hey @Will how are you", &["Will Pfleger"]); + // No known name matches @Will (boundary check: 'Will' is followed by ' h' + // which would match "Will Pfleger" only if the full name follows). + // Falls back to single-word tokenizer → emits "will". + assert_eq!(result, vec!["will"]); + } + + #[test] + fn longest_first_wins_over_prefix() { + // With both "Will" and "Will Pfleger" known, "@Will Pfleger" should + // match the longer name, not just "Will". + let result = extract_at_mentions_with_known( + "@Will Pfleger sent a message", + &["Will", "Will Pfleger"], + ); + assert_eq!(result, vec!["will pfleger"]); + } + + #[test] + fn single_word_known_name_matches() { + let result = extract_at_mentions_with_known("ping @alice please", &["Alice"]); + assert_eq!(result, vec!["alice"]); + } + + #[test] + fn unknown_name_falls_back_to_single_word() { + // @alice is not in known_names but single-word fallback still emits it. + let result = extract_at_mentions_with_known("hey @alice", &["Bob"]); + assert_eq!(result, vec!["alice"]); + } + + #[test] + fn multiple_mentions_mixed_known_and_unknown() { + let result = extract_at_mentions_with_known( + "@Will Pfleger and @alice should review", + &["Will Pfleger"], + ); + assert_eq!(result, vec!["will pfleger", "alice"]); + } + + #[test] + fn deduplicates_case_insensitively() { + let result = extract_at_mentions_with_known( + "@Will Pfleger and @will pfleger again", + &["Will Pfleger"], + ); + assert_eq!(result, vec!["will pfleger"]); + } + + #[test] + fn multiword_name_at_end_of_string() { + let result = extract_at_mentions_with_known("cc @Will Pfleger", &["Will Pfleger"]); + assert_eq!(result, vec!["will pfleger"]); + } + + #[test] + fn multiword_name_followed_by_punctuation() { + let result = + extract_at_mentions_with_known("thanks @Will Pfleger, great work", &["Will Pfleger"]); + assert_eq!(result, vec!["will pfleger"]); + } + + #[test] + fn email_address_not_matched() { + let result = extract_at_mentions_with_known("user@example.com", &["example.com"]); + assert!(result.is_empty()); + } + + #[test] + fn empty_content_returns_empty() { + let result = extract_at_mentions_with_known("", &["Alice"]); + assert!(result.is_empty()); + } + + #[test] + fn empty_known_names_uses_single_word_fallback() { + let result = extract_at_mentions_with_known("hey @alice", &[]); + assert_eq!(result, vec!["alice"]); + } + // ── match_names_to_profiles ───────────────────────────────────────── fn profile<'a>(pk: &'a str, json: &'a str) -> MentionProfile<'a> { diff --git a/desktop/src/shared/lib/mentionPattern.ts b/desktop/src/shared/lib/mentionPattern.ts index d0c636836..f28d2c0b0 100644 --- a/desktop/src/shared/lib/mentionPattern.ts +++ b/desktop/src/shared/lib/mentionPattern.ts @@ -9,8 +9,9 @@ export function escapeRegExp(str: string): string { * Build a regex that matches a given prefix followed by known multi-word names * (longest-first to avoid partial matches). When known names are provided, * only those names are matched — no generic fallback. When no names are - * available, falls back to prefix + \S+ for backwards compatibility (e.g. - * old messages without proper p-tags, or while profiles are loading). + * available, returns a never-matching regex so that arbitrary prefix+word + * patterns are not highlighted as if they were valid matches (e.g. @Will + * should not render as a mention when only "Will Pfleger" is a real user). */ export function buildPrefixPattern( prefix: string, @@ -23,7 +24,10 @@ export function buildPrefixPattern( const escapedPrefix = escapeRegExp(prefix); if (sorted.length === 0) { - return new RegExp(`${escapedPrefix}\\S+`, "gi"); + // No known names — don't highlight anything as a mention. + // Previously fell back to prefix+\S+ which created false positives for + // messages without p-tags or with unresolved multi-word display names. + return /(?!)/gi; // never matches } const nameAlternatives = sorted.map((name) => escapeRegExp(name)).join("|"); @@ -32,8 +36,10 @@ export function buildPrefixPattern( } /** - * Build a regex that matches @mentions, trying known multi-word names first - * (longest-first to avoid partial matches), then falling back to @\S+. + * Build a regex that matches @mentions for known multi-word names + * (longest-first to avoid partial matches). When no known names are provided, + * returns a never-matching regex — @word patterns are not highlighted unless + * they correspond to an actual p-tagged member. */ export function buildMentionPattern(mentionNames: string[]): RegExp { return buildPrefixPattern("@", mentionNames); From 684fa0bb6343e0a9ad9e4f0466b90862a13cabe7 Mon Sep 17 00:00:00 2001 From: Will Pfleger Date: Mon, 8 Jun 2026 13:02:30 -0400 Subject: [PATCH 2/7] fix: guard against UTF-8 boundary panic in multi-word mention extractor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The two-pass extractor sliced after_at by byte offset (known.len()) which can land mid-character when content contains multi-byte UTF-8 (CJK, emoji). Replace panicking &after_at[..n] with after_at.get(..n) which returns None on invalid boundaries, gracefully skipping the candidate. Also fix clippy: sort_by → sort_by_key(Reverse) for the longest-first sort. Add three Unicode-specific tests to prevent regression. Signed-off-by: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 --- crates/sprout-sdk/src/mentions.rs | 37 +++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/crates/sprout-sdk/src/mentions.rs b/crates/sprout-sdk/src/mentions.rs index 9baae3701..2c1225115 100644 --- a/crates/sprout-sdk/src/mentions.rs +++ b/crates/sprout-sdk/src/mentions.rs @@ -131,7 +131,7 @@ pub fn extract_at_mentions_with_known(content: &str, known_names: &[&str]) -> Ve .copied() .filter(|n| !n.trim().is_empty()) .collect(); - sorted_known.sort_by(|a, b| b.len().cmp(&a.len())); + sorted_known.sort_by_key(|k| std::cmp::Reverse(k.len())); let mut names: Vec = Vec::new(); let mut seen = HashSet::new(); @@ -152,7 +152,14 @@ pub fn extract_at_mentions_with_known(content: &str, known_names: &[&str]) -> Ve if after_at.len() < known.len() { continue; } - let candidate = &after_at[..known.len()]; + // Use get() to safely handle byte boundaries — known.len() + // may land mid-character when content contains multi-byte + // UTF-8 (e.g. CJK, emoji). If the slice isn't on a char + // boundary, skip this candidate. + let candidate = match after_at.get(..known.len()) { + Some(s) => s, + None => continue, + }; if !candidate.eq_ignore_ascii_case(known) { continue; } @@ -426,6 +433,32 @@ mod tests { assert_eq!(result, vec!["alice"]); } + #[test] + fn unicode_content_does_not_panic() { + // Known name byte-length may land mid-character in multi-byte content. + // e.g. known "ab" (2 bytes) vs content starting with 日 (3 bytes) — + // byte offset 2 is not a char boundary. Must not panic; gracefully + // skips the candidate via get() returning None. + let result = extract_at_mentions_with_known("@日本語 hello", &["ab"]); + // "ab" doesn't match — falls through to single-word fallback which + // stops at non-ASCII, so no match. The key assertion: no panic. + assert!(result.is_empty()); + } + + #[test] + fn unicode_known_name_matches_with_boundary() { + // Multi-byte known name followed by a space (valid boundary). + let result = extract_at_mentions_with_known("@日本 hello", &["日本"]); + assert_eq!(result, vec!["日本"]); + } + + #[test] + fn unicode_known_name_with_ascii_content_no_panic() { + // Reverse case: multi-byte known name against ASCII content. + let result = extract_at_mentions_with_known("@alice hello", &["日本語"]); + assert_eq!(result, vec!["alice"]); + } + // ── match_names_to_profiles ───────────────────────────────────────── fn profile<'a>(pk: &'a str, json: &'a str) -> MentionProfile<'a> { From db2a37c876690d2247dbb5d4dbfa0c37684807f3 Mon Sep 17 00:00:00 2001 From: Will Pfleger Date: Mon, 8 Jun 2026 13:03:50 -0400 Subject: [PATCH 3/7] docs: update stale doc comments referencing removed @\S+ fallback remarkMentions.ts and remarkChannelLinks.ts still referenced the old 'falls back to generic pattern' behavior that was removed in the mentionPattern.ts change. Signed-off-by: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 --- desktop/src/shared/lib/remarkChannelLinks.ts | 6 +++--- desktop/src/shared/lib/remarkMentions.ts | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/desktop/src/shared/lib/remarkChannelLinks.ts b/desktop/src/shared/lib/remarkChannelLinks.ts index ca1225a43..f4e3963cc 100644 --- a/desktop/src/shared/lib/remarkChannelLinks.ts +++ b/desktop/src/shared/lib/remarkChannelLinks.ts @@ -2,9 +2,9 @@ * Remark plugin that detects #channel-name patterns in text nodes and wraps them * in custom HAST `channel-link` elements for styled rendering via react-markdown. * - * When `channelNames` is provided, multi-word channel names (e.g. "my channel") - * are matched first (longest-first to avoid partial matches), then the plugin - * falls back to the generic `#\S+` pattern for unknown channels. + * Only known channel names are highlighted — multi-word names (e.g. "my channel") + * are matched longest-first to avoid partial matches. When no known names are + * provided, nothing is highlighted. */ import { createRemarkPrefixPlugin } from "./createRemarkPrefixPlugin"; diff --git a/desktop/src/shared/lib/remarkMentions.ts b/desktop/src/shared/lib/remarkMentions.ts index b958ea1bb..7413df6f3 100644 --- a/desktop/src/shared/lib/remarkMentions.ts +++ b/desktop/src/shared/lib/remarkMentions.ts @@ -2,9 +2,9 @@ * Remark plugin that detects @mention patterns in text nodes and wraps them * in custom HAST `mention` elements for styled rendering via react-markdown. * - * When `mentionNames` is provided, multi-word display names (e.g. "John Doe") - * are matched first (longest-first to avoid partial matches), then the plugin - * falls back to the generic `@\S+` pattern for unknown mentions. + * Only p-tagged member names are highlighted — multi-word display names + * (e.g. "John Doe") are matched longest-first to avoid partial matches. + * When no known names are provided, nothing is highlighted. */ import { createRemarkPrefixPlugin } from "./createRemarkPrefixPlugin"; From 73472660a2ae1dc0d6a19beb0e797e98f0cc7371 Mon Sep 17 00:00:00 2001 From: Will Pfleger Date: Mon, 8 Jun 2026 14:00:56 -0400 Subject: [PATCH 4/7] fix(desktop): restore #channel fallback, keep @mention no-fallback buildPrefixPattern() now accepts an optional { fallbackToGeneric } flag. remarkChannelLinks passes it so #channel links still render while channel names load asynchronously. remarkMentions/buildMentionPattern do not, so @word patterns without a matching p-tag are never highlighted as mentions. Fixes CI regression where the smoke test timed out waiting for the 'Open channel' button on a #general link. Signed-off-by: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 --- desktop/src/shared/lib/mentionPattern.ts | 22 +++++++++++++------- desktop/src/shared/lib/remarkChannelLinks.ts | 10 +++++---- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/desktop/src/shared/lib/mentionPattern.ts b/desktop/src/shared/lib/mentionPattern.ts index f28d2c0b0..ecd700cb2 100644 --- a/desktop/src/shared/lib/mentionPattern.ts +++ b/desktop/src/shared/lib/mentionPattern.ts @@ -8,14 +8,20 @@ export function escapeRegExp(str: string): string { /** * Build a regex that matches a given prefix followed by known multi-word names * (longest-first to avoid partial matches). When known names are provided, - * only those names are matched — no generic fallback. When no names are - * available, returns a never-matching regex so that arbitrary prefix+word - * patterns are not highlighted as if they were valid matches (e.g. @Will - * should not render as a mention when only "Will Pfleger" is a real user). + * only those names are matched — no generic fallback. + * + * When no names are available: + * - If `options.fallbackToGeneric` is true, falls back to `prefix + \S+` so + * that patterns like `#channel` still render while channel names are loading + * asynchronously (used by remarkChannelLinks). + * - Otherwise returns a never-matching regex, preventing arbitrary `@word` + * patterns from being highlighted as valid mentions when no p-tags are + * present (used by remarkMentions / buildMentionPattern). */ export function buildPrefixPattern( prefix: string, knownNames: string[], + options?: { fallbackToGeneric?: boolean }, ): RegExp { const sorted = [...new Set(knownNames)] .filter((name) => name.trim().length > 0) @@ -24,9 +30,11 @@ export function buildPrefixPattern( const escapedPrefix = escapeRegExp(prefix); if (sorted.length === 0) { - // No known names — don't highlight anything as a mention. - // Previously fell back to prefix+\S+ which created false positives for - // messages without p-tags or with unresolved multi-word display names. + if (options?.fallbackToGeneric) { + return new RegExp(`${escapedPrefix}\\S+`, "gi"); + } + // No known names and no fallback requested — don't highlight anything. + // Prevents false-positive mention styling for messages without p-tags. return /(?!)/gi; // never matches } diff --git a/desktop/src/shared/lib/remarkChannelLinks.ts b/desktop/src/shared/lib/remarkChannelLinks.ts index f4e3963cc..008a68540 100644 --- a/desktop/src/shared/lib/remarkChannelLinks.ts +++ b/desktop/src/shared/lib/remarkChannelLinks.ts @@ -2,9 +2,9 @@ * Remark plugin that detects #channel-name patterns in text nodes and wraps them * in custom HAST `channel-link` elements for styled rendering via react-markdown. * - * Only known channel names are highlighted — multi-word names (e.g. "my channel") - * are matched longest-first to avoid partial matches. When no known names are - * provided, nothing is highlighted. + * Known channel names are matched longest-first to avoid partial matches. When + * no known names are provided, falls back to `#\S+` so that channel links still + * render while the channel list is loading asynchronously. */ import { createRemarkPrefixPlugin } from "./createRemarkPrefixPlugin"; @@ -17,7 +17,9 @@ type RemarkChannelLinksOptions = { export default function remarkChannelLinks( options?: RemarkChannelLinksOptions, ) { - const channelPattern = buildPrefixPattern("#", options?.channelNames ?? []); + const channelPattern = buildPrefixPattern("#", options?.channelNames ?? [], { + fallbackToGeneric: true, + }); return createRemarkPrefixPlugin(channelPattern, (matchText) => { const channelName = matchText.slice(1); From 829d48fc6dac072663d6b9e12cf8d2c6988632e9 Mon Sep 17 00:00:00 2001 From: Will Pfleger Date: Mon, 8 Jun 2026 15:04:34 -0400 Subject: [PATCH 5/7] refactor: simplify mention extractor and eliminate double JSON parsing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The char-indexed scan allocated a new String at every '@' and collected the full content to Vec upfront. Replace with str::match_indices + str::get() slicing — zero intermediate allocations, half the lines. Merge the two-phase profile parse in the CLI into a single pass that builds a name→pubkey HashMap, removing the MentionProfile indirection and the duplicated display_name/name fallback logic. Trim verbose doc comments to contract-only summaries. Signed-off-by: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 --- crates/sprout-cli/src/commands/messages.rs | 137 +++++++++++------ crates/sprout-cli/src/validate.rs | 4 +- crates/sprout-sdk/src/mentions.rs | 162 ++++++--------------- desktop/src/shared/lib/mentionPattern.ts | 6 +- 4 files changed, 141 insertions(+), 168 deletions(-) diff --git a/crates/sprout-cli/src/commands/messages.rs b/crates/sprout-cli/src/commands/messages.rs index 997e195e6..727ea5ba9 100644 --- a/crates/sprout-cli/src/commands/messages.rs +++ b/crates/sprout-cli/src/commands/messages.rs @@ -8,9 +8,7 @@ use crate::validate::{ infer_language, parse_event_id, parse_uuid, read_or_stdin, truncate_diff, validate_content_size, validate_hex64, validate_uuid, MAX_DIFF_BYTES, }; -use sprout_sdk::mentions::{ - extract_at_mentions_with_known, match_names_to_profiles, MentionProfile, -}; +use sprout_sdk::mentions::extract_at_mentions_with_known; // --------------------------------------------------------------------------- // Helpers @@ -124,22 +122,15 @@ async fn resolve_channel_id(client: &SproutClient, event_id: &str) -> Result Vec { - // Quick pre-check: if there's no '@' at all we can skip all I/O. if !content.contains('@') { return vec![]; } @@ -166,43 +157,46 @@ async fn resolve_content_mentions( None => return vec![], }; - // 3. Build profile entries and extract display names for the two-pass extractor. - let entries: Vec> = profile_events - .iter() - .filter_map(|e| { - let pubkey = e.get("pubkey")?.as_str()?; - let content_json = e.get("content")?.as_str()?; - Some(MentionProfile { - pubkey, - content_json, - }) - }) - .collect(); - - // Extract the display names (with `name` fallback) from each profile so - // the two-pass extractor can match multi-word names like "Will Pfleger". - let display_names: Vec = entries - .iter() - .filter_map(|e| { - let v: serde_json::Value = serde_json::from_str(e.content_json).ok()?; - let name = v - .get("display_name") - .or_else(|| v.get("name")) - .and_then(|n| n.as_str()) - .filter(|n| !n.is_empty())?; - Some(name.to_string()) - }) - .collect(); + // 3. Single parse: extract (pubkey, display_name) pairs from profile JSON. + let mut name_to_pubkeys: std::collections::HashMap> = + std::collections::HashMap::new(); + let mut display_names: Vec = Vec::new(); + for e in &profile_events { + let Some(pubkey) = e.get("pubkey").and_then(|v| v.as_str()) else { + continue; + }; + let Some(content_json) = e.get("content").and_then(|v| v.as_str()) else { + continue; + }; + let Ok(v) = serde_json::from_str::(content_json) else { + continue; + }; + let Some(name) = v + .get("display_name") + .or_else(|| v.get("name")) + .and_then(|n| n.as_str()) + .filter(|n| !n.is_empty()) + else { + continue; + }; + let lower = name.to_ascii_lowercase(); + name_to_pubkeys + .entry(lower) + .or_default() + .push(pubkey.to_string()); + display_names.push(name.to_string()); + } // 4. Two-pass extraction: known multi-word names first, single-word fallback. let known_refs: Vec<&str> = display_names.iter().map(|s| s.as_str()).collect(); let names = extract_at_mentions_with_known(content, &known_refs); - if names.is_empty() { - return vec![]; - } - // 5. Map matched names back to pubkeys via the shared profile matcher. - match_names_to_profiles(&names, &entries) + // 5. Look up matched names → pubkeys via the map we already built. + names + .iter() + .flat_map(|n| name_to_pubkeys.get(n).into_iter().flatten()) + .cloned() + .collect() } /// Fetch raw events for `filter` via the relay's `/query` endpoint. @@ -736,7 +730,9 @@ pub async fn dispatch( mod tests { use super::{find_root_from_tags, parse_member_pubkeys}; use serde_json::json; - use sprout_sdk::mentions::{extract_at_names, match_names_to_profiles, MentionProfile}; + use sprout_sdk::mentions::{ + extract_at_mentions_with_known, extract_at_names, match_names_to_profiles, MentionProfile, + }; const ID_A: &str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; const ID_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -866,6 +862,53 @@ mod tests { assert_eq!(resolved, vec![PK_VALID_A, PK_VALID_C]); } + #[test] + fn cli_pipeline_resolves_multiword_display_names() { + let profile_events: Vec = vec![ + json!({ + "pubkey": PK_VALID_A, + "content": r#"{"display_name":"Will Pfleger"}"#, + }), + json!({ + "pubkey": PK_VALID_B, + "content": r#"{"display_name":"Alice"}"#, + }), + ]; + + // Simulate the single-parse pipeline from resolve_content_mentions. + let mut name_to_pubkeys: std::collections::HashMap> = + std::collections::HashMap::new(); + let mut display_names: Vec = Vec::new(); + for e in &profile_events { + let pubkey = e.get("pubkey").unwrap().as_str().unwrap(); + let content_json = e.get("content").unwrap().as_str().unwrap(); + let v: serde_json::Value = serde_json::from_str(content_json).unwrap(); + let name = v + .get("display_name") + .or_else(|| v.get("name")) + .and_then(|n| n.as_str()) + .filter(|n| !n.is_empty()) + .unwrap(); + let lower = name.to_ascii_lowercase(); + name_to_pubkeys + .entry(lower) + .or_default() + .push(pubkey.to_string()); + display_names.push(name.to_string()); + } + + let known_refs: Vec<&str> = display_names.iter().map(|s| s.as_str()).collect(); + let names = extract_at_mentions_with_known("hey @Will Pfleger and @alice!", &known_refs); + assert_eq!(names, vec!["will pfleger", "alice"]); + + let resolved: Vec = names + .iter() + .flat_map(|n| name_to_pubkeys.get(n).into_iter().flatten()) + .cloned() + .collect(); + assert_eq!(resolved, vec![PK_VALID_A, PK_VALID_B]); + } + #[test] fn cli_pipeline_returns_empty_when_no_at_names() { // Sanity: no `@names` in body → no profile match attempt needed. diff --git a/crates/sprout-cli/src/validate.rs b/crates/sprout-cli/src/validate.rs index 4d5dbeb22..4882e340a 100644 --- a/crates/sprout-cli/src/validate.rs +++ b/crates/sprout-cli/src/validate.rs @@ -372,8 +372,8 @@ mod tests { ); } - // Note: `extract_at_names`, `merge_mentions`, and `normalize_mention_pubkeys` - // moved to `sprout_sdk::mentions` and are tested there. + // Note: `extract_at_names`, `extract_at_mentions_with_known`, `merge_mentions`, + // and `normalize_mention_pubkeys` live in `sprout_sdk::mentions` and are tested there. // --- parse_event_id --- diff --git a/crates/sprout-sdk/src/mentions.rs b/crates/sprout-sdk/src/mentions.rs index 2c1225115..1b4e4b79c 100644 --- a/crates/sprout-sdk/src/mentions.rs +++ b/crates/sprout-sdk/src/mentions.rs @@ -16,17 +16,9 @@ //! explicit mentions ──► normalize ──► merge_mentions ──► p-tags //! ``` //! -//! For callers that have the set of known member display names available -//! upfront, [`extract_at_mentions_with_known`] provides a two-pass approach -//! that correctly handles multi-word display names (e.g. "Will Pfleger"): -//! -//! ```text -//! body text + known_names ──► extract_at_mentions_with_known ──► names: Vec -//! │ -//! direct name→pubkey lookup -//! │ -//! p-tags (merged) -//! ``` +//! When the set of known member names is available upfront, +//! [`extract_at_mentions_with_known`] replaces the first step to correctly +//! handle multi-word display names. //! //! See [`crate::mentions::MENTION_CAP`] for the hard upper bound on tags. @@ -51,7 +43,10 @@ pub struct MentionProfile<'a> { pub content_json: &'a str, } -/// Extract `@mention` names from message content. +/// Extract single-word `@mention` names from message content. +/// +/// Prefer [`extract_at_mentions_with_known`] when known member names are +/// available — it correctly handles multi-word display names. /// /// Returns lowercased names found after `@` tokens. An `@name` only matches /// when the `@` is at start-of-string or preceded by an ASCII whitespace @@ -96,130 +91,65 @@ pub fn extract_at_names(content: &str) -> Vec { names } -/// Extract `@mention` names from message content when the set of known member -/// display names is available upfront. -/// -/// Uses a two-pass approach to correctly handle multi-word display names -/// (e.g. "Will Pfleger"): -/// -/// **Pass 1 — known-name matching:** At each `@` token (preceded by -/// start-of-string or ASCII whitespace), try each known name longest-first, -/// case-insensitively. A match is accepted only when the name is followed by a -/// word boundary (whitespace, common punctuation, or end-of-string). When a -/// known name matches, the lowercased name is emitted and the scan advances -/// past the entire matched name. -/// -/// **Pass 2 — single-word fallback:** If no known name matches at a given `@`, -/// falls back to the existing single-word tokenizer (alphanumeric + `.` `-` -/// `_`) so that `@alice` still works even when Alice's profile hasn't been -/// fetched yet. +/// Extract `@mention` names from message content using known member names. /// -/// `known_names` should be the display names (or `name` fallbacks) of all -/// channel members. Duplicates and empty strings are ignored. The function -/// does **not** require `known_names` to be pre-sorted — it sorts -/// longest-first internally. -/// -/// Returns lowercased names in first-seen order, deduplicated. +/// At each `@` preceded by whitespace or start-of-string, tries known names +/// longest-first (case-insensitive, word-boundary-checked), then falls back +/// to single-word tokenization. Returns lowercased names in first-seen order, +/// deduplicated. Empty/whitespace-only entries in `known_names` are ignored. pub fn extract_at_mentions_with_known(content: &str, known_names: &[&str]) -> Vec { if content.is_empty() || !content.contains('@') { return vec![]; } - // Sort known names longest-first so multi-word names beat their prefixes. - let mut sorted_known: Vec<&str> = known_names + let mut sorted: Vec<&str> = known_names .iter() .copied() .filter(|n| !n.trim().is_empty()) .collect(); - sorted_known.sort_by_key(|k| std::cmp::Reverse(k.len())); + sorted.sort_by_key(|k| std::cmp::Reverse(k.len())); - let mut names: Vec = Vec::new(); + let mut names = Vec::new(); let mut seen = HashSet::new(); - let chars: Vec = content.chars().collect(); - let len = chars.len(); - let mut i = 0; - - while i < len { - if chars[i] == '@' { - let preceded_by_ws = i == 0 || chars[i - 1].is_ascii_whitespace(); - if preceded_by_ws && i + 1 < len { - // Build the remaining content after '@' as a &str for prefix matching. - let after_at: String = chars[i + 1..].iter().collect(); - - // Pass 1: try each known name (longest-first). - let mut matched: Option<(String, usize)> = None; - for known in &sorted_known { - if after_at.len() < known.len() { - continue; - } - // Use get() to safely handle byte boundaries — known.len() - // may land mid-character when content contains multi-byte - // UTF-8 (e.g. CJK, emoji). If the slice isn't on a char - // boundary, skip this candidate. - let candidate = match after_at.get(..known.len()) { - Some(s) => s, - None => continue, - }; - if !candidate.eq_ignore_ascii_case(known) { - continue; - } - // Word-boundary check: must be followed by whitespace, - // common punctuation, or end-of-string. - let after_name = &after_at[known.len()..]; - let boundary = after_name.is_empty() - || after_name - .chars() - .next() - .map(|c| { - c.is_ascii_whitespace() - || matches!( - c, - ',' | ';' | '.' | '!' | '?' | ':' | ')' | ']' | '}' - ) - }) - .unwrap_or(true); - if boundary { - // Advance i past '@' + matched name length (in chars). - let name_char_len = known.chars().count(); - matched = Some((known.to_ascii_lowercase(), name_char_len)); - break; - } - } - if let Some((lower, char_len)) = matched { - if seen.insert(lower.clone()) { - names.push(lower); - } - // Skip past '@' + the matched name chars. - i += 1 + char_len; - continue; - } + for (i, _) in content.match_indices('@') { + let preceded = i == 0 || content.as_bytes()[i - 1].is_ascii_whitespace(); + if !preceded { + continue; + } + let rest = &content[i + 1..]; + if rest.is_empty() { + continue; + } - // Pass 2: single-word fallback (alphanumeric + . - _). - let start = i + 1; - let mut end = start; - while end < len { - let c = chars[end]; - if c.is_ascii_alphanumeric() || c == '.' || c == '-' || c == '_' { - end += 1; - } else { - break; - } - } - if end > start { - let name: String = chars[start..end].iter().collect(); - let lower = name.to_ascii_lowercase(); - if seen.insert(lower.clone()) { - names.push(lower); - } - } + let lower = if let Some(&known) = sorted.iter().find(|&&k| { + rest.get(..k.len()) + .is_some_and(|s| s.eq_ignore_ascii_case(k) && is_word_boundary(&rest[k.len()..])) + }) { + known.to_ascii_lowercase() + } else { + let end = rest + .find(|c: char| !c.is_ascii_alphanumeric() && !matches!(c, '.' | '-' | '_')) + .unwrap_or(rest.len()); + if end == 0 { + continue; } + rest[..end].to_ascii_lowercase() + }; + + if seen.insert(lower.clone()) { + names.push(lower); } - i += 1; } names } +fn is_word_boundary(s: &str) -> bool { + s.chars().next().is_none_or(|c| { + c.is_ascii_whitespace() || matches!(c, ',' | ';' | '.' | '!' | '?' | ':' | ')' | ']' | '}') + }) +} + /// Match extracted `@names` against channel-member profiles. /// /// For each profile, parses its `content_json` and reads the diff --git a/desktop/src/shared/lib/mentionPattern.ts b/desktop/src/shared/lib/mentionPattern.ts index ecd700cb2..73d12a7a4 100644 --- a/desktop/src/shared/lib/mentionPattern.ts +++ b/desktop/src/shared/lib/mentionPattern.ts @@ -5,6 +5,8 @@ export function escapeRegExp(str: string): string { return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } +const NEVER_MATCH = /(?!)/gi; + /** * Build a regex that matches a given prefix followed by known multi-word names * (longest-first to avoid partial matches). When known names are provided, @@ -33,9 +35,7 @@ export function buildPrefixPattern( if (options?.fallbackToGeneric) { return new RegExp(`${escapedPrefix}\\S+`, "gi"); } - // No known names and no fallback requested — don't highlight anything. - // Prevents false-positive mention styling for messages without p-tags. - return /(?!)/gi; // never matches + return NEVER_MATCH; } const nameAlternatives = sorted.map((name) => escapeRegExp(name)).join("|"); From 0d35331a582f3aaed451d02ac891dc67985cb87c Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Mon, 8 Jun 2026 17:17:45 -0400 Subject: [PATCH 6/7] feat(sdk): add NIP-27 nostr:npub mention extraction with markdown-aware escaping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add strip_code_regions() to remove fenced code blocks and inline code spans before scanning, and extract_nostr_uris() to decode nostr:npub1... URIs into hex pubkeys. Uses the nostr crate's PublicKey::from_bech32() for decoding — no new dependencies needed. Callers run extract_nostr_uris on strip_code_regions(content) and merge the resulting pubkeys into the existing p-tag set via merge_mentions, which handles deduplication against @name-resolved pubkeys. Signed-off-by: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 --- crates/sprout-cli/TESTING.md | 4 + crates/sprout-cli/src/commands/messages.rs | 13 +- crates/sprout-sdk/src/mentions.rs | 317 +++++++++++++++++- .../src/managed_agents/nest_skill.md | 2 +- 4 files changed, 329 insertions(+), 7 deletions(-) diff --git a/crates/sprout-cli/TESTING.md b/crates/sprout-cli/TESTING.md index 0eb4021cd..c08367f93 100644 --- a/crates/sprout-cli/TESTING.md +++ b/crates/sprout-cli/TESTING.md @@ -204,6 +204,10 @@ REPLY_ID=$(echo "$REPLY" | jq -r '.event_id') # messages send with mentions — @name in content is auto-resolved, no flag needed sprout messages send --channel "$CHANNEL_ID" --content "Hey @someone" | jq . +# messages send with NIP-27 nostr:npub1… inline mention — auto-resolved to p-tag +sprout messages send --channel "$CHANNEL_ID" \ + --content "Check with nostr:npub10elfcs4fr0l0r8af98jlmgdh9c8tcxjvz9qkw038js35mp4dma8qzvjptg on this" | jq . + # messages send from stdin — safe path for content with shell metacharacters # (backticks, $vars, code blocks) that would otherwise be expanded by the shell. echo 'Body with `backticks` and $vars stays literal.' \ diff --git a/crates/sprout-cli/src/commands/messages.rs b/crates/sprout-cli/src/commands/messages.rs index 727ea5ba9..380250ae1 100644 --- a/crates/sprout-cli/src/commands/messages.rs +++ b/crates/sprout-cli/src/commands/messages.rs @@ -8,7 +8,10 @@ use crate::validate::{ infer_language, parse_event_id, parse_uuid, read_or_stdin, truncate_diff, validate_content_size, validate_hex64, validate_uuid, MAX_DIFF_BYTES, }; -use sprout_sdk::mentions::extract_at_mentions_with_known; +use sprout_sdk::mentions::{ + extract_at_mentions_with_known, extract_nostr_uris, merge_mentions, strip_code_regions, + MENTION_CAP, +}; // --------------------------------------------------------------------------- // Helpers @@ -422,7 +425,13 @@ pub async fn cmd_send_message( // Resolve @name mentions in the author-written body only — not the media markdown we // append above, which is derived from upload metadata and can't carry `@names`. - let auto_resolved = resolve_content_mentions(client, &p.channel_id, &p.content).await; + let mut auto_resolved = resolve_content_mentions(client, &p.channel_id, &p.content).await; + + // NIP-27: also extract nostr:npub1… inline references (skipping code regions) + let stripped = strip_code_regions(&p.content); + let uri_pubkeys = extract_nostr_uris(&stripped); + merge_mentions(&mut auto_resolved, &uri_pubkeys, MENTION_CAP); + let mention_refs: Vec<&str> = auto_resolved.iter().map(|s| s.as_str()).collect(); let builder = match p.kind { diff --git a/crates/sprout-sdk/src/mentions.rs b/crates/sprout-sdk/src/mentions.rs index 1b4e4b79c..424834030 100644 --- a/crates/sprout-sdk/src/mentions.rs +++ b/crates/sprout-sdk/src/mentions.rs @@ -1,4 +1,4 @@ -//! `@name` mention resolution helpers for Sprout chat messages. +//! `@name` and NIP-27 `nostr:npub1…` mention resolution helpers for Sprout chat messages. //! //! These helpers are **pure** — no network calls, no async. Callers query //! channel membership (kind 39002) and profile (kind 0) events themselves, @@ -11,19 +11,26 @@ //! │ //! members + profiles (queried by caller) │ //! ▼ -//! match_names_to_profiles -//! │ -//! explicit mentions ──► normalize ──► merge_mentions ──► p-tags +//! match_names_to_profiles ──► pubkeys +//! │ +//! body text ──► strip_code_regions ──► extract_nostr_uris ─┤ +//! ▼ +//! explicit mentions ──► normalize ──► merge_mentions ──► p-tags //! ``` //! //! When the set of known member names is available upfront, //! [`extract_at_mentions_with_known`] replaces the first step to correctly //! handle multi-word display names. //! +//! [`extract_nostr_uris`] handles NIP-27 inline `nostr:npub1…` references, +//! skipping those inside code blocks/spans via [`strip_code_regions`]. +//! //! See [`crate::mentions::MENTION_CAP`] for the hard upper bound on tags. use std::collections::HashSet; +use nostr::{FromBech32, PublicKey}; + /// Maximum number of mention p-tags allowed on a single message. /// /// Matches the cap enforced by Sprout message builders and the legacy MCP @@ -229,6 +236,153 @@ pub fn normalize_mention_pubkeys(pubkeys: &[String], sender_pubkey: Option<&str> .collect() } +/// Remove fenced code blocks and inline code spans from content. +/// +/// Returns a copy of `content` with ` ```…``` ` blocks and `` `…` `` spans +/// replaced by spaces. Used only for mention scanning — the original +/// content is stored verbatim. Preserves valid UTF-8 throughout. +pub fn strip_code_regions(content: &str) -> String { + let mut out = String::with_capacity(content.len()); + let mut chars = content.char_indices().peekable(); + + while let Some(&(i, ch)) = chars.peek() { + // Fenced code block: ``` at line start (possibly after whitespace) + if ch == '`' && content[i..].starts_with("```") { + let is_fence_start = if i == 0 { + true + } else { + let before = &content[..i]; + before.ends_with('\n') + || before.chars().all(|c| c.is_ascii_whitespace()) + || before.rsplit_once('\n').is_some_and(|(_, after_nl)| { + after_nl.chars().all(|c| c.is_ascii_whitespace()) + }) + }; + + if is_fence_start { + // Find end of opening fence line + let after_fence = i + 3; + let rest = &content[after_fence..]; + let line_end = rest + .find('\n') + .map_or(content.len(), |p| after_fence + p + 1); + + // Find closing fence + let mut search_from = line_end; + let close_end = loop { + if search_from >= content.len() { + break content.len(); + } + if let Some(pos) = content[search_from..].find("```") { + let abs_pos = search_from + pos; + let at_line_start = abs_pos == 0 + || content.as_bytes()[abs_pos - 1] == b'\n' + || content[..abs_pos] + .rsplit_once('\n') + .is_some_and(|(_, after_nl)| { + after_nl.chars().all(|c| c.is_ascii_whitespace()) + }); + if at_line_start { + // Skip to end of closing fence line + let after_close = abs_pos + 3; + let end = content[after_close..] + .find('\n') + .map_or(content.len(), |p| after_close + p + 1); + break end; + } + search_from = abs_pos + 3; + } else { + break content.len(); + } + }; + + out.push(' '); + // Advance chars iterator past the fenced block + while let Some(&(ci, _)) = chars.peek() { + if ci >= close_end { + break; + } + chars.next(); + } + continue; + } + } + + // Inline code span: `…` + if ch == '`' { + let after_tick = i + 1; + if after_tick < content.len() { + // Find closing backtick on same line + if let Some(rel_end) = content[after_tick..].find('`') { + let close_pos = after_tick + rel_end; + // Only treat as code span if no newline between the backticks + if !content[after_tick..close_pos].contains('\n') { + out.push(' '); + // Advance past closing backtick + while let Some(&(ci, _)) = chars.peek() { + if ci > close_pos { + break; + } + chars.next(); + } + continue; + } + } + } + } + + out.push(ch); + chars.next(); + } + + out +} + +/// Bech32 alphabet used by NIP-19. +// NIP-19 allows uppercase; normalize before decode +fn is_bech32_char(c: char) -> bool { + matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z') +} + +/// Extract pubkeys from NIP-27 `nostr:npub1…` URIs in content. +/// +/// Scans `content` (which should already have code regions stripped via +/// [`strip_code_regions`]) for `nostr:npub1` followed by 58 bech32 characters. +/// Decodes each to a 32-byte pubkey hex string. Invalid bech32 is silently +/// skipped. Returns deduplicated lowercase hex pubkeys. +pub fn extract_nostr_uris(content: &str) -> Vec { + const PREFIX: &str = "nostr:npub1"; + const BECH32_SUFFIX_LEN: usize = 58; // chars after "npub1" + + let mut pubkeys = Vec::new(); + let mut seen = HashSet::new(); + + for (start, _) in content.match_indices(PREFIX) { + let bech32_start = start + "nostr:".len(); + let bech32_end = bech32_start + 5 + BECH32_SUFFIX_LEN; // "npub1" + 58 + + if bech32_end > content.len() { + continue; + } + + let candidate = &content[bech32_start..bech32_end]; + if !candidate.chars().all(is_bech32_char) { + continue; + } + + // NIP-19 allows uppercase; normalize before decode + let normalized = candidate.to_ascii_lowercase(); + if let Ok(pk) = PublicKey::from_bech32(&normalized) { + let hex = pk.to_hex(); + if seen.insert(hex.clone()) { + pubkeys.push(hex); + } + } + } + + pubkeys +} + #[cfg(test)] mod tests { use super::*; @@ -511,4 +665,159 @@ mod tests { fn normalize_empty_input() { assert!(normalize_mention_pubkeys(&[], Some("anything")).is_empty()); } + + // ── strip_code_regions ────────────────────────────────────────────── + + #[test] + fn strip_code_regions_removes_fenced_block() { + let input = "before\n```rust\nlet x = 1;\n```\nafter"; + let stripped = strip_code_regions(input); + assert!(!stripped.contains("let x = 1")); + assert!(stripped.contains("before")); + assert!(stripped.contains("after")); + } + + #[test] + fn strip_code_regions_removes_inline_code() { + let input = + "see `nostr:npub10elfcs4fr0l0r8af98jlmgdh9c8tcxjvz9qkw038js35mp4dma8qzvjptg` here"; + let stripped = strip_code_regions(input); + assert!(!stripped.contains("npub1")); + assert!(stripped.contains("see")); + assert!(stripped.contains("here")); + } + + #[test] + fn strip_code_regions_preserves_prose() { + let input = + "hello nostr:npub10elfcs4fr0l0r8af98jlmgdh9c8tcxjvz9qkw038js35mp4dma8qzvjptg world"; + let stripped = strip_code_regions(input); + assert!(stripped.contains("nostr:npub1")); + } + + #[test] + fn strip_code_regions_handles_empty() { + assert_eq!(strip_code_regions(""), ""); + } + + #[test] + fn strip_code_regions_unclosed_backtick_preserved() { + // A lone backtick without a closing one is not a code span + let input = "hello `world"; + let stripped = strip_code_regions(input); + assert!(stripped.contains("world")); + } + + // ── extract_nostr_uris ────────────────────────────────────────────── + + const TEST_NPUB1: &str = "npub10elfcs4fr0l0r8af98jlmgdh9c8tcxjvz9qkw038js35mp4dma8qzvjptg"; + const TEST_HEX1: &str = "7e7e9c42a91bfef19fa929e5fda1b72e0ebc1a4c1141673e2794234d86addf4e"; + const TEST_NPUB2: &str = "npub1fgdl5qqnh3k3f2xkqrvt7cujalhm623x4s7fdjdj5yrtp5fzjl9qrjpucw"; + const TEST_HEX2: &str = "4a1bfa0013bc6d14a8d600d8bf6392efefbd2a26ac3c96c9b2a106b0d12297ca"; + + #[test] + fn extract_nostr_uris_valid_in_prose() { + let content = format!("hello nostr:{} world", TEST_NPUB1); + let result = extract_nostr_uris(&content); + assert_eq!(result, vec![TEST_HEX1]); + } + + #[test] + fn extract_nostr_uris_not_extracted_in_backticks() { + let content = format!("see `nostr:{}` here", TEST_NPUB1); + let stripped = strip_code_regions(&content); + let result = extract_nostr_uris(&stripped); + assert!(result.is_empty()); + } + + #[test] + fn extract_nostr_uris_not_extracted_in_fenced_code() { + let content = format!("before\n```\nnostr:{}\n```\nafter", TEST_NPUB1); + let stripped = strip_code_regions(&content); + let result = extract_nostr_uris(&stripped); + assert!(result.is_empty()); + } + + #[test] + fn extract_nostr_uris_invalid_bech32_skipped() { + // Corrupt the last few chars to make invalid bech32 + let invalid = "npub10elfcs4fr0l0r8af98jlmgdh9c8tcxjvz9qkw038js35mp4dma8qzvjaaaa"; + let content = format!("nostr:{}", invalid); + let result = extract_nostr_uris(&content); + // Should not panic, just skip + assert!(result.is_empty()); + } + + #[test] + fn extract_nostr_uris_deduplicates() { + let content = format!("nostr:{} and again nostr:{}", TEST_NPUB1, TEST_NPUB1); + let result = extract_nostr_uris(&content); + assert_eq!(result.len(), 1); + assert_eq!(result[0], TEST_HEX1); + } + + #[test] + fn extract_nostr_uris_multiple_different() { + let content = format!("nostr:{} and nostr:{}", TEST_NPUB1, TEST_NPUB2); + let result = extract_nostr_uris(&content); + assert_eq!(result.len(), 2); + assert!(result.contains(&TEST_HEX1.to_string())); + assert!(result.contains(&TEST_HEX2.to_string())); + } + + #[test] + fn extract_nostr_uris_at_name_and_npub_dedup() { + // Simulates the integration: @name resolves to same pubkey as nostr:npub + // The dedup happens at the merge_mentions level, but extract_nostr_uris + // itself deduplicates within its own output. + let content = format!("nostr:{}", TEST_NPUB1); + let uri_pubkeys = extract_nostr_uris(&content); + let name_pubkeys = vec![TEST_HEX1.to_string()]; + + // merge_mentions deduplicates + let mut merged = name_pubkeys; + merge_mentions(&mut merged, &uri_pubkeys, MENTION_CAP); + assert_eq!(merged.len(), 1); + assert_eq!(merged[0], TEST_HEX1); + } + + #[test] + fn extract_nostr_uris_empty_content() { + assert!(extract_nostr_uris("").is_empty()); + } + + #[test] + fn extract_nostr_uris_no_prefix() { + // npub without "nostr:" prefix should not match + let content = format!("just {} in text", TEST_NPUB1); + let result = extract_nostr_uris(&content); + assert!(result.is_empty()); + } + + #[test] + fn extract_nostr_uris_after_unicode_does_not_panic() { + // Multi-byte UTF-8 before a nostr: URI must not cause panics + let content = format!("こんにちは nostr:{}", TEST_NPUB1); + let result = extract_nostr_uris(&content); + assert_eq!(result, vec![TEST_HEX1]); + } + + #[test] + fn strip_code_regions_preserves_unicode() { + let input = "こんにちは `code` 世界"; + let stripped = strip_code_regions(input); + assert!(stripped.contains("こんにちは")); + assert!(stripped.contains("世界")); + assert!(!stripped.contains("code")); + } + + #[test] + fn extract_nostr_uris_uppercase_bech32_chars() { + // NIP-19 allows uppercase bech32 characters in the suffix + let upper_suffix = &TEST_NPUB1[5..].to_uppercase(); // uppercase the 58 chars after "npub1" + let npub_mixed = format!("npub1{}", upper_suffix); + let content = format!("nostr:{}", npub_mixed); + let result = extract_nostr_uris(&content); + assert_eq!(result, vec![TEST_HEX1]); + } } diff --git a/desktop/src-tauri/src/managed_agents/nest_skill.md b/desktop/src-tauri/src/managed_agents/nest_skill.md index 13f29d061..5117d396a 100644 --- a/desktop/src-tauri/src/managed_agents/nest_skill.md +++ b/desktop/src-tauri/src/managed_agents/nest_skill.md @@ -55,7 +55,7 @@ Write commands are unaffected. `--format json` (default) returns full fields. ## Communication Patterns -**Mentions that notify:** Use `@Name` directly in message content — the CLI auto-resolves channel members by name and adds the required p-tags. No `--mention` flag exists or is needed. +**Mentions that notify:** Use `@Name` directly in message content — the CLI auto-resolves channel members by name and adds the required p-tags. No `--mention` flag exists or is needed. `nostr:npub1…` inline references are also auto-resolved to p-tags without needing a flag. ```bash # ✅ Correct — notification delivered automatically From fa63e9626f9a952663b6c74b050aa8afa9b795b5 Mon Sep 17 00:00:00 2001 From: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 Date: Mon, 8 Jun 2026 18:36:03 -0400 Subject: [PATCH 7/7] fix(sdk): guard nostr URI slice against mid-character byte index extract_nostr_uris computes a fixed 58-byte suffix window after the nostr:npub1 prefix. The existing length guard let bech32_end land inside a multi-byte UTF-8 character when non-ASCII followed the prefix, panicking on the &content[..bech32_end] slice. A valid bech32 suffix is pure ASCII, so a non-boundary index is always a non-match; skip it explicitly. Signed-off-by: npub1mn7jgtj4w2pd0g0zeuhxsa6jy6p0rewxz4kujt98my82ahfmp72sxjexk7 --- crates/sprout-sdk/src/mentions.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/sprout-sdk/src/mentions.rs b/crates/sprout-sdk/src/mentions.rs index 424834030..5a7b2fe52 100644 --- a/crates/sprout-sdk/src/mentions.rs +++ b/crates/sprout-sdk/src/mentions.rs @@ -361,7 +361,10 @@ pub fn extract_nostr_uris(content: &str) -> Vec { let bech32_start = start + "nostr:".len(); let bech32_end = bech32_start + 5 + BECH32_SUFFIX_LEN; // "npub1" + 58 - if bech32_end > content.len() { + // The fixed-width window can land mid-character when multi-byte UTF-8 + // follows the prefix; slicing a non-boundary would panic. A real bech32 + // suffix is 58 ASCII bytes, so any non-boundary here is a non-match. + if bech32_end > content.len() || !content.is_char_boundary(bech32_end) { continue; } @@ -802,6 +805,14 @@ mod tests { assert_eq!(result, vec![TEST_HEX1]); } + #[test] + fn extract_nostr_uris_multibyte_inside_window_does_not_panic() { + // Multi-byte UTF-8 within the fixed 58-char suffix window would make + // bech32_end land mid-character; the boundary guard must skip it. + let content = format!("nostr:npub1{}", "あ".repeat(20)); + assert!(extract_nostr_uris(&content).is_empty()); + } + #[test] fn strip_code_regions_preserves_unicode() { let input = "こんにちは `code` 世界";