diff --git a/crates/sprout-cli/TESTING.md b/crates/sprout-cli/TESTING.md index 0eb4021cd..c08367f93 100644 --- a/crates/sprout-cli/TESTING.md +++ b/crates/sprout-cli/TESTING.md @@ -204,6 +204,10 @@ REPLY_ID=$(echo "$REPLY" | jq -r '.event_id') # messages send with mentions — @name in content is auto-resolved, no flag needed sprout messages send --channel "$CHANNEL_ID" --content "Hey @someone" | jq . +# messages send with NIP-27 nostr:npub1… inline mention — auto-resolved to p-tag +sprout messages send --channel "$CHANNEL_ID" \ + --content "Check with nostr:npub10elfcs4fr0l0r8af98jlmgdh9c8tcxjvz9qkw038js35mp4dma8qzvjptg on this" | jq . + # messages send from stdin — safe path for content with shell metacharacters # (backticks, $vars, code blocks) that would otherwise be expanded by the shell. echo 'Body with `backticks` and $vars stays literal.' \ diff --git a/crates/sprout-cli/src/commands/messages.rs b/crates/sprout-cli/src/commands/messages.rs index bf08d996f..380250ae1 100644 --- a/crates/sprout-cli/src/commands/messages.rs +++ b/crates/sprout-cli/src/commands/messages.rs @@ -8,7 +8,10 @@ use crate::validate::{ infer_language, parse_event_id, parse_uuid, read_or_stdin, truncate_diff, validate_content_size, validate_hex64, validate_uuid, MAX_DIFF_BYTES, }; -use sprout_sdk::mentions::{extract_at_names, match_names_to_profiles, MentionProfile}; +use sprout_sdk::mentions::{ + extract_at_mentions_with_known, extract_nostr_uris, merge_mentions, strip_code_regions, + MENTION_CAP, +}; // --------------------------------------------------------------------------- // Helpers @@ -122,20 +125,19 @@ async fn resolve_channel_id(client: &SproutClient, event_id: &str) -> Result Vec { - let names = extract_at_names(content); - if names.is_empty() { + if !content.contains('@') { return vec![]; } + // 1. Membership list (kind 39002 is parameterized-replaceable, addressed by `d` tag). let members_filter = serde_json::json!({ "kinds": [39002], @@ -158,19 +160,46 @@ async fn resolve_content_mentions( None => return vec![], }; - // 3. Hand the parsed profile content + pubkey to the shared matcher. - let entries: Vec> = profile_events + // 3. Single parse: extract (pubkey, display_name) pairs from profile JSON. + let mut name_to_pubkeys: std::collections::HashMap> = + std::collections::HashMap::new(); + let mut display_names: Vec = Vec::new(); + for e in &profile_events { + let Some(pubkey) = e.get("pubkey").and_then(|v| v.as_str()) else { + continue; + }; + let Some(content_json) = e.get("content").and_then(|v| v.as_str()) else { + continue; + }; + let Ok(v) = serde_json::from_str::(content_json) else { + continue; + }; + let Some(name) = v + .get("display_name") + .or_else(|| v.get("name")) + .and_then(|n| n.as_str()) + .filter(|n| !n.is_empty()) + else { + continue; + }; + let lower = name.to_ascii_lowercase(); + name_to_pubkeys + .entry(lower) + .or_default() + .push(pubkey.to_string()); + display_names.push(name.to_string()); + } + + // 4. Two-pass extraction: known multi-word names first, single-word fallback. + let known_refs: Vec<&str> = display_names.iter().map(|s| s.as_str()).collect(); + let names = extract_at_mentions_with_known(content, &known_refs); + + // 5. Look up matched names → pubkeys via the map we already built. + names .iter() - .filter_map(|e| { - let pubkey = e.get("pubkey")?.as_str()?; - let content_json = e.get("content")?.as_str()?; - Some(MentionProfile { - pubkey, - content_json, - }) - }) - .collect(); - match_names_to_profiles(&names, &entries) + .flat_map(|n| name_to_pubkeys.get(n).into_iter().flatten()) + .cloned() + .collect() } /// Fetch raw events for `filter` via the relay's `/query` endpoint. @@ -396,7 +425,13 @@ pub async fn cmd_send_message( // Resolve @name mentions in the author-written body only — not the media markdown we // append above, which is derived from upload metadata and can't carry `@names`. - let auto_resolved = resolve_content_mentions(client, &p.channel_id, &p.content).await; + let mut auto_resolved = resolve_content_mentions(client, &p.channel_id, &p.content).await; + + // NIP-27: also extract nostr:npub1… inline references (skipping code regions) + let stripped = strip_code_regions(&p.content); + let uri_pubkeys = extract_nostr_uris(&stripped); + merge_mentions(&mut auto_resolved, &uri_pubkeys, MENTION_CAP); + let mention_refs: Vec<&str> = auto_resolved.iter().map(|s| s.as_str()).collect(); let builder = match p.kind { @@ -704,7 +739,9 @@ pub async fn dispatch( mod tests { use super::{find_root_from_tags, parse_member_pubkeys}; use serde_json::json; - use sprout_sdk::mentions::{extract_at_names, match_names_to_profiles, MentionProfile}; + use sprout_sdk::mentions::{ + extract_at_mentions_with_known, extract_at_names, match_names_to_profiles, MentionProfile, + }; const ID_A: &str = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"; const ID_B: &str = "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"; @@ -834,6 +871,53 @@ mod tests { assert_eq!(resolved, vec![PK_VALID_A, PK_VALID_C]); } + #[test] + fn cli_pipeline_resolves_multiword_display_names() { + let profile_events: Vec = vec![ + json!({ + "pubkey": PK_VALID_A, + "content": r#"{"display_name":"Will Pfleger"}"#, + }), + json!({ + "pubkey": PK_VALID_B, + "content": r#"{"display_name":"Alice"}"#, + }), + ]; + + // Simulate the single-parse pipeline from resolve_content_mentions. + let mut name_to_pubkeys: std::collections::HashMap> = + std::collections::HashMap::new(); + let mut display_names: Vec = Vec::new(); + for e in &profile_events { + let pubkey = e.get("pubkey").unwrap().as_str().unwrap(); + let content_json = e.get("content").unwrap().as_str().unwrap(); + let v: serde_json::Value = serde_json::from_str(content_json).unwrap(); + let name = v + .get("display_name") + .or_else(|| v.get("name")) + .and_then(|n| n.as_str()) + .filter(|n| !n.is_empty()) + .unwrap(); + let lower = name.to_ascii_lowercase(); + name_to_pubkeys + .entry(lower) + .or_default() + .push(pubkey.to_string()); + display_names.push(name.to_string()); + } + + let known_refs: Vec<&str> = display_names.iter().map(|s| s.as_str()).collect(); + let names = extract_at_mentions_with_known("hey @Will Pfleger and @alice!", &known_refs); + assert_eq!(names, vec!["will pfleger", "alice"]); + + let resolved: Vec = names + .iter() + .flat_map(|n| name_to_pubkeys.get(n).into_iter().flatten()) + .cloned() + .collect(); + assert_eq!(resolved, vec![PK_VALID_A, PK_VALID_B]); + } + #[test] fn cli_pipeline_returns_empty_when_no_at_names() { // Sanity: no `@names` in body → no profile match attempt needed. diff --git a/crates/sprout-cli/src/validate.rs b/crates/sprout-cli/src/validate.rs index 4d5dbeb22..4882e340a 100644 --- a/crates/sprout-cli/src/validate.rs +++ b/crates/sprout-cli/src/validate.rs @@ -372,8 +372,8 @@ mod tests { ); } - // Note: `extract_at_names`, `merge_mentions`, and `normalize_mention_pubkeys` - // moved to `sprout_sdk::mentions` and are tested there. + // Note: `extract_at_names`, `extract_at_mentions_with_known`, `merge_mentions`, + // and `normalize_mention_pubkeys` live in `sprout_sdk::mentions` and are tested there. // --- parse_event_id --- diff --git a/crates/sprout-sdk/src/mentions.rs b/crates/sprout-sdk/src/mentions.rs index 1742f5340..5a7b2fe52 100644 --- a/crates/sprout-sdk/src/mentions.rs +++ b/crates/sprout-sdk/src/mentions.rs @@ -1,4 +1,4 @@ -//! `@name` mention resolution helpers for Sprout chat messages. +//! `@name` and NIP-27 `nostr:npub1…` mention resolution helpers for Sprout chat messages. //! //! These helpers are **pure** — no network calls, no async. Callers query //! channel membership (kind 39002) and profile (kind 0) events themselves, @@ -11,15 +11,26 @@ //! │ //! members + profiles (queried by caller) │ //! ▼ -//! match_names_to_profiles -//! │ -//! explicit mentions ──► normalize ──► merge_mentions ──► p-tags +//! match_names_to_profiles ──► pubkeys +//! │ +//! body text ──► strip_code_regions ──► extract_nostr_uris ─┤ +//! ▼ +//! explicit mentions ──► normalize ──► merge_mentions ──► p-tags //! ``` //! +//! When the set of known member names is available upfront, +//! [`extract_at_mentions_with_known`] replaces the first step to correctly +//! handle multi-word display names. +//! +//! [`extract_nostr_uris`] handles NIP-27 inline `nostr:npub1…` references, +//! skipping those inside code blocks/spans via [`strip_code_regions`]. +//! //! See [`crate::mentions::MENTION_CAP`] for the hard upper bound on tags. use std::collections::HashSet; +use nostr::{FromBech32, PublicKey}; + /// Maximum number of mention p-tags allowed on a single message. /// /// Matches the cap enforced by Sprout message builders and the legacy MCP @@ -39,7 +50,10 @@ pub struct MentionProfile<'a> { pub content_json: &'a str, } -/// Extract `@mention` names from message content. +/// Extract single-word `@mention` names from message content. +/// +/// Prefer [`extract_at_mentions_with_known`] when known member names are +/// available — it correctly handles multi-word display names. /// /// Returns lowercased names found after `@` tokens. An `@name` only matches /// when the `@` is at start-of-string or preceded by an ASCII whitespace @@ -84,6 +98,65 @@ pub fn extract_at_names(content: &str) -> Vec { names } +/// Extract `@mention` names from message content using known member names. +/// +/// At each `@` preceded by whitespace or start-of-string, tries known names +/// longest-first (case-insensitive, word-boundary-checked), then falls back +/// to single-word tokenization. Returns lowercased names in first-seen order, +/// deduplicated. Empty/whitespace-only entries in `known_names` are ignored. +pub fn extract_at_mentions_with_known(content: &str, known_names: &[&str]) -> Vec { + if content.is_empty() || !content.contains('@') { + return vec![]; + } + + let mut sorted: Vec<&str> = known_names + .iter() + .copied() + .filter(|n| !n.trim().is_empty()) + .collect(); + sorted.sort_by_key(|k| std::cmp::Reverse(k.len())); + + let mut names = Vec::new(); + let mut seen = HashSet::new(); + + for (i, _) in content.match_indices('@') { + let preceded = i == 0 || content.as_bytes()[i - 1].is_ascii_whitespace(); + if !preceded { + continue; + } + let rest = &content[i + 1..]; + if rest.is_empty() { + continue; + } + + let lower = if let Some(&known) = sorted.iter().find(|&&k| { + rest.get(..k.len()) + .is_some_and(|s| s.eq_ignore_ascii_case(k) && is_word_boundary(&rest[k.len()..])) + }) { + known.to_ascii_lowercase() + } else { + let end = rest + .find(|c: char| !c.is_ascii_alphanumeric() && !matches!(c, '.' | '-' | '_')) + .unwrap_or(rest.len()); + if end == 0 { + continue; + } + rest[..end].to_ascii_lowercase() + }; + + if seen.insert(lower.clone()) { + names.push(lower); + } + } + names +} + +fn is_word_boundary(s: &str) -> bool { + s.chars().next().is_none_or(|c| { + c.is_ascii_whitespace() || matches!(c, ',' | ';' | '.' | '!' | '?' | ':' | ')' | ']' | '}') + }) +} + /// Match extracted `@names` against channel-member profiles. /// /// For each profile, parses its `content_json` and reads the @@ -163,6 +236,156 @@ pub fn normalize_mention_pubkeys(pubkeys: &[String], sender_pubkey: Option<&str> .collect() } +/// Remove fenced code blocks and inline code spans from content. +/// +/// Returns a copy of `content` with ` ```…``` ` blocks and `` `…` `` spans +/// replaced by spaces. Used only for mention scanning — the original +/// content is stored verbatim. Preserves valid UTF-8 throughout. +pub fn strip_code_regions(content: &str) -> String { + let mut out = String::with_capacity(content.len()); + let mut chars = content.char_indices().peekable(); + + while let Some(&(i, ch)) = chars.peek() { + // Fenced code block: ``` at line start (possibly after whitespace) + if ch == '`' && content[i..].starts_with("```") { + let is_fence_start = if i == 0 { + true + } else { + let before = &content[..i]; + before.ends_with('\n') + || before.chars().all(|c| c.is_ascii_whitespace()) + || before.rsplit_once('\n').is_some_and(|(_, after_nl)| { + after_nl.chars().all(|c| c.is_ascii_whitespace()) + }) + }; + + if is_fence_start { + // Find end of opening fence line + let after_fence = i + 3; + let rest = &content[after_fence..]; + let line_end = rest + .find('\n') + .map_or(content.len(), |p| after_fence + p + 1); + + // Find closing fence + let mut search_from = line_end; + let close_end = loop { + if search_from >= content.len() { + break content.len(); + } + if let Some(pos) = content[search_from..].find("```") { + let abs_pos = search_from + pos; + let at_line_start = abs_pos == 0 + || content.as_bytes()[abs_pos - 1] == b'\n' + || content[..abs_pos] + .rsplit_once('\n') + .is_some_and(|(_, after_nl)| { + after_nl.chars().all(|c| c.is_ascii_whitespace()) + }); + if at_line_start { + // Skip to end of closing fence line + let after_close = abs_pos + 3; + let end = content[after_close..] + .find('\n') + .map_or(content.len(), |p| after_close + p + 1); + break end; + } + search_from = abs_pos + 3; + } else { + break content.len(); + } + }; + + out.push(' '); + // Advance chars iterator past the fenced block + while let Some(&(ci, _)) = chars.peek() { + if ci >= close_end { + break; + } + chars.next(); + } + continue; + } + } + + // Inline code span: `…` + if ch == '`' { + let after_tick = i + 1; + if after_tick < content.len() { + // Find closing backtick on same line + if let Some(rel_end) = content[after_tick..].find('`') { + let close_pos = after_tick + rel_end; + // Only treat as code span if no newline between the backticks + if !content[after_tick..close_pos].contains('\n') { + out.push(' '); + // Advance past closing backtick + while let Some(&(ci, _)) = chars.peek() { + if ci > close_pos { + break; + } + chars.next(); + } + continue; + } + } + } + } + + out.push(ch); + chars.next(); + } + + out +} + +/// Bech32 alphabet used by NIP-19. +// NIP-19 allows uppercase; normalize before decode +fn is_bech32_char(c: char) -> bool { + matches!(c, '0'..='9' | 'a'..='z' | 'A'..='Z') +} + +/// Extract pubkeys from NIP-27 `nostr:npub1…` URIs in content. +/// +/// Scans `content` (which should already have code regions stripped via +/// [`strip_code_regions`]) for `nostr:npub1` followed by 58 bech32 characters. +/// Decodes each to a 32-byte pubkey hex string. Invalid bech32 is silently +/// skipped. Returns deduplicated lowercase hex pubkeys. +pub fn extract_nostr_uris(content: &str) -> Vec { + const PREFIX: &str = "nostr:npub1"; + const BECH32_SUFFIX_LEN: usize = 58; // chars after "npub1" + + let mut pubkeys = Vec::new(); + let mut seen = HashSet::new(); + + for (start, _) in content.match_indices(PREFIX) { + let bech32_start = start + "nostr:".len(); + let bech32_end = bech32_start + 5 + BECH32_SUFFIX_LEN; // "npub1" + 58 + + // The fixed-width window can land mid-character when multi-byte UTF-8 + // follows the prefix; slicing a non-boundary would panic. A real bech32 + // suffix is 58 ASCII bytes, so any non-boundary here is a non-match. + if bech32_end > content.len() || !content.is_char_boundary(bech32_end) { + continue; + } + + let candidate = &content[bech32_start..bech32_end]; + if !candidate.chars().all(is_bech32_char) { + continue; + } + + // NIP-19 allows uppercase; normalize before decode + let normalized = candidate.to_ascii_lowercase(); + if let Ok(pk) = PublicKey::from_bech32(&normalized) { + let hex = pk.to_hex(); + if seen.insert(hex.clone()) { + pubkeys.push(hex); + } + } + } + + pubkeys +} + #[cfg(test)] mod tests { use super::*; @@ -205,6 +428,124 @@ mod tests { assert!(extract_at_names("hello @").is_empty()); } + // ── extract_at_mentions_with_known ────────────────────────────────── + + #[test] + fn known_multiword_name_matches_fully() { + // "Will Pfleger" should match @Will Pfleger, not just @Will. + let result = extract_at_mentions_with_known("hello @Will Pfleger!", &["Will Pfleger"]); + assert_eq!(result, vec!["will pfleger"]); + } + + #[test] + fn partial_first_word_does_not_match_multiword_name() { + // @Will alone must NOT match "Will Pfleger" — partial matches are rejected. + let result = extract_at_mentions_with_known("hey @Will how are you", &["Will Pfleger"]); + // No known name matches @Will (boundary check: 'Will' is followed by ' h' + // which would match "Will Pfleger" only if the full name follows). + // Falls back to single-word tokenizer → emits "will". + assert_eq!(result, vec!["will"]); + } + + #[test] + fn longest_first_wins_over_prefix() { + // With both "Will" and "Will Pfleger" known, "@Will Pfleger" should + // match the longer name, not just "Will". + let result = extract_at_mentions_with_known( + "@Will Pfleger sent a message", + &["Will", "Will Pfleger"], + ); + assert_eq!(result, vec!["will pfleger"]); + } + + #[test] + fn single_word_known_name_matches() { + let result = extract_at_mentions_with_known("ping @alice please", &["Alice"]); + assert_eq!(result, vec!["alice"]); + } + + #[test] + fn unknown_name_falls_back_to_single_word() { + // @alice is not in known_names but single-word fallback still emits it. + let result = extract_at_mentions_with_known("hey @alice", &["Bob"]); + assert_eq!(result, vec!["alice"]); + } + + #[test] + fn multiple_mentions_mixed_known_and_unknown() { + let result = extract_at_mentions_with_known( + "@Will Pfleger and @alice should review", + &["Will Pfleger"], + ); + assert_eq!(result, vec!["will pfleger", "alice"]); + } + + #[test] + fn deduplicates_case_insensitively() { + let result = extract_at_mentions_with_known( + "@Will Pfleger and @will pfleger again", + &["Will Pfleger"], + ); + assert_eq!(result, vec!["will pfleger"]); + } + + #[test] + fn multiword_name_at_end_of_string() { + let result = extract_at_mentions_with_known("cc @Will Pfleger", &["Will Pfleger"]); + assert_eq!(result, vec!["will pfleger"]); + } + + #[test] + fn multiword_name_followed_by_punctuation() { + let result = + extract_at_mentions_with_known("thanks @Will Pfleger, great work", &["Will Pfleger"]); + assert_eq!(result, vec!["will pfleger"]); + } + + #[test] + fn email_address_not_matched() { + let result = extract_at_mentions_with_known("user@example.com", &["example.com"]); + assert!(result.is_empty()); + } + + #[test] + fn empty_content_returns_empty() { + let result = extract_at_mentions_with_known("", &["Alice"]); + assert!(result.is_empty()); + } + + #[test] + fn empty_known_names_uses_single_word_fallback() { + let result = extract_at_mentions_with_known("hey @alice", &[]); + assert_eq!(result, vec!["alice"]); + } + + #[test] + fn unicode_content_does_not_panic() { + // Known name byte-length may land mid-character in multi-byte content. + // e.g. known "ab" (2 bytes) vs content starting with 日 (3 bytes) — + // byte offset 2 is not a char boundary. Must not panic; gracefully + // skips the candidate via get() returning None. + let result = extract_at_mentions_with_known("@日本語 hello", &["ab"]); + // "ab" doesn't match — falls through to single-word fallback which + // stops at non-ASCII, so no match. The key assertion: no panic. + assert!(result.is_empty()); + } + + #[test] + fn unicode_known_name_matches_with_boundary() { + // Multi-byte known name followed by a space (valid boundary). + let result = extract_at_mentions_with_known("@日本 hello", &["日本"]); + assert_eq!(result, vec!["日本"]); + } + + #[test] + fn unicode_known_name_with_ascii_content_no_panic() { + // Reverse case: multi-byte known name against ASCII content. + let result = extract_at_mentions_with_known("@alice hello", &["日本語"]); + assert_eq!(result, vec!["alice"]); + } + // ── match_names_to_profiles ───────────────────────────────────────── fn profile<'a>(pk: &'a str, json: &'a str) -> MentionProfile<'a> { @@ -327,4 +668,167 @@ mod tests { fn normalize_empty_input() { assert!(normalize_mention_pubkeys(&[], Some("anything")).is_empty()); } + + // ── strip_code_regions ────────────────────────────────────────────── + + #[test] + fn strip_code_regions_removes_fenced_block() { + let input = "before\n```rust\nlet x = 1;\n```\nafter"; + let stripped = strip_code_regions(input); + assert!(!stripped.contains("let x = 1")); + assert!(stripped.contains("before")); + assert!(stripped.contains("after")); + } + + #[test] + fn strip_code_regions_removes_inline_code() { + let input = + "see `nostr:npub10elfcs4fr0l0r8af98jlmgdh9c8tcxjvz9qkw038js35mp4dma8qzvjptg` here"; + let stripped = strip_code_regions(input); + assert!(!stripped.contains("npub1")); + assert!(stripped.contains("see")); + assert!(stripped.contains("here")); + } + + #[test] + fn strip_code_regions_preserves_prose() { + let input = + "hello nostr:npub10elfcs4fr0l0r8af98jlmgdh9c8tcxjvz9qkw038js35mp4dma8qzvjptg world"; + let stripped = strip_code_regions(input); + assert!(stripped.contains("nostr:npub1")); + } + + #[test] + fn strip_code_regions_handles_empty() { + assert_eq!(strip_code_regions(""), ""); + } + + #[test] + fn strip_code_regions_unclosed_backtick_preserved() { + // A lone backtick without a closing one is not a code span + let input = "hello `world"; + let stripped = strip_code_regions(input); + assert!(stripped.contains("world")); + } + + // ── extract_nostr_uris ────────────────────────────────────────────── + + const TEST_NPUB1: &str = "npub10elfcs4fr0l0r8af98jlmgdh9c8tcxjvz9qkw038js35mp4dma8qzvjptg"; + const TEST_HEX1: &str = "7e7e9c42a91bfef19fa929e5fda1b72e0ebc1a4c1141673e2794234d86addf4e"; + const TEST_NPUB2: &str = "npub1fgdl5qqnh3k3f2xkqrvt7cujalhm623x4s7fdjdj5yrtp5fzjl9qrjpucw"; + const TEST_HEX2: &str = "4a1bfa0013bc6d14a8d600d8bf6392efefbd2a26ac3c96c9b2a106b0d12297ca"; + + #[test] + fn extract_nostr_uris_valid_in_prose() { + let content = format!("hello nostr:{} world", TEST_NPUB1); + let result = extract_nostr_uris(&content); + assert_eq!(result, vec![TEST_HEX1]); + } + + #[test] + fn extract_nostr_uris_not_extracted_in_backticks() { + let content = format!("see `nostr:{}` here", TEST_NPUB1); + let stripped = strip_code_regions(&content); + let result = extract_nostr_uris(&stripped); + assert!(result.is_empty()); + } + + #[test] + fn extract_nostr_uris_not_extracted_in_fenced_code() { + let content = format!("before\n```\nnostr:{}\n```\nafter", TEST_NPUB1); + let stripped = strip_code_regions(&content); + let result = extract_nostr_uris(&stripped); + assert!(result.is_empty()); + } + + #[test] + fn extract_nostr_uris_invalid_bech32_skipped() { + // Corrupt the last few chars to make invalid bech32 + let invalid = "npub10elfcs4fr0l0r8af98jlmgdh9c8tcxjvz9qkw038js35mp4dma8qzvjaaaa"; + let content = format!("nostr:{}", invalid); + let result = extract_nostr_uris(&content); + // Should not panic, just skip + assert!(result.is_empty()); + } + + #[test] + fn extract_nostr_uris_deduplicates() { + let content = format!("nostr:{} and again nostr:{}", TEST_NPUB1, TEST_NPUB1); + let result = extract_nostr_uris(&content); + assert_eq!(result.len(), 1); + assert_eq!(result[0], TEST_HEX1); + } + + #[test] + fn extract_nostr_uris_multiple_different() { + let content = format!("nostr:{} and nostr:{}", TEST_NPUB1, TEST_NPUB2); + let result = extract_nostr_uris(&content); + assert_eq!(result.len(), 2); + assert!(result.contains(&TEST_HEX1.to_string())); + assert!(result.contains(&TEST_HEX2.to_string())); + } + + #[test] + fn extract_nostr_uris_at_name_and_npub_dedup() { + // Simulates the integration: @name resolves to same pubkey as nostr:npub + // The dedup happens at the merge_mentions level, but extract_nostr_uris + // itself deduplicates within its own output. + let content = format!("nostr:{}", TEST_NPUB1); + let uri_pubkeys = extract_nostr_uris(&content); + let name_pubkeys = vec![TEST_HEX1.to_string()]; + + // merge_mentions deduplicates + let mut merged = name_pubkeys; + merge_mentions(&mut merged, &uri_pubkeys, MENTION_CAP); + assert_eq!(merged.len(), 1); + assert_eq!(merged[0], TEST_HEX1); + } + + #[test] + fn extract_nostr_uris_empty_content() { + assert!(extract_nostr_uris("").is_empty()); + } + + #[test] + fn extract_nostr_uris_no_prefix() { + // npub without "nostr:" prefix should not match + let content = format!("just {} in text", TEST_NPUB1); + let result = extract_nostr_uris(&content); + assert!(result.is_empty()); + } + + #[test] + fn extract_nostr_uris_after_unicode_does_not_panic() { + // Multi-byte UTF-8 before a nostr: URI must not cause panics + let content = format!("こんにちは nostr:{}", TEST_NPUB1); + let result = extract_nostr_uris(&content); + assert_eq!(result, vec![TEST_HEX1]); + } + + #[test] + fn extract_nostr_uris_multibyte_inside_window_does_not_panic() { + // Multi-byte UTF-8 within the fixed 58-char suffix window would make + // bech32_end land mid-character; the boundary guard must skip it. + let content = format!("nostr:npub1{}", "あ".repeat(20)); + assert!(extract_nostr_uris(&content).is_empty()); + } + + #[test] + fn strip_code_regions_preserves_unicode() { + let input = "こんにちは `code` 世界"; + let stripped = strip_code_regions(input); + assert!(stripped.contains("こんにちは")); + assert!(stripped.contains("世界")); + assert!(!stripped.contains("code")); + } + + #[test] + fn extract_nostr_uris_uppercase_bech32_chars() { + // NIP-19 allows uppercase bech32 characters in the suffix + let upper_suffix = &TEST_NPUB1[5..].to_uppercase(); // uppercase the 58 chars after "npub1" + let npub_mixed = format!("npub1{}", upper_suffix); + let content = format!("nostr:{}", npub_mixed); + let result = extract_nostr_uris(&content); + assert_eq!(result, vec![TEST_HEX1]); + } } diff --git a/desktop/src-tauri/src/managed_agents/nest_skill.md b/desktop/src-tauri/src/managed_agents/nest_skill.md index 13f29d061..5117d396a 100644 --- a/desktop/src-tauri/src/managed_agents/nest_skill.md +++ b/desktop/src-tauri/src/managed_agents/nest_skill.md @@ -55,7 +55,7 @@ Write commands are unaffected. `--format json` (default) returns full fields. ## Communication Patterns -**Mentions that notify:** Use `@Name` directly in message content — the CLI auto-resolves channel members by name and adds the required p-tags. No `--mention` flag exists or is needed. +**Mentions that notify:** Use `@Name` directly in message content — the CLI auto-resolves channel members by name and adds the required p-tags. No `--mention` flag exists or is needed. `nostr:npub1…` inline references are also auto-resolved to p-tags without needing a flag. ```bash # ✅ Correct — notification delivered automatically diff --git a/desktop/src/shared/lib/mentionPattern.ts b/desktop/src/shared/lib/mentionPattern.ts index d0c636836..73d12a7a4 100644 --- a/desktop/src/shared/lib/mentionPattern.ts +++ b/desktop/src/shared/lib/mentionPattern.ts @@ -5,16 +5,25 @@ export function escapeRegExp(str: string): string { return str.replace(/[.*+?^${}()|[\]\\]/g, "\\$&"); } +const NEVER_MATCH = /(?!)/gi; + /** * Build a regex that matches a given prefix followed by known multi-word names * (longest-first to avoid partial matches). When known names are provided, - * only those names are matched — no generic fallback. When no names are - * available, falls back to prefix + \S+ for backwards compatibility (e.g. - * old messages without proper p-tags, or while profiles are loading). + * only those names are matched — no generic fallback. + * + * When no names are available: + * - If `options.fallbackToGeneric` is true, falls back to `prefix + \S+` so + * that patterns like `#channel` still render while channel names are loading + * asynchronously (used by remarkChannelLinks). + * - Otherwise returns a never-matching regex, preventing arbitrary `@word` + * patterns from being highlighted as valid mentions when no p-tags are + * present (used by remarkMentions / buildMentionPattern). */ export function buildPrefixPattern( prefix: string, knownNames: string[], + options?: { fallbackToGeneric?: boolean }, ): RegExp { const sorted = [...new Set(knownNames)] .filter((name) => name.trim().length > 0) @@ -23,7 +32,10 @@ export function buildPrefixPattern( const escapedPrefix = escapeRegExp(prefix); if (sorted.length === 0) { - return new RegExp(`${escapedPrefix}\\S+`, "gi"); + if (options?.fallbackToGeneric) { + return new RegExp(`${escapedPrefix}\\S+`, "gi"); + } + return NEVER_MATCH; } const nameAlternatives = sorted.map((name) => escapeRegExp(name)).join("|"); @@ -32,8 +44,10 @@ export function buildPrefixPattern( } /** - * Build a regex that matches @mentions, trying known multi-word names first - * (longest-first to avoid partial matches), then falling back to @\S+. + * Build a regex that matches @mentions for known multi-word names + * (longest-first to avoid partial matches). When no known names are provided, + * returns a never-matching regex — @word patterns are not highlighted unless + * they correspond to an actual p-tagged member. */ export function buildMentionPattern(mentionNames: string[]): RegExp { return buildPrefixPattern("@", mentionNames); diff --git a/desktop/src/shared/lib/remarkChannelLinks.ts b/desktop/src/shared/lib/remarkChannelLinks.ts index ca1225a43..008a68540 100644 --- a/desktop/src/shared/lib/remarkChannelLinks.ts +++ b/desktop/src/shared/lib/remarkChannelLinks.ts @@ -2,9 +2,9 @@ * Remark plugin that detects #channel-name patterns in text nodes and wraps them * in custom HAST `channel-link` elements for styled rendering via react-markdown. * - * When `channelNames` is provided, multi-word channel names (e.g. "my channel") - * are matched first (longest-first to avoid partial matches), then the plugin - * falls back to the generic `#\S+` pattern for unknown channels. + * Known channel names are matched longest-first to avoid partial matches. When + * no known names are provided, falls back to `#\S+` so that channel links still + * render while the channel list is loading asynchronously. */ import { createRemarkPrefixPlugin } from "./createRemarkPrefixPlugin"; @@ -17,7 +17,9 @@ type RemarkChannelLinksOptions = { export default function remarkChannelLinks( options?: RemarkChannelLinksOptions, ) { - const channelPattern = buildPrefixPattern("#", options?.channelNames ?? []); + const channelPattern = buildPrefixPattern("#", options?.channelNames ?? [], { + fallbackToGeneric: true, + }); return createRemarkPrefixPlugin(channelPattern, (matchText) => { const channelName = matchText.slice(1); diff --git a/desktop/src/shared/lib/remarkMentions.ts b/desktop/src/shared/lib/remarkMentions.ts index b958ea1bb..7413df6f3 100644 --- a/desktop/src/shared/lib/remarkMentions.ts +++ b/desktop/src/shared/lib/remarkMentions.ts @@ -2,9 +2,9 @@ * Remark plugin that detects @mention patterns in text nodes and wraps them * in custom HAST `mention` elements for styled rendering via react-markdown. * - * When `mentionNames` is provided, multi-word display names (e.g. "John Doe") - * are matched first (longest-first to avoid partial matches), then the plugin - * falls back to the generic `@\S+` pattern for unknown mentions. + * Only p-tagged member names are highlighted — multi-word display names + * (e.g. "John Doe") are matched longest-first to avoid partial matches. + * When no known names are provided, nothing is highlighted. */ import { createRemarkPrefixPlugin } from "./createRemarkPrefixPlugin";