diff --git a/desktop/scripts/check-file-sizes.mjs b/desktop/scripts/check-file-sizes.mjs index ba850f11c..d27752bc8 100644 --- a/desktop/scripts/check-file-sizes.mjs +++ b/desktop/scripts/check-file-sizes.mjs @@ -68,7 +68,7 @@ const overrides = new Map([ ["src-tauri/src/events.rs", 610], // event builders + build_huddle_guidelines (kind:48106) + post_event_raw transport helper + participant p-tag on join/leave + NIP-43 relay admin builders (add/remove/change-role) + check_relay_role + DM/presence/workflow command builders ["src-tauri/src/huddle/kokoro.rs", 980], // Kokoro ONNX TTS engine + three-tier G2P + ARPAbet→IPA + CoreML + synth_chunk() public API + style validation + hyphenated compound splitting + 23 unit tests ["src-tauri/src/huddle/mod.rs", 1020], // huddle state machine + Tauri commands + sync protocol doc; state/relay/pipeline extracted + emit_huddle_state_changed wiring - ["src-tauri/src/huddle/models.rs", 850], // model download manager for Moonshine STT + Kokoro TTS with streaming downloads + SHA-256 verification + Rust-native tar extraction + version manifest + atomic swap + hot-start signaling + ["src-tauri/src/huddle/models.rs", 900], // model download manager for Parakeet TDT-CTC STT + Kokoro TTS with streaming downloads + SHA-256 verification + Rust-native tar extraction + version manifest + atomic swap + hot-start signaling + CC-BY-4.0 attribution sidecar + idempotent legacy Moonshine dir cleanup ["src-tauri/src/huddle/stt.rs", 580], // STT pipeline + PTT edge-detection flush + PTT gating (is_speech AND ptt_active) + barge-in for VAD mode + rubato resampler + earshot VAD + sherpa-onnx transcription ["src-tauri/src/huddle/preprocessing.rs", 670], // TTS text preprocessing pipeline + unified split_sentences + int_to_words 0-999999 + URL trailing punctuation preservation + 23 unit tests ["src-tauri/src/huddle/relay_api.rs", 520], // audio relay recv task + per-peer frame counting for remote human TTS interrupt + NIP-98 channel member query diff --git a/desktop/src-tauri/src/huddle/mod.rs b/desktop/src-tauri/src/huddle/mod.rs index 892dd025b..d9bede659 100644 --- a/desktop/src-tauri/src/huddle/mod.rs +++ b/desktop/src-tauri/src/huddle/mod.rs @@ -658,8 +658,8 @@ pub async fn check_pipeline_hotstart(state: State<'_, AppState>) -> Result<(), S }; // Check if models just became ready (one-shot flags). - let moonshine_ready = models::global_model_manager() - .map(|m| m.take_moonshine_ready()) + let stt_ready = models::global_model_manager() + .map(|m| m.take_stt_ready()) .unwrap_or(false); let kokoro_ready = models::global_model_manager() .map(|m| m.take_kokoro_ready()) @@ -672,7 +672,7 @@ pub async fn check_pipeline_hotstart(state: State<'_, AppState>) -> Result<(), S } } - if !has_stt && (moonshine_ready || models::is_moonshine_ready()) { + if !has_stt && (stt_ready || models::is_stt_ready()) { if let Some(eph_id) = &ephemeral_channel_id { if let Err(e) = maybe_start_stt_pipeline(&state, eph_id).await { eprintln!("sprout-desktop: STT hotstart failed: {e}"); @@ -746,12 +746,12 @@ pub async fn start_stt_pipeline(state: State<'_, AppState>) -> Result<(), String match maybe_start_stt_pipeline(&state, &ephemeral_channel_id).await { Ok(true) => Ok(()), - Ok(false) => Err("Moonshine model not ready".to_string()), + Ok(false) => Err("STT model not ready".to_string()), Err(e) => Err(e), } } -/// Trigger a background download of voice models (Moonshine STT + Kokoro TTS). +/// Trigger a background download of voice models (Parakeet STT + Kokoro TTS). /// /// Returns immediately — downloads run in tokio background tasks. /// Poll `get_model_status` to track progress. @@ -760,7 +760,7 @@ pub async fn start_stt_pipeline(state: State<'_, AppState>) -> Result<(), String pub async fn download_voice_models(state: State<'_, AppState>) -> Result<(), String> { let manager = models::global_model_manager() .ok_or("model manager unavailable (home directory could not be resolved)")?; - manager.start_moonshine_download(state.http_client.clone()); + manager.start_stt_download(state.http_client.clone()); manager.start_kokoro_download(state.http_client.clone()); Ok(()) } @@ -771,8 +771,7 @@ pub fn get_model_status(_state: State<'_, AppState>) -> Result`, and // update the corresponding constant. -/// SHA-256 hash of the Moonshine archive (sherpa-onnx-moonshine-tiny-en-int8.tar.bz2). +/// SHA-256 hash of the STT archive +/// (sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8.tar.bz2). /// Computed from a known-good download. Update when upgrading model versions. -const MOONSHINE_ARCHIVE_SHA256: &str = - "d5fe6ec4334fef36255b2a4010412cad4c007e33103fec62fb5d17cad88086f2"; +const STT_ARCHIVE_SHA256: &str = "17f945007b52ccd8b7200ffc7c5652e9e8e961dfdf479cefcabd06cf5703630b"; /// SHA-256 hashes for individual Kokoro model files. /// Computed from known-good downloads. Update when upgrading model versions. @@ -51,8 +58,12 @@ const KOKORO_FILE_HASHES: &[(&str, &str)] = &[ // If the on-disk manifest doesn't match the compiled-in version, the model is // considered stale and re-downloaded. Increment when upgrading model files. -/// Model manifest version for Moonshine. Increment when upgrading model files. -const MOONSHINE_MODEL_VERSION: &str = "1"; +/// Model manifest version for the STT model. Increment when upgrading model files. +/// Bumped from "1" → "2" alongside the migration from Moonshine Tiny to +/// Parakeet TDT-CTC 110M — the model directory name also changed, so this +/// is technically belt-and-suspenders, but it keeps the manifest semantics +/// honest (each version tag identifies one specific set of model bytes). +const STT_MODEL_VERSION: &str = "2"; /// Model manifest version for Kokoro. Increment when upgrading model files. const KOKORO_MODEL_VERSION: &str = "1"; @@ -62,30 +73,57 @@ const MANIFEST_FILENAME: &str = ".sprout-model-manifest"; // ── Constants ───────────────────────────────────────────────────────────────── -/// Maximum expected Moonshine archive size (200 MB — actual is ~50 MB). -const MAX_MOONSHINE_DOWNLOAD_BYTES: u64 = 200 * 1024 * 1024; +/// Maximum expected STT archive size (200 MB — actual is ~100 MB). +const MAX_STT_DOWNLOAD_BYTES: u64 = 200 * 1024 * 1024; /// Maximum expected Kokoro file size (200 MB per file — model is 86 MB). const MAX_KOKORO_FILE_BYTES: u64 = 200 * 1024 * 1024; -const MOONSHINE_DOWNLOAD_URL: &str = +/// NVIDIA Parakeet TDT-CTC 110M (English, int8) — packaged for sherpa-onnx by +/// k2-fsa. Single ONNX file (CTC head) + tokens.txt. Avg WER ~7.5% across +/// the OpenASR-style benchmarks; ~half the WER of Moonshine Tiny at ~2× the +/// disk footprint. CTC blank-token decoding eliminates the silence/cut-audio +/// hallucination class that hurts encoder-decoder models on noisy huddle audio. +/// License: CC-BY-4.0 (attribution required — see About dialog). +const STT_DOWNLOAD_URL: &str = "https://github.com/k2-fsa/sherpa-onnx/releases/download/asr-models/\ - sherpa-onnx-moonshine-tiny-en-int8.tar.bz2"; + sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8.tar.bz2"; /// Subdirectory name produced by `tar xjf` on the archive. -const MOONSHINE_ARCHIVE_SUBDIR: &str = "sherpa-onnx-moonshine-tiny-en-int8"; +const STT_ARCHIVE_SUBDIR: &str = "sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000-int8"; /// Final directory name under `~/.sprout/models/`. -const MOONSHINE_MODEL_DIR_NAME: &str = "moonshine-tiny"; +const STT_MODEL_DIR_NAME: &str = "parakeet-tdt-ctc-110m-en"; /// All files that must be present for the model to be considered ready. -const MOONSHINE_EXPECTED_FILES: &[&str] = &[ - "preprocess.onnx", - "encode.int8.onnx", - "cached_decode.int8.onnx", - "uncached_decode.int8.onnx", - "tokens.txt", -]; +/// +/// Includes the attribution sidecar written by Sprout during install. The +/// upstream archive does not ship a license file, so readiness should require +/// the local CC-BY-4.0 attribution to travel with the cached model bytes. +const STT_EXPECTED_FILES: &[&str] = &["model.int8.onnx", "tokens.txt", STT_LICENSE_FILE_NAME]; + +/// CC-BY-4.0 §3(a)(1) attribution block written next to the STT model files +/// after install. Travels with the bytes — if a user copies the model +/// directory, the attribution comes with it. Mirrored in About/Credits. +/// +/// Covers all five §3(a)(1) bullets: creator, copyright notice, license +/// notice, warranty disclaimer reference, and URI to the source material. +const STT_LICENSE_FILE_NAME: &str = "MODEL_LICENSE.txt"; +const STT_LICENSE_TEXT: &str = "\ +NVIDIA Parakeet TDT-CTC 110M (English) +© NVIDIA Corporation. + +Licensed under the Creative Commons Attribution 4.0 International License +(CC-BY-4.0). License text: https://creativecommons.org/licenses/by/4.0/ + +Original model: https://huggingface.co/nvidia/parakeet-tdt_ctc-110m +Converted to ONNX with int8 quantization by the sherpa-onnx project +(https://github.com/k2-fsa/sherpa-onnx); Sprout ships this conversion +unmodified. + +Provided \"AS IS\", without warranty of any kind, express or implied. See the +license text for full warranty disclaimer. +"; // ── Kokoro TTS model ───────────────────────────────────────────────────────── @@ -131,9 +169,13 @@ pub enum ModelStatus { } /// Combined status for all voice models (returned to the frontend). +/// +/// `stt` is the speech-to-text model status (currently Parakeet TDT-CTC 110M; +/// historically Moonshine Tiny). The field name describes the role, not the +/// specific model, so future model swaps don't ripple into the API surface. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct VoiceModelStatus { - pub moonshine: ModelStatus, + pub stt: ModelStatus, pub kokoro: ModelStatus, } @@ -297,7 +339,7 @@ where // ── ModelSlot ───────────────────────────────────────────────────────────────── -/// Per-model state + config. `ModelManager` owns two of these (moonshine, kokoro). +/// Per-model state + config. `ModelManager` owns two of these (stt, kokoro). #[derive(Clone)] struct ModelSlot { dir_name: &'static str, // subdir under ~/.sprout/models/ @@ -390,7 +432,7 @@ impl ModelSlot { } /// Verify files in `source_dir`, atomic-swap into final location, write manifest, signal ready. - /// `temp_cleanup`: optional extra dir to remove (e.g. outer extraction dir for Moonshine). + /// `temp_cleanup`: optional extra dir to remove (e.g. outer extraction dir for STT archive). async fn verify_and_install( &self, models_dir: &Path, @@ -450,7 +492,7 @@ impl ModelSlot { pub struct ModelManager { /// `~/.sprout/models/` models_dir: PathBuf, - moonshine: ModelSlot, + stt: ModelSlot, kokoro: ModelSlot, } @@ -462,11 +504,7 @@ impl ModelManager { let models_dir = dirs::home_dir()?.join(".sprout").join("models"); Some(Self { models_dir, - moonshine: ModelSlot::new( - MOONSHINE_MODEL_DIR_NAME, - MOONSHINE_EXPECTED_FILES, - MOONSHINE_MODEL_VERSION, - ), + stt: ModelSlot::new(STT_MODEL_DIR_NAME, STT_EXPECTED_FILES, STT_MODEL_VERSION), kokoro: ModelSlot::new( KOKORO_MODEL_DIR_NAME, KOKORO_EXPECTED_FILES, @@ -475,23 +513,23 @@ impl ModelManager { }) } - // ── Moonshine accessors ─────────────────────────────────────────────────── + // ── STT accessors ──────────────────────────────────────────────────────── - /// Path to the Moonshine model directory, or `None` if not ready. - pub fn moonshine_model_dir(&self) -> Option { - self.moonshine.dir_if_ready(&self.models_dir) + /// Path to the STT model directory, or `None` if not ready. + pub fn stt_model_dir(&self) -> Option { + self.stt.dir_if_ready(&self.models_dir) } - /// `true` if all Moonshine files are present and the manifest version matches. - pub fn is_moonshine_ready(&self) -> bool { - self.moonshine.is_ready(&self.models_dir) + /// `true` if all STT files are present and the manifest version matches. + pub fn is_stt_ready(&self) -> bool { + self.stt.is_ready(&self.models_dir) } - /// Current Moonshine download status. - pub fn moonshine_status(&self) -> ModelStatus { - self.moonshine.status() + /// Current STT download status. + pub fn stt_status(&self) -> ModelStatus { + self.stt.status() } - /// Returns `true` once when Moonshine just became ready. Resets the flag. - pub fn take_moonshine_ready(&self) -> bool { - self.moonshine.take_ready() + /// Returns `true` once when the STT model just became ready. Resets the flag. + pub fn take_stt_ready(&self) -> bool { + self.stt.take_ready() } // ── Kokoro accessors ────────────────────────────────────────────────────── @@ -515,15 +553,34 @@ impl ModelManager { // ── Download triggers ───────────────────────────────────────────────────── - /// Start a background Moonshine download. No-op if already ready or downloading. - pub fn start_moonshine_download(&self, http_client: reqwest::Client) { + /// Start a background STT model download. No-op if already ready or downloading. + /// + /// Also schedules a best-effort cleanup of the legacy Moonshine model + /// directory — but **only when the new STT model is already on disk and + /// Ready**. This covers the "fast-path" upgrade scenario (new model + /// installed by a previous build, `download_stt_model` short-circuits, the + /// post-install cleanup never runs). For users mid-migration (old model + /// present, new model still downloading) we keep the old files until the + /// Parakeet install finishes, avoiding unnecessary data loss if the + /// ~100 MB download fails. The post-install path inside + /// `download_stt_model` handles cleanup once the new install reaches Ready. + pub fn start_stt_download(&self, http_client: reqwest::Client) { let manager = self.clone(); - self.moonshine.start_download( + self.stt.start_download( &self.models_dir, http_client, - "moonshine", - move |client| async move { manager.download_moonshine_model(client).await }, + "stt", + move |client| async move { manager.download_stt_model(client).await }, ); + if self.stt.is_ready(&self.models_dir) { + // Detached cleanup task — must not block startup. Gated above on + // the new model being Ready, so a mid-migration user keeps their + // existing moonshine-tiny files until Parakeet install completes. + let models_dir = self.models_dir.clone(); + tauri::async_runtime::spawn(async move { + cleanup_legacy_moonshine_dir(&models_dir).await; + }); + } } /// Start a background Kokoro download (~87 MB). No-op if already ready or downloading. @@ -539,24 +596,28 @@ impl ModelManager { // ── Private download implementations ───────────────────────────────────── - /// Download, extract, and verify the Moonshine model archive. - async fn download_moonshine_model(&self, http_client: reqwest::Client) -> Result<(), String> { + /// Download, extract, and verify the STT model archive. + async fn download_stt_model(&self, http_client: reqwest::Client) -> Result<(), String> { tokio::fs::create_dir_all(&self.models_dir) .await .map_err(|e| format!("create models dir: {e}"))?; - let archive_path = self.models_dir.join("moonshine-tiny.tar.bz2"); - let temp_dir = self.models_dir.join("moonshine-tiny.tmp"); + // Temp filenames derive from the final directory name to avoid colliding + // with leftovers from any previous STT model (e.g. moonshine-tiny.*). + let archive_path = self + .models_dir + .join(format!("{STT_MODEL_DIR_NAME}.tar.bz2")); + let temp_dir = self.models_dir.join(format!("{STT_MODEL_DIR_NAME}.tmp")); - eprintln!("sprout-desktop: downloading Moonshine model from {MOONSHINE_DOWNLOAD_URL}"); - let response = fetch_url(&http_client, MOONSHINE_DOWNLOAD_URL, "moonshine archive").await?; + eprintln!("sprout-desktop: downloading STT model from {STT_DOWNLOAD_URL}"); + let response = fetch_url(&http_client, STT_DOWNLOAD_URL, "stt archive").await?; - let slot = self.moonshine.clone(); + let slot = self.stt.clone(); let bytes = download_file( response, &archive_path, - MAX_MOONSHINE_DOWNLOAD_BYTES, - "moonshine archive", + MAX_STT_DOWNLOAD_BYTES, + "stt archive", |downloaded, content_length| { if let Some(total) = content_length { if total > 0 { @@ -573,35 +634,46 @@ impl ModelManager { // Verify archive integrity before extraction. let hash = sha256_file(&archive_path).await?; - if hash != MOONSHINE_ARCHIVE_SHA256 { + if hash != STT_ARCHIVE_SHA256 { let _ = tokio::fs::remove_file(&archive_path).await; return Err(format!( - "Moonshine archive integrity check failed: expected {MOONSHINE_ARCHIVE_SHA256}, got {hash}" + "STT archive integrity check failed: expected {STT_ARCHIVE_SHA256}, got {hash}" )); } - self.moonshine.set_status(ModelStatus::Downloading { + self.stt.set_status(ModelStatus::Downloading { progress_percent: 90, }); fresh_temp_dir(&temp_dir).await?; - eprintln!("sprout-desktop: extracting Moonshine archive…"); + eprintln!("sprout-desktop: extracting STT archive…"); let (ap, td) = (archive_path.clone(), temp_dir.clone()); tokio::task::spawn_blocking(move || extract_archive(&ap, &td)) .await .map_err(|e| format!("tar task panicked: {e}"))??; - let extracted_subdir = temp_dir.join(MOONSHINE_ARCHIVE_SUBDIR); + let extracted_subdir = temp_dir.join(STT_ARCHIVE_SUBDIR); if !extracted_subdir.is_dir() { let _ = tokio::fs::remove_dir_all(&temp_dir).await; return Err(format!( - "expected subdir '{MOONSHINE_ARCHIVE_SUBDIR}' not found after extraction" + "expected subdir '{STT_ARCHIVE_SUBDIR}' not found after extraction" )); } + // Write the CC-BY-4.0 attribution sidecar before the atomic install, + // so it lands in the final model dir as part of the same rename. The + // upstream tarball ships no LICENSE/NOTICE, so we provide it ourselves + // per §3(a)(1) (license must travel with Shared material). + let license_path = extracted_subdir.join(STT_LICENSE_FILE_NAME); + if let Err(e) = tokio::fs::write(&license_path, STT_LICENSE_TEXT).await { + let _ = tokio::fs::remove_dir_all(&temp_dir).await; + let _ = tokio::fs::remove_file(&archive_path).await; + return Err(format!("write model license sidecar: {e}")); + } + // verify_and_install takes the subdir (actual model files); temp_cleanup removes outer dir. if let Err(e) = self - .moonshine + .stt .verify_and_install(&self.models_dir, &extracted_subdir, Some(&temp_dir)) .await { @@ -611,9 +683,16 @@ impl ModelManager { } let _ = tokio::fs::remove_file(&archive_path).await; + // Best-effort cleanup of the previous default STT model dir (Moonshine + // Tiny, ~70 MB). Runs only after the new install reaches Ready, so a + // failed download never removes the previous on-disk model during + // migration. The same cleanup also runs from `start_stt_download` to + // cover users who already have the new model installed. + cleanup_legacy_moonshine_dir(&self.models_dir).await; + eprintln!( - "sprout-desktop: Moonshine model ready at {}", - self.moonshine.model_dir(&self.models_dir).display() + "sprout-desktop: STT model ready at {}", + self.stt.model_dir(&self.models_dir).display() ); Ok(()) } @@ -737,18 +816,46 @@ pub fn global_model_manager() -> Option<&'static ModelManager> { // ── Standalone helpers ──────────────────────────────────────────────────────── -/// Path to the Moonshine model directory, or `None` if not ready. -pub fn moonshine_model_dir() -> Option { - global_model_manager()?.moonshine_model_dir() +/// Path to the STT model directory, or `None` if not ready. +pub fn stt_model_dir() -> Option { + global_model_manager()?.stt_model_dir() } -/// `true` if all expected Moonshine model files are present on disk. -pub fn is_moonshine_ready() -> bool { +/// `true` if all expected STT model files are present on disk. +pub fn is_stt_ready() -> bool { global_model_manager() - .map(|m| m.is_moonshine_ready()) + .map(|m| m.is_stt_ready()) .unwrap_or(false) } +/// Best-effort cleanup of the legacy Moonshine STT model directory. +/// +/// Removes `~/.sprout/models/moonshine-tiny/` if present (~70 MB on disk). +/// Idempotent — no-op if the directory is absent. Errors are logged and +/// swallowed; the leftover is harmless and the user can remove it manually. +/// +/// This is intentionally a free function rather than a method: it has no +/// dependency on `ModelManager` state, runs from both pre- and post-install +/// code paths, and the call site is meant to be easy to delete in a future +/// release once we're confident no users are still on the old model dir. +async fn cleanup_legacy_moonshine_dir(models_dir: &Path) { + let legacy = models_dir.join("moonshine-tiny"); + if !legacy.exists() { + return; + } + match tokio::fs::remove_dir_all(&legacy).await { + Ok(()) => eprintln!( + "sprout-desktop: removed legacy STT model dir {}", + legacy.display() + ), + Err(e) => eprintln!( + "sprout-desktop: could not remove legacy STT model dir {}: {e} \ + (harmless — remove manually to reclaim disk space)", + legacy.display() + ), + } +} + /// Path to the Kokoro model directory, or `None` if not ready. pub fn kokoro_model_dir() -> Option { global_model_manager()?.kokoro_model_dir() diff --git a/desktop/src-tauri/src/huddle/pipeline.rs b/desktop/src-tauri/src/huddle/pipeline.rs index 7c12455ae..4b1c0eb76 100644 --- a/desktop/src-tauri/src/huddle/pipeline.rs +++ b/desktop/src-tauri/src/huddle/pipeline.rs @@ -40,7 +40,7 @@ pub(crate) async fn post_connect_setup( // Ensure voice models are downloading (idempotent). if let Some(mgr) = models::global_model_manager() { - mgr.start_moonshine_download(state.http_client.clone()); + mgr.start_stt_download(state.http_client.clone()); mgr.start_kokoro_download(state.http_client.clone()); } @@ -81,11 +81,10 @@ pub(crate) async fn maybe_start_stt_pipeline( state: &AppState, ephemeral_channel_id: &str, ) -> Result { - if !models::is_moonshine_ready() { + if !models::is_stt_ready() { return Ok(false); // Models not downloaded yet — voice-only mode. } - let model_dir = - models::moonshine_model_dir().ok_or_else(|| "Moonshine model directory not found")?; + let model_dir = models::stt_model_dir().ok_or_else(|| "STT model directory not found")?; let channel_uuid = parse_channel_uuid(ephemeral_channel_id)?; diff --git a/desktop/src-tauri/src/huddle/stt.rs b/desktop/src-tauri/src/huddle/stt.rs index a67e8e68a..8a9cac002 100644 --- a/desktop/src-tauri/src/huddle/stt.rs +++ b/desktop/src-tauri/src/huddle/stt.rs @@ -9,7 +9,7 @@ //! → stt_worker thread //! rubato: 48 kHz → 16 kHz mono //! earshot VAD: accumulate speech frames -//! sherpa-onnx Moonshine: transcribe on silence +//! sherpa-onnx Parakeet TDT-CTC 110M: transcribe on silence //! → text_rx [mpsc channel] //! → tokio task (start_stt_pipeline) //! builds kind:9 event → relay @@ -190,6 +190,17 @@ const RECV_TIMEOUT: Duration = Duration::from_millis(50); /// immediately after the agent finished. const TTS_COOLDOWN: Duration = Duration::from_millis(50); +/// Number of ONNX Runtime intra-op threads used by the offline recognizer. +/// +/// Held at 1 (conservative) until we have a local A/B on real huddle audio. +/// Sherpa-onnx's Parakeet example uses 2 and most published RTF numbers are +/// at 2 threads on x86_64 server class hardware, but the encoder runs only +/// on VAD chunk boundaries on a dedicated thread, so the threading knob +/// trades worker latency against potential oversubscription with the audio +/// worklet on small Macs (4-core Intel especially). Bump to 2 once the A/B +/// shows it's safe on the minimum-spec target. +const STT_NUM_THREADS: i32 = 1; + fn stt_worker( model_dir: PathBuf, audio_rx: Receiver>, @@ -216,31 +227,32 @@ fn stt_worker( let mut vad = Detector::new(DefaultPredictor::new()); // ── 3. Initialise sherpa-onnx recognizer ───────────────────────────────── - use sherpa_onnx::{OfflineMoonshineModelConfig, OfflineRecognizer, OfflineRecognizerConfig}; + // + // Parakeet TDT-CTC 110M ships as a single `model.int8.onnx` (CTC head) plus + // `tokens.txt`. sherpa-onnx infers the model family from which inner config + // has a `model` path set, so we don't need to set `model_type` explicitly. + // (See rust-api-examples/parakeet_tdt_ctc_simulate_streaming_microphone.rs + // in k2-fsa/sherpa-onnx.) + use sherpa_onnx::{OfflineRecognizer, OfflineRecognizerConfig}; let tokens_path = model_dir.join("tokens.txt"); - if !tokens_path.exists() { + let model_path = model_dir.join("model.int8.onnx"); + if !tokens_path.exists() || !model_path.exists() { eprintln!( - "sprout-desktop: STT models not found at {} — STT disabled", + "sprout-desktop: STT model not found at {} — STT disabled", model_dir.display() ); drain_until_shutdown(audio_rx, &shutdown); return; } - let model_dir_str = model_dir.to_string_lossy().into_owned(); - let mut cfg = OfflineRecognizerConfig::default(); - cfg.model_config.moonshine = OfflineMoonshineModelConfig { - preprocessor: Some(format!("{model_dir_str}/preprocess.onnx")), - encoder: Some(format!("{model_dir_str}/encode.int8.onnx")), - uncached_decoder: Some(format!("{model_dir_str}/uncached_decode.int8.onnx")), - cached_decoder: Some(format!("{model_dir_str}/cached_decode.int8.onnx")), - merged_decoder: None, - }; + cfg.model_config.nemo_ctc.model = Some(model_path.to_string_lossy().into_owned()); cfg.model_config.tokens = Some(tokens_path.to_string_lossy().into_owned()); - cfg.model_config.num_threads = 1; - cfg.model_config.model_type = Some("moonshine".into()); + cfg.model_config.num_threads = STT_NUM_THREADS; + // Explicit — defaults are not part of the API contract, and noisy debug + // logging in release builds would be expensive on every VAD chunk. + cfg.model_config.debug = false; let recognizer = match OfflineRecognizer::create(&cfg) { Some(r) => r, diff --git a/desktop/src-tauri/src/lib.rs b/desktop/src-tauri/src/lib.rs index 5e71c2579..f0d024772 100644 --- a/desktop/src-tauri/src/lib.rs +++ b/desktop/src-tauri/src/lib.rs @@ -395,9 +395,9 @@ pub fn run() { // Pre-download voice models in the background so they're ready // when the user starts their first huddle. Idempotent — no-op if - // already downloaded. ~87 MB total (50 MB Moonshine + 87 MB Kokoro). + // already downloaded. ~187 MB total (~100 MB Parakeet STT + ~87 MB Kokoro). if let Some(mgr) = huddle::models::global_model_manager() { - mgr.start_moonshine_download(state.http_client.clone()); + mgr.start_stt_download(state.http_client.clone()); mgr.start_kokoro_download(state.http_client.clone()); } diff --git a/desktop/src/features/huddle/components/HuddleBar.tsx b/desktop/src/features/huddle/components/HuddleBar.tsx index f1dc3a967..e335ea75f 100644 --- a/desktop/src/features/huddle/components/HuddleBar.tsx +++ b/desktop/src/features/huddle/components/HuddleBar.tsx @@ -65,7 +65,7 @@ export function HuddleBar({ className }: HuddleBarProps) { const [showAddAgent, setShowAddAgent] = React.useState(false); const [agentAddError, setAgentAddError] = React.useState(null); const [modelStatus, setModelStatus] = React.useState<{ - moonshine: string; + stt: string; kokoro: string; } | null>(null); // Huddle state: event-driven + 10s fallback poll. @@ -132,13 +132,13 @@ export function HuddleBar({ className }: HuddleBarProps) { async function pollModels() { try { const status = await invoke<{ - moonshine: unknown; + stt: unknown; kokoro: unknown; }>("get_model_status"); if (cancelled) return; setModelStatus({ - moonshine: fmt(status.moonshine), + stt: fmt(status.stt), kokoro: fmt(status.kokoro), }); } catch { @@ -228,15 +228,13 @@ export function HuddleBar({ className }: HuddleBarProps) { {/* Model download progress */} {modelStatus && - (modelStatus.moonshine !== "ready" || - modelStatus.kokoro !== "ready") && ( + (modelStatus.stt !== "ready" || modelStatus.kokoro !== "ready") && ( - {modelStatus.moonshine !== "ready" && - modelStatus.kokoro !== "ready" - ? `Voice models: STT ${modelStatus.moonshine}, TTS ${modelStatus.kokoro}` - : modelStatus.moonshine !== "ready" - ? `STT model: ${modelStatus.moonshine}` + {modelStatus.stt !== "ready" && modelStatus.kokoro !== "ready" + ? `Voice models: STT ${modelStatus.stt}, TTS ${modelStatus.kokoro}` + : modelStatus.stt !== "ready" + ? `STT model: ${modelStatus.stt}` : `TTS model: ${modelStatus.kokoro}`} @@ -463,8 +461,8 @@ export function HuddleBar({ className }: HuddleBarProps) { : "In huddle, no microphone"} {`, voice input: ${isPttMode ? "push to talk, press Ctrl+Space to transmit" : "voice activity detection"}`} {modelStatus && - modelStatus.moonshine !== "ready" && - `, STT model ${modelStatus.moonshine}`} + modelStatus.stt !== "ready" && + `, STT model ${modelStatus.stt}`} {modelStatus && modelStatus.kokoro !== "ready" && `, TTS model ${modelStatus.kokoro}`}