From 0c00fb4cefe77432461615593825c0074e236b36 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Thu, 28 Aug 2025 20:14:53 +0800 Subject: [PATCH 1/2] fix: reduce allocations in chain traversal --- src/ipld/util.rs | 2 +- src/tool/subcommands/benchmark_cmd.rs | 3 +-- src/utils/encoding/cid_de_cbor.rs | 15 +++++++++------ 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/ipld/util.rs b/src/ipld/util.rs index 5080dc167420..c8403d5670a3 100644 --- a/src/ipld/util.rs +++ b/src/ipld/util.rs @@ -428,7 +428,7 @@ impl< let block_sender = block_sender.clone(); handles.spawn(async move { 'main: while let Ok(cid) = extract_receiver.recv_async().await { - let mut cid_vec = vec![cid]; + let mut cid_vec: smallvec::SmallVec<[_; 8]> = smallvec::smallvec![cid]; while let Some(cid) = cid_vec.pop() { if should_save_block_to_snapshot(cid) && seen.lock().insert(cid) { if let Some(data) = db.get(&cid)? { diff --git a/src/tool/subcommands/benchmark_cmd.rs b/src/tool/subcommands/benchmark_cmd.rs index 7522f8075f01..072ead19ed20 100644 --- a/src/tool/subcommands/benchmark_cmd.rs +++ b/src/tool/subcommands/benchmark_cmd.rs @@ -13,7 +13,6 @@ use crate::utils::db::car_stream::{CarBlock, CarStream}; use crate::utils::encoding::extract_cids; use crate::utils::stream::par_buffer; use anyhow::Context as _; -use cid::Cid; use clap::Subcommand; use futures::{StreamExt, TryStreamExt}; use fvm_ipld_encoding::DAG_CBOR; @@ -148,7 +147,7 @@ async fn benchmark_car_streaming_inspect(input: Vec) -> anyhow::Result< while let Some(block) = s.try_next().await? { let block: CarBlock = block; if block.cid.codec() == DAG_CBOR { - let cid_vec: Vec = extract_cids(&block.data)?; + let cid_vec = extract_cids(&block.data)?; let _ = cid_vec.iter().unique().count(); } sink.write_all(&block.data).await? diff --git a/src/utils/encoding/cid_de_cbor.rs b/src/utils/encoding/cid_de_cbor.rs index 88cae4a27ef2..44874b1e0045 100644 --- a/src/utils/encoding/cid_de_cbor.rs +++ b/src/utils/encoding/cid_de_cbor.rs @@ -6,22 +6,25 @@ use cid::Cid; use cid::serde::BytesToCidVisitor; use serde::Deserializer; use serde::de::{self, DeserializeSeed, SeqAccess, Visitor}; +use smallvec::SmallVec; use std::fmt; +pub type SmallCidVec = SmallVec<[Cid; 8]>; + /// Find and extract all the [`Cid`] from a `DAG_CBOR`-encoded blob without employing any /// intermediate recursive structures, eliminating unnecessary allocations. -pub fn extract_cids(cbor_blob: &[u8]) -> anyhow::Result> { +pub fn extract_cids(cbor_blob: &[u8]) -> anyhow::Result { let CidVec(v) = from_slice_with_fallback(cbor_blob)?; Ok(v) } /// [`CidVec`] allows for efficient zero-copy de-serialization of `DAG_CBOR`-encoded nodes into a /// vector of [`Cid`]. -struct CidVec(Vec); +struct CidVec(SmallCidVec); /// [`FilterCids`] traverses an [`ipld_core::ipld::Ipld`] tree, appending [`Cid`]s (and only CIDs) to a single vector. /// This is much faster than constructing an [`ipld_core::ipld::Ipld`] tree and then performing the filtering. -struct FilterCids<'a>(&'a mut Vec); +struct FilterCids<'a>(&'a mut SmallCidVec); impl<'de> DeserializeSeed<'de> for FilterCids<'_> { type Value = (); @@ -30,7 +33,7 @@ impl<'de> DeserializeSeed<'de> for FilterCids<'_> { where D: Deserializer<'de>, { - struct FilterCidsVisitor<'a>(&'a mut Vec); + struct FilterCidsVisitor<'a>(&'a mut SmallCidVec); impl<'de> Visitor<'de> for FilterCidsVisitor<'_> { type Value = (); @@ -50,7 +53,7 @@ impl<'de> DeserializeSeed<'de> for FilterCids<'_> { // This is where recursion happens, we unravel each [`Ipld`] till we reach all // the nodes. while visitor - .next_entry_seed(FilterCids(&mut Vec::new()), FilterCids(self.0))? + .next_entry_seed(FilterCids(&mut SmallCidVec::new()), FilterCids(self.0))? .is_some() { // Nothing to do; inner map values have been into `vec`. @@ -171,7 +174,7 @@ impl<'de> de::Deserialize<'de> for CidVec { where D: de::Deserializer<'de>, { - let mut vec = CidVec(Vec::new()); + let mut vec = CidVec(SmallCidVec::new()); FilterCids(&mut vec.0).deserialize(deserializer)?; Ok(vec) } From 3b0a663d89d3f18cbacb9765de8da08de83c54d8 Mon Sep 17 00:00:00 2001 From: hanabi1224 Date: Thu, 28 Aug 2025 21:58:44 +0800 Subject: [PATCH 2/2] apply AI suggestions --- src/utils/encoding/cid_de_cbor.rs | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/utils/encoding/cid_de_cbor.rs b/src/utils/encoding/cid_de_cbor.rs index 44874b1e0045..accb21adc274 100644 --- a/src/utils/encoding/cid_de_cbor.rs +++ b/src/utils/encoding/cid_de_cbor.rs @@ -33,6 +33,20 @@ impl<'de> DeserializeSeed<'de> for FilterCids<'_> { where D: Deserializer<'de>, { + struct IgnoredSeed; + + impl<'de> DeserializeSeed<'de> for IgnoredSeed { + type Value = (); + + fn deserialize(self, deserializer: D) -> Result + where + D: Deserializer<'de>, + { + deserializer.deserialize_ignored_any(de::IgnoredAny)?; + Ok(()) + } + } + struct FilterCidsVisitor<'a>(&'a mut SmallCidVec); impl<'de> Visitor<'de> for FilterCidsVisitor<'_> { @@ -53,7 +67,7 @@ impl<'de> DeserializeSeed<'de> for FilterCids<'_> { // This is where recursion happens, we unravel each [`Ipld`] till we reach all // the nodes. while visitor - .next_entry_seed(FilterCids(&mut SmallCidVec::new()), FilterCids(self.0))? + .next_entry_seed(IgnoredSeed, FilterCids(self.0))? .is_some() { // Nothing to do; inner map values have been into `vec`.