Skip to content

Commit ede4683

Browse files
committed
fix: add 128-byte threshold to prevent large Uint8Array string duplication
Applied the same 128-byte threshold to normalizeValue() as used in the hashing function. This prevents creating giant strings in memory when indexing large Uint8Arrays (> 128 bytes). Arrays larger than 128 bytes will fall back to reference equality, which is acceptable as the fix is primarily for ID use cases (ULIDs are 16 bytes, UUIDs are 16 bytes). Added test coverage to verify the threshold behavior works as expected.
1 parent 4a1d114 commit ede4683

2 files changed

Lines changed: 74 additions & 3 deletions

File tree

packages/db/src/utils/comparison.ts

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,14 @@ function areUint8ArraysEqual(a: Uint8Array, b: Uint8Array): boolean {
126126
return true
127127
}
128128

129+
/**
130+
* Threshold for normalizing Uint8Arrays to string representations.
131+
* Arrays larger than this will use reference equality to avoid memory overhead.
132+
* 128 bytes is enough for common ID formats (ULIDs are 16 bytes, UUIDs are 16 bytes)
133+
* while avoiding excessive string allocation for large binary data.
134+
*/
135+
const UINT8ARRAY_NORMALIZE_THRESHOLD = 128
136+
129137
/**
130138
* Normalize a value for comparison and Map key usage
131139
* Converts values that can't be directly compared or used as Map keys
@@ -143,9 +151,14 @@ export function normalizeValue(value: any): any {
143151
value instanceof Uint8Array
144152

145153
if (isUint8Array) {
146-
// Convert to a string representation that can be used as a Map key
147-
// Use a special prefix to avoid collisions with user strings
148-
return `__u8__${Array.from(value).join(`,`)}`
154+
// Only normalize small arrays to avoid memory overhead for large binary data
155+
if (value.byteLength <= UINT8ARRAY_NORMALIZE_THRESHOLD) {
156+
// Convert to a string representation that can be used as a Map key
157+
// Use a special prefix to avoid collisions with user strings
158+
return `__u8__${Array.from(value).join(`,`)}`
159+
}
160+
// For large arrays, fall back to reference equality
161+
// Users working with large binary data should use a derived key if needed
149162
}
150163

151164
return value

packages/db/tests/integration/uint8array-id-comparison.test.ts

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,4 +78,62 @@ describe(`Uint8Array ID comparison (user reproduction)`, () => {
7878
expect(resultByName).toBeDefined()
7979
expect(resultByName?.name).toBe(makeItemName(selectedItemIndex))
8080
})
81+
82+
it(`should use reference equality for large Uint8Arrays (> 128 bytes)`, async () => {
83+
// Create a large Uint8Array (> 128 bytes) that should use reference equality
84+
const largeId = new Uint8Array(200).fill(42)
85+
86+
interface LargeItem {
87+
id: Uint8Array
88+
name: string
89+
}
90+
91+
const data: Array<LargeItem> = [
92+
{ id: largeId, name: `Large Item` },
93+
{ id: new Uint8Array(200).fill(99), name: `Other Large Item` },
94+
]
95+
96+
const collection = createCollection(
97+
mockSyncCollectionOptions<LargeItem>({
98+
id: `large-uint8array-test`,
99+
getKey: (item) => item.name,
100+
initialData: data,
101+
autoIndex: `eager`,
102+
})
103+
)
104+
105+
// Query with the exact same reference - this should work
106+
const queryWithSameRef = createLiveQueryCollection((q) =>
107+
q
108+
.from({ item: collection })
109+
.where(({ item }) => eq(item.id, largeId))
110+
.findOne()
111+
)
112+
113+
await queryWithSameRef.preload()
114+
const resultWithSameRef = Array.from(queryWithSameRef.entries())[0]?.[1]
115+
116+
// Should find the item because we're using the same reference
117+
expect(resultWithSameRef).toBeDefined()
118+
expect(resultWithSameRef?.name).toBe(`Large Item`)
119+
120+
// Query with a different instance but same content - this will NOT work
121+
// because large arrays use reference equality
122+
const differentInstance = new Uint8Array(200).fill(42)
123+
const queryWithDifferentRef = createLiveQueryCollection((q) =>
124+
q
125+
.from({ item: collection })
126+
.where(({ item }) => eq(item.id, differentInstance))
127+
.findOne()
128+
)
129+
130+
await queryWithDifferentRef.preload()
131+
const resultWithDifferentRef = Array.from(
132+
queryWithDifferentRef.entries()
133+
)[0]?.[1]
134+
135+
// Should NOT find the item because large arrays use reference equality
136+
// This is expected behavior to avoid memory overhead
137+
expect(resultWithDifferentRef).toBeUndefined()
138+
})
81139
})

0 commit comments

Comments
 (0)