Skip to content

Commit 8f4d7c9

Browse files
authored
[8.2] [MOD-12346] Remove main-thread graph repair from SVS delete operations (#831) (#841)
* [MOD-12346] Remove main-thread graph repair from SVS delete operations (#831) * remove consolidate from delete * indexSize -> includes marked deleted indexLabelCount -> only valid vectors * fix flow tests * Revert "fix flow tests" This reverts commit 831c08c. * Revert "indexSize -> includes marked deleted" This reverts commit 82758c2. * TO REVERT: bump to main SVS * BUMP TO ib/buffer * SVS v0.0.11: take updated bunaries and SVS public * Reapply "indexSize -> includes marked deleted" This reverts commit 3ae28cd. * Reapply "fix flow tests" This reverts commit 45462d4. (cherry picked from commit 11ea88d) * remove test * remove test * merge meriavg_fix_fp16_svs * format
1 parent b7f6621 commit 8f4d7c9

File tree

6 files changed

+97
-70
lines changed

6 files changed

+97
-70
lines changed

src/VecSim/algorithms/svs/svs.h

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
240240
}
241241

242242
int deleteVectorsImpl(const labelType *labels, size_t n) {
243-
if (indexSize() == 0) {
243+
if (indexLabelCount() == 0) {
244244
return 0;
245245
}
246246

@@ -280,22 +280,13 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
280280
return;
281281

282282
// SVS index instance should not be empty
283-
if (indexSize() == 0) {
283+
if (indexLabelCount() == 0) {
284284
this->impl_.reset();
285285
num_marked_deleted = 0;
286286
return;
287287
}
288288

289289
num_marked_deleted += n;
290-
// consolidate index if number of changes bigger than 50% of index size
291-
const float consolidation_threshold = .5f;
292-
// indexSize() should not be 0 see above lines
293-
assert(indexSize() > 0);
294-
// Note: if this function is called after deleteVectorsImpl, indexSize is already updated
295-
if (static_cast<float>(num_marked_deleted) / indexSize() > consolidation_threshold) {
296-
impl_->consolidate();
297-
num_marked_deleted = 0;
298-
}
299290
}
300291

301292
bool isTwoLevelLVQ(const VecSimSvsQuantBits &qbits) {
@@ -330,7 +321,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
330321

331322
~SVSIndex() = default;
332323

333-
size_t indexSize() const override { return impl_ ? impl_->size() : 0; }
324+
size_t indexSize() const override { return indexStorageSize(); }
334325

335326
size_t indexStorageSize() const override { return impl_ ? impl_->view_data().size() : 0; }
336327

@@ -342,7 +333,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
342333
if constexpr (isMulti) {
343334
return impl_ ? impl_->labelcount() : 0;
344335
} else {
345-
return indexSize();
336+
return impl_ ? impl_->size() : 0;
346337
}
347338
}
348339

@@ -524,7 +515,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
524515
VecSimQueryParams *queryParams) const override {
525516
auto rep = new VecSimQueryReply(this->allocator);
526517
this->lastMode = STANDARD_KNN;
527-
if (k == 0 || this->indexSize() == 0) {
518+
if (k == 0 || this->indexLabelCount() == 0) {
528519
return rep;
529520
}
530521

@@ -569,7 +560,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
569560
VecSimQueryParams *queryParams) const override {
570561
auto rep = new VecSimQueryReply(this->allocator);
571562
this->lastMode = RANGE_QUERY;
572-
if (radius == 0 || this->indexSize() == 0) {
563+
if (radius == 0 || this->indexLabelCount() == 0) {
573564
return rep;
574565
}
575566

@@ -642,7 +633,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
642633
// take ownership of the blob copy and pass it to the batch iterator.
643634
auto *queryBlobCopyPtr = queryBlobCopy.release();
644635
// Ownership of queryBlobCopy moves to VecSimBatchIterator that will free it at the end.
645-
if (indexSize() == 0) {
636+
if (indexLabelCount() == 0) {
646637
return new (this->getAllocator())
647638
NullSVS_BatchIterator(queryBlobCopyPtr, queryParams, this->getAllocator());
648639
} else {
@@ -652,7 +643,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
652643
}
653644

654645
bool preferAdHocSearch(size_t subsetSize, size_t k, bool initial_check) const override {
655-
size_t index_size = this->indexSize();
646+
size_t index_size = this->indexLabelCount();
656647

657648
// Calculate the ratio of the subset size to the total index size.
658649
double subsetRatio = (index_size == 0) ? 0.f : static_cast<double>(subsetSize) / index_size;

tests/flow/test_svs_tiered.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ def test_recall_after_deletion(test_logger):
404404
test_logger.info(f"Done deleting half of the index")
405405
assert index.svs_label_count() >= (num_elements // 2) - indices_ctx.tiered_svs_params.updateTriggerThreshold
406406
assert index.svs_label_count() <= (num_elements // 2) + indices_ctx.tiered_svs_params.updateTriggerThreshold
407-
assert svs_index.index_size() == (num_elements // 2)
407+
assert svs_index.index_size() == num_elements
408408

409409
# Create a list of tuples of the vectors that left.
410410
vectors = [vectors[i] for i in range(1, num_elements, 2)]

tests/unit/test_svs.cpp

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -262,20 +262,18 @@ TYPED_TEST(SVSTest, svs_bulk_vectors_add_delete_test) {
262262
runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID);
263263

264264
// Delete almost all vectors
265-
// First delete small amount of vector to prevent consolidation.
266-
const size_t first_batch_deletion = 10;
267-
ASSERT_EQ(svs_index->deleteVectors(ids.data(), first_batch_deletion), first_batch_deletion);
268-
ASSERT_EQ(VecSimIndex_IndexSize(index), n - first_batch_deletion);
269-
ASSERT_EQ(svs_index->getNumMarkedDeleted(), first_batch_deletion);
270-
271-
// Now delete enough vectors to trigger consolidation.
272265
const size_t keep_num = 1;
273-
ASSERT_EQ(svs_index->deleteVectors(ids.data() + first_batch_deletion,
274-
n - keep_num - first_batch_deletion),
275-
n - keep_num - first_batch_deletion);
276-
ASSERT_EQ(VecSimIndex_IndexSize(index), keep_num);
277-
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 0);
266+
ASSERT_EQ(svs_index->deleteVectors(ids.data(), n - keep_num), n - keep_num);
267+
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
268+
ASSERT_EQ(index->indexLabelCount(), keep_num);
269+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - keep_num);
278270

271+
// Delete rest of the vectors
272+
// num_marked_deleted should reset.
273+
ASSERT_EQ(svs_index->deleteVectors(ids.data() + n - keep_num, keep_num), keep_num);
274+
ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
275+
ASSERT_EQ(index->indexLabelCount(), 0);
276+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 0);
279277
VecSimIndex_Free(index);
280278
}
281279

@@ -453,14 +451,18 @@ TYPED_TEST(SVSTest, svs_reindexing_same_vector) {
453451
for (size_t i = 0; i < n - 1; i++) {
454452
VecSimIndex_DeleteVector(index, i);
455453
}
456-
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
454+
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
455+
ASSERT_EQ(index->indexLabelCount(), 1);
456+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);
457457

458458
// Reinsert the same vectors under the same ids.
459459
for (size_t i = 0; i < n; i++) {
460460
// i / 10 is in integer (take the "floor value).
461461
GenerateAndAddVector<TEST_DATA_T>(index, dim, i, i / 10);
462462
}
463-
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
463+
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
464+
ASSERT_EQ(index->indexLabelCount(), n);
465+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);
464466

465467
// Run the same query again.
466468
runTopKSearchTest(index, query, k, verify_res);
@@ -513,14 +515,18 @@ TYPED_TEST(SVSTest, svs_reindexing_same_vector_different_id) {
513515
for (size_t i = 0; i < n - 1; i++) {
514516
VecSimIndex_DeleteVector(index, i);
515517
}
516-
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
518+
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
519+
ASSERT_EQ(index->indexLabelCount(), 1);
520+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);
517521

518522
// Reinsert the same vectors under different ids than before.
519523
for (size_t i = 0; i < n; i++) {
520524
GenerateAndAddVector<TEST_DATA_T>(index, dim, i + 10,
521525
i / 10); // i / 10 is in integer (take the "floor" value).
522526
}
523-
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
527+
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
528+
ASSERT_EQ(index->indexLabelCount(), n);
529+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);
524530

525531
// Run the same query again.
526532
auto verify_res_different_id = [&](size_t id, double score, size_t index) {
@@ -920,7 +926,8 @@ TYPED_TEST(SVSTest, test_delete_vector) {
920926

921927
// Here the shift should happen.
922928
VecSimIndex_DeleteVector(index, 1);
923-
ASSERT_EQ(VecSimIndex_IndexSize(index), n - 1);
929+
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
930+
ASSERT_EQ(index->indexLabelCount(), n - 1);
924931

925932
TEST_DATA_T query[] = {0.0, 0.0};
926933
auto verify_res = [&](size_t id, double score, size_t index) {
@@ -3024,7 +3031,8 @@ TYPED_TEST(SVSTest, logging_runtime_params) {
30243031
index->addVector(v[i].data(), ids[i]);
30253032
}
30263033
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 10);
3027-
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
3034+
ASSERT_EQ(VecSimIndex_IndexSize(index), n + 10);
3035+
ASSERT_EQ(index->indexLabelCount(), n);
30283036

30293037
float query[] = {50, 50, 50, 50};
30303038
auto verify_res = [&](size_t id, double score, size_t index) { EXPECT_EQ(id, (index + 45)); };

tests/unit/test_svs_fp16.cpp

Lines changed: 26 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,9 @@ TYPED_TEST(FP16SVSTest, svs_bulk_vectors_add_delete_test) {
242242
// Delete almost all vectors
243243
const size_t keep_num = 10;
244244
ASSERT_EQ(svs_index->deleteVectors(ids.data(), n - keep_num), n - keep_num);
245-
ASSERT_EQ(VecSimIndex_IndexSize(index), keep_num);
245+
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
246+
ASSERT_EQ(index->indexLabelCount(), keep_num);
247+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - keep_num);
246248

247249
auto verify_res_after_delete = [&](size_t id, double score, size_t index) {
248250
EXPECT_EQ(id, n - keep_num + index);
@@ -252,6 +254,12 @@ TYPED_TEST(FP16SVSTest, svs_bulk_vectors_add_delete_test) {
252254
// Thread 0: Couldn't find key.
253255
runTopKSearchTest(index, query, keep_num, verify_res_after_delete, nullptr, BY_ID);
254256

257+
// Delete rest of the vectors
258+
// num_marked_deleted should reset.
259+
ASSERT_EQ(svs_index->deleteVectors(ids.data() + n - keep_num, keep_num), keep_num);
260+
ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
261+
ASSERT_EQ(index->indexLabelCount(), 0);
262+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 0);
255263
VecSimIndex_Free(index);
256264
}
257265

@@ -334,14 +342,18 @@ TYPED_TEST(FP16SVSTest, svs_reindexing_same_vector) {
334342
for (size_t i = 0; i < n - 1; i++) {
335343
VecSimIndex_DeleteVector(index, i);
336344
}
337-
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
345+
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
346+
ASSERT_EQ(index->indexLabelCount(), 1);
347+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);
338348

339349
// Reinsert the same vectors under the same ids.
340350
for (size_t i = 0; i < n; i++) {
341351
// i / 10 is in integer (take the "floor value).
342352
this->GenerateAndAddVector(index, dim, i, i / 10);
343353
}
344-
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
354+
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
355+
ASSERT_EQ(index->indexLabelCount(), n);
356+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);
345357

346358
// Run the same query again.
347359
runTopKSearchTest(index, query, k, verify_res);
@@ -388,14 +400,18 @@ TYPED_TEST(FP16SVSTest, svs_reindexing_same_vector_different_id) {
388400
for (size_t i = 0; i < n - 1; i++) {
389401
VecSimIndex_DeleteVector(index, i);
390402
}
391-
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
403+
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
404+
ASSERT_EQ(index->indexLabelCount(), 1);
405+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);
392406

393407
// Reinsert the same vectors under different ids than before.
394408
for (size_t i = 0; i < n; i++) {
395409
this->GenerateAndAddVector(index, dim, i + 10,
396410
i / 10); // i / 10 is in integer (take the "floor" value).
397411
}
398-
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
412+
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
413+
ASSERT_EQ(index->indexLabelCount(), n);
414+
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);
399415

400416
// Run the same query again.
401417
auto verify_res_different_id = [&](size_t id, double score, size_t index) {
@@ -787,7 +803,8 @@ TYPED_TEST(FP16SVSTest, test_delete_vector) {
787803

788804
// Here the shift should happen.
789805
VecSimIndex_DeleteVector(index, 1);
790-
ASSERT_EQ(VecSimIndex_IndexSize(index), n - 1);
806+
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
807+
ASSERT_EQ(index->indexLabelCount(), n - 1);
791808

792809
float16 query[dim];
793810
this->GenerateVector(query, dim, 0.0);
@@ -2627,7 +2644,7 @@ TYPED_TEST(FP16SVSTieredIndexTest, KNNSearch) {
26272644
VecSimIndex_DeleteVector(svs_index, i);
26282645
}
26292646
ASSERT_EQ(flat_index->indexSize(), n * 2 / 3);
2630-
ASSERT_EQ(svs_index->indexSize(), n / 2);
2647+
ASSERT_EQ(svs_index->indexLabelCount(), n / 2);
26312648
k = n * 2 / 3;
26322649
cur_memory_usage = allocator->getAllocationSize();
26332650
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);
@@ -2642,7 +2659,7 @@ TYPED_TEST(FP16SVSTieredIndexTest, KNNSearch) {
26422659
VecSimIndex_DeleteVector(flat_index, i);
26432660
}
26442661
ASSERT_EQ(flat_index->indexSize(), n / 6);
2645-
ASSERT_EQ(svs_index->indexSize(), n / 2);
2662+
ASSERT_EQ(svs_index->indexLabelCount(), n / 2);
26462663
k = n / 4;
26472664
cur_memory_usage = allocator->getAllocationSize();
26482665
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);
@@ -2656,7 +2673,7 @@ TYPED_TEST(FP16SVSTieredIndexTest, KNNSearch) {
26562673
this->GenerateAndAddVector(flat_index, dim, i, i);
26572674
}
26582675
ASSERT_EQ(flat_index->indexSize(), n * 2 / 3);
2659-
ASSERT_EQ(svs_index->indexSize(), 0);
2676+
ASSERT_EQ(svs_index->indexLabelCount(), 0);
26602677
k = n / 3;
26612678
cur_memory_usage = allocator->getAllocationSize();
26622679
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);

tests/unit/test_svs_multi.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ TYPED_TEST(SVSMultiTest, test_dynamic_svs_info_iterator) {
536536
VecSimIndex_DeleteVector(index, 0);
537537
info = VecSimIndex_DebugInfo(index);
538538
infoIter = VecSimIndex_DebugInfoIterator(index);
539-
ASSERT_EQ(2, info.commonInfo.indexSize);
539+
ASSERT_EQ(4, info.commonInfo.indexSize);
540540
ASSERT_EQ(1, info.commonInfo.indexLabelCount);
541541
compareSVSIndexInfoToIterator(info, infoIter);
542542
VecSimDebugInfoIterator_Free(infoIter);

0 commit comments

Comments
 (0)