diff --git a/c_api/IndexBinary_c_ex.cpp b/c_api/IndexBinary_c_ex.cpp index 2e00ee5e53..583cb325c8 100644 --- a/c_api/IndexBinary_c_ex.cpp +++ b/c_api/IndexBinary_c_ex.cpp @@ -10,6 +10,7 @@ #include "IndexBinary_c_ex.h" #include +#include #include "macros_impl.h" extern "C" { @@ -33,9 +34,23 @@ int faiss_IndexBinary_search_with_params( CATCH_AND_HANDLE } -size_t faiss_IndexBinary_size(FaissIndexBinary* index) { - auto xIndex = reinterpret_cast(index); - size_t rv = sizeof(xIndex); - return rv; +int faiss_IndexBinary_size(const FaissIndexBinary* index, size_t* p_size) { + try { + const faiss::IndexBinary* idx = reinterpret_cast(index); + // Base: raw binary codes (d / 8 bytes per vector). + size_t size = (size_t)idx->ntotal * idx->code_size; + // IVF-specific overhead not captured by code_size: + // centroids: quantizer->ntotal * quantizer->sa_code_size() + // stored IDs: ntotal * sizeof(idx_t) (per-vector ID in each inverted list) + if (auto ivf = dynamic_cast(idx)) { + auto ivfQuantizer = ivf->quantizer; + if (ivfQuantizer != nullptr) { + size += (size_t)ivfQuantizer->ntotal * ivfQuantizer->sa_code_size(); + } + size += (size_t)ivf->ntotal * sizeof(faiss::idx_t); + } + *p_size = size; + } + CATCH_AND_HANDLE } } diff --git a/c_api/IndexBinary_c_ex.h b/c_api/IndexBinary_c_ex.h index d43a3c6318..bd71d0cef7 100644 --- a/c_api/IndexBinary_c_ex.h +++ b/c_api/IndexBinary_c_ex.h @@ -38,11 +38,18 @@ int faiss_IndexBinary_search_with_params( int32_t* distances, idx_t* labels); -/** return the size of the binary index - * +/** return an approximate size estimate in bytes for the binary index. + * The estimate accounts for stored codes and, for IVF-based binary indices, + * includes centroid and stored ID overhead. + * + * This is not a complete in-memory footprint: it does not attempt to include + * all internal allocations such as inverted-list container overhead, + * direct_map, or quantizer internals beyond centroid storage. + * * @param index opaque pointer to index object + * @param p_size output approximate size in bytes */ -size_t faiss_IndexBinary_size(FaissIndexBinary* index); +int faiss_IndexBinary_size(const FaissIndexBinary* index, size_t* p_size); #ifdef __cplusplus } diff --git a/c_api/Index_c_ex.cpp b/c_api/Index_c_ex.cpp index c5c1045142..6d299530d9 100644 --- a/c_api/Index_c_ex.cpp +++ b/c_api/Index_c_ex.cpp @@ -13,6 +13,7 @@ #include "macros_impl.h" #include #include +#include extern "C" { @@ -39,10 +40,24 @@ int faiss_Index_merge_from( CATCH_AND_HANDLE } -size_t faiss_Index_size(FaissIndex* index) { - auto xIndex = reinterpret_cast(index); - size_t rv = sizeof(xIndex); - return rv; +int faiss_Index_size(const FaissIndex* index, size_t* p_size) { + try { + const faiss::Index* idx = reinterpret_cast(index); + // Base: raw vector codes (works for Flat, SQ, and all other types). + size_t size = (size_t)idx->ntotal * idx->sa_code_size(); + // IVF-specific overhead not captured by sa_code_size(): + // centroids: quantizer->ntotal * quantizer->sa_code_size() + // stored IDs: ntotal * sizeof(idx_t) (per-vector ID in each inverted list) + if (auto ivf = dynamic_cast(idx)) { + auto ivfQuantizer = ivf->quantizer; + if (ivfQuantizer != nullptr) { + size += (size_t)ivfQuantizer->ntotal * ivfQuantizer->sa_code_size(); + } + size += (size_t)ivf->ntotal * sizeof(faiss::idx_t); + } + *p_size = size; + } + CATCH_AND_HANDLE } int faiss_Index_dist_compute( diff --git a/c_api/Index_c_ex.h b/c_api/Index_c_ex.h index f8189f0844..5bc4f86e2f 100644 --- a/c_api/Index_c_ex.h +++ b/c_api/Index_c_ex.h @@ -28,7 +28,18 @@ int faiss_Index_reconstruct_batch( int faiss_Index_merge_from(FaissIndex* index, FaissIndex* other, idx_t add_id); -size_t faiss_Index_size(FaissIndex* index); +/** Estimate the size of the index in bytes. + * + * The returned value is an approximation based on the stored vector + * codes and any additional known overhead (for example centroids and + * stored IDs for IVF indices). It does not imply an exact total memory + * footprint, and may not be available for index types that do not + * support this estimate. + * + * @param index opaque pointer to index object + * @param p_size pointer to size_t to store the estimated size + */ +int faiss_Index_size(const FaissIndex* index, size_t* p_size); /** Compute distances between a query vector and a set of vectors *