diff --git a/c++/include/orc/MemoryPool.hh b/c++/include/orc/MemoryPool.hh index a914e5f260..8a7d1003cb 100644 --- a/c++/include/orc/MemoryPool.hh +++ b/c++/include/orc/MemoryPool.hh @@ -42,13 +42,15 @@ namespace orc { uint64_t currentSize_; // maximal capacity (actual allocated memory) uint64_t currentCapacity_; + // whether this buffer owns memory lifecycle + bool ownBuffer_; // not implemented DataBuffer(DataBuffer& buffer); DataBuffer& operator=(DataBuffer& buffer); public: - DataBuffer(MemoryPool& pool, uint64_t size = 0); + DataBuffer(MemoryPool& pool, uint64_t size = 0, bool ownBuf = true); DataBuffer(DataBuffer&& buffer) noexcept; @@ -81,6 +83,9 @@ namespace orc { void reserve(uint64_t size); void resize(uint64_t size); void zeroOut(); + + // Attach to an external raw buffer. bufSize is in bytes. + void setData(T* buf, size_t bufSize); }; // Specializations for char diff --git a/c++/src/MemoryPool.cc b/c++/src/MemoryPool.cc index ed7fee7373..f51b210517 100644 --- a/c++/src/MemoryPool.cc +++ b/c++/src/MemoryPool.cc @@ -24,6 +24,7 @@ #include #include #include +#include namespace orc { @@ -52,10 +53,16 @@ namespace orc { } template - DataBuffer::DataBuffer(MemoryPool& pool, uint64_t newSize) - : memoryPool_(pool), buf_(nullptr), currentSize_(0), currentCapacity_(0) { - reserve(newSize); - currentSize_ = newSize; + DataBuffer::DataBuffer(MemoryPool& pool, uint64_t newSize, bool ownBuf) + : memoryPool_(pool), + buf_(nullptr), + currentSize_(0), + currentCapacity_(0), + ownBuffer_(ownBuf) { + if (ownBuffer_) { + reserve(newSize); + currentSize_ = newSize; + } } template @@ -63,24 +70,34 @@ namespace orc { : memoryPool_(buffer.memoryPool_), buf_(buffer.buf_), currentSize_(buffer.currentSize_), - currentCapacity_(buffer.currentCapacity_) { + currentCapacity_(buffer.currentCapacity_), + ownBuffer_(buffer.ownBuffer_) { buffer.buf_ = nullptr; buffer.currentSize_ = 0; buffer.currentCapacity_ = 0; + buffer.ownBuffer_ = true; } template DataBuffer::~DataBuffer() { + if (!ownBuffer_) { + return; + } for (uint64_t i = currentSize_; i > 0; --i) { (buf_ + i - 1)->~T(); } if (buf_) { + static_assert(std::is_trivially_copyable::value, + "Only trivially copyable type is supported for DataBuffer reserve"); memoryPool_.free(reinterpret_cast(buf_)); } } template void DataBuffer::resize(uint64_t newSize) { + if (!ownBuffer_) { + return; + } reserve(newSize); if (currentSize_ > newSize) { for (uint64_t i = currentSize_; i > newSize; --i) { @@ -96,6 +113,9 @@ namespace orc { template void DataBuffer::reserve(uint64_t newCapacity) { + if (!ownBuffer_) { + return; + } if (newCapacity > currentCapacity_ || !buf_) { if (buf_) { T* buf_old = buf_; @@ -114,6 +134,18 @@ namespace orc { memset(buf_, 0, sizeof(T) * currentCapacity_); } + template + void DataBuffer::setData(T* buffer, size_t bufSize) { + if (ownBuffer_ && buf_) { + static_assert(std::is_trivially_copyable::value, + "Only trivially copyable type is supported for DataBuffer reserve"); + memoryPool_.free(reinterpret_cast(buf_)); + } + ownBuffer_ = false; + buf_ = buffer; + currentSize_ = currentCapacity_ = static_cast(bufSize / sizeof(T)); + } + // Specializations for Int128 template <> void DataBuffer::zeroOut() { @@ -126,13 +158,16 @@ namespace orc { template <> DataBuffer::~DataBuffer() { - if (buf_) { + if (ownBuffer_ && buf_) { memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { + if (!ownBuffer_) { + return; + } reserve(newSize); if (newSize > currentSize_) { memset(buf_ + currentSize_, 0, newSize - currentSize_); @@ -144,13 +179,16 @@ namespace orc { template <> DataBuffer::~DataBuffer() { - if (buf_) { + if (ownBuffer_ && buf_) { memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { + if (!ownBuffer_) { + return; + } reserve(newSize); if (newSize > currentSize_) { memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(char*)); @@ -162,13 +200,16 @@ namespace orc { template <> DataBuffer::~DataBuffer() { - if (buf_) { + if (ownBuffer_ && buf_) { memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { + if (!ownBuffer_) { + return; + } reserve(newSize); if (newSize > currentSize_) { memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(double)); @@ -180,13 +221,16 @@ namespace orc { template <> DataBuffer::~DataBuffer() { - if (buf_) { + if (ownBuffer_ && buf_) { memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { + if (!ownBuffer_) { + return; + } reserve(newSize); if (newSize > currentSize_) { memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(float)); @@ -198,13 +242,16 @@ namespace orc { template <> DataBuffer::~DataBuffer() { - if (buf_) { + if (ownBuffer_ && buf_) { memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { + if (!ownBuffer_) { + return; + } reserve(newSize); if (newSize > currentSize_) { memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int64_t)); @@ -216,13 +263,16 @@ namespace orc { template <> DataBuffer::~DataBuffer() { - if (buf_) { + if (ownBuffer_ && buf_) { memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { + if (!ownBuffer_) { + return; + } reserve(newSize); if (newSize > currentSize_) { memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int32_t)); @@ -234,13 +284,16 @@ namespace orc { template <> DataBuffer::~DataBuffer() { - if (buf_) { + if (ownBuffer_ && buf_) { memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { + if (!ownBuffer_) { + return; + } reserve(newSize); if (newSize > currentSize_) { memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int16_t)); @@ -252,13 +305,16 @@ namespace orc { template <> DataBuffer::~DataBuffer() { - if (buf_) { + if (ownBuffer_ && buf_) { memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { + if (!ownBuffer_) { + return; + } reserve(newSize); if (newSize > currentSize_) { memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(int8_t)); @@ -270,13 +326,16 @@ namespace orc { template <> DataBuffer::~DataBuffer() { - if (buf_) { + if (ownBuffer_ && buf_) { memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { + if (!ownBuffer_) { + return; + } reserve(newSize); if (newSize > currentSize_) { memset(buf_ + currentSize_, 0, (newSize - currentSize_) * sizeof(uint64_t)); @@ -288,13 +347,16 @@ namespace orc { template <> DataBuffer::~DataBuffer() { - if (buf_) { + if (ownBuffer_ && buf_) { memoryPool_.free(reinterpret_cast(buf_)); } } template <> void DataBuffer::resize(uint64_t newSize) { + if (!ownBuffer_) { + return; + } reserve(newSize); if (newSize > currentSize_) { memset(buf_ + currentSize_, 0, newSize - currentSize_); diff --git a/c++/test/TestCache.cc b/c++/test/TestCache.cc index 496ba3ec90..ad7023bd46 100644 --- a/c++/test/TestCache.cc +++ b/c++/test/TestCache.cc @@ -26,6 +26,22 @@ namespace orc { + class CountingMemoryPool : public MemoryPool { + public: + uint64_t allocCount = 0; + uint64_t freeCount = 0; + + char* malloc(uint64_t size) override { + ++allocCount; + return static_cast(std::malloc(size)); + } + + void free(char* p) override { + ++freeCount; + std::free(p); + } + }; + TEST(TestReadRangeCombiner, testBasics) { ReadRangeCombiner combinator{0, 100}; /// Ranges with partial overlap and identical offsets @@ -139,4 +155,24 @@ namespace orc { slice = cache.read({20, 2}); assert_slice_equal(slice, "uv"); } + + TEST(TestDataBuffer, testExternalBufferNonOwning) { + CountingMemoryPool pool; + char external[16] = {0}; + uint64_t freeCountAfterSetData = 0; + + { + DataBuffer buffer(pool, 0); + buffer.setData(external, sizeof(external)); + freeCountAfterSetData = pool.freeCount; + + // Non-owning buffer should keep external size and ignore resize. + EXPECT_EQ(sizeof(external), buffer.size()); + buffer.resize(8); + EXPECT_EQ(sizeof(external), buffer.size()); + } + + // setData may release previously owned internal memory, but destruction should not free external. + EXPECT_EQ(freeCountAfterSetData, pool.freeCount); + } } // namespace orc