From 4ab17684e9e09f9ff1425977da6750a75dd4c7a0 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 7 May 2026 21:43:28 +0000 Subject: [PATCH 1/3] Avoid off-heap buffer cache growth in MEMORY mode `FileChannel.read(ByteBuffer)` with a heap-backed buffer causes the JDK to substitute a temporary direct buffer obtained from a per-thread cache (`sun.nio.ch.Util.BufferCache`). With chunk sizes near `Integer.MAX_VALUE`, a single MEMORY-mode database load leaves up to ~2 GB of direct memory cached on the loading thread for that thread's lifetime. Repeated loads on different threads compound the growth. Open the database via `FileInputStream` and delegate to the existing chunked `InputStream` read path. `FileInputStream.read(byte[])` is implemented natively without going through the NIO buffer cache, so it avoids the leak entirely. The MMAP path is unchanged, since `FileChannel.map()` does not use the cache. Note: `Files.readAllBytes()` and `Files.newInputStream()` would NOT fix this, as both are backed by `Channels.newInputStream(FileChannel)` internally and still trigger the cache. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 12 +++ .../java/com/maxmind/db/BufferHolder.java | 96 ++++++++++--------- 2 files changed, 62 insertions(+), 46 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d8dd3eb..f3ad77ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,18 @@ CHANGELOG ========= +4.1.0 +------------------ + +* Fixed unbounded off-heap memory growth when initializing the reader in + `FileMode.MEMORY`. The previous implementation read the database via + `FileChannel.read()` into a heap buffer, which causes the JDK to cache + temporary direct ByteBuffers in per-thread storage + (`sun.nio.ch.Util.BufferCache`). Repeated initialization across different + threads could grow this cache without bound. The reader now uses + `FileInputStream` for `MEMORY` mode, which bypasses the cache. + `FileMode.MEMORY_MAPPED` was unaffected. + 4.0.2 (2025-12-08) ------------------ diff --git a/src/main/java/com/maxmind/db/BufferHolder.java b/src/main/java/com/maxmind/db/BufferHolder.java index 834a2b87..1fbcf889 100644 --- a/src/main/java/com/maxmind/db/BufferHolder.java +++ b/src/main/java/com/maxmind/db/BufferHolder.java @@ -3,6 +3,7 @@ import com.maxmind.db.Reader.FileMode; import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.RandomAccessFile; @@ -23,53 +24,36 @@ final class BufferHolder { } BufferHolder(File database, FileMode mode, int chunkSize) throws IOException { - try (RandomAccessFile file = new RandomAccessFile(database, "r"); - FileChannel channel = file.getChannel()) { - long size = channel.size(); - if (mode == FileMode.MEMORY) { + if (mode == FileMode.MEMORY) { + // FileInputStream avoids the per-thread direct ByteBuffer cache that + // FileChannel.read() populates when reading into a heap buffer. That cache + // retains the largest direct buffer ever requested — under chunked MEMORY + // mode that would mean chunkSize bytes of off-heap memory held per loader + // thread for the JVM's lifetime. + try (FileInputStream stream = new FileInputStream(database)) { + long size = database.length(); + var name = database.getName(); if (size <= chunkSize) { - // Allocate, read, and make read-only - ByteBuffer buffer = ByteBuffer.allocate((int) size); - if (channel.read(buffer) != size) { - throw new IOException("Unable to read " - + database.getName() - + " into memory. Unexpected end of stream."); - } - buffer.flip(); - this.buffer = new SingleBuffer(buffer); + this.buffer = SingleBuffer.wrap(readFully(stream, (int) size, name)); } else { - // Allocate chunks, read, and make read-only var fullChunks = (int) (size / chunkSize); var remainder = (int) (size % chunkSize); var totalChunks = fullChunks + (remainder > 0 ? 1 : 0); var buffers = new ByteBuffer[totalChunks]; - for (int i = 0; i < fullChunks; i++) { - buffers[i] = ByteBuffer.allocate(chunkSize); + buffers[i] = ByteBuffer.wrap(readFully(stream, chunkSize, name)); } if (remainder > 0) { - buffers[totalChunks - 1] = ByteBuffer.allocate(remainder); + buffers[totalChunks - 1] = ByteBuffer.wrap( + readFully(stream, remainder, name)); } - - var totalRead = 0L; - for (var buffer : buffers) { - var read = channel.read(buffer); - if (read == -1) { - break; - } - totalRead += read; - buffer.flip(); - } - - if (totalRead != size) { - throw new IOException("Unable to read " - + database.getName() - + " into memory. Unexpected end of stream."); - } - this.buffer = new MultiBuffer(buffers, chunkSize); } - } else { + } + } else { + try (RandomAccessFile file = new RandomAccessFile(database, "r"); + FileChannel channel = file.getChannel()) { + long size = channel.size(); if (size <= chunkSize) { this.buffer = SingleBuffer.mapFromChannel(channel); } else { @@ -79,11 +63,32 @@ final class BufferHolder { } } - BufferHolder(InputStream stream, int chunkSize) throws IOException { + BufferHolder(InputStream stream, int chunkSize) throws IOException { if (null == stream) { throw new NullPointerException("Unable to use a NULL InputStream"); } + this.buffer = readFromStream(stream, chunkSize); + } + + // Pre-allocates exactly len bytes. Used by file-backed MEMORY mode where the size is + // known up front, avoiding the transient peak from ByteArrayOutputStream.grow() and + // the defensive copy in toByteArray(). + private static byte[] readFully(InputStream stream, int len, String name) throws IOException { + var data = new byte[len]; + var totalRead = 0; + while (totalRead < len) { + var n = stream.read(data, totalRead, len - totalRead); + if (n < 0) { + throw new IOException("Unable to read " + + name + + " into memory. Unexpected end of stream."); + } + totalRead += n; + } + return data; + } + private static Buffer readFromStream(InputStream stream, int chunkSize) throws IOException { // Read data from the stream in chunks to support databases >2GB. // Invariant: All chunks except the last are exactly chunkSize bytes. var chunks = new ArrayList(); @@ -116,17 +121,16 @@ final class BufferHolder { if (chunks.size() == 1) { // For databases that fit in a single chunk, use SingleBuffer - this.buffer = SingleBuffer.wrap(chunks.get(0)); - } else { - // For large databases, wrap chunks in ByteBuffers and use MultiBuffer - // Guaranteed: chunks[0..n-2] all have length == chunkSize - // chunks[n-1] may have length < chunkSize - var buffers = new ByteBuffer[chunks.size()]; - for (var i = 0; i < chunks.size(); i++) { - buffers[i] = ByteBuffer.wrap(chunks.get(i)); - } - this.buffer = new MultiBuffer(buffers, chunkSize); + return SingleBuffer.wrap(chunks.get(0)); + } + // For large databases, wrap chunks in ByteBuffers and use MultiBuffer + // Guaranteed: chunks[0..n-2] all have length == chunkSize + // chunks[n-1] may have length < chunkSize + var buffers = new ByteBuffer[chunks.size()]; + for (var i = 0; i < chunks.size(); i++) { + buffers[i] = ByteBuffer.wrap(chunks.get(i)); } + return new MultiBuffer(buffers, chunkSize); } /* From e2be3a536dbe09560474498fd7b7621a3e1cc984 Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 7 May 2026 21:44:37 +0000 Subject: [PATCH 2/3] Add mise configuration Manages local Java and Maven versions via mise, matching the setup in minfraud-api-java. CI is unaffected since the GitHub Actions workflows use setup-java directly. Co-Authored-By: Claude Opus 4.7 (1M context) --- mise.lock | 34 ++++++++++++++++++++++++++++++++++ mise.toml | 18 ++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 mise.lock create mode 100644 mise.toml diff --git a/mise.lock b/mise.lock new file mode 100644 index 00000000..f6b3d50c --- /dev/null +++ b/mise.lock @@ -0,0 +1,34 @@ +# @generated - this file is auto-generated by `mise lock` https://mise.jdx.dev/dev-tools/mise-lock.html + +[[tools.java]] +version = "26.0.1" +backend = "core:java" + +[tools.java."platforms.linux-x64"] +checksum = "sha256:2f2802d57b5fc414f1ddf6648ba12cc9a6454cf67b32ac95407c018f2e6ab0b0" +url = "https://download.java.net/java/GA/jdk26.0.1/458fda22e4c54d5ba572ab8d2b22eb83/8/GPL/openjdk-26.0.1_linux-x64_bin.tar.gz" + +[[tools.maven]] +version = "3.9.15" +backend = "aqua:apache/maven" + +[tools.maven."platforms.linux-arm64"] +url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" + +[tools.maven."platforms.linux-arm64-musl"] +url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" + +[tools.maven."platforms.linux-x64"] +url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" + +[tools.maven."platforms.linux-x64-musl"] +url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" + +[tools.maven."platforms.macos-arm64"] +url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" + +[tools.maven."platforms.macos-x64"] +url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" + +[tools.maven."platforms.windows-x64"] +url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz" diff --git a/mise.toml b/mise.toml new file mode 100644 index 00000000..c4027682 --- /dev/null +++ b/mise.toml @@ -0,0 +1,18 @@ +[settings] +experimental = true +lockfile = true +disable_backends = [ + "asdf", + "vfox", +] + +[tools] +java = "latest" +maven = "latest" + +[hooks] +enter = "mise install --quiet --locked" + +[[watch_files]] +patterns = ["mise.toml", "mise.lock"] +run = "mise install --quiet --locked" From cbf83d9ae0951b3bf2d31539592b3e5e596157bd Mon Sep 17 00:00:00 2001 From: Gregory Oschwald Date: Thu, 7 May 2026 22:18:26 +0000 Subject: [PATCH 3/3] Test MEMORY mode under chunkSizes matrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The existing chunkSizes parametrized matrix in ReaderTest only routed through Reader(File, int chunkSize), which hardcodes FileMode.MEMORY_MAPPED. As a result the chunked file-MEMORY load path in BufferHolder had no integration coverage — a remainder-chunk-sized- wrong regression or an EOF-handling change could ship silently. Add a package-private Reader(File, FileMode, int chunkSize) constructor and a testMemoryMode(int chunkSize) test that mirrors test(int) but in MEMORY mode. With chunk sizes 512/2048 against the test DBs (1285 and 2794 bytes), the multi-chunk + remainder branch is now exercised end to end. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/main/java/com/maxmind/db/Reader.java | 6 +++++- src/test/java/com/maxmind/db/ReaderTest.java | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/main/java/com/maxmind/db/Reader.java b/src/main/java/com/maxmind/db/Reader.java index f19872cc..64165468 100644 --- a/src/main/java/com/maxmind/db/Reader.java +++ b/src/main/java/com/maxmind/db/Reader.java @@ -59,8 +59,12 @@ public Reader(File database) throws IOException { } Reader(File database, int chunkSize) throws IOException { + this(database, FileMode.MEMORY_MAPPED, chunkSize); + } + + Reader(File database, FileMode fileMode, int chunkSize) throws IOException { this( - new BufferHolder(database, FileMode.MEMORY_MAPPED, chunkSize), + new BufferHolder(database, fileMode, chunkSize), database.getName(), NoCache.getInstance() ); diff --git a/src/test/java/com/maxmind/db/ReaderTest.java b/src/test/java/com/maxmind/db/ReaderTest.java index adfe2f3d..9188677f 100644 --- a/src/test/java/com/maxmind/db/ReaderTest.java +++ b/src/test/java/com/maxmind/db/ReaderTest.java @@ -12,6 +12,7 @@ import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; +import com.maxmind.db.Reader.FileMode; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -83,6 +84,24 @@ public void test(int chunkSize) throws IOException { } } + @ParameterizedTest + @MethodSource("chunkSizes") + public void testMemoryMode(int chunkSize) throws IOException { + for (long recordSize : new long[] {24, 28, 32}) { + for (int ipVersion : new int[] {4, 6}) { + var file = getFile("MaxMind-DB-test-ipv" + ipVersion + "-" + recordSize + ".mmdb"); + try (var reader = new Reader(file, FileMode.MEMORY, chunkSize)) { + this.testMetadata(reader, ipVersion, recordSize); + if (ipVersion == 4) { + this.testIpV4(reader, file); + } else { + this.testIpV6(reader, file); + } + } + } + } + } + static class GetRecordTest { InetAddress ip; File db;