From 4ab17684e9e09f9ff1425977da6750a75dd4c7a0 Mon Sep 17 00:00:00 2001
From: Gregory Oschwald <goschwald@maxmind.com>
Date: Thu, 7 May 2026 21:43:28 +0000
Subject: [PATCH 1/3] Avoid off-heap buffer cache growth in MEMORY mode

`FileChannel.read(ByteBuffer)` with a heap-backed buffer causes the
JDK to substitute a temporary direct buffer obtained from a per-thread
cache (`sun.nio.ch.Util.BufferCache`). With chunk sizes near
`Integer.MAX_VALUE`, a single MEMORY-mode database load leaves up to
~2 GB of direct memory cached on the loading thread for that thread's
lifetime. Repeated loads on different threads compound the growth.

Open the database via `FileInputStream` and delegate to the existing
chunked `InputStream` read path. `FileInputStream.read(byte[])` is
implemented natively without going through the NIO buffer cache, so it
avoids the leak entirely. The MMAP path is unchanged, since
`FileChannel.map()` does not use the cache.

Note: `Files.readAllBytes()` and `Files.newInputStream()` would NOT
fix this, as both are backed by `Channels.newInputStream(FileChannel)`
internally and still trigger the cache.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CHANGELOG.md                                  | 12 +++
 .../java/com/maxmind/db/BufferHolder.java     | 96 ++++++++++---------
 2 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8d8dd3eb..f3ad77ab 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,18 @@
 CHANGELOG
 =========
 
+4.1.0
+------------------
+
+* Fixed unbounded off-heap memory growth when initializing the reader in
+  `FileMode.MEMORY`. The previous implementation read the database via
+  `FileChannel.read()` into a heap buffer, which causes the JDK to cache
+  temporary direct ByteBuffers in per-thread storage
+  (`sun.nio.ch.Util.BufferCache`). Repeated initialization across different
+  threads could grow this cache without bound. The reader now uses
+  `FileInputStream` for `MEMORY` mode, which bypasses the cache.
+  `FileMode.MEMORY_MAPPED` was unaffected.
+
 4.0.2 (2025-12-08)
 ------------------
 
diff --git a/src/main/java/com/maxmind/db/BufferHolder.java b/src/main/java/com/maxmind/db/BufferHolder.java
index 834a2b87..1fbcf889 100644
--- a/src/main/java/com/maxmind/db/BufferHolder.java
+++ b/src/main/java/com/maxmind/db/BufferHolder.java
@@ -3,6 +3,7 @@
 import com.maxmind.db.Reader.FileMode;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.RandomAccessFile;
@@ -23,53 +24,36 @@ final class BufferHolder {
     }
 
     BufferHolder(File database, FileMode mode, int chunkSize) throws IOException {
-        try (RandomAccessFile file = new RandomAccessFile(database, "r");
-             FileChannel channel = file.getChannel()) {
-            long size = channel.size();
-            if (mode == FileMode.MEMORY) {
+        if (mode == FileMode.MEMORY) {
+            // FileInputStream avoids the per-thread direct ByteBuffer cache that
+            // FileChannel.read() populates when reading into a heap buffer. That cache
+            // retains the largest direct buffer ever requested — under chunked MEMORY
+            // mode that would mean chunkSize bytes of off-heap memory held per loader
+            // thread for the JVM's lifetime.
+            try (FileInputStream stream = new FileInputStream(database)) {
+                long size = database.length();
+                var name = database.getName();
                 if (size <= chunkSize) {
-                    // Allocate, read, and make read-only
-                    ByteBuffer buffer = ByteBuffer.allocate((int) size);
-                    if (channel.read(buffer) != size) {
-                        throw new IOException("Unable to read "
-                                + database.getName()
-                                + " into memory. Unexpected end of stream.");
-                    }
-                    buffer.flip();
-                    this.buffer = new SingleBuffer(buffer);
+                    this.buffer = SingleBuffer.wrap(readFully(stream, (int) size, name));
                 } else {
-                    // Allocate chunks, read, and make read-only
                     var fullChunks = (int) (size / chunkSize);
                     var remainder = (int) (size % chunkSize);
                     var totalChunks = fullChunks + (remainder > 0 ? 1 : 0);
                     var buffers = new ByteBuffer[totalChunks];
-
                     for (int i = 0; i < fullChunks; i++) {
-                        buffers[i] = ByteBuffer.allocate(chunkSize);
+                        buffers[i] = ByteBuffer.wrap(readFully(stream, chunkSize, name));
                     }
                     if (remainder > 0) {
-                        buffers[totalChunks - 1] = ByteBuffer.allocate(remainder);
+                        buffers[totalChunks - 1] = ByteBuffer.wrap(
+                                readFully(stream, remainder, name));
                     }
-
-                    var totalRead = 0L;
-                    for (var buffer : buffers) {
-                        var read = channel.read(buffer);
-                        if (read == -1) {
-                            break;
-                        }
-                        totalRead += read;
-                        buffer.flip();
-                    }
-
-                    if (totalRead != size) {
-                        throw new IOException("Unable to read "
-                                + database.getName()
-                                + " into memory. Unexpected end of stream.");
-                    }
-
                     this.buffer = new MultiBuffer(buffers, chunkSize);
                 }
-            } else {
+            }
+        } else {
+            try (RandomAccessFile file = new RandomAccessFile(database, "r");
+                 FileChannel channel = file.getChannel()) {
+                long size = channel.size();
                 if (size <= chunkSize) {
                     this.buffer = SingleBuffer.mapFromChannel(channel);
                 } else {
@@ -79,11 +63,32 @@ final class BufferHolder {
         }
     }
 
-    BufferHolder(InputStream stream, int chunkSize) throws  IOException {
+    BufferHolder(InputStream stream, int chunkSize) throws IOException {
         if (null == stream) {
             throw new NullPointerException("Unable to use a NULL InputStream");
         }
+        this.buffer = readFromStream(stream, chunkSize);
+    }
+
+    // Pre-allocates exactly len bytes. Used by file-backed MEMORY mode where the size is
+    // known up front, avoiding the transient peak from ByteArrayOutputStream.grow() and
+    // the defensive copy in toByteArray().
+    private static byte[] readFully(InputStream stream, int len, String name) throws IOException {
+        var data = new byte[len];
+        var totalRead = 0;
+        while (totalRead < len) {
+            var n = stream.read(data, totalRead, len - totalRead);
+            if (n < 0) {
+                throw new IOException("Unable to read "
+                        + name
+                        + " into memory. Unexpected end of stream.");
+            }
+            totalRead += n;
+        }
+        return data;
+    }
 
+    private static Buffer readFromStream(InputStream stream, int chunkSize) throws IOException {
         // Read data from the stream in chunks to support databases >2GB.
         // Invariant: All chunks except the last are exactly chunkSize bytes.
         var chunks = new ArrayList<byte[]>();
@@ -116,17 +121,16 @@ final class BufferHolder {
 
         if (chunks.size() == 1) {
             // For databases that fit in a single chunk, use SingleBuffer
-            this.buffer = SingleBuffer.wrap(chunks.get(0));
-        } else {
-            // For large databases, wrap chunks in ByteBuffers and use MultiBuffer
-            // Guaranteed: chunks[0..n-2] all have length == chunkSize
-            // chunks[n-1] may have length < chunkSize
-            var buffers = new ByteBuffer[chunks.size()];
-            for (var i = 0; i < chunks.size(); i++) {
-                buffers[i] = ByteBuffer.wrap(chunks.get(i));
-            }
-            this.buffer = new MultiBuffer(buffers, chunkSize);
+            return SingleBuffer.wrap(chunks.get(0));
+        }
+        // For large databases, wrap chunks in ByteBuffers and use MultiBuffer
+        // Guaranteed: chunks[0..n-2] all have length == chunkSize
+        // chunks[n-1] may have length < chunkSize
+        var buffers = new ByteBuffer[chunks.size()];
+        for (var i = 0; i < chunks.size(); i++) {
+            buffers[i] = ByteBuffer.wrap(chunks.get(i));
         }
+        return new MultiBuffer(buffers, chunkSize);
     }
 
     /*

From e2be3a536dbe09560474498fd7b7621a3e1cc984 Mon Sep 17 00:00:00 2001
From: Gregory Oschwald <goschwald@maxmind.com>
Date: Thu, 7 May 2026 21:44:37 +0000
Subject: [PATCH 2/3] Add mise configuration

Manages local Java and Maven versions via mise, matching the setup
in minfraud-api-java. CI is unaffected since the GitHub Actions
workflows use setup-java directly.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 mise.lock | 34 ++++++++++++++++++++++++++++++++++
 mise.toml | 18 ++++++++++++++++++
 2 files changed, 52 insertions(+)
 create mode 100644 mise.lock
 create mode 100644 mise.toml

diff --git a/mise.lock b/mise.lock
new file mode 100644
index 00000000..f6b3d50c
--- /dev/null
+++ b/mise.lock
@@ -0,0 +1,34 @@
+# @generated - this file is auto-generated by `mise lock` https://mise.jdx.dev/dev-tools/mise-lock.html
+
+[[tools.java]]
+version = "26.0.1"
+backend = "core:java"
+
+[tools.java."platforms.linux-x64"]
+checksum = "sha256:2f2802d57b5fc414f1ddf6648ba12cc9a6454cf67b32ac95407c018f2e6ab0b0"
+url = "https://download.java.net/java/GA/jdk26.0.1/458fda22e4c54d5ba572ab8d2b22eb83/8/GPL/openjdk-26.0.1_linux-x64_bin.tar.gz"
+
+[[tools.maven]]
+version = "3.9.15"
+backend = "aqua:apache/maven"
+
+[tools.maven."platforms.linux-arm64"]
+url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"
+
+[tools.maven."platforms.linux-arm64-musl"]
+url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"
+
+[tools.maven."platforms.linux-x64"]
+url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"
+
+[tools.maven."platforms.linux-x64-musl"]
+url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"
+
+[tools.maven."platforms.macos-arm64"]
+url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"
+
+[tools.maven."platforms.macos-x64"]
+url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"
+
+[tools.maven."platforms.windows-x64"]
+url = "https://archive.apache.org/dist/maven/maven-3/3.9.15/binaries/apache-maven-3.9.15-bin.tar.gz"
diff --git a/mise.toml b/mise.toml
new file mode 100644
index 00000000..c4027682
--- /dev/null
+++ b/mise.toml
@@ -0,0 +1,18 @@
+[settings]
+experimental = true
+lockfile = true
+disable_backends = [
+    "asdf",
+    "vfox",
+]
+
+[tools]
+java = "latest"
+maven = "latest"
+
+[hooks]
+enter = "mise install --quiet --locked"
+
+[[watch_files]]
+patterns = ["mise.toml", "mise.lock"]
+run = "mise install --quiet --locked"

From cbf83d9ae0951b3bf2d31539592b3e5e596157bd Mon Sep 17 00:00:00 2001
From: Gregory Oschwald <goschwald@maxmind.com>
Date: Thu, 7 May 2026 22:18:26 +0000
Subject: [PATCH 3/3] Test MEMORY mode under chunkSizes matrix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The existing chunkSizes parametrized matrix in ReaderTest only routed
through Reader(File, int chunkSize), which hardcodes
FileMode.MEMORY_MAPPED. As a result the chunked file-MEMORY load path
in BufferHolder had no integration coverage — a remainder-chunk-sized-
wrong regression or an EOF-handling change could ship silently.

Add a package-private Reader(File, FileMode, int chunkSize) constructor
and a testMemoryMode(int chunkSize) test that mirrors test(int) but in
MEMORY mode. With chunk sizes 512/2048 against the test DBs (1285 and
2794 bytes), the multi-chunk + remainder branch is now exercised end
to end.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/main/java/com/maxmind/db/Reader.java     |  6 +++++-
 src/test/java/com/maxmind/db/ReaderTest.java | 19 +++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/src/main/java/com/maxmind/db/Reader.java b/src/main/java/com/maxmind/db/Reader.java
index f19872cc..64165468 100644
--- a/src/main/java/com/maxmind/db/Reader.java
+++ b/src/main/java/com/maxmind/db/Reader.java
@@ -59,8 +59,12 @@ public Reader(File database) throws IOException {
     }
 
     Reader(File database, int chunkSize) throws IOException {
+        this(database, FileMode.MEMORY_MAPPED, chunkSize);
+    }
+
+    Reader(File database, FileMode fileMode, int chunkSize) throws IOException {
         this(
-            new BufferHolder(database, FileMode.MEMORY_MAPPED, chunkSize),
+            new BufferHolder(database, fileMode, chunkSize),
             database.getName(),
             NoCache.getInstance()
         );
diff --git a/src/test/java/com/maxmind/db/ReaderTest.java b/src/test/java/com/maxmind/db/ReaderTest.java
index adfe2f3d..9188677f 100644
--- a/src/test/java/com/maxmind/db/ReaderTest.java
+++ b/src/test/java/com/maxmind/db/ReaderTest.java
@@ -12,6 +12,7 @@
 import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
+import com.maxmind.db.Reader.FileMode;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
@@ -83,6 +84,24 @@ public void test(int chunkSize) throws IOException {
         }
     }
 
+    @ParameterizedTest
+    @MethodSource("chunkSizes")
+    public void testMemoryMode(int chunkSize) throws IOException {
+        for (long recordSize : new long[] {24, 28, 32}) {
+            for (int ipVersion : new int[] {4, 6}) {
+                var file = getFile("MaxMind-DB-test-ipv" + ipVersion + "-" + recordSize + ".mmdb");
+                try (var reader = new Reader(file, FileMode.MEMORY, chunkSize)) {
+                    this.testMetadata(reader, ipVersion, recordSize);
+                    if (ipVersion == 4) {
+                        this.testIpV4(reader, file);
+                    } else {
+                        this.testIpV6(reader, file);
+                    }
+                }
+            }
+        }
+    }
+
     static class GetRecordTest {
         InetAddress ip;
         File db;