diff --git a/gitindex/catfile.go b/gitindex/catfile.go
new file mode 100644
index 000000000..0c7c7753c
--- /dev/null
+++ b/gitindex/catfile.go
@@ -0,0 +1,229 @@
+// Copyright 2016 Google Inc. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package gitindex
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/hex"
+	"fmt"
+	"io"
+	"os/exec"
+	"strconv"
+	"sync"
+	"syscall"
+
+	"github.com/go-git/go-git/v5/plumbing"
+)
+
+// catfileReader provides streaming access to git blob objects via a pipelined
+// "git cat-file --batch --buffer" process. A writer goroutine feeds all blob
+// SHAs to stdin while the caller reads responses one at a time, similar to
+// archive/tar.Reader.
+//
+// The --buffer flag switches git's output from per-object flush (write_or_die)
+// to libc stdio buffering (fwrite), reducing syscalls. After stdin EOF, git
+// calls fflush(stdout) to deliver any remaining output.
+//
+// Usage:
+//
+//	cr, err := newCatfileReader(repoDir, ids)
+//	if err != nil { ... }
+//	defer cr.Close()
+//
+//	for {
+//	    size, missing, err := cr.Next()
+//	    if err == io.EOF { break }
+//	    if missing { continue }
+//	    if size > maxSize { continue } // unread bytes auto-skipped
+//	    content := make([]byte, size)
+//	    io.ReadFull(cr, content)
+//	}
+type catfileReader struct {
+	cmd      *exec.Cmd
+	reader   *bufio.Reader
+	writeErr <-chan error
+
+	// pending tracks unread content bytes + trailing LF for the current
+	// entry. Next() discards any pending bytes before reading the next header.
+	pending int
+
+	closeOnce sync.Once
+	closeErr  error
+}
+
+// newCatfileReader starts a "git cat-file --batch --buffer" process and feeds
+// all ids to its stdin via a background goroutine. The caller must call Close
+// when done.
+func newCatfileReader(repoDir string, ids []plumbing.Hash) (*catfileReader, error) {
+	cmd := exec.Command("git", "cat-file", "--batch", "--buffer")
+	cmd.Dir = repoDir
+
+	stdin, err := cmd.StdinPipe()
+	if err != nil {
+		return nil, fmt.Errorf("stdin pipe: %w", err)
+	}
+
+	stdout, err := cmd.StdoutPipe()
+	if err != nil {
+		stdin.Close()
+		return nil, fmt.Errorf("stdout pipe: %w", err)
+	}
+
+	if err := cmd.Start(); err != nil {
+		stdin.Close()
+		stdout.Close()
+		return nil, fmt.Errorf("start git cat-file: %w", err)
+	}
+
+	// Writer goroutine: feed all SHAs then close stdin to trigger flush.
+	writeErr := make(chan error, 1)
+	go func() {
+		defer close(writeErr)
+		defer stdin.Close()
+		bw := bufio.NewWriterSize(stdin, 64*1024)
+		var hexBuf [41]byte
+		hexBuf[40] = '\n'
+		for _, id := range ids {
+			hex.Encode(hexBuf[:40], id[:])
+			if _, err := bw.Write(hexBuf[:]); err != nil {
+				writeErr <- err
+				return
+			}
+		}
+		writeErr <- bw.Flush()
+	}()
+
+	return &catfileReader{
+		cmd:      cmd,
+		reader:   bufio.NewReaderSize(stdout, 512*1024),
+		writeErr: writeErr,
+	}, nil
+}
+
+// Next advances to the next blob entry. It returns the blob's size and whether
+// it is missing. Any unread content from the previous entry is automatically
+// discarded. Returns io.EOF when all entries have been consumed.
+//
+// After Next returns successfully with missing=false, call Read to consume the
+// blob content, or call Next again to skip it.
+func (cr *catfileReader) Next() (size int, missing bool, err error) {
+	// Discard unread content from the previous entry.
+	if cr.pending > 0 {
+		if _, err := cr.reader.Discard(cr.pending); err != nil {
+			return 0, false, fmt.Errorf("discard pending bytes: %w", err)
+		}
+		cr.pending = 0
+	}
+
+	headerBytes, err := cr.reader.ReadBytes('\n')
+	if err != nil {
+		if err == io.EOF {
+			return 0, false, io.EOF
+		}
+		return 0, false, fmt.Errorf("read header: %w", err)
+	}
+	header := headerBytes[:len(headerBytes)-1] // trim \n
+
+	if bytes.HasSuffix(header, []byte(" missing")) {
+		return 0, true, nil
+	}
+
+	// Parse size from "<oid> <type> <size>".
+	lastSpace := bytes.LastIndexByte(header, ' ')
+	if lastSpace == -1 {
+		return 0, false, fmt.Errorf("unexpected header: %q", header)
+	}
+	size, err = strconv.Atoi(string(header[lastSpace+1:]))
+	if err != nil {
+		return 0, false, fmt.Errorf("parse size from %q: %w", header, err)
+	}
+
+	// Track pending bytes: content + trailing LF.
+	cr.pending = size + 1
+	return size, false, nil
+}
+
+// Read reads from the current blob's content. Implements io.Reader. Returns
+// io.EOF when the blob's content has been fully read (the trailing LF
+// delimiter is consumed automatically).
+func (cr *catfileReader) Read(p []byte) (int, error) {
+	if cr.pending <= 0 {
+		return 0, io.EOF
+	}
+
+	// Don't read into the trailing LF byte — reserve it.
+	contentRemaining := cr.pending - 1
+	if contentRemaining <= 0 {
+		// Only the trailing LF remains; consume it and signal EOF.
+		if _, err := cr.reader.ReadByte(); err != nil {
+			return 0, fmt.Errorf("read trailing LF: %w", err)
+		}
+		cr.pending = 0
+		return 0, io.EOF
+	}
+
+	// Limit the read to the remaining content bytes.
+	if len(p) > contentRemaining {
+		p = p[:contentRemaining]
+	}
+	n, err := cr.reader.Read(p)
+	cr.pending -= n
+	if err != nil {
+		return n, err
+	}
+
+	// If we've consumed all content bytes, also consume the trailing LF.
+	if cr.pending == 1 {
+		if _, err := cr.reader.ReadByte(); err != nil {
+			return n, fmt.Errorf("read trailing LF: %w", err)
+		}
+		cr.pending = 0
+	}
+
+	return n, nil
+}
+
+// Close shuts down the cat-file process and waits for it to exit.
+// It is safe to call Close multiple times or concurrently.
+func (cr *catfileReader) Close() error {
+	cr.closeOnce.Do(func() {
+		// Kill first to avoid blocking on drain when there are many
+		// unconsumed entries. Gitaly uses the same kill-first pattern.
+		_ = cr.cmd.Process.Kill()
+		// Drain any buffered stdout so the pipe closes cleanly.
+		// Must complete before cmd.Wait(), which closes the pipe.
+		_, _ = io.Copy(io.Discard, cr.reader)
+		// Wait for writer goroutine (unblocks via broken pipe from Kill).
+		<-cr.writeErr
+		err := cr.cmd.Wait()
+		// Suppress the expected "signal: killed" error from our own Kill().
+		if isKilledErr(err) {
+			err = nil
+		}
+		cr.closeErr = err
+	})
+	return cr.closeErr
+}
+
+// isKilledErr reports whether err is an exec.ExitError caused by SIGKILL.
+func isKilledErr(err error) bool {
+	exitErr, ok := err.(*exec.ExitError)
+	if !ok {
+		return false
+	}
+	ws, ok := exitErr.Sys().(syscall.WaitStatus)
+	return ok && ws.Signal() == syscall.SIGKILL
+}
diff --git a/gitindex/catfile_bench_test.go b/gitindex/catfile_bench_test.go
new file mode 100644
index 000000000..ec2626a7b
--- /dev/null
+++ b/gitindex/catfile_bench_test.go
@@ -0,0 +1,159 @@
+package gitindex
+
+import (
+	"fmt"
+	"io"
+	"os"
+	"testing"
+
+	"github.com/go-git/go-git/v5/plumbing"
+)
+
+// Set ZOEKT_BENCH_REPO to a git checkout to enable these benchmarks.
+//
+//	git clone --depth=1 https://github.com/kubernetes/kubernetes /tmp/k8s
+//	ZOEKT_BENCH_REPO=/tmp/k8s go test ./gitindex/ -bench=BenchmarkBlobRead -benchmem -count=5 -timeout=600s
+
+func requireBenchGitRepo(b *testing.B) string {
+	b.Helper()
+	dir := os.Getenv("ZOEKT_BENCH_REPO")
+	if dir == "" {
+		b.Skip("ZOEKT_BENCH_REPO not set")
+	}
+	return dir
+}
+
+// collectBlobKeys opens the repo, walks HEAD, and returns all fileKeys with
+// their BlobLocations plus the repo directory path.
+func collectBlobKeys(b *testing.B, repoDir string) (map[fileKey]BlobLocation, string) {
+	b.Helper()
+
+	repo, closer, err := openRepo(repoDir)
+	if err != nil {
+		b.Fatalf("openRepo: %v", err)
+	}
+	b.Cleanup(func() { closer.Close() })
+
+	head, err := repo.Head()
+	if err != nil {
+		b.Fatalf("Head: %v", err)
+	}
+
+	commit, err := repo.CommitObject(head.Hash())
+	if err != nil {
+		b.Fatalf("CommitObject: %v", err)
+	}
+
+	tree, err := commit.Tree()
+	if err != nil {
+		b.Fatalf("Tree: %v", err)
+	}
+
+	rw := NewRepoWalker(repo, "https://example.com/repo", nil)
+	if _, err := rw.CollectFiles(tree, "HEAD", nil); err != nil {
+		b.Fatalf("CollectFiles: %v", err)
+	}
+
+	return rw.Files, repoDir
+}
+
+// sortedBlobKeys returns fileKeys for deterministic iteration.
+func sortedBlobKeys(files map[fileKey]BlobLocation) []fileKey {
+	keys := make([]fileKey, 0, len(files))
+	for k := range files {
+		keys = append(keys, k)
+	}
+	return keys
+}
+
+// BenchmarkBlobRead_GoGit measures the current go-git BlobObject approach:
+// sequential calls to repo.GitRepo.BlobObject(hash) for each file.
+func BenchmarkBlobRead_GoGit(b *testing.B) {
+	repoDir := requireBenchGitRepo(b)
+	files, _ := collectBlobKeys(b, repoDir)
+	keys := sortedBlobKeys(files)
+	b.Logf("collected %d blob keys", len(keys))
+
+	for _, n := range []int{1_000, 5_000, len(keys)} {
+		n = min(n, len(keys))
+		subset := keys[:n]
+
+		b.Run(fmt.Sprintf("files=%d", n), func(b *testing.B) {
+			b.ReportAllocs()
+			var totalBytes int64
+			for b.Loop() {
+				totalBytes = 0
+				for _, key := range subset {
+					loc := files[key]
+					blob, err := loc.GitRepo.BlobObject(key.ID)
+					if err != nil {
+						b.Fatalf("BlobObject(%s): %v", key.ID, err)
+					}
+					r, err := blob.Reader()
+					if err != nil {
+						b.Fatalf("Reader: %v", err)
+					}
+					n, err := io.Copy(io.Discard, r)
+					r.Close()
+					if err != nil {
+						b.Fatalf("Read: %v", err)
+					}
+					totalBytes += n
+				}
+			}
+			b.ReportMetric(float64(totalBytes), "content-bytes/op")
+			b.ReportMetric(float64(len(subset)), "files/op")
+		})
+	}
+}
+
+// BenchmarkBlobRead_CatfileReader measures the streaming catfileReader approach:
+// all SHAs written to stdin at once via --buffer, responses read one at a time.
+// This is the production path used by indexGitRepo.
+func BenchmarkBlobRead_CatfileReader(b *testing.B) {
+	repoDir := requireBenchGitRepo(b)
+	files, gitDir := collectBlobKeys(b, repoDir)
+	keys := sortedBlobKeys(files)
+	b.Logf("collected %d blob keys", len(keys))
+
+	ids := make([]plumbing.Hash, len(keys))
+	for i, k := range keys {
+		ids[i] = k.ID
+	}
+
+	for _, n := range []int{1_000, 5_000, len(keys)} {
+		n = min(n, len(keys))
+		subset := ids[:n]
+
+		b.Run(fmt.Sprintf("files=%d", n), func(b *testing.B) {
+			b.ReportAllocs()
+			var totalBytes int64
+			for b.Loop() {
+				totalBytes = 0
+				cr, err := newCatfileReader(gitDir, subset)
+				if err != nil {
+					b.Fatalf("newCatfileReader: %v", err)
+				}
+				for range subset {
+					size, missing, err := cr.Next()
+					if err != nil {
+						cr.Close()
+						b.Fatalf("Next: %v", err)
+					}
+					if missing {
+						continue
+					}
+					content := make([]byte, size)
+					if _, err := io.ReadFull(cr, content); err != nil {
+						cr.Close()
+						b.Fatalf("ReadFull: %v", err)
+					}
+					totalBytes += int64(len(content))
+				}
+				cr.Close()
+			}
+			b.ReportMetric(float64(totalBytes), "content-bytes/op")
+			b.ReportMetric(float64(len(subset)), "files/op")
+		})
+	}
+}
diff --git a/gitindex/catfile_hardening_test.go b/gitindex/catfile_hardening_test.go
new file mode 100644
index 000000000..06a730a25
--- /dev/null
+++ b/gitindex/catfile_hardening_test.go
@@ -0,0 +1,813 @@
+package gitindex
+
+import (
+	"bytes"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"sync"
+	"testing"
+	"time"
+
+	"github.com/go-git/go-git/v5/plumbing"
+)
+
+// --- Close lifecycle tests ---
+
+// TestCatfileReader_DoubleClose verifies that Close is idempotent.
+// Calling Close twice must not deadlock or panic.
+func TestCatfileReader_DoubleClose(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{blobs["hello.txt"]}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Consume the entry so the process can exit cleanly.
+	if _, _, err := cr.Next(); err != nil {
+		t.Fatal(err)
+	}
+
+	if err := cr.Close(); err != nil {
+		t.Fatalf("first Close: %v", err)
+	}
+
+	// Second Close must not deadlock or panic.
+	done := make(chan error, 1)
+	go func() {
+		done <- cr.Close()
+	}()
+
+	select {
+	case <-done:
+		// Success — whether err is nil or not, it didn't block.
+	case <-time.After(5 * time.Second):
+		t.Fatal("second Close() deadlocked — writeErr channel was never closed")
+	}
+}
+
+// TestCatfileReader_ConcurrentClose verifies that calling Close from
+// multiple goroutines simultaneously does not panic, deadlock, or
+// corrupt state.
+func TestCatfileReader_ConcurrentClose(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{
+		blobs["hello.txt"],
+		blobs["large.bin"],
+		blobs["binary.bin"],
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Read one entry, leave two unconsumed.
+	if _, _, err := cr.Next(); err != nil {
+		t.Fatal(err)
+	}
+
+	const goroutines = 5
+	var wg sync.WaitGroup
+	wg.Add(goroutines)
+	barrier := make(chan struct{})
+
+	for i := 0; i < goroutines; i++ {
+		go func() {
+			defer wg.Done()
+			<-barrier // all start at once
+			cr.Close()
+		}()
+	}
+
+	done := make(chan struct{})
+	go func() {
+		close(barrier)
+		wg.Wait()
+		close(done)
+	}()
+
+	select {
+	case <-done:
+		// All goroutines returned.
+	case <-time.After(10 * time.Second):
+		t.Fatal("concurrent Close() deadlocked")
+	}
+}
+
+// TestCatfileReader_CloseWithoutReading verifies that closing
+// immediately after creation (without reading any entries) completes
+// without hanging.
+func TestCatfileReader_CloseWithoutReading(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{
+		blobs["hello.txt"],
+		blobs["large.bin"],
+		blobs["binary.bin"],
+		blobs["empty.txt"],
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	done := make(chan error, 1)
+	go func() {
+		done <- cr.Close()
+	}()
+
+	select {
+	case err := <-done:
+		if err != nil {
+			t.Fatalf("Close: %v", err)
+		}
+	case <-time.After(10 * time.Second):
+		t.Fatal("Close() without reading any entries hung")
+	}
+}
+
+// TestCatfileReader_CloseBeforeExhausted_ManyBlobs simulates early
+// termination (e.g., builder.Add error) with many unconsumed blobs.
+// Close should complete promptly — not drain the entire git output.
+func TestCatfileReader_CloseBeforeExhausted_ManyBlobs(t *testing.T) {
+	// Create a repo with many non-trivial files.
+	dir := t.TempDir()
+	repoDir := filepath.Join(dir, "repo")
+
+	script := `
+set -e
+git init -b main repo
+cd repo
+git config user.email "test@test.com"
+git config user.name "Test"
+for i in $(seq 1 200); do
+    dd if=/dev/urandom bs=1024 count=10 of="file_$i.bin" 2>/dev/null
+done
+git add -A
+git commit -m "many files"
+`
+	cmd := exec.Command("/bin/sh", "-c", script)
+	cmd.Dir = dir
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatalf("create test repo: %v", err)
+	}
+
+	var ids []plumbing.Hash
+	for i := 1; i <= 200; i++ {
+		name := fmt.Sprintf("file_%d.bin", i)
+		out, err := exec.Command("git", "-C", repoDir, "rev-parse", "HEAD:"+name).Output()
+		if err != nil {
+			t.Fatalf("rev-parse %s: %v", name, err)
+		}
+		ids = append(ids, plumbing.NewHash(string(out[:len(out)-1])))
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Read only 1 of 200 entries.
+	if _, _, err := cr.Next(); err != nil {
+		t.Fatal(err)
+	}
+
+	// Close should be fast (kill, not drain). With drain it still works but
+	// is slow — we enforce a generous bound.
+	start := time.Now()
+	done := make(chan error, 1)
+	go func() {
+		done <- cr.Close()
+	}()
+
+	select {
+	case <-done:
+		elapsed := time.Since(start)
+		// With Kill: sub-millisecond. Draining 200×10KB is fast too, so we
+		// use a generous 3s bound that still catches pathological stalls.
+		if elapsed > 3*time.Second {
+			t.Errorf("Close took %v after reading 1 of 200 entries — consider killing instead of draining", elapsed)
+		}
+	case <-time.After(30 * time.Second):
+		t.Fatal("Close() deadlocked with many unconsumed blobs")
+	}
+}
+
+// --- Read edge-case tests ---
+
+// TestCatfileReader_ReadWithoutNext verifies that calling Read
+// before calling Next returns io.EOF, not a panic or garbage data.
+func TestCatfileReader_ReadWithoutNext(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{blobs["hello.txt"]}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	buf := make([]byte, 10)
+	n, err := cr.Read(buf)
+	if n != 0 || err != io.EOF {
+		t.Fatalf("Read without Next: n=%d err=%v, want n=0 err=io.EOF", n, err)
+	}
+}
+
+// TestCatfileReader_ReadAfterFullConsumption verifies that extra Read
+// calls after a blob is fully consumed return io.EOF, not duplicate
+// data or trailing LF bytes.
+func TestCatfileReader_ReadAfterFullConsumption(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{blobs["hello.txt"]}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	size, _, _ := cr.Next()
+	content := make([]byte, size)
+	if _, err := io.ReadFull(cr, content); err != nil {
+		t.Fatal(err)
+	}
+
+	// Blob is fully read — additional Reads must return EOF.
+	for i := 0; i < 3; i++ {
+		buf := make([]byte, 10)
+		n, err := cr.Read(buf)
+		if n != 0 || err != io.EOF {
+			t.Fatalf("Read #%d after full consumption: n=%d err=%v, want n=0 err=io.EOF", i, n, err)
+		}
+	}
+}
+
+// TestCatfileReader_SmallBufferReads reads a blob one byte at a time
+// and verifies the entire content is reconstructed correctly without
+// any trailing LF leaking into user content.
+func TestCatfileReader_SmallBufferReads(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{blobs["hello.txt"]}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	size, _, _ := cr.Next()
+
+	var result []byte
+	buf := make([]byte, 1)
+	for {
+		n, err := cr.Read(buf)
+		if n > 0 {
+			result = append(result, buf[:n]...)
+		}
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	if len(result) != size {
+		t.Fatalf("read %d bytes, want %d", len(result), size)
+	}
+	if string(result) != "hello world\n" {
+		t.Errorf("content = %q, want %q", result, "hello world\n")
+	}
+}
+
+// TestCatfileReader_PartialReadThenNext reads only part of a blob's
+// content, then advances to the next entry. Verifies that the discard
+// of pending bytes doesn't corrupt the stream.
+func TestCatfileReader_PartialReadThenNext(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{
+		blobs["hello.txt"],  // 12 bytes: "hello world\n"
+		blobs["binary.bin"], // variable, starts with 0x00
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	// Read only 5 of 12 bytes from hello.txt.
+	size, _, _ := cr.Next()
+	if size != 12 {
+		t.Fatalf("hello.txt size = %d, want 12", size)
+	}
+	partial := make([]byte, 5)
+	if _, err := io.ReadFull(cr, partial); err != nil {
+		t.Fatal(err)
+	}
+	if string(partial) != "hello" {
+		t.Fatalf("partial = %q, want %q", partial, "hello")
+	}
+
+	// Advance — must discard remaining 7 content bytes + trailing LF.
+	size, _, err = cr.Next()
+	if err != nil {
+		t.Fatalf("Next binary.bin after partial read: %v", err)
+	}
+
+	// Verify binary.bin content is intact.
+	content := make([]byte, size)
+	if _, err := io.ReadFull(cr, content); err != nil {
+		t.Fatal(err)
+	}
+	if content[0] != 0x00 {
+		t.Errorf("binary.bin first byte = 0x%02x after partial-read skip, want 0x00", content[0])
+	}
+}
+
+// TestCatfileReader_PartialReadExactlyOneByteShort reads size-1 bytes
+// from a blob. The pending field should be exactly 2 (1 content byte +
+// 1 trailing LF). This stresses the boundary between content and LF
+// in the discard path.
+func TestCatfileReader_PartialReadExactlyOneByteShort(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{
+		blobs["hello.txt"],  // 12 bytes
+		blobs["binary.bin"], // starts with 0x00
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	size, _, _ := cr.Next()
+	// Read exactly size-1 bytes — leaves 1 content byte + trailing LF.
+	buf := make([]byte, size-1)
+	if _, err := io.ReadFull(cr, buf); err != nil {
+		t.Fatal(err)
+	}
+	if string(buf) != "hello world" { // missing final \n
+		t.Fatalf("partial = %q", buf)
+	}
+
+	// Advance — pending should be 2 (1 content byte + 1 LF). The
+	// Discard call must handle this exact boundary correctly.
+	size, missing, err := cr.Next()
+	if err != nil {
+		t.Fatalf("Next after size-1 partial read: %v", err)
+	}
+	if missing {
+		t.Fatal("binary.bin unexpectedly missing")
+	}
+
+	// Read binary.bin to verify stream integrity.
+	content := make([]byte, size)
+	if _, err := io.ReadFull(cr, content); err != nil {
+		t.Fatal(err)
+	}
+	if content[0] != 0x00 {
+		t.Errorf("binary.bin[0] = 0x%02x after boundary skip, want 0x00", content[0])
+	}
+}
+
+// --- Empty / degenerate input tests ---
+
+// TestCatfileReader_EmptyIds verifies that an empty id slice produces
+// immediate EOF without errors.
+func TestCatfileReader_EmptyIds(t *testing.T) {
+	repoDir, _ := createTestRepo(t)
+
+	cr, err := newCatfileReader(repoDir, nil)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	_, _, err = cr.Next()
+	if err != io.EOF {
+		t.Fatalf("expected io.EOF for empty ids, got %v", err)
+	}
+}
+
+// TestCatfileReader_MultipleEmptyBlobs stresses the trailing-LF
+// handling for size-0 blobs. Git still outputs a LF after a 0-byte
+// blob body. Repeated empty blobs test the pending=1 discard path.
+func TestCatfileReader_MultipleEmptyBlobs(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+
+	// Send the empty blob SHA 5 times — git outputs each independently.
+	emptyID := blobs["empty.txt"]
+	ids := []plumbing.Hash{emptyID, emptyID, emptyID, emptyID, emptyID}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	for i := range ids {
+		size, missing, err := cr.Next()
+		if err != nil {
+			t.Fatalf("Next #%d: %v", i, err)
+		}
+		if missing {
+			t.Fatalf("#%d unexpectedly missing", i)
+		}
+		if size != 0 {
+			t.Fatalf("#%d size = %d, want 0", i, size)
+		}
+		// Don't read — Next should discard the trailing LF for us.
+	}
+
+	_, _, err = cr.Next()
+	if err != io.EOF {
+		t.Fatalf("expected EOF after %d empty blobs, got %v", len(ids), err)
+	}
+}
+
+// TestCatfileReader_EmptyBlobRead verifies that reading a 0-byte blob
+// through the io.Reader interface returns 0 bytes and io.EOF, and that
+// the trailing LF is consumed transparently.
+func TestCatfileReader_EmptyBlobRead(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{
+		blobs["empty.txt"],  // 0 bytes
+		blobs["hello.txt"],  // 12 bytes — sentinel
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	size, _, _ := cr.Next()
+	if size != 0 {
+		t.Fatalf("empty.txt size = %d", size)
+	}
+
+	// Explicitly Read on the 0-byte blob.
+	buf := make([]byte, 10)
+	n, err := cr.Read(buf)
+	if n != 0 || err != io.EOF {
+		t.Fatalf("Read empty blob: n=%d err=%v, want n=0 err=io.EOF", n, err)
+	}
+
+	// The trailing LF must have been consumed. Verify by reading the
+	// next entry — if the LF leaked, the header parse would fail.
+	size, _, err = cr.Next()
+	if err != nil {
+		t.Fatalf("Next hello.txt after empty blob Read: %v", err)
+	}
+	if size != 12 {
+		t.Fatalf("hello.txt size = %d, want 12", size)
+	}
+	content := make([]byte, size)
+	if _, err := io.ReadFull(cr, content); err != nil {
+		t.Fatal(err)
+	}
+	if string(content) != "hello world\n" {
+		t.Errorf("hello.txt = %q", content)
+	}
+}
+
+// --- Missing object edge cases ---
+
+// TestCatfileReader_AllMissing verifies that a sequence of entirely
+// missing objects is handled gracefully — no errors, no panics, just
+// missing=true for each followed by EOF.
+func TestCatfileReader_AllMissing(t *testing.T) {
+	repoDir, _ := createTestRepo(t)
+
+	ids := []plumbing.Hash{
+		plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef"),
+		plumbing.NewHash("1111111111111111111111111111111111111111"),
+		plumbing.NewHash("2222222222222222222222222222222222222222"),
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	for i, id := range ids {
+		_, missing, err := cr.Next()
+		if err != nil {
+			t.Fatalf("Next #%d (%s): %v", i, id, err)
+		}
+		if !missing {
+			t.Errorf("expected #%d (%s) to be missing", i, id)
+		}
+	}
+
+	_, _, err = cr.Next()
+	if err != io.EOF {
+		t.Fatalf("expected EOF after all missing, got %v", err)
+	}
+}
+
+// TestCatfileReader_AlternatingMissingPresent interleaves missing and
+// present objects, verifying that stream alignment is maintained.
+func TestCatfileReader_AlternatingMissingPresent(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+
+	fake1 := plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
+	fake2 := plumbing.NewHash("1111111111111111111111111111111111111111")
+
+	ids := []plumbing.Hash{
+		fake1,
+		blobs["hello.txt"],
+		fake2,
+		blobs["empty.txt"],
+		blobs["binary.bin"],
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	// fake1 — missing
+	_, missing, err := cr.Next()
+	if err != nil || !missing {
+		t.Fatalf("fake1: err=%v missing=%v", err, missing)
+	}
+
+	// hello.txt — present, read it
+	size, missing, err := cr.Next()
+	if err != nil || missing {
+		t.Fatalf("hello.txt: err=%v missing=%v", err, missing)
+	}
+	content := make([]byte, size)
+	if _, err := io.ReadFull(cr, content); err != nil {
+		t.Fatal(err)
+	}
+	if string(content) != "hello world\n" {
+		t.Errorf("hello.txt = %q", content)
+	}
+
+	// fake2 — missing
+	_, missing, err = cr.Next()
+	if err != nil || !missing {
+		t.Fatalf("fake2: err=%v missing=%v", err, missing)
+	}
+
+	// empty.txt — present, skip it
+	size, missing, err = cr.Next()
+	if err != nil || missing {
+		t.Fatalf("empty.txt: err=%v missing=%v", err, missing)
+	}
+	if size != 0 {
+		t.Errorf("empty.txt size = %d", size)
+	}
+
+	// binary.bin — present, read it
+	size, missing, err = cr.Next()
+	if err != nil || missing {
+		t.Fatalf("binary.bin: err=%v missing=%v", err, missing)
+	}
+	binContent := make([]byte, size)
+	if _, err := io.ReadFull(cr, binContent); err != nil {
+		t.Fatal(err)
+	}
+	if binContent[0] != 0x00 {
+		t.Errorf("binary.bin[0] = 0x%02x, want 0x00", binContent[0])
+	}
+
+	_, _, err = cr.Next()
+	if err != io.EOF {
+		t.Fatalf("expected EOF, got %v", err)
+	}
+}
+
+// TestCatfileReader_MissingThenSkip verifies that a missing object
+// followed by a present but skipped (unread) object doesn't corrupt
+// the stream. Missing objects have no content body, so there must be
+// no stale pending bytes interfering with the next header read.
+func TestCatfileReader_MissingThenSkip(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+
+	fake := plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
+	ids := []plumbing.Hash{
+		fake,
+		blobs["large.bin"],  // 64KB — skip without reading
+		blobs["hello.txt"],  // sentinel — read to verify integrity
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	// missing
+	_, missing, _ := cr.Next()
+	if !missing {
+		t.Fatal("expected missing")
+	}
+
+	// large.bin — skip
+	size, missing, err := cr.Next()
+	if err != nil || missing {
+		t.Fatalf("large.bin: err=%v missing=%v", err, missing)
+	}
+	if size != 64*1024 {
+		t.Fatalf("large.bin size = %d", size)
+	}
+	// deliberately don't read
+
+	// hello.txt — read after missing+skip
+	size, missing, err = cr.Next()
+	if err != nil || missing {
+		t.Fatalf("hello.txt: err=%v missing=%v", err, missing)
+	}
+	content := make([]byte, size)
+	if _, err := io.ReadFull(cr, content); err != nil {
+		t.Fatal(err)
+	}
+	if string(content) != "hello world\n" {
+		t.Errorf("hello.txt = %q", content)
+	}
+}
+
+// --- Next() edge cases ---
+
+// TestCatfileReader_RepeatedNextAfterEOF verifies that calling Next
+// after EOF keeps returning EOF — not a panic, not a different error.
+func TestCatfileReader_RepeatedNextAfterEOF(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{blobs["hello.txt"]}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	// Consume and skip the only entry.
+	if _, _, err := cr.Next(); err != nil {
+		t.Fatal(err)
+	}
+
+	// First EOF.
+	_, _, err = cr.Next()
+	if err != io.EOF {
+		t.Fatalf("first post-exhaust Next: %v, want io.EOF", err)
+	}
+
+	// Second and third EOF — must be stable.
+	for i := 0; i < 2; i++ {
+		_, _, err = cr.Next()
+		if err != io.EOF {
+			t.Fatalf("Next #%d after EOF: %v, want io.EOF", i+2, err)
+		}
+	}
+}
+
+// --- Large blob precision tests ---
+
+// TestCatfileReader_LargeBlobBytePrecision verifies that a 64KB blob
+// is read with byte-exact precision — no off-by-one from trailing LF
+// handling, no truncation, no extra bytes.
+func TestCatfileReader_LargeBlobBytePrecision(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{blobs["large.bin"]}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	size, _, err := cr.Next()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if size != 64*1024 {
+		t.Fatalf("size = %d, want %d", size, 64*1024)
+	}
+
+	// Read the full blob content.
+	content := make([]byte, size)
+	n, err := io.ReadFull(cr, content)
+	if err != nil {
+		t.Fatalf("ReadFull: %v (read %d of %d)", err, n, size)
+	}
+	if n != size {
+		t.Fatalf("read %d bytes, want %d", n, size)
+	}
+
+	// Verify git agrees on the content via cat-file -p.
+	expected, err := exec.Command("git", "-C", repoDir, "cat-file", "-p", blobs["large.bin"].String()).Output()
+	if err != nil {
+		t.Fatalf("git cat-file -p: %v", err)
+	}
+	if !bytes.Equal(content, expected) {
+		t.Errorf("content mismatch: got %d bytes, git says %d bytes", len(content), len(expected))
+		// Find first divergence.
+		for i := range content {
+			if i >= len(expected) || content[i] != expected[i] {
+				t.Errorf("first diff at byte %d: got 0x%02x, want 0x%02x", i, content[i], expected[i])
+				break
+			}
+		}
+	}
+}
+
+// TestCatfileReader_LargeBlobChunkedRead reads a 64KB blob in 997-byte
+// chunks (a prime number that doesn't align with any power-of-2 buffer)
+// to verify no byte is lost or duplicated across read boundaries.
+func TestCatfileReader_LargeBlobChunkedRead(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+	ids := []plumbing.Hash{blobs["large.bin"]}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	size, _, _ := cr.Next()
+	if size != 64*1024 {
+		t.Fatalf("size = %d", size)
+	}
+
+	var result bytes.Buffer
+	buf := make([]byte, 997) // prime-sized chunks
+	for {
+		n, err := cr.Read(buf)
+		if n > 0 {
+			result.Write(buf[:n])
+		}
+		if err == io.EOF {
+			break
+		}
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	if result.Len() != size {
+		t.Fatalf("total read = %d, want %d", result.Len(), size)
+	}
+
+	// Cross-check with git.
+	expected, _ := exec.Command("git", "-C", repoDir, "cat-file", "-p", blobs["large.bin"].String()).Output()
+	if !bytes.Equal(result.Bytes(), expected) {
+		t.Error("chunked read content differs from git cat-file -p output")
+	}
+}
+
+// --- Duplicate SHA test ---
+
+// TestCatfileReader_DuplicateSHAs verifies that requesting the same
+// SHA multiple times works — git cat-file --batch outputs the object
+// for each request independently.
+func TestCatfileReader_DuplicateSHAs(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+
+	sha := blobs["hello.txt"]
+	ids := []plumbing.Hash{sha, sha, sha}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer cr.Close()
+
+	for i := 0; i < 3; i++ {
+		size, missing, err := cr.Next()
+		if err != nil {
+			t.Fatalf("Next #%d: %v", i, err)
+		}
+		if missing {
+			t.Fatalf("#%d unexpectedly missing", i)
+		}
+		if size != 12 {
+			t.Fatalf("#%d size = %d, want 12", i, size)
+		}
+		content := make([]byte, size)
+		if _, err := io.ReadFull(cr, content); err != nil {
+			t.Fatal(err)
+		}
+		if string(content) != "hello world\n" {
+			t.Errorf("#%d content = %q", i, content)
+		}
+	}
+
+	_, _, err = cr.Next()
+	if err != io.EOF {
+		t.Fatalf("expected EOF, got %v", err)
+	}
+}
diff --git a/gitindex/catfile_test.go b/gitindex/catfile_test.go
new file mode 100644
index 000000000..c871bd7d2
--- /dev/null
+++ b/gitindex/catfile_test.go
@@ -0,0 +1,230 @@
+package gitindex
+
+import (
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"testing"
+
+	"github.com/go-git/go-git/v5/plumbing"
+)
+
+// createTestRepo creates a git repo with various test files and returns
+// the repo path and a map of filename -> blob SHA.
+func createTestRepo(t *testing.T) (string, map[string]plumbing.Hash) {
+	t.Helper()
+	dir := t.TempDir()
+	repoDir := filepath.Join(dir, "repo")
+
+	script := `
+set -e
+git init -b main repo
+cd repo
+git config user.email "test@test.com"
+git config user.name "Test"
+
+# Normal text file
+echo "hello world" > hello.txt
+
+# Empty file
+touch empty.txt
+
+# Binary file with newlines embedded
+printf '\x00\x01\x02\nhello\nworld\n\x03\x04' > binary.bin
+
+# Large-ish file (64KB of data)
+dd if=/dev/urandom bs=1024 count=64 of=large.bin 2>/dev/null
+
+git add -A
+git commit -m "initial"
+`
+	cmd := exec.Command("/bin/sh", "-c", script)
+	cmd.Dir = dir
+	cmd.Stderr = os.Stderr
+	if err := cmd.Run(); err != nil {
+		t.Fatalf("create test repo: %v", err)
+	}
+
+	// Get blob SHAs for each file.
+	blobs := map[string]plumbing.Hash{}
+	for _, name := range []string{"hello.txt", "empty.txt", "binary.bin", "large.bin"} {
+		out, err := exec.Command("git", "-C", repoDir, "rev-parse", "HEAD:"+name).Output()
+		if err != nil {
+			t.Fatalf("rev-parse %s: %v", name, err)
+		}
+		sha := string(out[:len(out)-1]) // trim newline
+		blobs[name] = plumbing.NewHash(sha)
+	}
+
+	return repoDir, blobs
+}
+
+func TestCatfileReader(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+
+	ids := []plumbing.Hash{
+		blobs["hello.txt"],
+		blobs["empty.txt"],
+		blobs["binary.bin"],
+		blobs["large.bin"],
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatalf("newCatfileReader: %v", err)
+	}
+	defer cr.Close()
+
+	// hello.txt
+	size, missing, err := cr.Next()
+	if err != nil {
+		t.Fatalf("Next hello.txt: %v", err)
+	}
+	if missing {
+		t.Fatal("hello.txt unexpectedly missing")
+	}
+	if size != 12 {
+		t.Errorf("hello.txt size = %d, want 12", size)
+	}
+	content := make([]byte, size)
+	if _, err := io.ReadFull(cr, content); err != nil {
+		t.Fatalf("ReadFull hello.txt: %v", err)
+	}
+	if string(content) != "hello world\n" {
+		t.Errorf("hello.txt content = %q", content)
+	}
+
+	// empty.txt
+	size, missing, err = cr.Next()
+	if err != nil {
+		t.Fatalf("Next empty.txt: %v", err)
+	}
+	if size != 0 {
+		t.Errorf("empty.txt size = %d, want 0", size)
+	}
+
+	// binary.bin — read content and verify binary data survives.
+	size, missing, err = cr.Next()
+	if err != nil {
+		t.Fatalf("Next binary.bin: %v", err)
+	}
+	binContent := make([]byte, size)
+	if _, err := io.ReadFull(cr, binContent); err != nil {
+		t.Fatalf("ReadFull binary.bin: %v", err)
+	}
+	if binContent[0] != 0x00 || binContent[3] != '\n' {
+		t.Errorf("binary.bin unexpected leading bytes: %x", binContent[:5])
+	}
+
+	// large.bin
+	size, missing, err = cr.Next()
+	if err != nil {
+		t.Fatalf("Next large.bin: %v", err)
+	}
+	if size != 64*1024 {
+		t.Errorf("large.bin size = %d, want %d", size, 64*1024)
+	}
+	largeContent := make([]byte, size)
+	if _, err := io.ReadFull(cr, largeContent); err != nil {
+		t.Fatalf("ReadFull large.bin: %v", err)
+	}
+
+	// EOF after all entries.
+	_, _, err = cr.Next()
+	if err != io.EOF {
+		t.Errorf("expected io.EOF after last entry, got %v", err)
+	}
+}
+
+func TestCatfileReader_Skip(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+
+	ids := []plumbing.Hash{
+		blobs["hello.txt"],
+		blobs["large.bin"],
+		blobs["binary.bin"],
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatalf("newCatfileReader: %v", err)
+	}
+	defer cr.Close()
+
+	// Skip hello.txt by calling Next again without reading.
+	_, _, err = cr.Next()
+	if err != nil {
+		t.Fatalf("Next hello.txt: %v", err)
+	}
+
+	// Skip large.bin too.
+	size, _, err := cr.Next()
+	if err != nil {
+		t.Fatalf("Next large.bin: %v", err)
+	}
+	if size != 64*1024 {
+		t.Errorf("large.bin size = %d, want %d", size, 64*1024)
+	}
+
+	// Read binary.bin after skipping two entries.
+	size, _, err = cr.Next()
+	if err != nil {
+		t.Fatalf("Next binary.bin: %v", err)
+	}
+	content := make([]byte, size)
+	if _, err := io.ReadFull(cr, content); err != nil {
+		t.Fatalf("ReadFull binary.bin: %v", err)
+	}
+	if content[0] != 0x00 {
+		t.Errorf("binary.bin first byte = %x, want 0x00", content[0])
+	}
+}
+
+func TestCatfileReader_Missing(t *testing.T) {
+	repoDir, blobs := createTestRepo(t)
+
+	fakeHash := plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
+	ids := []plumbing.Hash{
+		blobs["hello.txt"],
+		fakeHash,
+		blobs["empty.txt"],
+	}
+
+	cr, err := newCatfileReader(repoDir, ids)
+	if err != nil {
+		t.Fatalf("newCatfileReader: %v", err)
+	}
+	defer cr.Close()
+
+	// hello.txt — read normally.
+	size, missing, err := cr.Next()
+	if err != nil || missing {
+		t.Fatalf("Next hello.txt: err=%v missing=%v", err, missing)
+	}
+	content := make([]byte, size)
+	if _, err := io.ReadFull(cr, content); err != nil {
+		t.Fatalf("ReadFull hello.txt: %v", err)
+	}
+	if string(content) != "hello world\n" {
+		t.Errorf("hello.txt = %q", content)
+	}
+
+	// fakeHash — missing.
+	_, missing, err = cr.Next()
+	if err != nil {
+		t.Fatalf("Next fakeHash: %v", err)
+	}
+	if !missing {
+		t.Error("expected fakeHash to be missing")
+	}
+
+	// empty.txt — still works after missing entry.
+	size, missing, err = cr.Next()
+	if err != nil || missing {
+		t.Fatalf("Next empty.txt: err=%v missing=%v", err, missing)
+	}
+	if size != 0 {
+		t.Errorf("empty.txt size = %d, want 0", size)
+	}
+}
diff --git a/gitindex/index.go b/gitindex/index.go
index 5fbeba0d0..595df28cb 100644
--- a/gitindex/index.go
+++ b/gitindex/index.go
@@ -585,28 +585,121 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
 	sort.Strings(names)
 	names = uniq(names)
 
-	log.Printf("attempting to index %d total files", totalFiles)
-	for idx, name := range names {
-		keys := fileKeys[name]
-
-		for _, key := range keys {
-			doc, err := createDocument(key, repos, opts.BuildOptions)
-			if err != nil {
-				return false, err
+	// Separate main-repo keys from submodule keys, collecting blob SHAs
+	// for the main repo so we can stream them via git cat-file --batch.
+	// ZOEKT_DISABLE_CATFILE_BATCH=true falls back to the go-git path for
+	// all files, useful as a kill switch if the cat-file path causes issues.
+	catfileBatchDisabled := cmp.Or(os.Getenv("ZOEKT_DISABLE_CATFILE_BATCH"), "false")
+	useCatfileBatch := true
+	if disabled, _ := strconv.ParseBool(catfileBatchDisabled); disabled {
+		useCatfileBatch = false
+		log.Printf("cat-file batch disabled via ZOEKT_DISABLE_CATFILE_BATCH, using go-git")
+	}
+
+	mainRepoKeys := make([]fileKey, 0, totalFiles)
+	mainRepoIDs := make([]plumbing.Hash, 0, totalFiles)
+	var submoduleKeys []fileKey
+
+	for _, name := range names {
+		for _, key := range fileKeys[name] {
+			if useCatfileBatch && key.SubRepoPath == "" {
+				mainRepoKeys = append(mainRepoKeys, key)
+				mainRepoIDs = append(mainRepoIDs, key.ID)
+			} else {
+				submoduleKeys = append(submoduleKeys, key)
 			}
+		}
+	}
 
-			if err := builder.Add(doc); err != nil {
-				return false, fmt.Errorf("error adding document with name %s: %w", key.FullPath(), err)
-			}
+	log.Printf("attempting to index %d total files (%d via cat-file, %d via go-git)", totalFiles, len(mainRepoIDs), len(submoduleKeys))
 
-			if idx%10_000 == 0 {
-				builder.CheckMemoryUsage()
-			}
+	// Stream main-repo blobs via pipelined cat-file --batch --buffer.
+	// Large blobs are skipped without reading content into memory.
+	if len(mainRepoIDs) > 0 {
+		cr, err := newCatfileReader(opts.RepoDir, mainRepoIDs)
+		if err != nil {
+			return false, fmt.Errorf("newCatfileReader: %w", err)
+		}
+
+		if err := indexCatfileBlobs(cr, mainRepoKeys, repos, opts, builder); err != nil {
+			return false, err
 		}
 	}
+
+	// Index submodule blobs via go-git.
+	for idx, key := range submoduleKeys {
+		doc, err := createDocument(key, repos, opts.BuildOptions)
+		if err != nil {
+			return false, err
+		}
+
+		if err := builder.Add(doc); err != nil {
+			return false, fmt.Errorf("error adding document with name %s: %w", key.FullPath(), err)
+		}
+
+		if idx%10_000 == 0 {
+			builder.CheckMemoryUsage()
+		}
+	}
+
 	return true, builder.Finish()
 }
 
+// indexCatfileBlobs streams main-repo blobs from the catfileReader into the
+// builder. Large blobs are skipped without reading content into memory.
+// keys must correspond 1:1 (in order) with the ids passed to newCatfileReader.
+// The reader is always closed when this function returns.
+func indexCatfileBlobs(cr *catfileReader, keys []fileKey, repos map[fileKey]BlobLocation, opts Options, builder *index.Builder) error {
+	defer cr.Close()
+
+	for idx, key := range keys {
+		size, missing, err := cr.Next()
+		if err != nil {
+			return fmt.Errorf("cat-file next for %s: %w", key.FullPath(), err)
+		}
+
+		branches := repos[key].Branches
+		var doc index.Document
+
+		if missing {
+			// Unexpected for local repos — may indicate corruption, shallow
+			// clone, or a race with git gc. Log a warning and skip.
+			log.Printf("warning: blob %s missing for %s", key.ID, key.FullPath())
+			doc = skippedDoc(key, branches, index.SkipReasonMissing)
+		} else {
+			keyFullPath := key.FullPath()
+			if size > opts.BuildOptions.SizeMax && !opts.BuildOptions.IgnoreSizeMax(keyFullPath) {
+				// Skip without reading content into memory.
+				doc = skippedDoc(key, branches, index.SkipReasonTooLarge)
+			} else {
+				// Pre-allocate and read the full blob content in one call.
+				// io.ReadFull is preferred over io.LimitedReader here as it
+				// avoids the intermediate allocation and the size is known.
+				content := make([]byte, size)
+				if _, err := io.ReadFull(cr, content); err != nil {
+					return fmt.Errorf("read blob %s: %w", keyFullPath, err)
+				}
+				doc = index.Document{
+					SubRepositoryPath: key.SubRepoPath,
+					Name:              keyFullPath,
+					Content:           content,
+					Branches:          branches,
+				}
+			}
+		}
+
+		if err := builder.Add(doc); err != nil {
+			return fmt.Errorf("error adding document with name %s: %w", key.FullPath(), err)
+		}
+
+		if idx%10_000 == 0 {
+			builder.CheckMemoryUsage()
+		}
+	}
+
+	return nil
+}
+
 // openRepo opens a git repository in a way that's optimized for indexing.
 //
 // It copies the relevant logic from git.PlainOpen, and tweaks certain filesystem options.
@@ -987,7 +1080,7 @@ func createDocument(key fileKey,
 
 	// We filter out large documents when fetching the repo. So if an object is too large, it will not be found.
 	if errors.Is(err, plumbing.ErrObjectNotFound) {
-		return skippedLargeDoc(key, branches), nil
+		return skippedDoc(key, branches, index.SkipReasonTooLarge), nil
 	}
 
 	if err != nil {
@@ -996,7 +1089,7 @@ func createDocument(key fileKey,
 
 	keyFullPath := key.FullPath()
 	if blob.Size > int64(opts.SizeMax) && !opts.IgnoreSizeMax(keyFullPath) {
-		return skippedLargeDoc(key, branches), nil
+		return skippedDoc(key, branches, index.SkipReasonTooLarge), nil
 	}
 
 	contents, err := blobContents(blob)
@@ -1012,9 +1105,10 @@ func createDocument(key fileKey,
 	}, nil
 }
 
-func skippedLargeDoc(key fileKey, branches []string) index.Document {
+// skippedDoc creates a Document placeholder for a blob that was not indexed.
+func skippedDoc(key fileKey, branches []string, reason index.SkipReason) index.Document {
 	return index.Document{
-		SkipReason:        index.SkipReasonTooLarge,
+		SkipReason:        reason,
 		Name:              key.FullPath(),
 		Branches:          branches,
 		SubRepositoryPath: key.SubRepoPath,
diff --git a/index/document.go b/index/document.go
index 68a5b1e98..25efa461e 100644
--- a/index/document.go
+++ b/index/document.go
@@ -26,6 +26,7 @@ const (
 	SkipReasonTooSmall
 	SkipReasonBinary
 	SkipReasonTooManyTrigrams
+	SkipReasonMissing
 )
 
 func (s SkipReason) explanation() string {
@@ -40,6 +41,8 @@ func (s SkipReason) explanation() string {
 		return "contains binary content"
 	case SkipReasonTooManyTrigrams:
 		return "contains too many trigrams"
+	case SkipReasonMissing:
+		return "object missing from repository"
 	default:
 		return "unknown skip reason"
 	}
diff --git a/index/file_category.go b/index/file_category.go
index 6ca4800af..fa365ccd0 100644
--- a/index/file_category.go
+++ b/index/file_category.go
@@ -35,9 +35,10 @@ func DetermineFileCategory(doc *Document) {
 	name := doc.Name
 	content := doc.Content
 
-	// If this document was skipped because it was too large, just guess the category based on the filename to avoid
-	// examining the contents. Note: passing nil content is allowed by the go-enry contract.
-	if doc.SkipReason == SkipReasonTooLarge || doc.SkipReason == SkipReasonBinary {
+	// If this document was skipped (too large, binary, or missing from the repo),
+	// guess the category based on the filename to avoid examining the contents.
+	// Note: passing nil content is allowed by the go-enry contract.
+	if doc.SkipReason == SkipReasonTooLarge || doc.SkipReason == SkipReasonBinary || doc.SkipReason == SkipReasonMissing {
 		content = nil
 	}