From 5a670b1b0b1878e1981306b7e4eb6bd07c34fda7 Mon Sep 17 00:00:00 2001
From: Keegan Carruthers-Smith <keegan.csmith@gmail.com>
Date: Thu, 2 Apr 2026 22:23:16 +0200
Subject: [PATCH 1/2] gitindex: use --unordered for git cat-file

---
 gitindex/catfile.go                | 69 ++++++++++++++++------
 gitindex/catfile_bench_test.go     | 67 ++++++++++++----------
 gitindex/catfile_hardening_test.go | 64 ++++++++++-----------
 gitindex/catfile_test.go           | 92 +++++++++++++++++++++++++-----
 gitindex/index.go                  | 28 +++++++--
 5 files changed, 222 insertions(+), 98 deletions(-)

diff --git a/gitindex/catfile.go b/gitindex/catfile.go
index 1933c337c..308e80f75 100644
--- a/gitindex/catfile.go
+++ b/gitindex/catfile.go
@@ -30,12 +30,15 @@ import (
 
 type catfileReaderOptions struct {
 	filterSpec string
+	unordered  bool
 }
 
 // catfileReader provides streaming access to git blob objects via a pipelined
 // "git cat-file --batch --buffer" process. A writer goroutine feeds all blob
 // SHAs to stdin while the caller reads responses one at a time, similar to
-// archive/tar.Reader.
+// archive/tar.Reader. When the reader is configured with --unordered, git may
+// return objects in pack order instead of request order, so callers must use
+// the object ID returned by Next to correlate responses.
 //
 // The --buffer flag switches git's output from per-object flush (write_or_die)
 // to libc stdio buffering (fwrite), reducing syscalls. After stdin EOF, git
@@ -48,13 +51,14 @@ type catfileReaderOptions struct {
 //	defer cr.Close()
 //
 //	for {
-//	    size, missing, excluded, err := cr.Next()
+//	    id, size, missing, excluded, err := cr.Next()
 //	    if err == io.EOF { break }
 //	    if missing { continue }
 //	    if excluded { continue }
 //	    if size > maxSize { continue } // unread bytes auto-skipped
 //	    content := make([]byte, size)
 //	    io.ReadFull(cr, content)
+//	    _ = id // match back to the requested blob if using --unordered
 //	}
 type catfileReader struct {
 	cmd      *exec.Cmd
@@ -74,6 +78,9 @@ type catfileReader struct {
 // when done. Pass a zero-value catfileReaderOptions when no options are needed.
 func newCatfileReader(repoDir string, ids []plumbing.Hash, opts catfileReaderOptions) (*catfileReader, error) {
 	args := []string{"cat-file", "--batch", "--buffer"}
+	if opts.unordered {
+		args = append(args, "--unordered")
+	}
 	if opts.filterSpec != "" {
 		args = append(args, "--filter="+opts.filterSpec)
 	}
@@ -123,18 +130,18 @@ func newCatfileReader(repoDir string, ids []plumbing.Hash, opts catfileReaderOpt
 	}, nil
 }
 
-// Next advances to the next blob entry. It returns the blob's size and whether
-// it is missing or excluded by the configured filter. Any unread content from
-// the previous entry is automatically discarded. Returns io.EOF when all
-// entries have been consumed.
+// Next advances to the next blob entry. It returns the blob ID, size, and
+// whether it is missing or excluded by the configured filter. Any unread
+// content from the previous entry is automatically discarded. Returns io.EOF
+// when all entries have been consumed.
 //
 // After Next returns successfully with missing=false and excluded=false, call
 // Read to consume the blob content, or call Next again to skip it.
-func (cr *catfileReader) Next() (size int, missing bool, excluded bool, err error) {
+func (cr *catfileReader) Next() (id plumbing.Hash, size int, missing bool, excluded bool, err error) {
 	// Discard unread content from the previous entry.
 	if cr.pending > 0 {
 		if _, err := cr.reader.Discard(cr.pending); err != nil {
-			return 0, false, false, fmt.Errorf("discard pending bytes: %w", err)
+			return plumbing.ZeroHash, 0, false, false, fmt.Errorf("discard pending bytes: %w", err)
 		}
 		cr.pending = 0
 	}
@@ -142,33 +149,57 @@ func (cr *catfileReader) Next() (size int, missing bool, excluded bool, err erro
 	headerBytes, err := cr.reader.ReadBytes('\n')
 	if err != nil {
 		if err == io.EOF {
-			return 0, false, false, io.EOF
+			return plumbing.ZeroHash, 0, false, false, io.EOF
 		}
-		return 0, false, false, fmt.Errorf("read header: %w", err)
+		return plumbing.ZeroHash, 0, false, false, fmt.Errorf("read header: %w", err)
 	}
 	header := headerBytes[:len(headerBytes)-1] // trim \n
+	firstSpace := bytes.IndexByte(header, ' ')
+	if firstSpace == -1 {
+		return plumbing.ZeroHash, 0, false, false, fmt.Errorf("unexpected header: %q", header)
+	}
 
-	if bytes.HasSuffix(header, []byte(" missing")) {
-		return 0, true, false, nil
+	id, err = parseCatfileObjectID(header[:firstSpace])
+	if err != nil {
+		return plumbing.ZeroHash, 0, false, false, fmt.Errorf("parse object id from %q: %w", header, err)
 	}
 
-	if bytes.HasSuffix(header, []byte(" excluded")) {
-		return 0, false, true, nil
+	rest := header[firstSpace+1:]
+	if bytes.Equal(rest, []byte("missing")) {
+		return id, 0, true, false, nil
+	}
+
+	if bytes.Equal(rest, []byte("excluded")) {
+		return id, 0, false, true, nil
 	}
 
-	// Parse size from "<oid> <type> <size>".
 	lastSpace := bytes.LastIndexByte(header, ' ')
-	if lastSpace == -1 {
-		return 0, false, false, fmt.Errorf("unexpected header: %q", header)
+	if lastSpace <= firstSpace {
+		return plumbing.ZeroHash, 0, false, false, fmt.Errorf("unexpected header: %q", header)
 	}
+
+	// Parse size from "<oid> <type> <size>".
 	size, err = strconv.Atoi(string(header[lastSpace+1:]))
 	if err != nil {
-		return 0, false, false, fmt.Errorf("parse size from %q: %w", header, err)
+		return plumbing.ZeroHash, 0, false, false, fmt.Errorf("parse size from %q: %w", header, err)
 	}
 
 	// Track pending bytes: content + trailing LF.
 	cr.pending = size + 1
-	return size, false, false, nil
+	return id, size, false, false, nil
+}
+
+func parseCatfileObjectID(raw []byte) (plumbing.Hash, error) {
+	if len(raw) != 40 {
+		return plumbing.ZeroHash, fmt.Errorf("invalid object id length %d", len(raw))
+	}
+
+	var id plumbing.Hash
+	if _, err := hex.Decode(id[:], raw); err != nil {
+		return plumbing.ZeroHash, err
+	}
+
+	return id, nil
 }
 
 // Read reads from the current blob's content. Implements io.Reader. Returns
diff --git a/gitindex/catfile_bench_test.go b/gitindex/catfile_bench_test.go
index 5b8897380..e383892f6 100644
--- a/gitindex/catfile_bench_test.go
+++ b/gitindex/catfile_bench_test.go
@@ -107,9 +107,10 @@ func BenchmarkBlobRead_GoGit(b *testing.B) {
 	}
 }
 
-// BenchmarkBlobRead_CatfileReader measures the streaming catfileReader approach:
-// all SHAs written to stdin at once via --buffer, responses read one at a time.
-// This is the production path used by indexGitRepo.
+// BenchmarkBlobRead_CatfileReader measures the streaming catfileReader
+// approach: all SHAs written to stdin at once via --buffer, responses read one
+// at a time. It compares the legacy ordered stream with the production
+// unordered mode used by indexGitRepo.
 func BenchmarkBlobRead_CatfileReader(b *testing.B) {
 	repoDir := requireBenchGitRepo(b)
 	files, gitDir := collectBlobKeys(b, repoDir)
@@ -125,35 +126,43 @@ func BenchmarkBlobRead_CatfileReader(b *testing.B) {
 		n = min(n, len(keys))
 		subset := ids[:n]
 
-		b.Run(fmt.Sprintf("files=%d", n), func(b *testing.B) {
-			b.ReportAllocs()
-			var totalBytes int64
-			for b.Loop() {
-				totalBytes = 0
-				cr, err := newCatfileReader(gitDir, subset, catfileReaderOptions{})
-				if err != nil {
-					b.Fatalf("newCatfileReader: %v", err)
-				}
-				for range subset {
-					size, missing, excluded, err := cr.Next()
+		for _, benchMode := range []struct {
+			name      string
+			unordered bool
+		}{
+			{name: "ordered"},
+			{name: "unordered", unordered: true},
+		} {
+			b.Run(fmt.Sprintf("files=%d/mode=%s", n, benchMode.name), func(b *testing.B) {
+				b.ReportAllocs()
+				var totalBytes int64
+				for b.Loop() {
+					totalBytes = 0
+					cr, err := newCatfileReader(gitDir, subset, catfileReaderOptions{unordered: benchMode.unordered})
 					if err != nil {
-						cr.Close()
-						b.Fatalf("Next: %v", err)
+						b.Fatalf("newCatfileReader: %v", err)
 					}
-					if missing || excluded {
-						continue
+					for range subset {
+						_, size, missing, excluded, err := cr.Next()
+						if err != nil {
+							cr.Close()
+							b.Fatalf("Next: %v", err)
+						}
+						if missing || excluded {
+							continue
+						}
+						content := make([]byte, size)
+						if _, err := io.ReadFull(cr, content); err != nil {
+							cr.Close()
+							b.Fatalf("ReadFull: %v", err)
+						}
+						totalBytes += int64(len(content))
 					}
-					content := make([]byte, size)
-					if _, err := io.ReadFull(cr, content); err != nil {
-						cr.Close()
-						b.Fatalf("ReadFull: %v", err)
-					}
-					totalBytes += int64(len(content))
+					cr.Close()
 				}
-				cr.Close()
-			}
-			b.ReportMetric(float64(totalBytes), "content-bytes/op")
-			b.ReportMetric(float64(len(subset)), "files/op")
-		})
+				b.ReportMetric(float64(totalBytes), "content-bytes/op")
+				b.ReportMetric(float64(len(subset)), "files/op")
+			})
+		}
 	}
 }
diff --git a/gitindex/catfile_hardening_test.go b/gitindex/catfile_hardening_test.go
index e1da3faf7..967170c4e 100644
--- a/gitindex/catfile_hardening_test.go
+++ b/gitindex/catfile_hardening_test.go
@@ -30,7 +30,7 @@ func TestCatfileReader_DoubleClose(t *testing.T) {
 	}
 
 	// Consume the entry so the process can exit cleanly.
-	if _, _, _, err := cr.Next(); err != nil {
+	if _, _, _, _, err := cr.Next(); err != nil {
 		t.Fatal(err)
 	}
 
@@ -71,7 +71,7 @@ func TestCatfileReader_ConcurrentClose(t *testing.T) {
 	}
 
 	// Read one entry, leave two unconsumed.
-	if _, _, _, err := cr.Next(); err != nil {
+	if _, _, _, _, err := cr.Next(); err != nil {
 		t.Fatal(err)
 	}
 
@@ -153,7 +153,7 @@ func TestCatfileReader_CloseBeforeExhausted_ManyBlobs(t *testing.T) {
 	}
 
 	// Read only 1 of 200 entries.
-	if _, _, _, err := cr.Next(); err != nil {
+	if _, _, _, _, err := cr.Next(); err != nil {
 		t.Fatal(err)
 	}
 
@@ -261,7 +261,7 @@ func TestCatfileReader_ReadAfterFullConsumption(t *testing.T) {
 	}
 	defer cr.Close()
 
-	size, _, _, _ := cr.Next()
+	_, size, _, _, _ := cr.Next()
 	content := make([]byte, size)
 	if _, err := io.ReadFull(cr, content); err != nil {
 		t.Fatal(err)
@@ -292,7 +292,7 @@ func TestCatfileReader_SmallBufferReads(t *testing.T) {
 	}
 	defer cr.Close()
 
-	size, _, _, _ := cr.Next()
+	_, size, _, _, _ := cr.Next()
 
 	var result []byte
 	buf := make([]byte, 1)
@@ -336,7 +336,7 @@ func TestCatfileReader_PartialReadThenNext(t *testing.T) {
 	defer cr.Close()
 
 	// Read only 5 of 12 bytes from hello.txt.
-	size, _, _, _ := cr.Next()
+	_, size, _, _, _ := cr.Next()
 	if size != 12 {
 		t.Fatalf("hello.txt size = %d, want 12", size)
 	}
@@ -349,7 +349,7 @@ func TestCatfileReader_PartialReadThenNext(t *testing.T) {
 	}
 
 	// Advance — must discard remaining 7 content bytes + trailing LF.
-	size, _, _, err = cr.Next()
+	_, size, _, _, err = cr.Next()
 	if err != nil {
 		t.Fatalf("Next binary.bin after partial read: %v", err)
 	}
@@ -383,7 +383,7 @@ func TestCatfileReader_PartialReadExactlyOneByteShort(t *testing.T) {
 	}
 	defer cr.Close()
 
-	size, _, _, _ := cr.Next()
+	_, size, _, _, _ := cr.Next()
 	// Read exactly size-1 bytes — leaves 1 content byte + trailing LF.
 	buf := make([]byte, size-1)
 	if _, err := io.ReadFull(cr, buf); err != nil {
@@ -395,7 +395,7 @@ func TestCatfileReader_PartialReadExactlyOneByteShort(t *testing.T) {
 
 	// Advance — pending should be 2 (1 content byte + 1 LF). The
 	// Discard call must handle this exact boundary correctly.
-	size, missing, excluded, err := cr.Next()
+	_, size, missing, excluded, err := cr.Next()
 	if err != nil {
 		t.Fatalf("Next after size-1 partial read: %v", err)
 	}
@@ -428,7 +428,7 @@ func TestCatfileReader_EmptyIds(t *testing.T) {
 	}
 	defer cr.Close()
 
-	_, _, _, err = cr.Next()
+	_, _, _, _, err = cr.Next()
 	if err != io.EOF {
 		t.Fatalf("expected io.EOF for empty ids, got %v", err)
 	}
@@ -453,7 +453,7 @@ func TestCatfileReader_MultipleEmptyBlobs(t *testing.T) {
 	defer cr.Close()
 
 	for i := range ids {
-		size, missing, excluded, err := cr.Next()
+		_, size, missing, excluded, err := cr.Next()
 		if err != nil {
 			t.Fatalf("Next #%d: %v", i, err)
 		}
@@ -466,7 +466,7 @@ func TestCatfileReader_MultipleEmptyBlobs(t *testing.T) {
 		// Don't read — Next should discard the trailing LF for us.
 	}
 
-	_, _, _, err = cr.Next()
+	_, _, _, _, err = cr.Next()
 	if err != io.EOF {
 		t.Fatalf("expected EOF after %d empty blobs, got %v", len(ids), err)
 	}
@@ -490,7 +490,7 @@ func TestCatfileReader_EmptyBlobRead(t *testing.T) {
 	}
 	defer cr.Close()
 
-	size, _, _, _ := cr.Next()
+	_, size, _, _, _ := cr.Next()
 	if size != 0 {
 		t.Fatalf("empty.txt size = %d", size)
 	}
@@ -504,7 +504,7 @@ func TestCatfileReader_EmptyBlobRead(t *testing.T) {
 
 	// The trailing LF must have been consumed. Verify by reading the
 	// next entry — if the LF leaked, the header parse would fail.
-	size, _, _, err = cr.Next()
+	_, size, _, _, err = cr.Next()
 	if err != nil {
 		t.Fatalf("Next hello.txt after empty blob Read: %v", err)
 	}
@@ -543,7 +543,7 @@ func TestCatfileReader_AllMissing(t *testing.T) {
 	defer cr.Close()
 
 	for i, id := range ids {
-		_, missing, excluded, err := cr.Next()
+		_, _, missing, excluded, err := cr.Next()
 		if err != nil {
 			t.Fatalf("Next #%d (%s): %v", i, id, err)
 		}
@@ -555,7 +555,7 @@ func TestCatfileReader_AllMissing(t *testing.T) {
 		}
 	}
 
-	_, _, _, err = cr.Next()
+	_, _, _, _, err = cr.Next()
 	if err != io.EOF {
 		t.Fatalf("expected EOF after all missing, got %v", err)
 	}
@@ -586,13 +586,13 @@ func TestCatfileReader_AlternatingMissingPresent(t *testing.T) {
 	defer cr.Close()
 
 	// fake1 — missing
-	_, missing, excluded, err := cr.Next()
+	_, _, missing, excluded, err := cr.Next()
 	if err != nil || !missing || excluded {
 		t.Fatalf("fake1: err=%v missing=%v excluded=%v", err, missing, excluded)
 	}
 
 	// hello.txt — present, read it
-	size, missing, excluded, err := cr.Next()
+	_, size, missing, excluded, err := cr.Next()
 	if err != nil || missing || excluded {
 		t.Fatalf("hello.txt: err=%v missing=%v excluded=%v", err, missing, excluded)
 	}
@@ -605,13 +605,13 @@ func TestCatfileReader_AlternatingMissingPresent(t *testing.T) {
 	}
 
 	// fake2 — missing
-	_, missing, excluded, err = cr.Next()
+	_, _, missing, excluded, err = cr.Next()
 	if err != nil || !missing || excluded {
 		t.Fatalf("fake2: err=%v missing=%v excluded=%v", err, missing, excluded)
 	}
 
 	// empty.txt — present, skip it
-	size, missing, excluded, err = cr.Next()
+	_, size, missing, excluded, err = cr.Next()
 	if err != nil || missing || excluded {
 		t.Fatalf("empty.txt: err=%v missing=%v excluded=%v", err, missing, excluded)
 	}
@@ -620,7 +620,7 @@ func TestCatfileReader_AlternatingMissingPresent(t *testing.T) {
 	}
 
 	// binary.bin — present, read it
-	size, missing, excluded, err = cr.Next()
+	_, size, missing, excluded, err = cr.Next()
 	if err != nil || missing || excluded {
 		t.Fatalf("binary.bin: err=%v missing=%v excluded=%v", err, missing, excluded)
 	}
@@ -632,7 +632,7 @@ func TestCatfileReader_AlternatingMissingPresent(t *testing.T) {
 		t.Errorf("binary.bin[0] = 0x%02x, want 0x00", binContent[0])
 	}
 
-	_, _, _, err = cr.Next()
+	_, _, _, _, err = cr.Next()
 	if err != io.EOF {
 		t.Fatalf("expected EOF, got %v", err)
 	}
@@ -661,13 +661,13 @@ func TestCatfileReader_MissingThenSkip(t *testing.T) {
 	defer cr.Close()
 
 	// missing
-	_, missing, excluded, _ := cr.Next()
+	_, _, missing, excluded, _ := cr.Next()
 	if !missing || excluded {
 		t.Fatal("expected missing")
 	}
 
 	// large.bin — skip
-	size, missing, excluded, err := cr.Next()
+	_, size, missing, excluded, err := cr.Next()
 	if err != nil || missing || excluded {
 		t.Fatalf("large.bin: err=%v missing=%v excluded=%v", err, missing, excluded)
 	}
@@ -677,7 +677,7 @@ func TestCatfileReader_MissingThenSkip(t *testing.T) {
 	// deliberately don't read
 
 	// hello.txt — read after missing+skip
-	size, missing, excluded, err = cr.Next()
+	_, size, missing, excluded, err = cr.Next()
 	if err != nil || missing || excluded {
 		t.Fatalf("hello.txt: err=%v missing=%v excluded=%v", err, missing, excluded)
 	}
@@ -707,19 +707,19 @@ func TestCatfileReader_RepeatedNextAfterEOF(t *testing.T) {
 	defer cr.Close()
 
 	// Consume and skip the only entry.
-	if _, _, _, err := cr.Next(); err != nil {
+	if _, _, _, _, err := cr.Next(); err != nil {
 		t.Fatal(err)
 	}
 
 	// First EOF.
-	_, _, _, err = cr.Next()
+	_, _, _, _, err = cr.Next()
 	if err != io.EOF {
 		t.Fatalf("first post-exhaust Next: %v, want io.EOF", err)
 	}
 
 	// Second and third EOF — must be stable.
 	for i := 0; i < 2; i++ {
-		_, _, _, err = cr.Next()
+		_, _, _, _, err = cr.Next()
 		if err != io.EOF {
 			t.Fatalf("Next #%d after EOF: %v, want io.EOF", i+2, err)
 		}
@@ -743,7 +743,7 @@ func TestCatfileReader_LargeBlobBytePrecision(t *testing.T) {
 	}
 	defer cr.Close()
 
-	size, _, _, err := cr.Next()
+	_, size, _, _, err := cr.Next()
 	if err != nil {
 		t.Fatal(err)
 	}
@@ -793,7 +793,7 @@ func TestCatfileReader_LargeBlobChunkedRead(t *testing.T) {
 	}
 	defer cr.Close()
 
-	size, _, _, _ := cr.Next()
+	_, size, _, _, _ := cr.Next()
 	if size != 64*1024 {
 		t.Fatalf("size = %d", size)
 	}
@@ -844,7 +844,7 @@ func TestCatfileReader_DuplicateSHAs(t *testing.T) {
 	defer cr.Close()
 
 	for i := 0; i < 3; i++ {
-		size, missing, excluded, err := cr.Next()
+		_, size, missing, excluded, err := cr.Next()
 		if err != nil {
 			t.Fatalf("Next #%d: %v", i, err)
 		}
@@ -863,7 +863,7 @@ func TestCatfileReader_DuplicateSHAs(t *testing.T) {
 		}
 	}
 
-	_, _, _, err = cr.Next()
+	_, _, _, _, err = cr.Next()
 	if err != io.EOF {
 		t.Fatalf("expected EOF, got %v", err)
 	}
diff --git a/gitindex/catfile_test.go b/gitindex/catfile_test.go
index 161141273..fbecc9570 100644
--- a/gitindex/catfile_test.go
+++ b/gitindex/catfile_test.go
@@ -74,7 +74,7 @@ func TestCatfileReader(t *testing.T) {
 	defer cr.Close()
 
 	// hello.txt
-	size, missing, excluded, err := cr.Next()
+	_, size, missing, excluded, err := cr.Next()
 	if err != nil {
 		t.Fatalf("Next hello.txt: %v", err)
 	}
@@ -93,7 +93,7 @@ func TestCatfileReader(t *testing.T) {
 	}
 
 	// empty.txt
-	size, missing, excluded, err = cr.Next()
+	_, size, missing, excluded, err = cr.Next()
 	if err != nil {
 		t.Fatalf("Next empty.txt: %v", err)
 	}
@@ -105,7 +105,7 @@ func TestCatfileReader(t *testing.T) {
 	}
 
 	// binary.bin — read content and verify binary data survives.
-	size, missing, excluded, err = cr.Next()
+	_, size, missing, excluded, err = cr.Next()
 	if err != nil {
 		t.Fatalf("Next binary.bin: %v", err)
 	}
@@ -121,7 +121,7 @@ func TestCatfileReader(t *testing.T) {
 	}
 
 	// large.bin
-	size, missing, excluded, err = cr.Next()
+	_, size, missing, excluded, err = cr.Next()
 	if err != nil {
 		t.Fatalf("Next large.bin: %v", err)
 	}
@@ -137,7 +137,7 @@ func TestCatfileReader(t *testing.T) {
 	}
 
 	// EOF after all entries.
-	_, _, _, err = cr.Next()
+	_, _, _, _, err = cr.Next()
 	if err != io.EOF {
 		t.Errorf("expected io.EOF after last entry, got %v", err)
 	}
@@ -161,13 +161,13 @@ func TestCatfileReader_Skip(t *testing.T) {
 	defer cr.Close()
 
 	// Skip hello.txt by calling Next again without reading.
-	_, _, _, err = cr.Next()
+	_, _, _, _, err = cr.Next()
 	if err != nil {
 		t.Fatalf("Next hello.txt: %v", err)
 	}
 
 	// Skip large.bin too.
-	size, _, _, err := cr.Next()
+	_, size, _, _, err := cr.Next()
 	if err != nil {
 		t.Fatalf("Next large.bin: %v", err)
 	}
@@ -176,7 +176,7 @@ func TestCatfileReader_Skip(t *testing.T) {
 	}
 
 	// Read binary.bin after skipping two entries.
-	size, _, _, err = cr.Next()
+	_, size, _, _, err = cr.Next()
 	if err != nil {
 		t.Fatalf("Next binary.bin: %v", err)
 	}
@@ -208,7 +208,7 @@ func TestCatfileReader_Missing(t *testing.T) {
 	defer cr.Close()
 
 	// hello.txt — read normally.
-	size, missing, excluded, err := cr.Next()
+	_, size, missing, excluded, err := cr.Next()
 	if err != nil || missing || excluded {
 		t.Fatalf("Next hello.txt: err=%v missing=%v excluded=%v", err, missing, excluded)
 	}
@@ -221,7 +221,7 @@ func TestCatfileReader_Missing(t *testing.T) {
 	}
 
 	// fakeHash — missing.
-	_, missing, excluded, err = cr.Next()
+	_, _, missing, excluded, err = cr.Next()
 	if err != nil {
 		t.Fatalf("Next fakeHash: %v", err)
 	}
@@ -233,7 +233,7 @@ func TestCatfileReader_Missing(t *testing.T) {
 	}
 
 	// empty.txt — still works after missing entry.
-	size, missing, excluded, err = cr.Next()
+	_, size, missing, excluded, err = cr.Next()
 	if err != nil || missing || excluded {
 		t.Fatalf("Next empty.txt: err=%v missing=%v excluded=%v", err, missing, excluded)
 	}
@@ -258,7 +258,7 @@ func TestCatfileReader_Excluded(t *testing.T) {
 	}
 	defer cr.Close()
 
-	_, missing, excluded, err := cr.Next()
+	_, _, missing, excluded, err := cr.Next()
 	if err != nil {
 		t.Fatalf("Next large.bin: %v", err)
 	}
@@ -269,7 +269,7 @@ func TestCatfileReader_Excluded(t *testing.T) {
 		t.Fatal("large.bin unexpectedly included")
 	}
 
-	size, missing, excluded, err := cr.Next()
+	_, size, missing, excluded, err := cr.Next()
 	if err != nil {
 		t.Fatalf("Next hello.txt: %v", err)
 	}
@@ -284,3 +284,69 @@ func TestCatfileReader_Excluded(t *testing.T) {
 		t.Errorf("hello.txt = %q", content)
 	}
 }
+
+func TestCatfileReader_Unordered(t *testing.T) {
+	t.Parallel()
+
+	repoDir, blobs := createTestRepo(t)
+	fakeHash := plumbing.NewHash("deadbeefdeadbeefdeadbeefdeadbeefdeadbeef")
+
+	ids := []plumbing.Hash{
+		blobs["large.bin"],
+		blobs["hello.txt"],
+		fakeHash,
+		blobs["hello.txt"],
+	}
+
+	cr, err := newCatfileReader(repoDir, ids, catfileReaderOptions{
+		filterSpec: "blob:limit=1k",
+		unordered:  true,
+	})
+	if err != nil {
+		t.Fatalf("newCatfileReader: %v", err)
+	}
+	defer cr.Close()
+
+	counts := map[plumbing.Hash]int{}
+	for range ids {
+		id, size, missing, excluded, err := cr.Next()
+		if err != nil {
+			t.Fatalf("Next: %v", err)
+		}
+		counts[id]++
+
+		switch id {
+		case blobs["large.bin"]:
+			if missing || !excluded {
+				t.Fatalf("large.bin: missing=%v excluded=%v", missing, excluded)
+			}
+		case fakeHash:
+			if !missing || excluded {
+				t.Fatalf("fakeHash: missing=%v excluded=%v", missing, excluded)
+			}
+		case blobs["hello.txt"]:
+			if missing || excluded {
+				t.Fatalf("hello.txt: missing=%v excluded=%v", missing, excluded)
+			}
+			content := make([]byte, size)
+			if _, err := io.ReadFull(cr, content); err != nil {
+				t.Fatalf("ReadFull hello.txt: %v", err)
+			}
+			if !bytes.Equal(content, []byte("hello world\n")) {
+				t.Fatalf("hello.txt content = %q", content)
+			}
+		default:
+			t.Fatalf("unexpected blob id %s", id)
+		}
+	}
+
+	if counts[blobs["hello.txt"]] != 2 {
+		t.Fatalf("hello.txt seen %d times, want 2", counts[blobs["hello.txt"]])
+	}
+	if counts[blobs["large.bin"]] != 1 {
+		t.Fatalf("large.bin seen %d times, want 1", counts[blobs["large.bin"]])
+	}
+	if counts[fakeHash] != 1 {
+		t.Fatalf("fakeHash seen %d times, want 1", counts[fakeHash])
+	}
+}
diff --git a/gitindex/index.go b/gitindex/index.go
index 5b9d5abf1..2b81fb9c4 100644
--- a/gitindex/index.go
+++ b/gitindex/index.go
@@ -648,6 +648,7 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
 	if len(mainRepoIDs) > 0 {
 		crOpts := catfileReaderOptions{
 			filterSpec: catfileFilterSpec(opts),
+			unordered:  true,
 		}
 		cr, err := newCatfileReader(opts.RepoDir, mainRepoIDs, crOpts)
 		if err != nil {
@@ -680,15 +681,32 @@ func indexGitRepo(opts Options, config gitIndexConfig) (bool, error) {
 
 // indexCatfileBlobs streams main-repo blobs from the catfileReader into the
 // builder. Large blobs are skipped without reading content into memory.
-// keys must correspond 1:1 (in order) with the ids passed to newCatfileReader.
-// The reader is always closed when this function returns.
+// The reader may return blobs out of request order, so responses are matched
+// back to the queued file keys by object ID. The reader is always closed when
+// this function returns.
 func indexCatfileBlobs(cr *catfileReader, keys []fileKey, repos map[fileKey]BlobLocation, opts Options, builder *index.Builder) error {
 	defer cr.Close()
 
-	for idx, key := range keys {
-		size, missing, excluded, err := cr.Next()
+	keysByID := make(map[plumbing.Hash][]fileKey, len(keys))
+	for _, key := range keys {
+		keysByID[key.ID] = append(keysByID[key.ID], key)
+	}
+
+	for idx := 0; idx < len(keys); idx++ {
+		id, size, missing, excluded, err := cr.Next()
 		if err != nil {
-			return fmt.Errorf("cat-file next for %s: %w", key.FullPath(), err)
+			return fmt.Errorf("cat-file next for blob %s: %w", id, err)
+		}
+
+		queued := keysByID[id]
+		if len(queued) == 0 {
+			return fmt.Errorf("cat-file returned unexpected blob %s", id)
+		}
+		key := queued[0]
+		if len(queued) == 1 {
+			delete(keysByID, id)
+		} else {
+			keysByID[id] = queued[1:]
 		}
 
 		branches := repos[key].Branches

From aff0f93146973b06542787c69fd82544a47f3473 Mon Sep 17 00:00:00 2001
From: Keegan Carruthers-Smith <keegan.csmith@gmail.com>
Date: Thu, 9 Apr 2026 16:20:18 +0200
Subject: [PATCH 2/2] measure max rss

---
 gitindex/catfile.go            | 27 +++++++++++++++++++++++++++
 gitindex/catfile_bench_test.go | 19 ++++++++++++++++++-
 2 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/gitindex/catfile.go b/gitindex/catfile.go
index 308e80f75..115d5fc2c 100644
--- a/gitindex/catfile.go
+++ b/gitindex/catfile.go
@@ -21,6 +21,7 @@ import (
 	"fmt"
 	"io"
 	"os/exec"
+	"runtime"
 	"strconv"
 	"sync"
 	"syscall"
@@ -69,6 +70,9 @@ type catfileReader struct {
 	// entry. Next() discards any pending bytes before reading the next header.
 	pending int
 
+	peakRSSBytes uint64
+	hasPeakRSS   bool
+
 	closeOnce sync.Once
 	closeErr  error
 }
@@ -255,6 +259,9 @@ func (cr *catfileReader) Close() error {
 		// Wait for writer goroutine (unblocks via broken pipe from Kill).
 		<-cr.writeErr
 		err := cr.cmd.Wait()
+		if ps := cr.cmd.ProcessState; ps != nil {
+			cr.peakRSSBytes, cr.hasPeakRSS = maxRSSBytes(ps.SysUsage())
+		}
 		// Suppress the expected "signal: killed" error from our own Kill().
 		if isKilledErr(err) {
 			err = nil
@@ -264,6 +271,26 @@ func (cr *catfileReader) Close() error {
 	return cr.closeErr
 }
 
+func (cr *catfileReader) maxRSSBytes() (uint64, bool) {
+	return cr.peakRSSBytes, cr.hasPeakRSS
+}
+
+func maxRSSBytes(sysUsage any) (uint64, bool) {
+	rusage, ok := sysUsage.(*syscall.Rusage)
+	if !ok || rusage == nil || rusage.Maxrss < 0 {
+		return 0, false
+	}
+
+	// Darwin reports ru_maxrss in bytes, while Linux reports it in KiB.
+	maxRSS := uint64(rusage.Maxrss)
+	switch runtime.GOOS {
+	case "linux", "android":
+		maxRSS *= 1024
+	}
+
+	return maxRSS, true
+}
+
 // isKilledErr reports whether err is an exec.ExitError caused by SIGKILL.
 func isKilledErr(err error) bool {
 	exitErr, ok := err.(*exec.ExitError)
diff --git a/gitindex/catfile_bench_test.go b/gitindex/catfile_bench_test.go
index e383892f6..2a5a9088f 100644
--- a/gitindex/catfile_bench_test.go
+++ b/gitindex/catfile_bench_test.go
@@ -136,6 +136,8 @@ func BenchmarkBlobRead_CatfileReader(b *testing.B) {
 			b.Run(fmt.Sprintf("files=%d/mode=%s", n, benchMode.name), func(b *testing.B) {
 				b.ReportAllocs()
 				var totalBytes int64
+				var totalPeakRSS uint64
+				var peakRSSSamples int
 				for b.Loop() {
 					totalBytes = 0
 					cr, err := newCatfileReader(gitDir, subset, catfileReaderOptions{unordered: benchMode.unordered})
@@ -158,10 +160,25 @@ func BenchmarkBlobRead_CatfileReader(b *testing.B) {
 						}
 						totalBytes += int64(len(content))
 					}
-					cr.Close()
+					// Force the child to close stdout before Close() so the recorded
+					// rusage reflects the fully-drained cat-file process.
+					if _, _, _, _, err := cr.Next(); err != io.EOF {
+						cr.Close()
+						b.Fatalf("final Next: got %v, want io.EOF", err)
+					}
+					if err := cr.Close(); err != nil {
+						b.Fatalf("Close: %v", err)
+					}
+					if peakRSS, ok := cr.maxRSSBytes(); ok {
+						totalPeakRSS += peakRSS
+						peakRSSSamples++
+					}
 				}
 				b.ReportMetric(float64(totalBytes), "content-bytes/op")
 				b.ReportMetric(float64(len(subset)), "files/op")
+				if peakRSSSamples > 0 {
+					b.ReportMetric(float64(totalPeakRSS)/float64(peakRSSSamples), "git-maxrss-bytes/op")
+				}
 			})
 		}
 	}