From 388517c14ce62e1c52b091af862bbaf28dbabb7a Mon Sep 17 00:00:00 2001
From: Christian Couder <christian.couder@gmail.com>
Date: Mon, 17 Nov 2025 05:34:48 +0100
Subject: [PATCH 01/35] fast-import: refactor finalize_commit_buffer()

In a following commit we are going to finalize commit buffers with or
without signatures in order to check the signatures and possibly drop
them.

To do so easily and without duplication, let's refactor the current
code that finalizes commit buffers into a new finalize_commit_buffer()
function.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/fast-import.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 54d3e592c6e460..493de57ef67bfb 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -2815,6 +2815,18 @@ static void import_one_signature(struct signature_data *sig_sha1,
 		die(_("parse_one_signature() returned unknown hash algo"));
 }
 
+static void finalize_commit_buffer(struct strbuf *new_data,
+				   struct signature_data *sig_sha1,
+				   struct signature_data *sig_sha256,
+				   struct strbuf *msg)
+{
+	add_gpgsig_to_commit(new_data, "gpgsig ", sig_sha1);
+	add_gpgsig_to_commit(new_data, "gpgsig-sha256 ", sig_sha256);
+
+	strbuf_addch(new_data, '\n');
+	strbuf_addbuf(new_data, msg);
+}
+
 static void parse_new_commit(const char *arg)
 {
 	static struct strbuf msg = STRBUF_INIT;
@@ -2950,11 +2962,8 @@ static void parse_new_commit(const char *arg)
 			"encoding %s\n",
 			encoding);
 
-	add_gpgsig_to_commit(&new_data, "gpgsig ", &sig_sha1);
-	add_gpgsig_to_commit(&new_data, "gpgsig-sha256 ", &sig_sha256);
+	finalize_commit_buffer(&new_data, &sig_sha1, &sig_sha256, &msg);
 
-	strbuf_addch(&new_data, '\n');
-	strbuf_addbuf(&new_data, &msg);
 	free(author);
 	free(committer);
 	free(encoding);

From cb034c020aba54360e7c19faf82021399bf131e7 Mon Sep 17 00:00:00 2001
From: Christian Couder <christian.couder@gmail.com>
Date: Mon, 17 Nov 2025 05:34:49 +0100
Subject: [PATCH 02/35] commit: refactor verify_commit_buffer()

In a following commit, we are going to check commit signatures, but we
won't have a commit yet, only a commit buffer, and we are going to
discard this commit buffer if the signature is invalid. So it would be
wasteful to create a commit that we might discard, just to be able to
check a commit signature.

It would be simpler instead to be able to check commit signatures
using only a commit buffer instead of a commit.

To be able to do that, let's extract some code from the
check_commit_signature() function into a new verify_commit_buffer()
function, and then let's make check_commit_signature() call
verify_commit_buffer().

Note that this doesn't fundamentally change how
check_commit_signature() works. It used to call parse_signed_commit()
which calls repo_get_commit_buffer(), parse_buffer_signed_by_header()
and repo_unuse_commit_buffer(). Now these 3 functions are called
directly by verify_commit_buffer().

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 commit.c | 17 +++++++++++++++--
 commit.h |  7 +++++++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/commit.c b/commit.c
index 16d91b2bfcf291..709c9eed58a790 100644
--- a/commit.c
+++ b/commit.c
@@ -1315,7 +1315,8 @@ static void handle_signed_tag(const struct commit *parent, struct commit_extra_h
 	free(buf);
 }
 
-int check_commit_signature(const struct commit *commit, struct signature_check *sigc)
+int verify_commit_buffer(const char *buffer, size_t size,
+			 struct signature_check *sigc)
 {
 	struct strbuf payload = STRBUF_INIT;
 	struct strbuf signature = STRBUF_INIT;
@@ -1323,7 +1324,8 @@ int check_commit_signature(const struct commit *commit, struct signature_check *
 
 	sigc->result = 'N';
 
-	if (parse_signed_commit(commit, &payload, &signature, the_hash_algo) <= 0)
+	if (parse_buffer_signed_by_header(buffer, size, &payload,
+					  &signature, the_hash_algo) <= 0)
 		goto out;
 
 	sigc->payload_type = SIGNATURE_PAYLOAD_COMMIT;
@@ -1337,6 +1339,17 @@ int check_commit_signature(const struct commit *commit, struct signature_check *
 	return ret;
 }
 
+int check_commit_signature(const struct commit *commit, struct signature_check *sigc)
+{
+	unsigned long size;
+	const char *buffer = repo_get_commit_buffer(the_repository, commit, &size);
+	int ret = verify_commit_buffer(buffer, size, sigc);
+
+	repo_unuse_commit_buffer(the_repository, commit, buffer);
+
+	return ret;
+}
+
 void verify_merge_signature(struct commit *commit, int verbosity,
 			    int check_trust)
 {
diff --git a/commit.h b/commit.h
index 1d6e0c7518b3bb..5406dd266327d4 100644
--- a/commit.h
+++ b/commit.h
@@ -333,6 +333,13 @@ int remove_signature(struct strbuf *buf);
  */
 int check_commit_signature(const struct commit *commit, struct signature_check *sigc);
 
+/*
+ * Same as check_commit_signature() but accepts a commit buffer and
+ * its size, instead of a `struct commit *`.
+ */
+int verify_commit_buffer(const char *buffer, size_t size,
+			 struct signature_check *sigc);
+
 /* record author-date for each commit object */
 struct author_date_slab;
 void record_author_date(struct author_date_slab *author_date,

From c64eb849b14e5a78864f4260ccc12f46052020ec Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Mon, 17 Nov 2025 19:51:26 +0000
Subject: [PATCH 03/35] make strip: include `scalar`

When Scalar was made a canonical part of Git in 7b5c93c6c68 (scalar:
include in standard Git build & installation, 2022-09-02), it was added
to all relevant Makefile targets except for the `strip` target.

Let's correct that.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 70d1543b6b8688..fffb0d66e39842 100644
--- a/Makefile
+++ b/Makefile
@@ -2499,7 +2499,7 @@ please_set_SHELL_PATH_to_a_more_modern_shell:
 
 shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
 
-strip: $(PROGRAMS) git$X
+strip: $(PROGRAMS) git$X scalar$X
 	$(STRIP) $(STRIP_OPTS) $^
 
 ### Target-specific flags and dependencies

From 6971934d9bb4b8176b48658482862169a4582913 Mon Sep 17 00:00:00 2001
From: Ezekiel Newren <ezekielnewren@gmail.com>
Date: Tue, 18 Nov 2025 22:34:13 +0000
Subject: [PATCH 04/35] doc: define unambiguous type mappings across C and Rust

Document other nuances when crossing the FFI boundary. Other language
mappings may be added in the future.

Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/Makefile                        |   1 +
 Documentation/technical/meson.build           |   1 +
 .../technical/unambiguous-types.adoc          | 224 ++++++++++++++++++
 3 files changed, 226 insertions(+)
 create mode 100644 Documentation/technical/unambiguous-types.adoc

diff --git a/Documentation/Makefile b/Documentation/Makefile
index a3fbd29744bd39..b580bdc98b100e 100644
--- a/Documentation/Makefile
+++ b/Documentation/Makefile
@@ -140,6 +140,7 @@ TECH_DOCS += technical/shallow
 TECH_DOCS += technical/sparse-checkout
 TECH_DOCS += technical/sparse-index
 TECH_DOCS += technical/trivial-merge
+TECH_DOCS += technical/unambiguous-types
 TECH_DOCS += technical/unit-tests
 SP_ARTICLES += $(TECH_DOCS)
 SP_ARTICLES += technical/api-index
diff --git a/Documentation/technical/meson.build b/Documentation/technical/meson.build
index 858af811a7bcc1..7df9b8acf60ed4 100644
--- a/Documentation/technical/meson.build
+++ b/Documentation/technical/meson.build
@@ -31,6 +31,7 @@ articles = [
   'sparse-checkout.adoc',
   'sparse-index.adoc',
   'trivial-merge.adoc',
+  'unambiguous-types.adoc',
   'unit-tests.adoc',
 ]
 
diff --git a/Documentation/technical/unambiguous-types.adoc b/Documentation/technical/unambiguous-types.adoc
new file mode 100644
index 00000000000000..9a4990847c0e0b
--- /dev/null
+++ b/Documentation/technical/unambiguous-types.adoc
@@ -0,0 +1,224 @@
+= Unambiguous types
+
+Most of these mappings are obvious, but there are some nuances and gotchas with
+Rust FFI (Foreign Function Interface).
+
+This document defines clear, one-to-one mappings between primitive types in C,
+Rust (and possible other languages in the future). Its purpose is to eliminate
+ambiguity in type widths, signedness, and binary representation across
+platforms and languages.
+
+For Git, the only header required to use these unambiguous types in C is
+`git-compat-util.h`.
+
+== Boolean types
+[cols="1,1", options="header"]
+|===
+| C Type | Rust Type
+| bool^1^       | bool
+|===
+
+== Integer types
+
+In C, `<stdint.h>` (or an equivalent) must be included.
+
+[cols="1,1", options="header"]
+|===
+| C Type | Rust Type
+| uint8_t    | u8
+| uint16_t   | u16
+| uint32_t   | u32
+| uint64_t   | u64
+
+| int8_t     | i8
+| int16_t    | i16
+| int32_t    | i32
+| int64_t    | i64
+|===
+
+== Floating-point types
+
+Rust requires IEEE-754 semantics.
+In C, that is typically true, but not guaranteed by the standard.
+
+[cols="1,1", options="header"]
+|===
+| C Type | Rust Type
+| float^2^      | f32
+| double^2^     | f64
+|===
+
+== Size types
+
+These types represent pointer-sized integers and are typically defined in
+`<stddef.h>` or an equivalent header.
+
+Size types should be used any time pointer arithmetic is performed e.g.
+indexing an array, describing the number of elements in memory, etc...
+
+[cols="1,1", options="header"]
+|===
+| C Type | Rust Type
+| size_t^3^     | usize
+| ptrdiff_t^3^  | isize
+|===
+
+== Character types
+
+This is where C and Rust don't have a clean one-to-one mapping.
+
+A C `char` and a Rust `u8` share the same bit width, so any C struct containing
+a `char` will have the same size as the corresponding Rust struct using `u8`.
+In that sense, such structs are safe to pass over the FFI boundary, because
+their fields will be laid out identically. However, beyond bit width, C `char`
+has additional semantics and platform-dependent behavior that can cause
+problems, as discussed below.
+
+The C language leaves the signedness of `char` implementation defined. Because
+our developer build enables -Wsign-compare, comparison of a value of `char`
+type with either signed or unsigned integers may trigger warnings from the
+compiler.
+
+Note: Rust's `char` type is an unsigned 32-bit integer that is used to describe
+Unicode code points.
+
+=== Notes
+^1^ This is only true if stdbool.h (or equivalent) is used. +
+^2^ C does not enforce IEEE-754 compatibility, but Rust expects it. If the
+platform/arch for C does not follow IEEE-754 then this equivalence does not
+hold. Also, it's assumed that `float` is 32 bits and `double` is 64, but
+there may be a strange platform/arch where even this isn't true. +
+^3^ C also defines uintptr_t, ssize_t and intptr_t, but these types are
+discouraged for FFI purposes. For functions like `read()` and `write()` ssize_t
+should be cast to a different, and unambiguous, type before being passed over
+the FFI boundary. +
+
+== Problems with std::ffi::c_* types in Rust
+TL;DR: In practice, Rust's `c_*` types aren't guaranteed to match C types for
+all possible C compilers, platforms, or architectures, because Rust only
+ensures correctness of C types on officially supported targets. These
+definitions have changed over time to match more targets which means that the
+c_* definitions will differ based on which Rust version Git chooses to use.
+
+Current list of safe, Rust side, FFI types in Git: +
+
+* `c_void`
+* `CStr`
+* `CString`
+
+Even then, they should be used sparingly, and only where the semantics match
+exactly.
+
+The std::os::raw::c_* directly inherits the problems of core::ffi, which
+changes over time and seems to make a best guess at the correct definition for
+a given platform/target. This probably isn't a problem for all other platforms
+that Rust supports currently, but can anyone say that Rust got it right for all
+C compilers of all platforms/targets?
+
+To give an example: c_long is defined in
+footnote:[https://doc.rust-lang.org/1.63.0/src/core/ffi/mod.rs.html#175-189[c_long in 1.63.0]]
+footnote:[https://doc.rust-lang.org/1.89.0/src/core/ffi/primitives.rs.html#135-151[c_long in 1.89.0]]
+
+=== Rust version 1.63.0
+
+```
+mod c_long_definition {
+    cfg_if! {
+        if #[cfg(all(target_pointer_width = "64", not(windows)))] {
+            pub type c_long = i64;
+            pub type NonZero_c_long = crate::num::NonZeroI64;
+            pub type c_ulong = u64;
+            pub type NonZero_c_ulong = crate::num::NonZeroU64;
+        } else {
+            // The minimal size of `long` in the C standard is 32 bits
+            pub type c_long = i32;
+            pub type NonZero_c_long = crate::num::NonZeroI32;
+            pub type c_ulong = u32;
+            pub type NonZero_c_ulong = crate::num::NonZeroU32;
+        }
+    }
+}
+```
+
+=== Rust version 1.89.0
+
+```
+mod c_long_definition {
+    crate::cfg_select! {
+        any(
+            all(target_pointer_width = "64", not(windows)),
+            // wasm32 Linux ABI uses 64-bit long
+            all(target_arch = "wasm32", target_os = "linux")
+        ) => {
+            pub(super) type c_long = i64;
+            pub(super) type c_ulong = u64;
+        }
+        _ => {
+            // The minimal size of `long` in the C standard is 32 bits
+            pub(super) type c_long = i32;
+            pub(super) type c_ulong = u32;
+        }
+    }
+}
+```
+
+Even for the cases where C types are correctly mapped to Rust types via
+std::ffi::c_* there are still problems. Let's take c_char for example. On some
+platforms it's u8 on others it's i8.
+
+=== Subtraction underflow in debug mode
+
+The following code will panic in debug on platforms that define c_char as u8,
+but won't if it's an i8.
+
+```
+let mut x: std::ffi::c_char = 0;
+x -= 1;
+```
+
+=== Inconsistent shift behavior
+
+`x` will be 0xC0 for platforms that use i8, but will be 0x40 where it's u8.
+
+```
+let mut x: std::ffi::c_char = 0x80;
+x >>= 1;
+```
+
+=== Equality fails to compile on some platforms
+
+The following will not compile on platforms that define c_char as i8, but will
+if it's u8. You can cast x e.g. `assert_eq!(x as u8, b'a');`, but then you get
+a warning on platforms that use u8 and a clean compilation where i8 is used.
+
+```
+let mut x: std::ffi::c_char = 0x61;
+assert_eq!(x, b'a');
+```
+
+== Enum types
+Rust enum types should not be used as FFI types. Rust enum types are more like
+C union types than C enum's. For something like:
+
+```
+#[repr(C, u8)]
+enum Fruit {
+    Apple,
+    Banana,
+    Cherry,
+}
+```
+
+It's easy enough to make sure the Rust enum matches what C would expect, but a
+more complex type like.
+
+```
+enum HashResult {
+    SHA1([u8; 20]),
+    SHA256([u8; 32]),
+}
+```
+
+The Rust compiler has to add a discriminant to the enum to distinguish between
+the variants. The width, location, and values for that discriminant is up to
+the Rust compiler and is not ABI stable.

From f007f4f4b473565fb2e94780028399030926bacb Mon Sep 17 00:00:00 2001
From: Ezekiel Newren <ezekielnewren@gmail.com>
Date: Tue, 18 Nov 2025 22:34:14 +0000
Subject: [PATCH 05/35] xdiff: use ptrdiff_t for dstart/dend

ptrdiff_t is appropriate for dstart and dend because they both describe
positive or negative offsets relative to a pointer.

Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff/xtypes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index f145abba3ea8a3..7a2d429ec5e7ea 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -47,7 +47,7 @@ typedef struct s_xrecord {
 typedef struct s_xdfile {
 	xrecord_t *recs;
 	long nrec;
-	long dstart, dend;
+	ptrdiff_t dstart, dend;
 	bool *changed;
 	long *rindex;
 	long nreff;

From 10f97d6affcb59bdcb74a6878d3d9da0eec81296 Mon Sep 17 00:00:00 2001
From: Ezekiel Newren <ezekielnewren@gmail.com>
Date: Tue, 18 Nov 2025 22:34:15 +0000
Subject: [PATCH 06/35] xdiff: make xrecord_t.ptr a uint8_t instead of char

Make xrecord_t.ptr uint8_t because it's referring to bytes in memory.

In order to avoid a refactor avalanche, many uses of this field were
cast to char* or similar.

Places where casting was unnecessary:
xemit.c:156
xmerge.c:124
xmerge.c:127
xmerge.c:164
xmerge.c:169
xmerge.c:172
xmerge.c:178

Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff/xdiffi.c    |  8 ++++----
 xdiff/xemit.c     |  6 +++---
 xdiff/xmerge.c    | 14 +++++++-------
 xdiff/xpatience.c |  2 +-
 xdiff/xprepare.c  |  6 +++---
 xdiff/xtypes.h    |  2 +-
 xdiff/xutils.c    |  4 ++--
 7 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 6f3998ee54c01e..95989b6af1d070 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -407,7 +407,7 @@ static int get_indent(xrecord_t *rec)
 	int ret = 0;
 
 	for (i = 0; i < rec->size; i++) {
-		char c = rec->ptr[i];
+		char c = (char) rec->ptr[i];
 
 		if (!XDL_ISSPACE(c))
 			return ret;
@@ -993,11 +993,11 @@ static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags)
 
 		rec = &xe->xdf1.recs[xch->i1];
 		for (i = 0; i < xch->chg1 && ignore; i++)
-			ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags);
+			ignore = xdl_blankline((const char *)rec[i].ptr, rec[i].size, flags);
 
 		rec = &xe->xdf2.recs[xch->i2];
 		for (i = 0; i < xch->chg2 && ignore; i++)
-			ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags);
+			ignore = xdl_blankline((const char *)rec[i].ptr, rec[i].size, flags);
 
 		xch->ignore = ignore;
 	}
@@ -1008,7 +1008,7 @@ static int record_matches_regex(xrecord_t *rec, xpparam_t const *xpp) {
 	size_t i;
 
 	for (i = 0; i < xpp->ignore_regex_nr; i++)
-		if (!regexec_buf(xpp->ignore_regex[i], rec->ptr, rec->size, 1,
+		if (!regexec_buf(xpp->ignore_regex[i], (const char *)rec->ptr, rec->size, 1,
 				 &regmatch, 0))
 			return 1;
 
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index b2f1f30cd36eef..ead930088a5601 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -27,7 +27,7 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *
 {
 	xrecord_t *rec = &xdf->recs[ri];
 
-	if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0)
+	if (xdl_emit_diffrec((char const *)rec->ptr, rec->size, pre, strlen(pre), ecb) < 0)
 		return -1;
 
 	return 0;
@@ -113,8 +113,8 @@ static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri,
 	xrecord_t *rec = &xdf->recs[ri];
 
 	if (!xecfg->find_func)
-		return def_ff(rec->ptr, rec->size, buf, sz);
-	return xecfg->find_func(rec->ptr, rec->size, buf, sz, xecfg->find_func_priv);
+		return def_ff((const char *)rec->ptr, rec->size, buf, sz);
+	return xecfg->find_func((const char *)rec->ptr, rec->size, buf, sz, xecfg->find_func_priv);
 }
 
 static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri)
diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
index fd600cbb5d58a2..75cb3e76a2c8e4 100644
--- a/xdiff/xmerge.c
+++ b/xdiff/xmerge.c
@@ -101,8 +101,8 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2,
 	xrecord_t *rec2 = xe2->xdf2.recs + i2;
 
 	for (i = 0; i < line_count; i++) {
-		int result = xdl_recmatch(rec1[i].ptr, rec1[i].size,
-			rec2[i].ptr, rec2[i].size, flags);
+		int result = xdl_recmatch((const char *)rec1[i].ptr, rec1[i].size,
+			(const char *)rec2[i].ptr, rec2[i].size, flags);
 		if (!result)
 			return -1;
 	}
@@ -324,8 +324,8 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1,
 
 static int recmatch(xrecord_t *rec1, xrecord_t *rec2, unsigned long flags)
 {
-	return xdl_recmatch(rec1->ptr, rec1->size,
-			    rec2->ptr, rec2->size, flags);
+	return xdl_recmatch((const char *)rec1->ptr, rec1->size,
+			    (const char *)rec2->ptr, rec2->size, flags);
 }
 
 /*
@@ -382,10 +382,10 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m,
 		 * we have a very simple mmfile structure.
 		 */
 		t1.ptr = (char *)xe1->xdf2.recs[m->i1].ptr;
-		t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1].ptr
+		t1.size = (char *)xe1->xdf2.recs[m->i1 + m->chg1 - 1].ptr
 			+ xe1->xdf2.recs[m->i1 + m->chg1 - 1].size - t1.ptr;
 		t2.ptr = (char *)xe2->xdf2.recs[m->i2].ptr;
-		t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1].ptr
+		t2.size = (char *)xe2->xdf2.recs[m->i2 + m->chg2 - 1].ptr
 			+ xe2->xdf2.recs[m->i2 + m->chg2 - 1].size - t2.ptr;
 		if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0)
 			return -1;
@@ -440,7 +440,7 @@ static int line_contains_alnum(const char *ptr, long size)
 static int lines_contain_alnum(xdfenv_t *xe, int i, int chg)
 {
 	for (; chg; chg--, i++)
-		if (line_contains_alnum(xe->xdf2.recs[i].ptr,
+		if (line_contains_alnum((const char *)xe->xdf2.recs[i].ptr,
 				xe->xdf2.recs[i].size))
 			return 1;
 	return 0;
diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
index 669b653580efe6..bb61354f22a177 100644
--- a/xdiff/xpatience.c
+++ b/xdiff/xpatience.c
@@ -121,7 +121,7 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
 		return;
 	map->entries[index].line1 = line;
 	map->entries[index].hash = record->ha;
-	map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1].ptr);
+	map->entries[index].anchor = is_anchor(xpp, (const char *)map->env->xdf1.recs[line - 1].ptr);
 	if (!map->first)
 		map->first = map->entries + index;
 	if (map->last) {
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 192334f1b72e63..4c564670764e53 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -99,8 +99,8 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
 	hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
 	for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next)
 		if (rcrec->rec.ha == rec->ha &&
-				xdl_recmatch(rcrec->rec.ptr, rcrec->rec.size,
-					rec->ptr, rec->size, cf->flags))
+				xdl_recmatch((const char *)rcrec->rec.ptr, rcrec->rec.size,
+					(const char *)rec->ptr, rec->size, cf->flags))
 			break;
 
 	if (!rcrec) {
@@ -156,7 +156,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 			if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec))
 				goto abort;
 			crec = &xdf->recs[xdf->nrec++];
-			crec->ptr = prev;
+			crec->ptr = (uint8_t const *)prev;
 			crec->size = (long) (cur - prev);
 			crec->ha = hav;
 			if (xdl_classify_record(pass, cf, crec) < 0)
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index 7a2d429ec5e7ea..69727fb29999ef 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -39,7 +39,7 @@ typedef struct s_chastore {
 } chastore_t;
 
 typedef struct s_xrecord {
-	char const *ptr;
+	uint8_t const *ptr;
 	long size;
 	unsigned long ha;
 } xrecord_t;
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 447e66c7198b08..7be063bfb61d72 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -465,10 +465,10 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp,
 	xdfenv_t env;
 
 	subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1].ptr;
-	subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2].ptr +
+	subfile1.size = (char *)diff_env->xdf1.recs[line1 + count1 - 2].ptr +
 		diff_env->xdf1.recs[line1 + count1 - 2].size - subfile1.ptr;
 	subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1].ptr;
-	subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2].ptr +
+	subfile2.size = (char *)diff_env->xdf2.recs[line2 + count2 - 2].ptr +
 		diff_env->xdf2.recs[line2 + count2 - 2].size - subfile2.ptr;
 	if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0)
 		return -1;

From 9bd193253c5e590203fc566ad7cff8f891ec0493 Mon Sep 17 00:00:00 2001
From: Ezekiel Newren <ezekielnewren@gmail.com>
Date: Tue, 18 Nov 2025 22:34:16 +0000
Subject: [PATCH 07/35] xdiff: use size_t for xrecord_t.size

size_t is the appropriate type because size is describing the number of
elements, bytes in this case, in memory.

Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff/xdiffi.c   |  7 +++----
 xdiff/xemit.c    |  8 ++++----
 xdiff/xmerge.c   | 16 ++++++++--------
 xdiff/xprepare.c |  6 +++---
 xdiff/xtypes.h   |  2 +-
 5 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 95989b6af1d070..cb8e412c7b9db6 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -403,10 +403,9 @@ static int recs_match(xrecord_t *rec1, xrecord_t *rec2)
  */
 static int get_indent(xrecord_t *rec)
 {
-	long i;
 	int ret = 0;
 
-	for (i = 0; i < rec->size; i++) {
+	for (size_t i = 0; i < rec->size; i++) {
 		char c = (char) rec->ptr[i];
 
 		if (!XDL_ISSPACE(c))
@@ -993,11 +992,11 @@ static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags)
 
 		rec = &xe->xdf1.recs[xch->i1];
 		for (i = 0; i < xch->chg1 && ignore; i++)
-			ignore = xdl_blankline((const char *)rec[i].ptr, rec[i].size, flags);
+			ignore = xdl_blankline((const char *)rec[i].ptr, (long)rec[i].size, flags);
 
 		rec = &xe->xdf2.recs[xch->i2];
 		for (i = 0; i < xch->chg2 && ignore; i++)
-			ignore = xdl_blankline((const char *)rec[i].ptr, rec[i].size, flags);
+			ignore = xdl_blankline((const char *)rec[i].ptr, (long)rec[i].size, flags);
 
 		xch->ignore = ignore;
 	}
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index ead930088a5601..2f8007753c30f2 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -27,7 +27,7 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t *
 {
 	xrecord_t *rec = &xdf->recs[ri];
 
-	if (xdl_emit_diffrec((char const *)rec->ptr, rec->size, pre, strlen(pre), ecb) < 0)
+	if (xdl_emit_diffrec((char const *)rec->ptr, (long)rec->size, pre, strlen(pre), ecb) < 0)
 		return -1;
 
 	return 0;
@@ -113,8 +113,8 @@ static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri,
 	xrecord_t *rec = &xdf->recs[ri];
 
 	if (!xecfg->find_func)
-		return def_ff((const char *)rec->ptr, rec->size, buf, sz);
-	return xecfg->find_func((const char *)rec->ptr, rec->size, buf, sz, xecfg->find_func_priv);
+		return def_ff((const char *)rec->ptr, (long)rec->size, buf, sz);
+	return xecfg->find_func((const char *)rec->ptr, (long)rec->size, buf, sz, xecfg->find_func_priv);
 }
 
 static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri)
@@ -151,7 +151,7 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg,
 static int is_empty_rec(xdfile_t *xdf, long ri)
 {
 	xrecord_t *rec = &xdf->recs[ri];
-	long i = 0;
+	size_t i = 0;
 
 	for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++);
 
diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
index 75cb3e76a2c8e4..0dd4558a3280dc 100644
--- a/xdiff/xmerge.c
+++ b/xdiff/xmerge.c
@@ -101,8 +101,8 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2,
 	xrecord_t *rec2 = xe2->xdf2.recs + i2;
 
 	for (i = 0; i < line_count; i++) {
-		int result = xdl_recmatch((const char *)rec1[i].ptr, rec1[i].size,
-			(const char *)rec2[i].ptr, rec2[i].size, flags);
+		int result = xdl_recmatch((const char *)rec1[i].ptr, (long)rec1[i].size,
+			(const char *)rec2[i].ptr, (long)rec2[i].size, flags);
 		if (!result)
 			return -1;
 	}
@@ -119,11 +119,11 @@ static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int nee
 	if (count < 1)
 		return 0;
 
-	for (i = 0; i < count; size += recs[i++].size)
+	for (i = 0; i < count; size += (int)recs[i++].size)
 		if (dest)
 			memcpy(dest + size, recs[i].ptr, recs[i].size);
 	if (add_nl) {
-		i = recs[count - 1].size;
+		i = (int)recs[count - 1].size;
 		if (i == 0 || recs[count - 1].ptr[i - 1] != '\n') {
 			if (needs_cr) {
 				if (dest)
@@ -156,7 +156,7 @@ static int xdl_orig_copy(xdfenv_t *xe, int i, int count, int needs_cr, int add_n
  */
 static int is_eol_crlf(xdfile_t *file, int i)
 {
-	long size;
+	size_t size;
 
 	if (i < file->nrec - 1)
 		/* All lines before the last *must* end in LF */
@@ -324,8 +324,8 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1,
 
 static int recmatch(xrecord_t *rec1, xrecord_t *rec2, unsigned long flags)
 {
-	return xdl_recmatch((const char *)rec1->ptr, rec1->size,
-			    (const char *)rec2->ptr, rec2->size, flags);
+	return xdl_recmatch((const char *)rec1->ptr, (long)rec1->size,
+			    (const char *)rec2->ptr, (long)rec2->size, flags);
 }
 
 /*
@@ -441,7 +441,7 @@ static int lines_contain_alnum(xdfenv_t *xe, int i, int chg)
 {
 	for (; chg; chg--, i++)
 		if (line_contains_alnum((const char *)xe->xdf2.recs[i].ptr,
-				xe->xdf2.recs[i].size))
+				(long)xe->xdf2.recs[i].size))
 			return 1;
 	return 0;
 }
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 4c564670764e53..b3219aed3e8795 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -99,8 +99,8 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
 	hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
 	for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next)
 		if (rcrec->rec.ha == rec->ha &&
-				xdl_recmatch((const char *)rcrec->rec.ptr, rcrec->rec.size,
-					(const char *)rec->ptr, rec->size, cf->flags))
+				xdl_recmatch((const char *)rcrec->rec.ptr, (long)rcrec->rec.size,
+					(const char *)rec->ptr, (long)rec->size, cf->flags))
 			break;
 
 	if (!rcrec) {
@@ -157,7 +157,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 				goto abort;
 			crec = &xdf->recs[xdf->nrec++];
 			crec->ptr = (uint8_t const *)prev;
-			crec->size = (long) (cur - prev);
+			crec->size = cur - prev;
 			crec->ha = hav;
 			if (xdl_classify_record(pass, cf, crec) < 0)
 				goto abort;
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index 69727fb29999ef..354349b523fbac 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -40,7 +40,7 @@ typedef struct s_chastore {
 
 typedef struct s_xrecord {
 	uint8_t const *ptr;
-	long size;
+	size_t size;
 	unsigned long ha;
 } xrecord_t;
 

From b0d4ae30f5a23fa9da87e9396b78e6442b351ddc Mon Sep 17 00:00:00 2001
From: Ezekiel Newren <ezekielnewren@gmail.com>
Date: Tue, 18 Nov 2025 22:34:17 +0000
Subject: [PATCH 08/35] xdiff: use unambiguous types in xdl_hash_record()

Convert the function signature and body to use unambiguous types. char
is changed to uint8_t because this function processes bytes in memory.
unsigned long to uint64_t so that the hash output is consistent across
platforms. `flags` was changed from long to uint64_t to ensure the
high order bits are not dropped on platforms that treat long as 32
bits.

Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff-interface.c |  2 +-
 xdiff/xprepare.c  |  6 +++---
 xdiff/xutils.c    | 28 ++++++++++++++--------------
 xdiff/xutils.h    |  6 +++---
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/xdiff-interface.c b/xdiff-interface.c
index 4971f722b3e5f4..1a35556380451a 100644
--- a/xdiff-interface.c
+++ b/xdiff-interface.c
@@ -300,7 +300,7 @@ void xdiff_clear_find_func(xdemitconf_t *xecfg)
 
 unsigned long xdiff_hash_string(const char *s, size_t len, long flags)
 {
-	return xdl_hash_record(&s, s + len, flags);
+	return xdl_hash_record((uint8_t const**)&s, (uint8_t const*)s + len, flags);
 }
 
 int xdiff_compare_lines(const char *l1, long s1,
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index b3219aed3e8795..85e56021daf9e2 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -137,8 +137,8 @@ static void xdl_free_ctx(xdfile_t *xdf)
 static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
 			   xdlclassifier_t *cf, xdfile_t *xdf) {
 	long bsize;
-	unsigned long hav;
-	char const *blk, *cur, *top, *prev;
+	uint64_t hav;
+	uint8_t const *blk, *cur, *top, *prev;
 	xrecord_t *crec;
 
 	xdf->rindex = NULL;
@@ -156,7 +156,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 			if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec))
 				goto abort;
 			crec = &xdf->recs[xdf->nrec++];
-			crec->ptr = (uint8_t const *)prev;
+			crec->ptr = prev;
 			crec->size = cur - prev;
 			crec->ha = hav;
 			if (xdl_classify_record(pass, cf, crec) < 0)
diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 7be063bfb61d72..77ee1ad9c86875 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -249,11 +249,11 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags)
 	return 1;
 }
 
-unsigned long xdl_hash_record_with_whitespace(char const **data,
-		char const *top, long flags) {
-	unsigned long ha = 5381;
-	char const *ptr = *data;
-	int cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL;
+uint64_t xdl_hash_record_with_whitespace(uint8_t const **data,
+		uint8_t const *top, uint64_t flags) {
+	uint64_t ha = 5381;
+	uint8_t const *ptr = *data;
+	bool cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL;
 
 	for (; ptr < top && *ptr != '\n'; ptr++) {
 		if (cr_at_eol_only) {
@@ -263,8 +263,8 @@ unsigned long xdl_hash_record_with_whitespace(char const **data,
 				continue;
 		}
 		else if (XDL_ISSPACE(*ptr)) {
-			const char *ptr2 = ptr;
-			int at_eol;
+			const uint8_t *ptr2 = ptr;
+			bool at_eol;
 			while (ptr + 1 < top && XDL_ISSPACE(ptr[1])
 					&& ptr[1] != '\n')
 				ptr++;
@@ -274,20 +274,20 @@ unsigned long xdl_hash_record_with_whitespace(char const **data,
 			else if (flags & XDF_IGNORE_WHITESPACE_CHANGE
 				 && !at_eol) {
 				ha += (ha << 5);
-				ha ^= (unsigned long) ' ';
+				ha ^= (uint64_t) ' ';
 			}
 			else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL
 				 && !at_eol) {
 				while (ptr2 != ptr + 1) {
 					ha += (ha << 5);
-					ha ^= (unsigned long) *ptr2;
+					ha ^= (uint64_t) *ptr2;
 					ptr2++;
 				}
 			}
 			continue;
 		}
 		ha += (ha << 5);
-		ha ^= (unsigned long) *ptr;
+		ha ^= (uint64_t) *ptr;
 	}
 	*data = ptr < top ? ptr + 1: ptr;
 
@@ -304,9 +304,9 @@ unsigned long xdl_hash_record_with_whitespace(char const **data,
 #define REASSOC_FENCE(x, y)
 #endif
 
-unsigned long xdl_hash_record_verbatim(char const **data, char const *top) {
-	unsigned long ha = 5381, c0, c1;
-	char const *ptr = *data;
+uint64_t xdl_hash_record_verbatim(uint8_t const **data, uint8_t const *top) {
+	uint64_t ha = 5381, c0, c1;
+	uint8_t const *ptr = *data;
 #if 0
 	/*
 	 * The baseline form of the optimized loop below. This is the djb2
@@ -314,7 +314,7 @@ unsigned long xdl_hash_record_verbatim(char const **data, char const *top) {
 	 */
 	for (; ptr < top && *ptr != '\n'; ptr++) {
 		ha += (ha << 5);
-		ha += (unsigned long) *ptr;
+		ha += (uint64_t) *ptr;
 	}
 	*data = ptr < top ? ptr + 1: ptr;
 #else
diff --git a/xdiff/xutils.h b/xdiff/xutils.h
index 13f68310472a69..615b4a9d355433 100644
--- a/xdiff/xutils.h
+++ b/xdiff/xutils.h
@@ -34,9 +34,9 @@ void *xdl_cha_alloc(chastore_t *cha);
 long xdl_guess_lines(mmfile_t *mf, long sample);
 int xdl_blankline(const char *line, long size, long flags);
 int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags);
-unsigned long xdl_hash_record_verbatim(char const **data, char const *top);
-unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags);
-static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags)
+uint64_t xdl_hash_record_verbatim(uint8_t const **data, uint8_t const *top);
+uint64_t xdl_hash_record_with_whitespace(uint8_t const **data, uint8_t const *top, uint64_t flags);
+static inline uint64_t xdl_hash_record(uint8_t const **data, uint8_t const *top, uint64_t flags)
 {
 	if (flags & XDF_WHITESPACE_FLAGS)
 		return xdl_hash_record_with_whitespace(data, top, flags);

From 6a26019c81faa07ba811541b4cf35be9e8ee1ead Mon Sep 17 00:00:00 2001
From: Ezekiel Newren <ezekielnewren@gmail.com>
Date: Tue, 18 Nov 2025 22:34:18 +0000
Subject: [PATCH 09/35] xdiff: split xrecord_t.ha into line_hash and
 minimal_perfect_hash

The ha field is serving two different purposes, which makes the code
harder to read. At first glance, it looks like many places assume
there could never be hash collisions between lines of the two input
files. In reality, line_hash is used together with xdl_recmatch() to
ensure correct comparisons of lines, even when collisions occur.

To make this clearer, the old ha field has been split:
  * line_hash: a straightforward hash of a line, independent of any
    external context. Its type is uint64_t, as it comes from a fixed
    width hash function.
  * minimal_perfect_hash: Not a new concept, but now a separate
    field. It comes from the classifier's general-purpose hash table,
    which assigns each line a unique and minimal hash across the two
    files. A size_t is used here because it's meant to be used to
    index an array. This also avoids ` as usize` casts on the Rust
    side when using it to index a slice.

Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff/xdiffi.c     |  6 +++---
 xdiff/xhistogram.c |  4 ++--
 xdiff/xpatience.c  | 10 +++++-----
 xdiff/xprepare.c   | 18 +++++++++---------
 xdiff/xtypes.h     |  3 ++-
 5 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index cb8e412c7b9db6..8d96074414f2e9 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -22,9 +22,9 @@
 
 #include "xinclude.h"
 
-static unsigned long get_hash(xdfile_t *xdf, long index)
+static size_t get_hash(xdfile_t *xdf, long index)
 {
-	return xdf->recs[xdf->rindex[index]].ha;
+	return xdf->recs[xdf->rindex[index]].minimal_perfect_hash;
 }
 
 #define XDL_MAX_COST_MIN 256
@@ -385,7 +385,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1,
 
 static int recs_match(xrecord_t *rec1, xrecord_t *rec2)
 {
-	return (rec1->ha == rec2->ha);
+	return rec1->minimal_perfect_hash == rec2->minimal_perfect_hash;
 }
 
 /*
diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index 6dc450b1fe1dfc..5ae1282c27568c 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -90,7 +90,7 @@ struct region {
 
 static int cmp_recs(xrecord_t *r1, xrecord_t *r2)
 {
-	return r1->ha == r2->ha;
+	return r1->minimal_perfect_hash == r2->minimal_perfect_hash;
 
 }
 
@@ -98,7 +98,7 @@ static int cmp_recs(xrecord_t *r1, xrecord_t *r2)
 	(cmp_recs(REC(i->env, s1, l1), REC(i->env, s2, l2)))
 
 #define TABLE_HASH(index, side, line) \
-	XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits)
+	XDL_HASHLONG((REC(index->env, side, line))->minimal_perfect_hash, index->table_bits)
 
 static int scanA(struct histindex *index, int line1, int count1)
 {
diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
index bb61354f22a177..cc53266f3b8302 100644
--- a/xdiff/xpatience.c
+++ b/xdiff/xpatience.c
@@ -48,7 +48,7 @@
 struct hashmap {
 	int nr, alloc;
 	struct entry {
-		unsigned long hash;
+		size_t minimal_perfect_hash;
 		/*
 		 * 0 = unused entry, 1 = first line, 2 = second, etc.
 		 * line2 is NON_UNIQUE if the line is not unique
@@ -101,10 +101,10 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
 	 * So we multiply ha by 2 in the hope that the hashing was
 	 * "unique enough".
 	 */
-	int index = (int)((record->ha << 1) % map->alloc);
+	int index = (int)((record->minimal_perfect_hash << 1) % map->alloc);
 
 	while (map->entries[index].line1) {
-		if (map->entries[index].hash != record->ha) {
+		if (map->entries[index].minimal_perfect_hash != record->minimal_perfect_hash) {
 			if (++index >= map->alloc)
 				index = 0;
 			continue;
@@ -120,7 +120,7 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map,
 	if (pass == 2)
 		return;
 	map->entries[index].line1 = line;
-	map->entries[index].hash = record->ha;
+	map->entries[index].minimal_perfect_hash = record->minimal_perfect_hash;
 	map->entries[index].anchor = is_anchor(xpp, (const char *)map->env->xdf1.recs[line - 1].ptr);
 	if (!map->first)
 		map->first = map->entries + index;
@@ -248,7 +248,7 @@ static int match(struct hashmap *map, int line1, int line2)
 {
 	xrecord_t *record1 = &map->env->xdf1.recs[line1 - 1];
 	xrecord_t *record2 = &map->env->xdf2.recs[line2 - 1];
-	return record1->ha == record2->ha;
+	return record1->minimal_perfect_hash == record2->minimal_perfect_hash;
 }
 
 static int patience_diff(xpparam_t const *xpp, xdfenv_t *env,
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 85e56021daf9e2..bea0992b5e4a33 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -93,12 +93,12 @@ static void xdl_free_classifier(xdlclassifier_t *cf) {
 
 
 static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t *rec) {
-	long hi;
+	size_t hi;
 	xdlclass_t *rcrec;
 
-	hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
+	hi = XDL_HASHLONG(rec->line_hash, cf->hbits);
 	for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next)
-		if (rcrec->rec.ha == rec->ha &&
+		if (rcrec->rec.line_hash == rec->line_hash &&
 				xdl_recmatch((const char *)rcrec->rec.ptr, (long)rcrec->rec.size,
 					(const char *)rec->ptr, (long)rec->size, cf->flags))
 			break;
@@ -120,7 +120,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
 
 	(pass == 1) ? rcrec->len1++ : rcrec->len2++;
 
-	rec->ha = (unsigned long) rcrec->idx;
+	rec->minimal_perfect_hash = (size_t)rcrec->idx;
 
 	return 0;
 }
@@ -158,7 +158,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 			crec = &xdf->recs[xdf->nrec++];
 			crec->ptr = prev;
 			crec->size = cur - prev;
-			crec->ha = hav;
+			crec->line_hash = hav;
 			if (xdl_classify_record(pass, cf, crec) < 0)
 				goto abort;
 		}
@@ -290,7 +290,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT)
 		mlim = XDL_MAX_EQLIMIT;
 	for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
-		rcrec = cf->rcrecs[recs->ha];
+		rcrec = cf->rcrecs[recs->minimal_perfect_hash];
 		nm = rcrec ? rcrec->len2 : 0;
 		action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP;
 	}
@@ -298,7 +298,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT)
 		mlim = XDL_MAX_EQLIMIT;
 	for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
-		rcrec = cf->rcrecs[recs->ha];
+		rcrec = cf->rcrecs[recs->minimal_perfect_hash];
 		nm = rcrec ? rcrec->len1 : 0;
 		action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP;
 	}
@@ -350,7 +350,7 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
 	recs2 = xdf2->recs;
 	for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
 	     i++, recs1++, recs2++)
-		if (recs1->ha != recs2->ha)
+		if (recs1->minimal_perfect_hash != recs2->minimal_perfect_hash)
 			break;
 
 	xdf1->dstart = xdf2->dstart = i;
@@ -358,7 +358,7 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
 	recs1 = xdf1->recs + xdf1->nrec - 1;
 	recs2 = xdf2->recs + xdf2->nrec - 1;
 	for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--)
-		if (recs1->ha != recs2->ha)
+		if (recs1->minimal_perfect_hash != recs2->minimal_perfect_hash)
 			break;
 
 	xdf1->dend = xdf1->nrec - i - 1;
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index 354349b523fbac..d4e9cd2e763616 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -41,7 +41,8 @@ typedef struct s_chastore {
 typedef struct s_xrecord {
 	uint8_t const *ptr;
 	size_t size;
-	unsigned long ha;
+	uint64_t line_hash;
+	size_t minimal_perfect_hash;
 } xrecord_t;
 
 typedef struct s_xdfile {

From 016538780e9f6e83a1d9c7b0ec771fb6c5583c0f Mon Sep 17 00:00:00 2001
From: Ezekiel Newren <ezekielnewren@gmail.com>
Date: Tue, 18 Nov 2025 22:34:19 +0000
Subject: [PATCH 10/35] xdiff: make xdfile_t.nrec a size_t instead of long

size_t is used because nrec describes the number of elements for both
recs, and for 'changed' + 2.

Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff/xdiffi.c    |  8 ++++----
 xdiff/xemit.c     | 20 ++++++++++----------
 xdiff/xmerge.c    |  8 ++++----
 xdiff/xpatience.c |  2 +-
 xdiff/xprepare.c  | 12 ++++++------
 xdiff/xtypes.h    |  2 +-
 6 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 8d96074414f2e9..21d06bce969765 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -483,7 +483,7 @@ static void measure_split(const xdfile_t *xdf, long split,
 {
 	long i;
 
-	if (split >= xdf->nrec) {
+	if (split >= (long)xdf->nrec) {
 		m->end_of_file = 1;
 		m->indent = -1;
 	} else {
@@ -506,7 +506,7 @@ static void measure_split(const xdfile_t *xdf, long split,
 
 	m->post_blank = 0;
 	m->post_indent = -1;
-	for (i = split + 1; i < xdf->nrec; i++) {
+	for (i = split + 1; i < (long)xdf->nrec; i++) {
 		m->post_indent = get_indent(&xdf->recs[i]);
 		if (m->post_indent != -1)
 			break;
@@ -717,7 +717,7 @@ static void group_init(xdfile_t *xdf, struct xdlgroup *g)
  */
 static inline int group_next(xdfile_t *xdf, struct xdlgroup *g)
 {
-	if (g->end == xdf->nrec)
+	if (g->end == (long)xdf->nrec)
 		return -1;
 
 	g->start = g->end + 1;
@@ -750,7 +750,7 @@ static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g)
  */
 static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g)
 {
-	if (g->end < xdf->nrec &&
+	if (g->end < (long)xdf->nrec &&
 	    recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) {
 		xdf->changed[g->start++] = false;
 		xdf->changed[g->end++] = true;
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 2f8007753c30f2..04f7e9193b61f0 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -137,7 +137,7 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg,
 	buf = func_line ? func_line->buf : dummy;
 	size = func_line ? sizeof(func_line->buf) : sizeof(dummy);
 
-	for (l = start; l != limit && 0 <= l && l < xe->xdf1.nrec; l += step) {
+	for (l = start; l != limit && 0 <= l && l < (long)xe->xdf1.nrec; l += step) {
 		long len = match_func_rec(&xe->xdf1, xecfg, l, buf, size);
 		if (len >= 0) {
 			if (func_line)
@@ -179,14 +179,14 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 			long fs1, i1 = xch->i1;
 
 			/* Appended chunk? */
-			if (i1 >= xe->xdf1.nrec) {
+			if (i1 >= (long)xe->xdf1.nrec) {
 				long i2 = xch->i2;
 
 				/*
 				 * We don't need additional context if
 				 * a whole function was added.
 				 */
-				while (i2 < xe->xdf2.nrec) {
+				while (i2 < (long)xe->xdf2.nrec) {
 					if (is_func_rec(&xe->xdf2, xecfg, i2))
 						goto post_context_calculation;
 					i2++;
@@ -196,7 +196,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 				 * Otherwise get more context from the
 				 * pre-image.
 				 */
-				i1 = xe->xdf1.nrec - 1;
+				i1 = (long)xe->xdf1.nrec - 1;
 			}
 
 			fs1 = get_func_line(xe, xecfg, NULL, i1, -1);
@@ -228,8 +228,8 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 
  post_context_calculation:
 		lctx = xecfg->ctxlen;
-		lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1));
-		lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2));
+		lctx = XDL_MIN(lctx, (long)xe->xdf1.nrec - (xche->i1 + xche->chg1));
+		lctx = XDL_MIN(lctx, (long)xe->xdf2.nrec - (xche->i2 + xche->chg2));
 
 		e1 = xche->i1 + xche->chg1 + lctx;
 		e2 = xche->i2 + xche->chg2 + lctx;
@@ -237,13 +237,13 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) {
 			long fe1 = get_func_line(xe, xecfg, NULL,
 						 xche->i1 + xche->chg1,
-						 xe->xdf1.nrec);
+						 (long)xe->xdf1.nrec);
 			while (fe1 > 0 && is_empty_rec(&xe->xdf1, fe1 - 1))
 				fe1--;
 			if (fe1 < 0)
-				fe1 = xe->xdf1.nrec;
+				fe1 = (long)xe->xdf1.nrec;
 			if (fe1 > e1) {
-				e2 = XDL_MIN(e2 + (fe1 - e1), xe->xdf2.nrec);
+				e2 = XDL_MIN(e2 + (fe1 - e1), (long)xe->xdf2.nrec);
 				e1 = fe1;
 			}
 
@@ -254,7 +254,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 			 */
 			if (xche->next) {
 				long l = XDL_MIN(xche->next->i1,
-						 xe->xdf1.nrec - 1);
+						 (long)xe->xdf1.nrec - 1);
 				if (l - xecfg->ctxlen <= e1 ||
 				    get_func_line(xe, xecfg, NULL, l, e1) < 0) {
 					xche = xche->next;
diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c
index 0dd4558a3280dc..29dad98c496b07 100644
--- a/xdiff/xmerge.c
+++ b/xdiff/xmerge.c
@@ -158,7 +158,7 @@ static int is_eol_crlf(xdfile_t *file, int i)
 {
 	size_t size;
 
-	if (i < file->nrec - 1)
+	if (i < (long)file->nrec - 1)
 		/* All lines before the last *must* end in LF */
 		return (size = file->recs[i].size) > 1 &&
 			file->recs[i].ptr[size - 2] == '\r';
@@ -317,7 +317,7 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1,
 			continue;
 		i = m->i1 + m->chg1;
 	}
-	size += xdl_recs_copy(xe1, i, xe1->xdf2.nrec - i, 0, 0,
+	size += xdl_recs_copy(xe1, i, (int)xe1->xdf2.nrec - i, 0, 0,
 			      dest ? dest + size : NULL);
 	return size;
 }
@@ -622,7 +622,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
 			changes = c;
 		i0 = xscr1->i1;
 		i1 = xscr1->i2;
-		i2 = xscr1->i1 + xe2->xdf2.nrec - xe2->xdf1.nrec;
+		i2 = xscr1->i1 + (long)xe2->xdf2.nrec - (long)xe2->xdf1.nrec;
 		chg0 = xscr1->chg1;
 		chg1 = xscr1->chg2;
 		chg2 = xscr1->chg1;
@@ -637,7 +637,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1,
 		if (!changes)
 			changes = c;
 		i0 = xscr2->i1;
-		i1 = xscr2->i1 + xe1->xdf2.nrec - xe1->xdf1.nrec;
+		i1 = xscr2->i1 + (long)xe1->xdf2.nrec - (long)xe1->xdf1.nrec;
 		i2 = xscr2->i2;
 		chg0 = xscr2->chg1;
 		chg1 = xscr2->chg1;
diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
index cc53266f3b8302..a0b31eb5d8c1c0 100644
--- a/xdiff/xpatience.c
+++ b/xdiff/xpatience.c
@@ -370,5 +370,5 @@ static int patience_diff(xpparam_t const *xpp, xdfenv_t *env,
 
 int xdl_do_patience_diff(xpparam_t const *xpp, xdfenv_t *env)
 {
-	return patience_diff(xpp, env, 1, env->xdf1.nrec, 1, env->xdf2.nrec);
+	return patience_diff(xpp, env, 1, (int)env->xdf1.nrec, 1, (int)env->xdf2.nrec);
 }
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index bea0992b5e4a33..705ddd1ae00a36 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -153,7 +153,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 		for (top = blk + bsize; cur < top; ) {
 			prev = cur;
 			hav = xdl_hash_record(&cur, top, xpp->flags);
-			if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec))
+			if (XDL_ALLOC_GROW(xdf->recs, (long)xdf->nrec + 1, narec))
 				goto abort;
 			crec = &xdf->recs[xdf->nrec++];
 			crec->ptr = prev;
@@ -287,7 +287,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	/*
 	 * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE.
 	 */
-	if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT)
+	if ((mlim = xdl_bogosqrt((long)xdf1->nrec)) > XDL_MAX_EQLIMIT)
 		mlim = XDL_MAX_EQLIMIT;
 	for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
 		rcrec = cf->rcrecs[recs->minimal_perfect_hash];
@@ -295,7 +295,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 		action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP;
 	}
 
-	if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT)
+	if ((mlim = xdl_bogosqrt((long)xdf2->nrec)) > XDL_MAX_EQLIMIT)
 		mlim = XDL_MAX_EQLIMIT;
 	for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
 		rcrec = cf->rcrecs[recs->minimal_perfect_hash];
@@ -348,7 +348,7 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
 
 	recs1 = xdf1->recs;
 	recs2 = xdf2->recs;
-	for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
+	for (i = 0, lim = (long)XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
 	     i++, recs1++, recs2++)
 		if (recs1->minimal_perfect_hash != recs2->minimal_perfect_hash)
 			break;
@@ -361,8 +361,8 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
 		if (recs1->minimal_perfect_hash != recs2->minimal_perfect_hash)
 			break;
 
-	xdf1->dend = xdf1->nrec - i - 1;
-	xdf2->dend = xdf2->nrec - i - 1;
+	xdf1->dend = (long)xdf1->nrec - i - 1;
+	xdf2->dend = (long)xdf2->nrec - i - 1;
 
 	return 0;
 }
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index d4e9cd2e763616..4c4d9bd147ebe8 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -47,7 +47,7 @@ typedef struct s_xrecord {
 
 typedef struct s_xdfile {
 	xrecord_t *recs;
-	long nrec;
+	size_t nrec;
 	ptrdiff_t dstart, dend;
 	bool *changed;
 	long *rindex;

From e35877eadbd9bee473936577c82abca9c8333abd Mon Sep 17 00:00:00 2001
From: Ezekiel Newren <ezekielnewren@gmail.com>
Date: Tue, 18 Nov 2025 22:34:20 +0000
Subject: [PATCH 11/35] xdiff: make xdfile_t.nreff a size_t instead of long

size_t is used because nreff describes the number of elements in memory
for rindex.

Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff/xprepare.c | 14 +++++++-------
 xdiff/xtypes.h   |  2 +-
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 705ddd1ae00a36..39fd79d9d46331 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -264,7 +264,7 @@ static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) {
  * might be potentially discarded if they appear in a run of discardable.
  */
 static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
-	long i, nm, nreff, mlim;
+	long i, nm, mlim;
 	xrecord_t *recs;
 	xdlclass_t *rcrec;
 	uint8_t *action1 = NULL, *action2 = NULL;
@@ -307,29 +307,29 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	 * Use temporary arrays to decide if changed[i] should remain
 	 * false, or become true.
 	 */
-	for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];
+	xdf1->nreff = 0;
+	for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];
 	     i <= xdf1->dend; i++, recs++) {
 		if (action1[i] == KEEP ||
 		    (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) {
-			xdf1->rindex[nreff++] = i;
+			xdf1->rindex[xdf1->nreff++] = i;
 			/* changed[i] remains false, i.e. keep */
 		} else
 			xdf1->changed[i] = true;
 			/* i.e. discard */
 	}
-	xdf1->nreff = nreff;
 
-	for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart];
+	xdf2->nreff = 0;
+	for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart];
 	     i <= xdf2->dend; i++, recs++) {
 		if (action2[i] == KEEP ||
 		    (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) {
-			xdf2->rindex[nreff++] = i;
+			xdf2->rindex[xdf2->nreff++] = i;
 			/* changed[i] remains false, i.e. keep */
 		} else
 			xdf2->changed[i] = true;
 			/* i.e. discard */
 	}
-	xdf2->nreff = nreff;
 
 cleanup:
 	xdl_free(action1);
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index 4c4d9bd147ebe8..1f495f987f861b 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -51,7 +51,7 @@ typedef struct s_xdfile {
 	ptrdiff_t dstart, dend;
 	bool *changed;
 	long *rindex;
-	long nreff;
+	size_t nreff;
 } xdfile_t;
 
 typedef struct s_xdfenv {

From 5004a8da14e2aa80b5697b0a3a60e594af1c8292 Mon Sep 17 00:00:00 2001
From: Ezekiel Newren <ezekielnewren@gmail.com>
Date: Tue, 18 Nov 2025 22:34:21 +0000
Subject: [PATCH 12/35] xdiff: change rindex from long to size_t in xdfile_t

The field rindex describes an index offset for other arrays. Change it
to size_t.

Changing the type of rindex from long to size_t has no cascading
refactor impact because it is only ever used to directly index other
arrays.

Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff/xtypes.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index 1f495f987f861b..9074cdadd1118c 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -50,7 +50,7 @@ typedef struct s_xdfile {
 	size_t nrec;
 	ptrdiff_t dstart, dend;
 	bool *changed;
-	long *rindex;
+	size_t *rindex;
 	size_t nreff;
 } xdfile_t;
 

From 22ce0cb6397d3d15c21c217696f262c4b8eb44b3 Mon Sep 17 00:00:00 2001
From: Ezekiel Newren <ezekielnewren@gmail.com>
Date: Tue, 18 Nov 2025 22:34:22 +0000
Subject: [PATCH 13/35] xdiff: rename rindex -> reference_index

The classic diff adds only the lines that it's going to consider,
during the diff, to an array. A mapping between the compacted
array, and the lines of the file that they reference, is
facilitated by this array.

Signed-off-by: Ezekiel Newren <ezekielnewren@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 xdiff/xdiffi.c   |  6 +++---
 xdiff/xprepare.c | 10 +++++-----
 xdiff/xtypes.h   |  2 +-
 3 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 21d06bce969765..4376f943dba539 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -24,7 +24,7 @@
 
 static size_t get_hash(xdfile_t *xdf, long index)
 {
-	return xdf->recs[xdf->rindex[index]].minimal_perfect_hash;
+	return xdf->recs[xdf->reference_index[index]].minimal_perfect_hash;
 }
 
 #define XDL_MAX_COST_MIN 256
@@ -278,10 +278,10 @@ int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1,
 	 */
 	if (off1 == lim1) {
 		for (; off2 < lim2; off2++)
-			xdf2->changed[xdf2->rindex[off2]] = true;
+			xdf2->changed[xdf2->reference_index[off2]] = true;
 	} else if (off2 == lim2) {
 		for (; off1 < lim1; off1++)
-			xdf1->changed[xdf1->rindex[off1]] = true;
+			xdf1->changed[xdf1->reference_index[off1]] = true;
 	} else {
 		xdpsplit_t spl;
 		spl.i1 = spl.i2 = 0;
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index 39fd79d9d46331..34c82e4f8e1626 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -128,7 +128,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
 
 static void xdl_free_ctx(xdfile_t *xdf)
 {
-	xdl_free(xdf->rindex);
+	xdl_free(xdf->reference_index);
 	xdl_free(xdf->changed - 1);
 	xdl_free(xdf->recs);
 }
@@ -141,7 +141,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 	uint8_t const *blk, *cur, *top, *prev;
 	xrecord_t *crec;
 
-	xdf->rindex = NULL;
+	xdf->reference_index = NULL;
 	xdf->changed = NULL;
 	xdf->recs = NULL;
 
@@ -169,7 +169,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
 
 	if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
 	    (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) {
-		if (!XDL_ALLOC_ARRAY(xdf->rindex, xdf->nrec + 1))
+		if (!XDL_ALLOC_ARRAY(xdf->reference_index, xdf->nrec + 1))
 			goto abort;
 	}
 
@@ -312,7 +312,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	     i <= xdf1->dend; i++, recs++) {
 		if (action1[i] == KEEP ||
 		    (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) {
-			xdf1->rindex[xdf1->nreff++] = i;
+			xdf1->reference_index[xdf1->nreff++] = i;
 			/* changed[i] remains false, i.e. keep */
 		} else
 			xdf1->changed[i] = true;
@@ -324,7 +324,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
 	     i <= xdf2->dend; i++, recs++) {
 		if (action2[i] == KEEP ||
 		    (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) {
-			xdf2->rindex[xdf2->nreff++] = i;
+			xdf2->reference_index[xdf2->nreff++] = i;
 			/* changed[i] remains false, i.e. keep */
 		} else
 			xdf2->changed[i] = true;
diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h
index 9074cdadd1118c..979586f20a6028 100644
--- a/xdiff/xtypes.h
+++ b/xdiff/xtypes.h
@@ -50,7 +50,7 @@ typedef struct s_xdfile {
 	size_t nrec;
 	ptrdiff_t dstart, dend;
 	bool *changed;
-	size_t *rindex;
+	size_t *reference_index;
 	size_t nreff;
 } xdfile_t;
 

From 831e02340b9de46c9ea0a1bbce3894f390f5a45e Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:50:49 +0100
Subject: [PATCH 14/35] path: move `enter_repo()` into "setup.c"

The function `enter_repo()` is used to enter a repository at a given
path. As such it sits way closer to setting up a repository than it does
with handling paths, but regardless of that it's located in "path.c"
instead of in "setup.c".

Move the function into "setup.c".

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/receive-pack.c   |   2 +-
 builtin/upload-archive.c |   2 +-
 builtin/upload-pack.c    |   2 +-
 http-backend.c           |   1 +
 path.c                   | 100 ---------------------------------------
 path.h                   |  15 ------
 setup.c                  |  81 +++++++++++++++++++++++++++++++
 setup.h                  |  38 +++++++++++++++
 8 files changed, 123 insertions(+), 118 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index c9288a9c7e382b..79a0fd4756665d 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -34,7 +34,6 @@
 #include "object-file.h"
 #include "object-name.h"
 #include "odb.h"
-#include "path.h"
 #include "protocol.h"
 #include "commit-reach.h"
 #include "server-info.h"
@@ -42,6 +41,7 @@
 #include "trace2.h"
 #include "worktree.h"
 #include "shallow.h"
+#include "setup.h"
 #include "parse-options.h"
 
 static const char * const receive_pack_usage[] = {
diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c
index 97d7c9522f9868..25312bb2a52887 100644
--- a/builtin/upload-archive.c
+++ b/builtin/upload-archive.c
@@ -4,8 +4,8 @@
 #define USE_THE_REPOSITORY_VARIABLE
 #include "builtin.h"
 #include "archive.h"
-#include "path.h"
 #include "pkt-line.h"
+#include "setup.h"
 #include "sideband.h"
 #include "run-command.h"
 #include "strvec.h"
diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c
index c2bbc035ab0c91..30498fafea3a8b 100644
--- a/builtin/upload-pack.c
+++ b/builtin/upload-pack.c
@@ -5,11 +5,11 @@
 #include "gettext.h"
 #include "pkt-line.h"
 #include "parse-options.h"
-#include "path.h"
 #include "protocol.h"
 #include "replace-object.h"
 #include "upload-pack.h"
 #include "serve.h"
+#include "setup.h"
 #include "commit.h"
 #include "environment.h"
 
diff --git a/http-backend.c b/http-backend.c
index 52f0483dd309d7..e9d1ef92bd8dc1 100644
--- a/http-backend.c
+++ b/http-backend.c
@@ -16,6 +16,7 @@
 #include "run-command.h"
 #include "string-list.h"
 #include "url.h"
+#include "setup.h"
 #include "strvec.h"
 #include "packfile.h"
 #include "odb.h"
diff --git a/path.c b/path.c
index 7f56eaf9930374..d726537622cda6 100644
--- a/path.c
+++ b/path.c
@@ -738,106 +738,6 @@ char *interpolate_path(const char *path, int real_home)
 	return NULL;
 }
 
-/*
- * First, one directory to try is determined by the following algorithm.
- *
- * (0) If "strict" is given, the path is used as given and no DWIM is
- *     done. Otherwise:
- * (1) "~/path" to mean path under the running user's home directory;
- * (2) "~user/path" to mean path under named user's home directory;
- * (3) "relative/path" to mean cwd relative directory; or
- * (4) "/absolute/path" to mean absolute directory.
- *
- * Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git"
- * in this order. We select the first one that is a valid git repository, and
- * chdir() to it. If none match, or we fail to chdir, we return NULL.
- *
- * If all goes well, we return the directory we used to chdir() (but
- * before ~user is expanded), avoiding getcwd() resolving symbolic
- * links.  User relative paths are also returned as they are given,
- * except DWIM suffixing.
- */
-const char *enter_repo(const char *path, unsigned flags)
-{
-	static struct strbuf validated_path = STRBUF_INIT;
-	static struct strbuf used_path = STRBUF_INIT;
-
-	if (!path)
-		return NULL;
-
-	if (!(flags & ENTER_REPO_STRICT)) {
-		static const char *suffix[] = {
-			"/.git", "", ".git/.git", ".git", NULL,
-		};
-		const char *gitfile;
-		int len = strlen(path);
-		int i;
-		while ((1 < len) && (path[len-1] == '/'))
-			len--;
-
-		/*
-		 * We can handle arbitrary-sized buffers, but this remains as a
-		 * sanity check on untrusted input.
-		 */
-		if (PATH_MAX <= len)
-			return NULL;
-
-		strbuf_reset(&used_path);
-		strbuf_reset(&validated_path);
-		strbuf_add(&used_path, path, len);
-		strbuf_add(&validated_path, path, len);
-
-		if (used_path.buf[0] == '~') {
-			char *newpath = interpolate_path(used_path.buf, 0);
-			if (!newpath)
-				return NULL;
-			strbuf_attach(&used_path, newpath, strlen(newpath),
-				      strlen(newpath));
-		}
-		for (i = 0; suffix[i]; i++) {
-			struct stat st;
-			size_t baselen = used_path.len;
-			strbuf_addstr(&used_path, suffix[i]);
-			if (!stat(used_path.buf, &st) &&
-			    (S_ISREG(st.st_mode) ||
-			    (S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) {
-				strbuf_addstr(&validated_path, suffix[i]);
-				break;
-			}
-			strbuf_setlen(&used_path, baselen);
-		}
-		if (!suffix[i])
-			return NULL;
-		gitfile = read_gitfile(used_path.buf);
-		if (!(flags & ENTER_REPO_ANY_OWNER_OK))
-			die_upon_dubious_ownership(gitfile, NULL, used_path.buf);
-		if (gitfile) {
-			strbuf_reset(&used_path);
-			strbuf_addstr(&used_path, gitfile);
-		}
-		if (chdir(used_path.buf))
-			return NULL;
-		path = validated_path.buf;
-	}
-	else {
-		const char *gitfile = read_gitfile(path);
-		if (!(flags & ENTER_REPO_ANY_OWNER_OK))
-			die_upon_dubious_ownership(gitfile, NULL, path);
-		if (gitfile)
-			path = gitfile;
-		if (chdir(path))
-			return NULL;
-	}
-
-	if (is_git_directory(".")) {
-		set_git_dir(".", 0);
-		check_repository_format(NULL);
-		return path;
-	}
-
-	return NULL;
-}
-
 int calc_shared_perm(struct repository *repo,
 		     int mode)
 {
diff --git a/path.h b/path.h
index e67348f25397cc..0ec95a0b079c90 100644
--- a/path.h
+++ b/path.h
@@ -146,21 +146,6 @@ int adjust_shared_perm(struct repository *repo, const char *path);
 
 char *interpolate_path(const char *path, int real_home);
 
-/* The bits are as follows:
- *
- * - ENTER_REPO_STRICT: callers that require exact paths (as opposed
- *   to allowing known suffixes like ".git", ".git/.git" to be
- *   omitted) can set this bit.
- *
- * - ENTER_REPO_ANY_OWNER_OK: callers that are willing to run without
- *   ownership check can set this bit.
- */
-enum {
-	ENTER_REPO_STRICT = (1<<0),
-	ENTER_REPO_ANY_OWNER_OK = (1<<1),
-};
-
-const char *enter_repo(const char *path, unsigned flags);
 const char *remove_leading_path(const char *in, const char *prefix);
 const char *relative_path(const char *in, const char *prefix, struct strbuf *sb);
 int normalize_path_copy_len(char *dst, const char *src, int *prefix_len);
diff --git a/setup.c b/setup.c
index 7086741e6c2d1f..98c6fd8ee4c02d 100644
--- a/setup.c
+++ b/setup.c
@@ -1703,6 +1703,87 @@ void set_git_dir(const char *path, int make_realpath)
 	strbuf_release(&realpath);
 }
 
+const char *enter_repo(const char *path, unsigned flags)
+{
+	static struct strbuf validated_path = STRBUF_INIT;
+	static struct strbuf used_path = STRBUF_INIT;
+
+	if (!path)
+		return NULL;
+
+	if (!(flags & ENTER_REPO_STRICT)) {
+		static const char *suffix[] = {
+			"/.git", "", ".git/.git", ".git", NULL,
+		};
+		const char *gitfile;
+		int len = strlen(path);
+		int i;
+		while ((1 < len) && (path[len-1] == '/'))
+			len--;
+
+		/*
+		 * We can handle arbitrary-sized buffers, but this remains as a
+		 * sanity check on untrusted input.
+		 */
+		if (PATH_MAX <= len)
+			return NULL;
+
+		strbuf_reset(&used_path);
+		strbuf_reset(&validated_path);
+		strbuf_add(&used_path, path, len);
+		strbuf_add(&validated_path, path, len);
+
+		if (used_path.buf[0] == '~') {
+			char *newpath = interpolate_path(used_path.buf, 0);
+			if (!newpath)
+				return NULL;
+			strbuf_attach(&used_path, newpath, strlen(newpath),
+				      strlen(newpath));
+		}
+		for (i = 0; suffix[i]; i++) {
+			struct stat st;
+			size_t baselen = used_path.len;
+			strbuf_addstr(&used_path, suffix[i]);
+			if (!stat(used_path.buf, &st) &&
+			    (S_ISREG(st.st_mode) ||
+			    (S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) {
+				strbuf_addstr(&validated_path, suffix[i]);
+				break;
+			}
+			strbuf_setlen(&used_path, baselen);
+		}
+		if (!suffix[i])
+			return NULL;
+		gitfile = read_gitfile(used_path.buf);
+		if (!(flags & ENTER_REPO_ANY_OWNER_OK))
+			die_upon_dubious_ownership(gitfile, NULL, used_path.buf);
+		if (gitfile) {
+			strbuf_reset(&used_path);
+			strbuf_addstr(&used_path, gitfile);
+		}
+		if (chdir(used_path.buf))
+			return NULL;
+		path = validated_path.buf;
+	}
+	else {
+		const char *gitfile = read_gitfile(path);
+		if (!(flags & ENTER_REPO_ANY_OWNER_OK))
+			die_upon_dubious_ownership(gitfile, NULL, path);
+		if (gitfile)
+			path = gitfile;
+		if (chdir(path))
+			return NULL;
+	}
+
+	if (is_git_directory(".")) {
+		set_git_dir(".", 0);
+		check_repository_format(NULL);
+		return path;
+	}
+
+	return NULL;
+}
+
 static int git_work_tree_initialized;
 
 /*
diff --git a/setup.h b/setup.h
index 8522fa8575da71..bfea199bcd8769 100644
--- a/setup.h
+++ b/setup.h
@@ -97,6 +97,44 @@ static inline int discover_git_directory(struct strbuf *commondir,
 void set_git_dir(const char *path, int make_realpath);
 void set_git_work_tree(const char *tree);
 
+/* Flags that can be passed to `enter_repo()`. */
+enum {
+	/*
+	 * Callers that require exact paths (as opposed to allowing known
+	 * suffixes like ".git", ".git/.git" to be omitted) can set this bit.
+	 */
+	ENTER_REPO_STRICT = (1<<0),
+
+	/*
+	 * Callers that are willing to run without ownership check can set this
+	 * bit.
+	 */
+	ENTER_REPO_ANY_OWNER_OK = (1<<1),
+};
+
+/*
+ * Discover and enter a repository.
+ *
+ * First, one directory to try is determined by the following algorithm.
+ *
+ * (0) If "strict" is given, the path is used as given and no DWIM is
+ *     done. Otherwise:
+ * (1) "~/path" to mean path under the running user's home directory;
+ * (2) "~user/path" to mean path under named user's home directory;
+ * (3) "relative/path" to mean cwd relative directory; or
+ * (4) "/absolute/path" to mean absolute directory.
+ *
+ * Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git"
+ * in this order. We select the first one that is a valid git repository, and
+ * chdir() to it. If none match, or we fail to chdir, we return NULL.
+ *
+ * If all goes well, we return the directory we used to chdir() (but
+ * before ~user is expanded), avoiding getcwd() resolving symbolic
+ * links.  User relative paths are also returned as they are given,
+ * except DWIM suffixing.
+ */
+const char *enter_repo(const char *path, unsigned flags);
+
 const char *setup_git_directory_gently(int *);
 const char *setup_git_directory(void);
 char *prefix_path(const char *prefix, int len, const char *path);

From 7c188a9e45405ff911b81a5dd9029f4e91fb338e Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:50:50 +0100
Subject: [PATCH 15/35] setup: convert `set_git_dir()` to have file scope

We don't have any external callers of `set_git_dir()` anymore now that
`enter_repo()` has been moved into "setup.c". Remove the declaration and
mark the function as static.

Note that this change requires us to move the implementation around so
that we can avoid adding any new forward declarations.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 setup.c | 80 ++++++++++++++++++++++++++++-----------------------------
 setup.h |  1 -
 2 files changed, 40 insertions(+), 41 deletions(-)

diff --git a/setup.c b/setup.c
index 98c6fd8ee4c02d..8bf52df71663a3 100644
--- a/setup.c
+++ b/setup.c
@@ -1002,6 +1002,46 @@ const char *read_gitfile_gently(const char *path, int *return_error_code)
 	return error_code ? NULL : path;
 }
 
+static void set_git_dir_1(const char *path)
+{
+	xsetenv(GIT_DIR_ENVIRONMENT, path, 1);
+	setup_git_env(path);
+}
+
+static void update_relative_gitdir(const char *name UNUSED,
+				   const char *old_cwd,
+				   const char *new_cwd,
+				   void *data UNUSED)
+{
+	char *path = reparent_relative_path(old_cwd, new_cwd,
+					    repo_get_git_dir(the_repository));
+	struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb();
+
+	trace_printf_key(&trace_setup_key,
+			 "setup: move $GIT_DIR to '%s'",
+			 path);
+	set_git_dir_1(path);
+	if (tmp_objdir)
+		tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd);
+	free(path);
+}
+
+static void set_git_dir(const char *path, int make_realpath)
+{
+	struct strbuf realpath = STRBUF_INIT;
+
+	if (make_realpath) {
+		strbuf_realpath(&realpath, path, 1);
+		path = realpath.buf;
+	}
+
+	set_git_dir_1(path);
+	if (!is_absolute_path(path))
+		chdir_notify_register(NULL, update_relative_gitdir, NULL);
+
+	strbuf_release(&realpath);
+}
+
 static const char *setup_explicit_git_dir(const char *gitdirenv,
 					  struct strbuf *cwd,
 					  struct repository_format *repo_fmt,
@@ -1663,46 +1703,6 @@ void setup_git_env(const char *git_dir)
 		fetch_if_missing = 0;
 }
 
-static void set_git_dir_1(const char *path)
-{
-	xsetenv(GIT_DIR_ENVIRONMENT, path, 1);
-	setup_git_env(path);
-}
-
-static void update_relative_gitdir(const char *name UNUSED,
-				   const char *old_cwd,
-				   const char *new_cwd,
-				   void *data UNUSED)
-{
-	char *path = reparent_relative_path(old_cwd, new_cwd,
-					    repo_get_git_dir(the_repository));
-	struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb();
-
-	trace_printf_key(&trace_setup_key,
-			 "setup: move $GIT_DIR to '%s'",
-			 path);
-	set_git_dir_1(path);
-	if (tmp_objdir)
-		tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd);
-	free(path);
-}
-
-void set_git_dir(const char *path, int make_realpath)
-{
-	struct strbuf realpath = STRBUF_INIT;
-
-	if (make_realpath) {
-		strbuf_realpath(&realpath, path, 1);
-		path = realpath.buf;
-	}
-
-	set_git_dir_1(path);
-	if (!is_absolute_path(path))
-		chdir_notify_register(NULL, update_relative_gitdir, NULL);
-
-	strbuf_release(&realpath);
-}
-
 const char *enter_repo(const char *path, unsigned flags)
 {
 	static struct strbuf validated_path = STRBUF_INIT;
diff --git a/setup.h b/setup.h
index bfea199bcd8769..d55dcc66086308 100644
--- a/setup.h
+++ b/setup.h
@@ -94,7 +94,6 @@ static inline int discover_git_directory(struct strbuf *commondir,
 	return 0;
 }
 
-void set_git_dir(const char *path, int make_realpath);
 void set_git_work_tree(const char *tree);
 
 /* Flags that can be passed to `enter_repo()`. */

From 9aaba579932781c74f67d6cecddaad59f0daaaef Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:50:51 +0100
Subject: [PATCH 16/35] odb: adopt logic to close object databases

The logic to close an object database is currently contained in the
packfile subsystem. That choice is somewhat relatable, as most of the
logic really is to close resources associated with the packfile store
itself. But we also end up handling object sources and commit graphs,
which certainly is not related to packfiles.

Move the function into the object database subsystem and rename it to
`odb_close()`.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/clone.c  |  2 +-
 builtin/gc.c     |  2 +-
 builtin/repack.c |  2 +-
 midx-write.c     |  2 +-
 odb.c            | 18 +++++++++++++++++-
 odb.h            |  7 +++++++
 packfile.c       | 15 ---------------
 packfile.h       |  1 -
 run-command.c    |  2 +-
 scalar.c         |  2 +-
 10 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index c990f398ef6f37..b19b302b065467 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -1617,7 +1617,7 @@ int cmd_clone(int argc,
 	transport_disconnect(transport);
 
 	if (option_dissociate) {
-		close_object_store(the_repository->objects);
+		odb_close(the_repository->objects);
 		dissociate_from_references();
 	}
 
diff --git a/builtin/gc.c b/builtin/gc.c
index d212cbb9b84781..961fa343c4b180 100644
--- a/builtin/gc.c
+++ b/builtin/gc.c
@@ -1048,7 +1048,7 @@ int cmd_gc(int argc,
 	report_garbage = report_pack_garbage;
 	odb_reprepare(the_repository->objects);
 	if (pack_garbage.nr > 0) {
-		close_object_store(the_repository->objects);
+		odb_close(the_repository->objects);
 		clean_pack_garbage();
 	}
 
diff --git a/builtin/repack.c b/builtin/repack.c
index cfdb4c0920b191..d9012141f699c9 100644
--- a/builtin/repack.c
+++ b/builtin/repack.c
@@ -488,7 +488,7 @@ int cmd_repack(int argc,
 
 	string_list_sort(&names);
 
-	close_object_store(repo->objects);
+	odb_close(repo->objects);
 
 	/*
 	 * Ok we have prepared all new packfiles.
diff --git a/midx-write.c b/midx-write.c
index c73010df6d3a4f..60497586fdf2f4 100644
--- a/midx-write.c
+++ b/midx-write.c
@@ -1459,7 +1459,7 @@ static int write_midx_internal(struct odb_source *source,
 	}
 
 	if (ctx.m || ctx.base_midx)
-		close_object_store(ctx.repo->objects);
+		odb_close(ctx.repo->objects);
 
 	if (commit_lock_file(&lk) < 0)
 		die_errno(_("could not write multi-pack-index"));
diff --git a/odb.c b/odb.c
index 3ec21ef24e16bb..bcefa5cede60b5 100644
--- a/odb.c
+++ b/odb.c
@@ -9,6 +9,7 @@
 #include "khash.h"
 #include "lockfile.h"
 #include "loose.h"
+#include "midx.h"
 #include "object-file-convert.h"
 #include "object-file.h"
 #include "odb.h"
@@ -1044,6 +1045,21 @@ struct object_database *odb_new(struct repository *repo)
 	return o;
 }
 
+void odb_close(struct object_database *o)
+{
+	struct odb_source *source;
+
+	packfile_store_close(o->packfiles);
+
+	for (source = o->sources; source; source = source->next) {
+		if (source->midx)
+			close_midx(source->midx);
+		source->midx = NULL;
+	}
+
+	close_commit_graph(o);
+}
+
 static void odb_free_sources(struct object_database *o)
 {
 	while (o->sources) {
@@ -1076,7 +1092,7 @@ void odb_clear(struct object_database *o)
 		free((char *) o->cached_objects[i].value.buf);
 	FREE_AND_NULL(o->cached_objects);
 
-	close_object_store(o);
+	odb_close(o);
 	packfile_store_free(o->packfiles);
 	o->packfiles = NULL;
 
diff --git a/odb.h b/odb.h
index 9bb28008b1d953..71b4897c82f3a8 100644
--- a/odb.h
+++ b/odb.h
@@ -169,6 +169,13 @@ struct object_database {
 struct object_database *odb_new(struct repository *repo);
 void odb_clear(struct object_database *o);
 
+/*
+ * Close the object database and all of its sources so that any held resources
+ * will be released. The database can still be used after closing it, in which
+ * case these resources may be reallocated.
+ */
+void odb_close(struct object_database *o);
+
 /*
  * Clear caches, reload alternates and then reload object sources so that new
  * objects may become accessible.
diff --git a/packfile.c b/packfile.c
index 40f733dd234900..af71eaf7e34461 100644
--- a/packfile.c
+++ b/packfile.c
@@ -359,21 +359,6 @@ void close_pack(struct packed_git *p)
 	oidset_clear(&p->bad_objects);
 }
 
-void close_object_store(struct object_database *o)
-{
-	struct odb_source *source;
-
-	packfile_store_close(o->packfiles);
-
-	for (source = o->sources; source; source = source->next) {
-		if (source->midx)
-			close_midx(source->midx);
-		source->midx = NULL;
-	}
-
-	close_commit_graph(o);
-}
-
 void unlink_pack_path(const char *pack_name, int force_delete)
 {
 	static const char *exts[] = {".idx", ".pack", ".rev", ".keep", ".bitmap", ".promisor", ".mtimes"};
diff --git a/packfile.h b/packfile.h
index 58fcc88e20224b..d9226a072ac96d 100644
--- a/packfile.h
+++ b/packfile.h
@@ -279,7 +279,6 @@ struct object_database;
 unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *);
 void close_pack_windows(struct packed_git *);
 void close_pack(struct packed_git *);
-void close_object_store(struct object_database *o);
 void unuse_pack(struct pack_window **);
 void clear_delta_base_cache(void);
 struct packed_git *add_packed_git(struct repository *r, const char *path,
diff --git a/run-command.c b/run-command.c
index ed9575bd6a8cbb..e3e02475ccec50 100644
--- a/run-command.c
+++ b/run-command.c
@@ -743,7 +743,7 @@ int start_command(struct child_process *cmd)
 	fflush(NULL);
 
 	if (cmd->close_object_store)
-		close_object_store(the_repository->objects);
+		odb_close(the_repository->objects);
 
 #ifndef GIT_WINDOWS_NATIVE
 {
diff --git a/scalar.c b/scalar.c
index f7543116272b77..2aeb191cc89b72 100644
--- a/scalar.c
+++ b/scalar.c
@@ -931,7 +931,7 @@ static int cmd_delete(int argc, const char **argv)
 	if (dir_inside_of(cwd, enlistment.buf) >= 0)
 		res = error(_("refusing to delete current working directory"));
 	else {
-		close_object_store(the_repository->objects);
+		odb_close(the_repository->objects);
 		res = delete_enlistment(&enlistment);
 	}
 	strbuf_release(&enlistment);

From f8bdf3127ab7df8a8f3039f41889b35eefe029a3 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:50:52 +0100
Subject: [PATCH 17/35] odb: refactor `odb_clear()` to `odb_free()`

The function `odb_clear()` releases all resources allocated to an object
database and ensures that all fields become zero'd out. Despite its
naming though it doesn't really clear the object database so that it
becomes ready for reuse afterwards again -- the caller would first have
to reinitialize it, and that contradicts the terminology of "clearing"
as we have defined it in our coding guidelines.

There isn't really only a reason to have "clearing" semantics, either.
There's only a single caller of `odb_clear()`, and that caller also ends
up freeing the object database structure itself.

Refactor the function to have "freeing" semantics instead, so that the
structure itself is also freed, which allows us to drop some useless
boilerplate to zero out the structure's members.

This refactoring reveals that we're trying to close the commit graph
multiple times: once directly via `free_commit_graph()`, and once via
`odb_close()`. Drop the former call.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 odb.c        | 19 ++++++++-----------
 odb.h        |  4 +++-
 repository.c |  4 ++--
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/odb.c b/odb.c
index bcefa5cede60b5..29cf6496c5e50a 100644
--- a/odb.c
+++ b/odb.c
@@ -1073,30 +1073,27 @@ static void odb_free_sources(struct object_database *o)
 	o->source_by_path = NULL;
 }
 
-void odb_clear(struct object_database *o)
+void odb_free(struct object_database *o)
 {
-	FREE_AND_NULL(o->alternate_db);
+	if (!o)
+		return;
+
+	free(o->alternate_db);
 
 	oidmap_clear(&o->replace_map, 1);
 	pthread_mutex_destroy(&o->replace_mutex);
 
-	free_commit_graph(o->commit_graph);
-	o->commit_graph = NULL;
-	o->commit_graph_attempted = 0;
-
 	odb_free_sources(o);
-	o->sources_tail = NULL;
-	o->loaded_alternates = 0;
 
 	for (size_t i = 0; i < o->cached_object_nr; i++)
 		free((char *) o->cached_objects[i].value.buf);
-	FREE_AND_NULL(o->cached_objects);
+	free(o->cached_objects);
 
 	odb_close(o);
 	packfile_store_free(o->packfiles);
-	o->packfiles = NULL;
-
 	string_list_clear(&o->submodule_source_paths, 0);
+
+	free(o);
 }
 
 void odb_reprepare(struct object_database *o)
diff --git a/odb.h b/odb.h
index 71b4897c82f3a8..77b313b784cad3 100644
--- a/odb.h
+++ b/odb.h
@@ -167,7 +167,9 @@ struct object_database {
 };
 
 struct object_database *odb_new(struct repository *repo);
-void odb_clear(struct object_database *o);
+
+/* Free the object database and release all resources. */
+void odb_free(struct object_database *o);
 
 /*
  * Close the object database and all of its sources so that any held resources
diff --git a/repository.c b/repository.c
index 6aaa7ba00869bf..3c8b3813b00af0 100644
--- a/repository.c
+++ b/repository.c
@@ -382,8 +382,8 @@ void repo_clear(struct repository *repo)
 	FREE_AND_NULL(repo->worktree);
 	FREE_AND_NULL(repo->submodule_prefix);
 
-	odb_clear(repo->objects);
-	FREE_AND_NULL(repo->objects);
+	odb_free(repo->objects);
+	repo->objects = NULL;
 
 	parsed_object_pool_clear(repo->parsed_objects);
 	FREE_AND_NULL(repo->parsed_objects);

From fbf3d0669f830b4492070aa33f57dbf2c43fa4c8 Mon Sep 17 00:00:00 2001
From: Kristoffer Haugsbakk <code@khaugsbakk.name>
Date: Thu, 20 Nov 2025 17:26:49 +0100
Subject: [PATCH 18/35] doc: warn against --committer-date-is-author-date
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This option could create a commit history which violates the assumption
that commits have non-decreasing commit timestamps. Warn against that in
both git-am(1) and git-rebase(1).

The genesis of this option is from git-am(1) and was added in
3f01ad66 (am: Add --committer-date-is-author-date option,
2009-01-22). The commit message doesn’t give us an example
of a use case, but the thread starter does:[1]

    I've a big set of patches in a mbox file: there's sufficient info
    inside for git-am to work.

    Yet, each time I do import these, my sha1sums are changing because of
    different commit dates.

    I'd like to force the commit date to match the info/date from the time
    I received the email (and therefore always get back the right
    sha1sums).

[1]: https://lore.kernel.org/git/46d6db660901221441q60eb90bdge601a7a250c3a247@mail.gmail.com/

So the motivation was to treat git-am(1) as an import command that
creates the same commit IDs.

Putting aside the question of whether you should be using git-am(1) for
importing commits, this approach is problematic:

• you still need to apply the commits to the same base if you want the
  same hashes; and
• you need the same committer.

And if you expect the same committer, why is this person applying the
same patches multiple times with the goal of making *identical* commits?

That was all for git-am(1).

It was added to git-rebase(1) in 570ccad3 (rebase: add options passed to
git-am, 2009-03-18)[2] in order to plug options that could not be sent
on to git-am(1). At this point the utility of the option graduated to
making no sense; a use case for `git rebase --committer-date-is-author-
date` is still yet to be found.

Just warn against using this option on both commands and remind the user
to consider whether they really need it.

† 2: See also 7573cec5 (rebase -i: support
     --committer-date-is-author-date, 2020-08-17) for the commit for the
     merge backend

Suggested-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Kristoffer Haugsbakk <code@khaugsbakk.name>
Acked-by: Johannes Sixt <j6t@kdbg.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-am.adoc     | 7 +++++++
 Documentation/git-rebase.adoc | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/Documentation/git-am.adoc b/Documentation/git-am.adoc
index 221070de481227..264d21a7de7801 100644
--- a/Documentation/git-am.adoc
+++ b/Documentation/git-am.adoc
@@ -161,6 +161,13 @@ Valid <action> for the `--whitespace` option are:
 	commit creation as the committer date. This allows the
 	user to lie about the committer date by using the same
 	value as the author date.
++
+WARNING: The history walking machinery assumes that commits have
+non-decreasing commit timestamps. You should consider if you really need
+to use this option. Then you should only use this option to override the
+committer date when applying commits on top of a base which commit is
+older (in terms of the commit date) than the oldest patch you are
+applying.
 
 --ignore-date::
 	By default the command records the date from the e-mail
diff --git a/Documentation/git-rebase.adoc b/Documentation/git-rebase.adoc
index 956d3048f5a618..0f808c82b2877b 100644
--- a/Documentation/git-rebase.adoc
+++ b/Documentation/git-rebase.adoc
@@ -507,6 +507,13 @@ See also INCOMPATIBLE OPTIONS below.
 	Instead of using the current time as the committer date, use
 	the author date of the commit being rebased as the committer
 	date. This option implies `--force-rebase`.
++
+WARNING: The history walking machinery assumes that commits have
+non-decreasing commit timestamps. You should consider if you really need
+to use this option. Then you should only use this option to override the
+committer date when rebasing commits on top of a base which commit is
+older (in terms of the commit date) than the oldest commit you are
+applying (in terms of the author date).
 
 --ignore-date::
 --reset-author-date::

From 770afe443784b3ec2c72d68aa509e48064942348 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Thu, 20 Nov 2025 11:32:45 -0800
Subject: [PATCH 19/35] config: mark otherwise unused function as file-scope
 static

git_configset_get_pathname() is only used once inside config.c; we do
not have to expose it as a public function.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 config.c | 2 +-
 config.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/config.c b/config.c
index 73fc74c8fa1a35..6552e5b0b80202 100644
--- a/config.c
+++ b/config.c
@@ -1954,7 +1954,7 @@ int git_configset_get_maybe_bool(struct config_set *set, const char *key, int *d
 		return 1;
 }
 
-int git_configset_get_pathname(struct config_set *set, const char *key, char **dest)
+static int git_configset_get_pathname(struct config_set *set, const char *key, char **dest)
 {
 	const char *value;
 	if (!git_configset_get_value(set, key, &value, NULL))
diff --git a/config.h b/config.h
index 19c87fc0bc1a2a..ba426a960af9f4 100644
--- a/config.h
+++ b/config.h
@@ -564,7 +564,6 @@ int git_configset_get_ulong(struct config_set *cs, const char *key, unsigned lon
 int git_configset_get_bool(struct config_set *cs, const char *key, int *dest);
 int git_configset_get_bool_or_int(struct config_set *cs, const char *key, int *is_bool, int *dest);
 int git_configset_get_maybe_bool(struct config_set *cs, const char *key, int *dest);
-int git_configset_get_pathname(struct config_set *cs, const char *key, char **dest);
 
 /**
  * Run only the discover part of the repo_config_get_*() functions

From df963f0df4756fa751bfbb39e104d004e3f7d60b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <l.s.r@web.de>
Date: Mon, 24 Nov 2025 21:33:24 +0100
Subject: [PATCH 20/35] config: fix suggestion for failed set of multi-valued
 option
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The command "git config set <name> <value>" fails for an option that has
multiple values.  List the "git config set" flags that can be used,
instead of old-style "git config" actions.

Reported-by: Paul Wintz <pwintz@ucsc.edu>
Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/config.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/config.c b/builtin/config.c
index 59fb113b073926..ef1c1a9cf2c0c8 100644
--- a/builtin/config.c
+++ b/builtin/config.c
@@ -974,7 +974,7 @@ static int cmd_config_set(int argc, const char **argv, const char *prefix,
 						     argv[0], comment, value);
 		if (ret == CONFIG_NOTHING_SET)
 			error(_("cannot overwrite multiple values with a single value\n"
-			"       Use a regexp, --add or --replace-all to change %s."), argv[0]);
+			"       Use --value=<pattern>, --append or --all to change %s."), argv[0]);
 	}
 
 	location_options_release(&location_opts);

From 18bf67b7537f8ff0cd772847aa03f9cc319b1346 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= <l.s.r@web.de>
Date: Mon, 24 Nov 2025 22:00:05 +0100
Subject: [PATCH 21/35] config: fix short help of unset flags
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The flags --all and --value of "git config unset" don't make the command
"replace" or "show" anything, they are about selecting what to unset.
Change their help text accordingly.

Signed-off-by: René Scharfe <l.s.r@web.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/config.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/config.c b/builtin/config.c
index f70d6354772259..f1aa4e21ffd51f 100644
--- a/builtin/config.c
+++ b/builtin/config.c
@@ -992,8 +992,8 @@ static int cmd_config_unset(int argc, const char **argv, const char *prefix,
 	struct option opts[] = {
 		CONFIG_LOCATION_OPTIONS(location_opts),
 		OPT_GROUP(N_("Filter")),
-		OPT_BIT(0, "all", &flags, N_("replace multi-valued config option with new value"), CONFIG_FLAGS_MULTI_REPLACE),
-		OPT_STRING(0, "value", &value_pattern, N_("pattern"), N_("show config with values matching the pattern")),
+		OPT_BIT(0, "all", &flags, N_("unset all multi-valued config options"), CONFIG_FLAGS_MULTI_REPLACE),
+		OPT_STRING(0, "value", &value_pattern, N_("pattern"), N_("unset multi-valued config options with matching values")),
 		OPT_BIT(0, "fixed-value", &flags, N_("use string equality when comparing values to value pattern"), CONFIG_FLAGS_FIXED_VALUE),
 		OPT_END(),
 	};

From ce1a5a22a5beefac8a52da518855b5aecc562874 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Thu, 20 Nov 2025 11:34:50 -0800
Subject: [PATCH 22/35] config: really pretend missing :(optional) value is not
 there

Earlier we added support for a value spelled as ":(optional)path"
for configuration variables whose values are of type "path", with
the documented semantics "if the path is missing, behave as if such
a variable definition is not even there."

This has worked OK for code paths that reads configuration files and
stores the configured value as a string, where NULL in such a string
is treated as if the setting is not there, left as the default.

However, there are other code paths that do not _ignore_ such NULL
values and misbehave.  "git config get --path" is one of them.

When git_config_pathname() helper function finds that the value of
the variable is an optional path *and* the path is missing, it
leaves the destination pointer intact (which usually is left to
NULL) and returns 0 to signal a success.  format_config() helper
however assumed that the destination pointer always gets a string,
which no longer is the case, and segfaulted.

Make sure that git_config_pathname() clears the destination pointer
in such a case, and teach format_config() to react to the condition
by returning 1 (which is different from 0 that is a normal success
and negative that is an error) to its callers.  Adjust the callers
to react to this new return value that tells them to pretend as if
they did not even see this partcular <key, value> pair.

Reported-by: Han Jiang <jhcarl0814@gmail.com>
Helped-by: Jeff King <peff@peff.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/config.c           | 45 ++++++++++++++++++++++++++++++--------
 config.c                   |  1 +
 t/meson.build              |  1 +
 t/t1311-config-optional.sh | 38 ++++++++++++++++++++++++++++++++
 4 files changed, 76 insertions(+), 9 deletions(-)
 create mode 100755 t/t1311-config-optional.sh

diff --git a/builtin/config.c b/builtin/config.c
index 59fb113b073926..2b36eb7d1cd204 100644
--- a/builtin/config.c
+++ b/builtin/config.c
@@ -261,6 +261,12 @@ struct strbuf_list {
 	int alloc;
 };
 
+/*
+ * Format the configuration key-value pair (`key_`, `value_`) and
+ * append it into strbuf `buf`.  Returns a negative value on failure,
+ * 0 on success, 1 on a missing optional value (i.e., telling the
+ * caller to pretend that <key_,value_> did not exist).
+ */
 static int format_config(const struct config_display_options *opts,
 			 struct strbuf *buf, const char *key_,
 			 const char *value_, const struct key_value_info *kvi)
@@ -299,7 +305,10 @@ static int format_config(const struct config_display_options *opts,
 			char *v;
 			if (git_config_pathname(&v, key_, value_) < 0)
 				return -1;
-			strbuf_addstr(buf, v);
+			if (v)
+				strbuf_addstr(buf, v);
+			else
+				return 1; /* :(optional)no-such-file */
 			free((char *)v);
 		} else if (opts->type == TYPE_EXPIRY_DATE) {
 			timestamp_t t;
@@ -344,6 +353,7 @@ static int collect_config(const char *key_, const char *value_,
 	struct collect_config_data *data = cb;
 	struct strbuf_list *values = data->values;
 	const struct key_value_info *kvi = ctx->kvi;
+	int status;
 
 	if (!(data->get_value_flags & GET_VALUE_KEY_REGEXP) &&
 	    strcmp(key_, data->key))
@@ -361,8 +371,15 @@ static int collect_config(const char *key_, const char *value_,
 	ALLOC_GROW(values->items, values->nr + 1, values->alloc);
 	strbuf_init(&values->items[values->nr], 0);
 
-	return format_config(data->display_opts, &values->items[values->nr++],
-			     key_, value_, kvi);
+	status = format_config(data->display_opts, &values->items[values->nr++],
+			       key_, value_, kvi);
+	if (status < 0)
+		return status;
+	if (status) {
+		strbuf_release(&values->items[--values->nr]);
+		status = 0;
+	}
+	return status;
 }
 
 static int get_value(const struct config_location_options *opts,
@@ -438,15 +455,23 @@ static int get_value(const struct config_location_options *opts,
 	if (!values.nr && display_opts->default_value) {
 		struct key_value_info kvi = KVI_INIT;
 		struct strbuf *item;
+		int status;
 
 		kvi_from_param(&kvi);
 		ALLOC_GROW(values.items, values.nr + 1, values.alloc);
 		item = &values.items[values.nr++];
 		strbuf_init(item, 0);
-		if (format_config(display_opts, item, key_,
-				  display_opts->default_value, &kvi) < 0)
+
+		status = format_config(display_opts, item, key_,
+				       display_opts->default_value, &kvi);
+		if (status < 0)
 			die(_("failed to format default config value: %s"),
 			    display_opts->default_value);
+		if (status) {
+			/* default was a missing optional value */
+			values.nr--;
+			strbuf_release(item);
+		}
 	}
 
 	ret = !values.nr;
@@ -706,11 +731,13 @@ static int get_urlmatch(const struct config_location_options *opts,
 	for_each_string_list_item(item, &values) {
 		struct urlmatch_current_candidate_value *matched = item->util;
 		struct strbuf buf = STRBUF_INIT;
+		int status;
 
-		format_config(&display_opts, &buf, item->string,
-			      matched->value_is_null ? NULL : matched->value.buf,
-			      &matched->kvi);
-		fwrite(buf.buf, 1, buf.len, stdout);
+		status = format_config(&display_opts, &buf, item->string,
+				       matched->value_is_null ? NULL : matched->value.buf,
+				       &matched->kvi);
+		if (!status)
+			fwrite(buf.buf, 1, buf.len, stdout);
 		strbuf_release(&buf);
 
 		strbuf_release(&matched->value);
diff --git a/config.c b/config.c
index 6552e5b0b80202..c6ef290011b752 100644
--- a/config.c
+++ b/config.c
@@ -1292,6 +1292,7 @@ int git_config_pathname(char **dest, const char *var, const char *value)
 
 	if (is_optional && is_missing_file(path)) {
 		free(path);
+		*dest = NULL;
 		return 0;
 	}
 
diff --git a/t/meson.build b/t/meson.build
index bbeba1a8d50e1b..137c0caea0e1b5 100644
--- a/t/meson.build
+++ b/t/meson.build
@@ -182,6 +182,7 @@ integration_tests = [
   't1308-config-set.sh',
   't1309-early-config.sh',
   't1310-config-default.sh',
+  't1311-config-optional.sh',
   't1350-config-hooks-path.sh',
   't1400-update-ref.sh',
   't1401-symbolic-ref.sh',
diff --git a/t/t1311-config-optional.sh b/t/t1311-config-optional.sh
new file mode 100755
index 00000000000000..fbbacfc67b368d
--- /dev/null
+++ b/t/t1311-config-optional.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+#
+# Copyright (c) 2025 Google LLC
+#
+
+test_description=':(optional) paths'
+
+. ./test-lib.sh
+
+test_expect_success 'var=:(optional)path-exists' '
+	test_config a.path ":(optional)path-exists" &&
+	>path-exists &&
+	echo path-exists >expect &&
+
+	git config get --path a.path >actual &&
+	test_cmp expect actual
+'
+
+test_expect_success 'missing optional value is ignored' '
+	test_config a.path ":(optional)no-such-path" &&
+	# Using --show-scope ensures we skip writing not only the value
+	# but also any meta-information about the ignored key.
+	test_must_fail git config get --show-scope --path a.path >actual &&
+	test_line_count = 0 actual
+'
+
+test_expect_success 'missing optional value is ignored in multi-value config' '
+	test_when_finished "git config unset --all a.path" &&
+	git config set --append a.path ":(optional)path-exists" &&
+	git config set --append a.path ":(optional)no-such-path" &&
+	>path-exists &&
+	echo path-exists >expect &&
+
+	git config --get --path a.path >actual &&
+	test_cmp expect actual
+'
+
+test_done

From 0bd16856ffb3968de73699ad0555d1fae6c45406 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Thu, 20 Nov 2025 11:45:35 -0800
Subject: [PATCH 23/35] config: really treat missing optional path as not
 configured

These callers expect that git_config_pathname() that returns 0 is a
signal that the variable they passed has a string they need to act
on.  But with the introduction of ":(optional)path" earlier, that is
no longer the case.  If the path specified by the configuration
variable is missing, their variable will get a NULL in it, and they
need to act on it (often, just refraining from copying it elsewhere).

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/blame.c        |  3 ++-
 builtin/receive-pack.c |  5 +++--
 fetch-pack.c           |  5 +++--
 fsck.c                 | 12 +++++++-----
 gpg-interface.c        | 10 +++++++++-
 setup.c                |  2 +-
 6 files changed, 25 insertions(+), 12 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index 5b10e84b664228..10928341442e0f 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -739,7 +739,8 @@ static int git_blame_config(const char *var, const char *value,
 		ret = git_config_pathname(&str, var, value);
 		if (ret)
 			return ret;
-		string_list_insert(&ignore_revs_file_list, str);
+		if (str)
+			string_list_insert(&ignore_revs_file_list, str);
 		free(str);
 		return 0;
 	}
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 1113137a6f0b3f..471857335429f8 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -177,8 +177,9 @@ static int receive_pack_config(const char *var, const char *value,
 
 		if (git_config_pathname(&path, var, value))
 			return -1;
-		strbuf_addf(&fsck_msg_types, "%cskiplist=%s",
-			fsck_msg_types.len ? ',' : '=', path);
+		if (path)
+			strbuf_addf(&fsck_msg_types, "%cskiplist=%s",
+				    fsck_msg_types.len ? ',' : '=', path);
 		free(path);
 		return 0;
 	}
diff --git a/fetch-pack.c b/fetch-pack.c
index 46c39f85c4ca9e..33a3f20bfc4d27 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1872,8 +1872,9 @@ int fetch_pack_fsck_config(const char *var, const char *value,
 
 		if (git_config_pathname(&path, var, value))
 			return -1;
-		strbuf_addf(msg_types, "%cskiplist=%s",
-			msg_types->len ? ',' : '=', path);
+		if (path)
+			strbuf_addf(msg_types, "%cskiplist=%s",
+				    msg_types->len ? ',' : '=', path);
 		free(path);
 		return 0;
 	}
diff --git a/fsck.c b/fsck.c
index 171b424dd57de1..0c287699d28267 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1351,14 +1351,16 @@ int git_fsck_config(const char *var, const char *value,
 
 	if (strcmp(var, "fsck.skiplist") == 0) {
 		char *path;
-		struct strbuf sb = STRBUF_INIT;
 
 		if (git_config_pathname(&path, var, value))
 			return -1;
-		strbuf_addf(&sb, "skiplist=%s", path);
-		free(path);
-		fsck_set_msg_types(options, sb.buf);
-		strbuf_release(&sb);
+		if (path) {
+			struct strbuf sb = STRBUF_INIT;
+			strbuf_addf(&sb, "skiplist=%s", path);
+			free(path);
+			fsck_set_msg_types(options, sb.buf);
+			strbuf_release(&sb);
+		}
 		return 0;
 	}
 
diff --git a/gpg-interface.c b/gpg-interface.c
index 06e7fb50603d22..8b91a11a430a35 100644
--- a/gpg-interface.c
+++ b/gpg-interface.c
@@ -794,8 +794,16 @@ static int git_gpg_config(const char *var, const char *value,
 		fmtname = "ssh";
 
 	if (fmtname) {
+		char *program;
+		int status;
+
 		fmt = get_format_by_name(fmtname);
-		return git_config_pathname((char **) &fmt->program, var, value);
+		status = git_config_pathname(&program, var, value);
+		if (status)
+			return status;
+		if (program)
+			fmt->program = program;
+		return status;
 	}
 
 	return 0;
diff --git a/setup.c b/setup.c
index 98ddbf377f923b..18301b5907dc20 100644
--- a/setup.c
+++ b/setup.c
@@ -1248,7 +1248,7 @@ static int safe_directory_cb(const char *key, const char *value,
 	} else {
 		char *allowed = NULL;
 
-		if (!git_config_pathname(&allowed, key, value)) {
+		if (!git_config_pathname(&allowed, key, value) && allowed) {
 			char *normalized = NULL;
 
 			/*

From b67b2d9fb7ccfaa72446d76abc8c36849d2e0685 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:50:53 +0100
Subject: [PATCH 24/35] odb: move logic to disable ref updates into repo

Our object database sources have a field `disable_ref_updates`. This
field can obviously be set to disable reference updates, but it is
somewhat curious that this logic is hosted by the object database.

The reason for this is that it was primarily added to keep us from
accidentally updating references while an ODB transaction is ongoing.
Any objects part of the transaction have not yet been committed to disk,
so new references that point to them might get corrupted in case we
never end up committing the transaction. As such, whenever we create a
new transaction we set up a new temporary ODB source and mark it as
disabling reference updates.

This has one (and only one?) upside: once we have committed the
transaction, the temporary source will be dropped and thus we clean up
the disabled reference updates automatically. But other than that, it's
somewhat misdesigned:

  - We can have multiple ODB sources, but only the currently active
    source inhibits reference updates.

  - We're mixing concerns of the refdb with the ODB.

Arguably, the decision of whether we can update references or not should
be handled by the refdb. But that wouldn't be a great fit either, as
there can be one refdb per worktree. So we'd again have the same problem
that a "global" intent becomes localized to a specific instance.

Instead, move the setting into the repository. While at it, convert it
into a boolean.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 odb.c        | 3 ++-
 odb.h        | 7 -------
 refs.c       | 2 +-
 repository.c | 2 +-
 repository.h | 9 ++++++++-
 setup.c      | 2 +-
 6 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/odb.c b/odb.c
index 29cf6496c5e50a..ccc6e999e7ae25 100644
--- a/odb.c
+++ b/odb.c
@@ -360,7 +360,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb,
 	 * Disable ref updates while a temporary odb is active, since
 	 * the objects in the database may roll back.
 	 */
-	source->disable_ref_updates = 1;
+	odb->repo->disable_ref_updates = true;
 	source->will_destroy = will_destroy;
 	source->next = odb->sources;
 	odb->sources = source;
@@ -387,6 +387,7 @@ void odb_restore_primary_source(struct object_database *odb,
 	if (cur_source->next != restore_source)
 		BUG("we expect the old primary object store to be the first alternate");
 
+	odb->repo->disable_ref_updates = false;
 	odb->sources = restore_source;
 	odb_source_free(cur_source);
 }
diff --git a/odb.h b/odb.h
index 77b313b784cad3..99c4d489729459 100644
--- a/odb.h
+++ b/odb.h
@@ -66,13 +66,6 @@ struct odb_source {
 	 */
 	bool local;
 
-	/*
-	 * This is a temporary object store created by the tmp_objdir
-	 * facility. Disable ref updates since the objects in the store
-	 * might be discarded on rollback.
-	 */
-	int disable_ref_updates;
-
 	/*
 	 * This object store is ephemeral, so there is no need to fsync.
 	 */
diff --git a/refs.c b/refs.c
index 965381367e0e53..6c7283d9eb2aa8 100644
--- a/refs.c
+++ b/refs.c
@@ -2491,7 +2491,7 @@ int ref_transaction_prepare(struct ref_transaction *transaction,
 		break;
 	}
 
-	if (refs->repo->objects->sources->disable_ref_updates) {
+	if (refs->repo->disable_ref_updates) {
 		strbuf_addstr(err,
 			      _("ref updates forbidden inside quarantine environment"));
 		return -1;
diff --git a/repository.c b/repository.c
index 3c8b3813b00af0..455c2d279fb8ab 100644
--- a/repository.c
+++ b/repository.c
@@ -179,7 +179,7 @@ void repo_set_gitdir(struct repository *repo,
 		repo->objects->sources->path = objects_path;
 	}
 
-	repo->objects->sources->disable_ref_updates = o->disable_ref_updates;
+	repo->disable_ref_updates = o->disable_ref_updates;
 
 	free(repo->objects->alternate_db);
 	repo->objects->alternate_db = xstrdup_or_null(o->alternate_db);
diff --git a/repository.h b/repository.h
index 5808a5d610846a..614649413b68bc 100644
--- a/repository.h
+++ b/repository.h
@@ -71,6 +71,13 @@ struct repository {
 	 */
 	struct ref_store *refs_private;
 
+	/*
+	 * Disable ref updates. This is especially used in contexts where
+	 * transactions may still be rolled back so that we don't start to
+	 * reference objects that may vanish.
+	 */
+	bool disable_ref_updates;
+
 	/*
 	 * A strmap of ref_stores, stored by submodule name, accessible via
 	 * `repo_get_submodule_ref_store()`.
@@ -187,7 +194,7 @@ struct set_gitdir_args {
 	const char *graft_file;
 	const char *index_file;
 	const char *alternate_db;
-	int disable_ref_updates;
+	bool disable_ref_updates;
 };
 
 void repo_set_gitdir(struct repository *repo, const char *root,
diff --git a/setup.c b/setup.c
index 8bf52df71663a3..a752e9fc8476a0 100644
--- a/setup.c
+++ b/setup.c
@@ -1682,7 +1682,7 @@ void setup_git_env(const char *git_dir)
 	args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT);
 	args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT);
 	if (getenv(GIT_QUARANTINE_ENVIRONMENT)) {
-		args.disable_ref_updates = 1;
+		args.disable_ref_updates = true;
 	}
 
 	repo_set_gitdir(the_repository, git_dir, &args);

From 5d795b34dcbf46039c3dda028bb8df8d75a5a9d0 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:50:54 +0100
Subject: [PATCH 25/35] oidset: introduce `oidset_equal()`

Introduce a new function that allows the caller to verify whether two
oidsets contain the exact same object IDs.

Note that this change requires us to change `oidset_iter_init()` to
accept a `const struct oidset`.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 oidset.c | 16 ++++++++++++++++
 oidset.h |  9 +++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/oidset.c b/oidset.c
index 8d36aef8dca4fc..c8ff0b385c58ac 100644
--- a/oidset.c
+++ b/oidset.c
@@ -16,6 +16,22 @@ int oidset_contains(const struct oidset *set, const struct object_id *oid)
 	return pos != kh_end(&set->set);
 }
 
+bool oidset_equal(const struct oidset *a, const struct oidset *b)
+{
+	struct oidset_iter iter;
+	struct object_id *a_oid;
+
+	if (oidset_size(a) != oidset_size(b))
+		return false;
+
+	oidset_iter_init(a, &iter);
+	while ((a_oid = oidset_iter_next(&iter)))
+		if (!oidset_contains(b, a_oid))
+			return false;
+
+	return true;
+}
+
 int oidset_insert(struct oidset *set, const struct object_id *oid)
 {
 	int added;
diff --git a/oidset.h b/oidset.h
index 0106b6f2787f0e..e0f1a6ff4ff203 100644
--- a/oidset.h
+++ b/oidset.h
@@ -38,6 +38,11 @@ void oidset_init(struct oidset *set, size_t initial_size);
  */
 int oidset_contains(const struct oidset *set, const struct object_id *oid);
 
+/**
+ * Returns true iff `a` and `b` contain the exact same OIDs.
+ */
+bool oidset_equal(const struct oidset *a, const struct oidset *b);
+
 /**
  * Insert the oid into the set; a copy is made, so "oid" does not need
  * to persist after this function is called.
@@ -94,11 +99,11 @@ void oidset_parse_file_carefully(struct oidset *set, const char *path,
 				 oidset_parse_tweak_fn fn, void *cbdata);
 
 struct oidset_iter {
-	kh_oid_set_t *set;
+	const kh_oid_set_t *set;
 	khiter_t iter;
 };
 
-static inline void oidset_iter_init(struct oidset *set,
+static inline void oidset_iter_init(const struct oidset *set,
 				    struct oidset_iter *iter)
 {
 	iter->set = &set->set;

From 8dc22e87f000a092b62b4fb08e2542433f1ae192 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:50:55 +0100
Subject: [PATCH 26/35] builtin/index-pack: fix deferred fsck outside repos

When asked to perform object consistency checks via the `--fsck-objects`
flag we verify that each object part of the pack is valid. In general,
this check can even be performed outside of a Git repository: we don't
need an initialized object database as we simply read the object from
the packfile directly.

But there's one exception: a subset of the object checks may be deferred
to a later point in time. For now, this only concerns ".gitmodules" and
".gitattributes" files: whenever we see a tree referencing these files
we queue them for a deferred check. This is done because we need to do
some extra checks for those files to ensure that they are well-formed,
and these checks need to be done regardless of whether the corresponding
blobs are part of the packfile or not.

This works inside a repository, but unfortunately the logic leads to a
segfault when running outside of one. This is because we eventually call
`odb_read_object()`, which will crash because the object database has
not been initialized.

There's multiple options here:

  - We could in theory create a purely in-memory database with only a
    packfile store that contains the single packfile. We don't really
    have the infrastructure for this yet though, and it would end up
    being quite hacky.

  - We could refuse to perform consistency checks outside of a
    repository. But most of the checks work alright, so this would be a
    regression.

  - We can skip the finalizing consistency checks when running outside
    of a repository. This is not as invasive as skipping all checks,
    but it's not great to randomly skip a subset of tests, either.

None of these options really feel perfect. The first one would be the
obvious choice if easily possible.

There's another option though: instead of skipping the final object
checks, we can die if there are any queued object checks. With this
change we now die exactly if and only if we would have previously
segfaulted. Like this we ensure that objects that _may_ fail the
consistency checks won't be silently skipped, and at the same time we
give users a much better error message.

Refactor the code accordingly and add a test that would have triggered
the segfault. Note that we also move down the logic to add the packfile
to the store. There is no point doing this any earlier than right before
we execute `fsck_finish()`, and it ensures that the logic to set up and
perform the consistency check is self-contained.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/index-pack.c  | 21 ++++++++++++++++++---
 fsck.c                |  6 ++++++
 fsck.h                |  7 +++++++
 t/t5302-pack-index.sh | 16 ++++++++++++++++
 4 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index 2b78ba7fe4d14a..699fe678cd60b0 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -1640,7 +1640,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name,
 	rename_tmp_packfile(&final_index_name, curr_index_name, &index_name,
 			    hash, "idx", 1);
 
-	if (do_fsck_object)
+	if (do_fsck_object && startup_info->have_repository)
 		packfile_store_load_pack(the_repository->objects->packfiles,
 					 final_index_name, 0);
 
@@ -2110,8 +2110,23 @@ int cmd_index_pack(int argc,
 	else
 		close(input_fd);
 
-	if (do_fsck_object && fsck_finish(&fsck_options))
-		die(_("fsck error in pack objects"));
+	if (do_fsck_object) {
+		/*
+		 * We cannot perform queued consistency checks when running
+		 * outside of a repository because those require us to read
+		 * from the object database, which is uninitialized.
+		 *
+		 * TODO: we may eventually set up an in-memory object database,
+		 * which would allow us to perform these queued checks.
+		 */
+		if (!startup_info->have_repository &&
+		    fsck_has_queued_checks(&fsck_options))
+			die(_("cannot perform queued object checks outside "
+			      "of a repository"));
+
+		if (fsck_finish(&fsck_options))
+			die(_("fsck error in pack objects"));
+	}
 
 	free(opts.anomaly);
 	free(objects);
diff --git a/fsck.c b/fsck.c
index 341e100d24ece0..8e1565fe6d0133 100644
--- a/fsck.c
+++ b/fsck.c
@@ -1350,6 +1350,12 @@ int fsck_finish(struct fsck_options *options)
 	return ret;
 }
 
+bool fsck_has_queued_checks(struct fsck_options *options)
+{
+	return !oidset_equal(&options->gitmodules_found, &options->gitmodules_done) ||
+	       !oidset_equal(&options->gitattributes_found, &options->gitattributes_done);
+}
+
 void fsck_options_clear(struct fsck_options *options)
 {
 	free(options->msg_type);
diff --git a/fsck.h b/fsck.h
index cb6ef32f4f3aaa..336917c0451aac 100644
--- a/fsck.h
+++ b/fsck.h
@@ -248,6 +248,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer,
  */
 int fsck_finish(struct fsck_options *options);
 
+/*
+ * Check whether there are any checks that have been queued up and that still
+ * need to be run. Returns `false` iff `fsck_finish()` wouldn't perform any
+ * actions, `true` otherwise.
+ */
+bool fsck_has_queued_checks(struct fsck_options *options);
+
 /*
  * Clear the fsck_options struct, freeing any allocated memory.
  */
diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh
index 413c99274c8f30..9697448cb27634 100755
--- a/t/t5302-pack-index.sh
+++ b/t/t5302-pack-index.sh
@@ -293,4 +293,20 @@ test_expect_success 'too-large packs report the breach' '
 	grep "maximum allowed size (20 bytes)" err
 '
 
+# git-index-pack(1) uses the default hash algorithm outside of the repository,
+# and it has no way to tell it otherwise. So we can only run this test with the
+# default hash algorithm, as it would otherwise fail to parse the tree.
+test_expect_success DEFAULT_HASH_ALGORITHM 'index-pack --fsck-objects outside of a repo' '
+	test_when_finished "rm -rf repo" &&
+	git init repo &&
+	(
+		cd repo &&
+		printf "100644 blob $(test_oid 001)\t.gitattributes\n" >tree &&
+		git mktree --missing <tree >tree-oid &&
+		git pack-objects <tree-oid pack &&
+		test_must_fail nongit git index-pack --fsck-objects "$(pwd)"/pack-*.pack 2>err &&
+		test_grep "cannot perform queued object checks outside of a repository" err
+	)
+'
+
 test_done

From eea83c010cf431325a05f06cc7c64029538c8495 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:50:56 +0100
Subject: [PATCH 27/35] t/helper: stop setting up `the_repository` repeatedly

The "repository" test helper sets up `the_repository` twice. In fact
though, we don't even have to set it up even once: all we need is to set
up its hash algorithm, because we still depend on some subsystems that
aren't free of `the_repository`.

Refactor the code accordingly. This prepares for a subsequent change,
where setting up the repository repeatedly will lead to a `BUG()`.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 t/helper/test-repository.c | 16 ++--------------
 1 file changed, 2 insertions(+), 14 deletions(-)

diff --git a/t/helper/test-repository.c b/t/helper/test-repository.c
index 63c37de33d22f1..9ba94cdffa4c47 100644
--- a/t/helper/test-repository.c
+++ b/t/helper/test-repository.c
@@ -17,10 +17,6 @@ static void test_parse_commit_in_graph(const char *gitdir, const char *worktree,
 	struct commit *c;
 	struct commit_list *parent;
 
-	setup_git_env(gitdir);
-
-	repo_clear(the_repository);
-
 	if (repo_init(&r, gitdir, worktree))
 		die("Couldn't init repo");
 
@@ -47,10 +43,6 @@ static void test_get_commit_tree_in_graph(const char *gitdir,
 	struct commit *c;
 	struct tree *tree;
 
-	setup_git_env(gitdir);
-
-	repo_clear(the_repository);
-
 	if (repo_init(&r, gitdir, worktree))
 		die("Couldn't init repo");
 
@@ -75,24 +67,20 @@ static void test_get_commit_tree_in_graph(const char *gitdir,
 
 int cmd__repository(int argc, const char **argv)
 {
-	int nongit_ok = 0;
-
-	setup_git_directory_gently(&nongit_ok);
-
 	if (argc < 2)
 		die("must have at least 2 arguments");
 	if (!strcmp(argv[1], "parse_commit_in_graph")) {
 		struct object_id oid;
 		if (argc < 5)
 			die("not enough arguments");
-		if (parse_oid_hex(argv[4], &oid, &argv[4]))
+		if (parse_oid_hex_any(argv[4], &oid, &argv[4]) == GIT_HASH_UNKNOWN)
 			die("cannot parse oid '%s'", argv[4]);
 		test_parse_commit_in_graph(argv[2], argv[3], &oid);
 	} else if (!strcmp(argv[1], "get_commit_tree_in_graph")) {
 		struct object_id oid;
 		if (argc < 5)
 			die("not enough arguments");
-		if (parse_oid_hex(argv[4], &oid, &argv[4]))
+		if (parse_oid_hex_any(argv[4], &oid, &argv[4]) == GIT_HASH_UNKNOWN)
 			die("cannot parse oid '%s'", argv[4]);
 		test_get_commit_tree_in_graph(argv[2], argv[3], &oid);
 	} else {

From c257bd59165e2f55dfa2c97b0ca1e39131513654 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:50:57 +0100
Subject: [PATCH 28/35] http-push: stop setting up `the_repository` for each
 reference

When pushing references via HTTP we call `repo_init_revisions()` in a
loop for each reference that we're about to push. As third argument we
pass the result of `setup_git_directory()`, which causes us to
reinitialize the repository every single time.

This is an obvious waste of compute, as the repository that we're
working in will never change across any of the initializations. The only
reason that we do this is to retrieve the directory of the repository.
Furthermore, this is about to create issues in a subsequent commit,
where reinitializing the repository will cause a `BUG()`.

Address this by storing the Git directory in a variable instead so that
we don't have to call the function repeatedly.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 http-push.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/http-push.c b/http-push.c
index a1c01e3b9b93a3..a48ca237996567 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1725,6 +1725,7 @@ int cmd_main(int argc, const char **argv)
 	int i;
 	int new_refs;
 	struct ref *ref, *local_refs = NULL;
+	const char *gitdir;
 
 	CALLOC_ARRAY(repo, 1);
 
@@ -1787,7 +1788,7 @@ int cmd_main(int argc, const char **argv)
 	if (delete_branch && rs.nr != 1)
 		die("You must specify only one branch name when deleting a remote branch");
 
-	setup_git_directory();
+	gitdir = setup_git_directory();
 
 	memset(remote_dir_exists, -1, 256);
 
@@ -1941,7 +1942,7 @@ int cmd_main(int argc, const char **argv)
 		if (!push_all && !is_null_oid(&ref->old_oid))
 			strvec_pushf(&commit_argv, "^%s",
 				     oid_to_hex(&ref->old_oid));
-		repo_init_revisions(the_repository, &revs, setup_git_directory());
+		repo_init_revisions(the_repository, &revs, gitdir);
 		setup_revisions_from_strvec(&commit_argv, &revs, NULL);
 		revs.edge_hint = 0; /* just in case */
 

From 35d9fc65edc0a5df9f714d02afaa2c942fb28570 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:50:58 +0100
Subject: [PATCH 29/35] odb: handle initialization of sources in `odb_new()`

The logic to set up a new object database is currently distributed
across two functions in "repository.c":

  - In `initialize_repository()` we initialize an empty object database.
    This object database is not fully initialized and doesn't have any
    sources attached to it.

  - The primary object database source is then created in
    `repo_set_gitdir()`.

Ideally though, the logic should be entirely self-contained so that we
can iterate more readily on how exactly the sources themselves get set
up.

Refactor `odb_new()` to handle both allocation and setup of the object
database. This ensures that the object database is always initialized
and ready for use, and it allows us to change how the sources get set up
eventually.

Note that `repo_set_gitdir()` still reaches into the sources when the
function gets called with an already-initialized object database. This
will be fixed in the next commit.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 odb.c        | 14 +++++++++++++-
 odb.h        | 15 ++++++++++++++-
 repository.c | 20 ++++++++------------
 3 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/odb.c b/odb.c
index ccc6e999e7ae25..88b40c81c0060c 100644
--- a/odb.c
+++ b/odb.c
@@ -1034,15 +1034,27 @@ int odb_write_object_stream(struct object_database *odb,
 	return odb_source_loose_write_stream(odb->sources, stream, len, oid);
 }
 
-struct object_database *odb_new(struct repository *repo)
+struct object_database *odb_new(struct repository *repo,
+				const char *primary_source,
+				const char *secondary_sources)
 {
 	struct object_database *o = xmalloc(sizeof(*o));
+	char *to_free = NULL;
 
 	memset(o, 0, sizeof(*o));
 	o->repo = repo;
 	o->packfiles = packfile_store_new(o);
 	pthread_mutex_init(&o->replace_mutex, NULL);
 	string_list_init_dup(&o->submodule_source_paths);
+
+	if (!primary_source)
+		primary_source = to_free = xstrfmt("%s/objects", repo->commondir);
+	o->sources = odb_source_new(o, primary_source, true);
+	o->sources_tail = &o->sources->next;
+	o->alternate_db = xstrdup_or_null(secondary_sources);
+
+	free(to_free);
+
 	return o;
 }
 
diff --git a/odb.h b/odb.h
index 99c4d489729459..41b3c03027f4d8 100644
--- a/odb.h
+++ b/odb.h
@@ -159,7 +159,20 @@ struct object_database {
 	struct string_list submodule_source_paths;
 };
 
-struct object_database *odb_new(struct repository *repo);
+/*
+ * Create a new object database for the given repository.
+ *
+ * If the primary source parameter is set it will override the usual primary
+ * object directory derived from the repository's common directory. The
+ * alternate sources are expected to be a PATH_SEP-separated list of secondary
+ * sources. Note that these alternate sources will be added in addition to, not
+ * instead of, the alternates identified by the primary source.
+ *
+ * Returns the newly created object database.
+ */
+struct object_database *odb_new(struct repository *repo,
+				const char *primary_source,
+				const char *alternate_sources);
 
 /* Free the object database and release all resources. */
 void odb_free(struct object_database *o);
diff --git a/repository.c b/repository.c
index 455c2d279fb8ab..5975c8f341c8cb 100644
--- a/repository.c
+++ b/repository.c
@@ -52,7 +52,6 @@ static void set_default_hash_algo(struct repository *repo)
 
 void initialize_repository(struct repository *repo)
 {
-	repo->objects = odb_new(repo);
 	repo->remote_state = remote_state_new();
 	repo->parsed_objects = parsed_object_pool_new(repo);
 	ALLOC_ARRAY(repo->index, 1);
@@ -160,29 +159,26 @@ void repo_set_gitdir(struct repository *repo,
 	 * until after xstrdup(root). Then we can free it.
 	 */
 	char *old_gitdir = repo->gitdir;
-	char *objects_path = NULL;
 
 	repo->gitdir = xstrdup(gitfile ? gitfile : root);
 	free(old_gitdir);
 
 	repo_set_commondir(repo, o->commondir);
-	expand_base_dir(&objects_path, o->object_dir,
-			repo->commondir, "objects");
-
-	if (!repo->objects->sources) {
-		repo->objects->sources = odb_source_new(repo->objects,
-							objects_path, true);
-		repo->objects->sources_tail = &repo->objects->sources->next;
-		free(objects_path);
+
+	if (!repo->objects) {
+		repo->objects = odb_new(repo, o->object_dir, o->alternate_db);
 	} else {
+		char *objects_path = NULL;
+		expand_base_dir(&objects_path, o->object_dir,
+				repo->commondir, "objects");
 		free(repo->objects->sources->path);
 		repo->objects->sources->path = objects_path;
+		free(repo->objects->alternate_db);
+		repo->objects->alternate_db = xstrdup_or_null(o->alternate_db);
 	}
 
 	repo->disable_ref_updates = o->disable_ref_updates;
 
-	free(repo->objects->alternate_db);
-	repo->objects->alternate_db = xstrdup_or_null(o->alternate_db);
 	expand_base_dir(&repo->graft_file, o->graft_file,
 			repo->commondir, "info/grafts");
 	expand_base_dir(&repo->index_file, o->index_file,

From 2574c617362a0c67d15fa01e01cdbd0f6bcdbc93 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:50:59 +0100
Subject: [PATCH 30/35] chdir-notify: add function to unregister listeners

While we (obviously) have a way to register new listeners that get
called whenever we chdir(3p), we don't have an equivalent that can be
used to unregister such a listener again.

Add one, as it will be required in a subsequent commit.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 chdir-notify.c | 18 ++++++++++++++++++
 chdir-notify.h |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/chdir-notify.c b/chdir-notify.c
index 0d7bc0460747b2..f8bfe3cbef9aba 100644
--- a/chdir-notify.c
+++ b/chdir-notify.c
@@ -25,6 +25,24 @@ void chdir_notify_register(const char *name,
 	list_add_tail(&e->list, &chdir_notify_entries);
 }
 
+void chdir_notify_unregister(const char *name, chdir_notify_callback cb,
+			     void *data)
+{
+	struct list_head *pos, *p;
+
+	list_for_each_safe(pos, p, &chdir_notify_entries) {
+		struct chdir_notify_entry *e =
+			list_entry(pos, struct chdir_notify_entry, list);
+
+		if (e->cb != cb || e->data != data || !e->name != !name ||
+		    (e->name && strcmp(e->name, name)))
+			continue;
+
+		list_del(pos);
+		free(e);
+	}
+}
+
 static void reparent_cb(const char *name,
 			const char *old_cwd,
 			const char *new_cwd,
diff --git a/chdir-notify.h b/chdir-notify.h
index 366e4c1ee9908c..81eb69d846e45d 100644
--- a/chdir-notify.h
+++ b/chdir-notify.h
@@ -41,6 +41,8 @@ typedef void (*chdir_notify_callback)(const char *name,
 				      const char *new_cwd,
 				      void *data);
 void chdir_notify_register(const char *name, chdir_notify_callback cb, void *data);
+void chdir_notify_unregister(const char *name, chdir_notify_callback cb,
+			     void *data);
 void chdir_notify_reparent(const char *name, char **path);
 
 /*

From 2816b748e5c300afda559a09426f8342c235c29d Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:51:00 +0100
Subject: [PATCH 31/35] odb: handle changing a repository's commondir

The function `repo_set_gitdir()` is called in two situations:

  - To initialize the repository with its discovered location. As part
    of this we also set up the new object database.

  - To update the repository's discovered location in case the process
    changes its working directory so that we update relative paths. This
    means we also have to update any relative paths that are potentially
    used in the object database.

In the context of the object database we ideally wouldn't ever have to
worry about the second case: if all paths used by our object database
sources were absolute, then we wouldn't have to update them. But
unfortunately, the paths aren't only used to locate files owned by the
given source, but we also use them for reporting purposes. One such
example is `repo_get_object_directory()`, where we cannot just change
semantics to always return absolute paths, as that is likely to break
tooling out there.

One solution to this would be to have both a "display path" and an
"internal path". This would allow us to use internal paths for all
internal matters, but continue to use the potentially-relative display
paths so that we don't break compatibility. But converting the codebase
to honor this split is quite a messy endeavour, and it wouldn't even
help us with the goal to get rid of the need to update the display path
on chdir(3p).

Another solution would be to rework "setup.c" so that we never have to
update paths in the first place. In that case, we'd only initialize the
repository once we have figured out final locations for all directories.
This would be a significant simplification of that subsystem indeed, but
the current logic is so messy that it would take significant investments
to get there.

Meanwhile though, while object sources may still use relative paths, the
best thing we can do is to handle the reparenting of the object source
paths in the object database itself. This can be done by registering one
callback for each object database so that we get notified whenever the
current working directory changes, and we then perform the reparenting
ourselves.

Ideally, this wouldn't even happen on the object database level, but
instead handled by each object database source. But we don't yet have
proper pluggable object database sources, so this will need to be
handled at a later point in time.

The logic itself is rather simple:

  - We register the callback when creating the object database.

  - We unregister the callback when releasing it again.

  - We split up `set_git_dir_1()` so that it becomes possible to skip
    recreating the object database. This is required because the
    function is called both when the current working directory changes,
    but also when we set up the repository. Calling this function
    without skipping creation of the ODB will result in a bug in case
    it's already created.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 odb.c        | 37 +++++++++++++++++++++--
 odb.h        |  4 ---
 repository.c | 13 ++------
 repository.h |  1 +
 setup.c      | 84 ++++++++++++++++++++++++++++------------------------
 5 files changed, 83 insertions(+), 56 deletions(-)

diff --git a/odb.c b/odb.c
index 88b40c81c0060c..70665fb7f48f0f 100644
--- a/odb.c
+++ b/odb.c
@@ -1,5 +1,6 @@
 #include "git-compat-util.h"
 #include "abspath.h"
+#include "chdir-notify.h"
 #include "commit-graph.h"
 #include "config.h"
 #include "dir.h"
@@ -142,9 +143,9 @@ static void read_info_alternates(struct object_database *odb,
 				 const char *relative_base,
 				 int depth);
 
-struct odb_source *odb_source_new(struct object_database *odb,
-				  const char *path,
-				  bool local)
+static struct odb_source *odb_source_new(struct object_database *odb,
+					 const char *path,
+					 bool local)
 {
 	struct odb_source *source;
 
@@ -1034,6 +1035,32 @@ int odb_write_object_stream(struct object_database *odb,
 	return odb_source_loose_write_stream(odb->sources, stream, len, oid);
 }
 
+static void odb_update_commondir(const char *name UNUSED,
+				 const char *old_cwd,
+				 const char *new_cwd,
+				 void *cb_data)
+{
+	struct object_database *odb = cb_data;
+	struct odb_source *source;
+
+	/*
+	 * In theory, we only have to do this for the primary object source, as
+	 * alternates' paths are always resolved to an absolute path.
+	 */
+	for (source = odb->sources; source; source = source->next) {
+		char *path;
+
+		if (is_absolute_path(source->path))
+			continue;
+
+		path = reparent_relative_path(old_cwd, new_cwd,
+					      source->path);
+
+		free(source->path);
+		source->path = path;
+	}
+}
+
 struct object_database *odb_new(struct repository *repo,
 				const char *primary_source,
 				const char *secondary_sources)
@@ -1055,6 +1082,8 @@ struct object_database *odb_new(struct repository *repo,
 
 	free(to_free);
 
+	chdir_notify_register(NULL, odb_update_commondir, o);
+
 	return o;
 }
 
@@ -1106,6 +1135,8 @@ void odb_free(struct object_database *o)
 	packfile_store_free(o->packfiles);
 	string_list_clear(&o->submodule_source_paths, 0);
 
+	chdir_notify_unregister(NULL, odb_update_commondir, o);
+
 	free(o);
 }
 
diff --git a/odb.h b/odb.h
index 41b3c03027f4d8..014cd9585a2f6e 100644
--- a/odb.h
+++ b/odb.h
@@ -78,10 +78,6 @@ struct odb_source {
 	char *path;
 };
 
-struct odb_source *odb_source_new(struct object_database *odb,
-				  const char *path,
-				  bool local);
-
 struct packed_git;
 struct packfile_store;
 struct cached_object_entry;
diff --git a/repository.c b/repository.c
index 5975c8f341c8cb..863f24411b7bf9 100644
--- a/repository.c
+++ b/repository.c
@@ -165,17 +165,10 @@ void repo_set_gitdir(struct repository *repo,
 
 	repo_set_commondir(repo, o->commondir);
 
-	if (!repo->objects) {
+	if (!repo->objects)
 		repo->objects = odb_new(repo, o->object_dir, o->alternate_db);
-	} else {
-		char *objects_path = NULL;
-		expand_base_dir(&objects_path, o->object_dir,
-				repo->commondir, "objects");
-		free(repo->objects->sources->path);
-		repo->objects->sources->path = objects_path;
-		free(repo->objects->alternate_db);
-		repo->objects->alternate_db = xstrdup_or_null(o->alternate_db);
-	}
+	else if (!o->skip_initializing_odb)
+		BUG("cannot reinitialize an already-initialized object directory");
 
 	repo->disable_ref_updates = o->disable_ref_updates;
 
diff --git a/repository.h b/repository.h
index 614649413b68bc..6063c4b846d031 100644
--- a/repository.h
+++ b/repository.h
@@ -195,6 +195,7 @@ struct set_gitdir_args {
 	const char *index_file;
 	const char *alternate_db;
 	bool disable_ref_updates;
+	bool skip_initializing_odb;
 };
 
 void repo_set_gitdir(struct repository *repo, const char *root,
diff --git a/setup.c b/setup.c
index a752e9fc8476a0..a625f9fbc8b1b7 100644
--- a/setup.c
+++ b/setup.c
@@ -1002,10 +1002,51 @@ const char *read_gitfile_gently(const char *path, int *return_error_code)
 	return error_code ? NULL : path;
 }
 
-static void set_git_dir_1(const char *path)
+static void setup_git_env_internal(const char *git_dir,
+				   bool skip_initializing_odb)
+{
+	char *git_replace_ref_base;
+	const char *shallow_file;
+	const char *replace_ref_base;
+	struct set_gitdir_args args = { NULL };
+	struct strvec to_free = STRVEC_INIT;
+
+	args.commondir = getenv_safe(&to_free, GIT_COMMON_DIR_ENVIRONMENT);
+	args.object_dir = getenv_safe(&to_free, DB_ENVIRONMENT);
+	args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT);
+	args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT);
+	args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT);
+	if (getenv(GIT_QUARANTINE_ENVIRONMENT))
+		args.disable_ref_updates = true;
+	args.skip_initializing_odb = skip_initializing_odb;
+
+	repo_set_gitdir(the_repository, git_dir, &args);
+	strvec_clear(&to_free);
+
+	if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT))
+		disable_replace_refs();
+	replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT);
+	git_replace_ref_base = xstrdup(replace_ref_base ? replace_ref_base
+							  : "refs/replace/");
+	update_ref_namespace(NAMESPACE_REPLACE, git_replace_ref_base);
+
+	shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT);
+	if (shallow_file)
+		set_alternate_shallow_file(the_repository, shallow_file, 0);
+
+	if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0))
+		fetch_if_missing = 0;
+}
+
+void setup_git_env(const char *git_dir)
+{
+	setup_git_env_internal(git_dir, false);
+}
+
+static void set_git_dir_1(const char *path, bool skip_initializing_odb)
 {
 	xsetenv(GIT_DIR_ENVIRONMENT, path, 1);
-	setup_git_env(path);
+	setup_git_env_internal(path, skip_initializing_odb);
 }
 
 static void update_relative_gitdir(const char *name UNUSED,
@@ -1020,7 +1061,7 @@ static void update_relative_gitdir(const char *name UNUSED,
 	trace_printf_key(&trace_setup_key,
 			 "setup: move $GIT_DIR to '%s'",
 			 path);
-	set_git_dir_1(path);
+	set_git_dir_1(path, true);
 	if (tmp_objdir)
 		tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd);
 	free(path);
@@ -1035,7 +1076,7 @@ static void set_git_dir(const char *path, int make_realpath)
 		path = realpath.buf;
 	}
 
-	set_git_dir_1(path);
+	set_git_dir_1(path, false);
 	if (!is_absolute_path(path))
 		chdir_notify_register(NULL, update_relative_gitdir, NULL);
 
@@ -1668,41 +1709,6 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir,
 	return result;
 }
 
-void setup_git_env(const char *git_dir)
-{
-	char *git_replace_ref_base;
-	const char *shallow_file;
-	const char *replace_ref_base;
-	struct set_gitdir_args args = { NULL };
-	struct strvec to_free = STRVEC_INIT;
-
-	args.commondir = getenv_safe(&to_free, GIT_COMMON_DIR_ENVIRONMENT);
-	args.object_dir = getenv_safe(&to_free, DB_ENVIRONMENT);
-	args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT);
-	args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT);
-	args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT);
-	if (getenv(GIT_QUARANTINE_ENVIRONMENT)) {
-		args.disable_ref_updates = true;
-	}
-
-	repo_set_gitdir(the_repository, git_dir, &args);
-	strvec_clear(&to_free);
-
-	if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT))
-		disable_replace_refs();
-	replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT);
-	git_replace_ref_base = xstrdup(replace_ref_base ? replace_ref_base
-							  : "refs/replace/");
-	update_ref_namespace(NAMESPACE_REPLACE, git_replace_ref_base);
-
-	shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT);
-	if (shallow_file)
-		set_alternate_shallow_file(the_repository, shallow_file, 0);
-
-	if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0))
-		fetch_if_missing = 0;
-}
-
 const char *enter_repo(const char *path, unsigned flags)
 {
 	static struct strbuf validated_path = STRBUF_INIT;

From ac65c70663b092e823b0d3de1c1cfdee0a4fbc8e Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Wed, 19 Nov 2025 08:51:01 +0100
Subject: [PATCH 32/35] odb: handle recreation of quarantine directories

In the preceding commit we have moved the logic that reparents object
database sources on chdir(3p) from "setup.c" into "odb.c". Let's also do
the same for any temporary quarantine directories so that the complete
reparenting logic is self-contained in "odb.c".

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 odb.c   | 7 +++++++
 setup.c | 5 -----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/odb.c b/odb.c
index 70665fb7f48f0f..dc8f292f3d9645 100644
--- a/odb.c
+++ b/odb.c
@@ -24,6 +24,7 @@
 #include "strbuf.h"
 #include "strvec.h"
 #include "submodule.h"
+#include "tmp-objdir.h"
 #include "trace2.h"
 #include "write-or-die.h"
 
@@ -1041,8 +1042,11 @@ static void odb_update_commondir(const char *name UNUSED,
 				 void *cb_data)
 {
 	struct object_database *odb = cb_data;
+	struct tmp_objdir *tmp_objdir;
 	struct odb_source *source;
 
+	tmp_objdir = tmp_objdir_unapply_primary_odb();
+
 	/*
 	 * In theory, we only have to do this for the primary object source, as
 	 * alternates' paths are always resolved to an absolute path.
@@ -1059,6 +1063,9 @@ static void odb_update_commondir(const char *name UNUSED,
 		free(source->path);
 		source->path = path;
 	}
+
+	if (tmp_objdir)
+		tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd);
 }
 
 struct object_database *odb_new(struct repository *repo,
diff --git a/setup.c b/setup.c
index a625f9fbc8b1b7..ae66188af3f6f4 100644
--- a/setup.c
+++ b/setup.c
@@ -22,7 +22,6 @@
 #include "chdir-notify.h"
 #include "path.h"
 #include "quote.h"
-#include "tmp-objdir.h"
 #include "trace.h"
 #include "trace2.h"
 #include "worktree.h"
@@ -1056,14 +1055,10 @@ static void update_relative_gitdir(const char *name UNUSED,
 {
 	char *path = reparent_relative_path(old_cwd, new_cwd,
 					    repo_get_git_dir(the_repository));
-	struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb();
-
 	trace_printf_key(&trace_setup_key,
 			 "setup: move $GIT_DIR to '%s'",
 			 path);
 	set_git_dir_1(path, true);
-	if (tmp_objdir)
-		tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd);
 	free(path);
 }
 

From c20f112e5149d1bd0d4741c4b28a65f81318309a Mon Sep 17 00:00:00 2001
From: Christian Couder <christian.couder@gmail.com>
Date: Mon, 17 Nov 2025 05:34:50 +0100
Subject: [PATCH 33/35] fast-import: add 'strip-if-invalid' mode to
 --signed-commits=<mode>

Tools like `git filter-repo`[1] use `git fast-export` and
`git fast-import` to rewrite repository history. When rewriting
history using one such tool though, commit signatures might become
invalid because the commits they sign changed due to the changes
in the repository history made by the tool between the fast-export
and the fast-import steps.

Note that as far as signature handling goes:

  * Since fast-export doesn't know what changes filter-repo may make
to the stream, it can't know whether the signatures will still be
valid.

  * Since filter-repo doesn't know what history canonicalizations
fast-export performed (and it performs a few), it can't know whether
the signatures will still be valid.

  * Therefore, fast-import is the only process in the pipeline that
can know whether a specified signature remains valid.

Having invalid signatures in a rewritten repository could be
confusing, so users rewritting history might prefer to simply
discard signatures that are invalid at the fast-import step.

For example a common use case is to rewrite only "recent" history.
While specifying commit ranges corresponding to "recent" commits
could work, users worry about getting it wrong and want to just
automatically rewrite everything, expecting older commit signatures
to be untouched.

To let them do that, let's add a new 'strip-if-invalid' mode to the
`--signed-commits=<mode>` option of `git fast-import`.

It would be interesting for the `--signed-tags=<mode>` option to
have this mode too, but we leave that for a future improvement.

It might also be possible for `git fast-export` to have such a mode
in its `--signed-commits=<mode>` and `--signed-tags=<mode>`
options, but the use cases for it are much less clear, so we also
leave that for possible future improvements.

For now let's just die() if 'strip-if-invalid' is passed to these
options where it hasn't been implemented yet.

[1]: https://github.com/newren/git-filter-repo

Helped-by: Elijah Newren <newren@gmail.com>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-fast-import.adoc | 29 +++++++++----
 builtin/fast-export.c              | 38 ++++++++++++----
 builtin/fast-import.c              | 59 ++++++++++++++++++++++---
 gpg-interface.c                    |  2 +
 gpg-interface.h                    |  1 +
 t/t9305-fast-import-signatures.sh  | 69 +++++++++++++++++++++++++++++-
 6 files changed, 174 insertions(+), 24 deletions(-)

diff --git a/Documentation/git-fast-import.adoc b/Documentation/git-fast-import.adoc
index b74179a6c891d5..479c4081da8f27 100644
--- a/Documentation/git-fast-import.adoc
+++ b/Documentation/git-fast-import.adoc
@@ -66,15 +66,26 @@ fast-import stream! This option is enabled automatically for
 remote-helpers that use the `import` capability, as they are
 already trusted to run their own code.
 
---signed-tags=(verbatim|warn-verbatim|warn-strip|strip|abort)::
-	Specify how to handle signed tags.  Behaves in the same way
-	as the same option in linkgit:git-fast-export[1], except that
-	default is 'verbatim' (instead of 'abort').
-
---signed-commits=(verbatim|warn-verbatim|warn-strip|strip|abort)::
-	Specify how to handle signed commits.  Behaves in the same way
-	as the same option in linkgit:git-fast-export[1], except that
-	default is 'verbatim' (instead of 'abort').
+`--signed-tags=(verbatim|warn-verbatim|warn-strip|strip|abort)`::
+	Specify how to handle signed tags. Behaves in the same way as
+	the `--signed-commits=<mode>` below, except that the
+	`strip-if-invalid` mode is not yet supported. Like for signed
+	commits, the default mode is `verbatim`.
+
+`--signed-commits=<mode>`::
+	Specify how to handle signed commits. The following <mode>s
+	are supported:
++
+* `verbatim`, which is the default, will silently import commit
+  signatures.
+* `warn-verbatim` will import them, but will display a warning.
+* `abort` will make this program die when encountering a signed
+  commit.
+* `strip` will silently make the commits unsigned.
+* `warn-strip` will make them unsigned, but will display a warning.
+* `strip-if-invalid` will check signatures and, if they are invalid,
+  will strip them and display a warning. The validation is performed
+  in the same way as linkgit:git-verify-commit[1] does it.
 
 Options for Frontends
 ~~~~~~~~~~~~~~~~~~~~~
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 7adbc55f0dccb1..a839a8f9acefde 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -797,10 +797,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 	       (int)(committer_end - committer), committer);
 	if (signatures.nr) {
 		switch (signed_commit_mode) {
-		case SIGN_ABORT:
-			die("encountered signed commit %s; use "
-			    "--signed-commits=<mode> to handle it",
-			    oid_to_hex(&commit->object.oid));
+
+		/* Exporting modes */
 		case SIGN_WARN_VERBATIM:
 			warning("exporting %"PRIuMAX" signature(s) for commit %s",
 				(uintmax_t)signatures.nr, oid_to_hex(&commit->object.oid));
@@ -811,12 +809,25 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 				print_signature(item->string, item->util);
 			}
 			break;
+
+		/* Stripping modes */
 		case SIGN_WARN_STRIP:
 			warning("stripping signature(s) from commit %s",
 				oid_to_hex(&commit->object.oid));
 			/* fallthru */
 		case SIGN_STRIP:
 			break;
+
+		/* Aborting modes */
+		case SIGN_ABORT:
+			die(_("encountered signed commit %s; use "
+			      "--signed-commits=<mode> to handle it"),
+			    oid_to_hex(&commit->object.oid));
+		case SIGN_STRIP_IF_INVALID:
+			die(_("'strip-if-invalid' is not a valid mode for "
+			      "git fast-export with --signed-commits=<mode>"));
+		default:
+			BUG("invalid signed_commit_mode value %d", signed_commit_mode);
 		}
 		string_list_clear(&signatures, 0);
 	}
@@ -934,16 +945,16 @@ static void handle_tag(const char *name, struct tag *tag)
 		size_t sig_offset = parse_signed_buffer(message, message_size);
 		if (sig_offset < message_size)
 			switch (signed_tag_mode) {
-			case SIGN_ABORT:
-				die("encountered signed tag %s; use "
-				    "--signed-tags=<mode> to handle it",
-				    oid_to_hex(&tag->object.oid));
+
+			/* Exporting modes */
 			case SIGN_WARN_VERBATIM:
 				warning("exporting signed tag %s",
 					oid_to_hex(&tag->object.oid));
 				/* fallthru */
 			case SIGN_VERBATIM:
 				break;
+
+			/* Stripping modes */
 			case SIGN_WARN_STRIP:
 				warning("stripping signature from tag %s",
 					oid_to_hex(&tag->object.oid));
@@ -951,6 +962,17 @@ static void handle_tag(const char *name, struct tag *tag)
 			case SIGN_STRIP:
 				message_size = sig_offset;
 				break;
+
+			/* Aborting modes */
+			case SIGN_ABORT:
+				die(_("encountered signed tag %s; use "
+				      "--signed-tags=<mode> to handle it"),
+				    oid_to_hex(&tag->object.oid));
+			case SIGN_STRIP_IF_INVALID:
+				die(_("'strip-if-invalid' is not a valid mode for "
+				      "git fast-export with --signed-tags=<mode>"));
+			default:
+				BUG("invalid signed_commit_mode value %d", signed_commit_mode);
 			}
 	}
 
diff --git a/builtin/fast-import.c b/builtin/fast-import.c
index 493de57ef67bfb..e2c6894461044b 100644
--- a/builtin/fast-import.c
+++ b/builtin/fast-import.c
@@ -2772,7 +2772,7 @@ static void add_gpgsig_to_commit(struct strbuf *commit_data,
 {
 	struct string_list siglines = STRING_LIST_INIT_NODUP;
 
-	if (!sig->hash_algo)
+	if (!sig || !sig->hash_algo)
 		return;
 
 	strbuf_addstr(commit_data, header);
@@ -2827,6 +2827,45 @@ static void finalize_commit_buffer(struct strbuf *new_data,
 	strbuf_addbuf(new_data, msg);
 }
 
+static void handle_strip_if_invalid(struct strbuf *new_data,
+				    struct signature_data *sig_sha1,
+				    struct signature_data *sig_sha256,
+				    struct strbuf *msg)
+{
+	struct strbuf tmp_buf = STRBUF_INIT;
+	struct signature_check signature_check = { 0 };
+	int ret;
+
+	/* Check signature in a temporary commit buffer */
+	strbuf_addbuf(&tmp_buf, new_data);
+	finalize_commit_buffer(&tmp_buf, sig_sha1, sig_sha256, msg);
+	ret = verify_commit_buffer(tmp_buf.buf, tmp_buf.len, &signature_check);
+
+	if (ret) {
+		const char *signer = signature_check.signer ?
+			signature_check.signer : _("unknown");
+		const char *subject;
+		int subject_len = find_commit_subject(msg->buf, &subject);
+
+		if (subject_len > 100)
+			warning(_("stripping invalid signature for commit '%.100s...'\n"
+				  "  allegedly by %s"), subject, signer);
+		else if (subject_len > 0)
+			warning(_("stripping invalid signature for commit '%.*s'\n"
+				  "  allegedly by %s"), subject_len, subject, signer);
+		else
+			warning(_("stripping invalid signature for commit\n"
+				  "  allegedly by %s"), signer);
+
+		finalize_commit_buffer(new_data, NULL, NULL, msg);
+	} else {
+		strbuf_swap(new_data, &tmp_buf);
+	}
+
+	signature_check_clear(&signature_check);
+	strbuf_release(&tmp_buf);
+}
+
 static void parse_new_commit(const char *arg)
 {
 	static struct strbuf msg = STRBUF_INIT;
@@ -2878,6 +2917,7 @@ static void parse_new_commit(const char *arg)
 			warning(_("importing a commit signature verbatim"));
 			/* fallthru */
 		case SIGN_VERBATIM:
+		case SIGN_STRIP_IF_INVALID:
 			import_one_signature(&sig_sha1, &sig_sha256, v);
 			break;
 
@@ -2962,7 +3002,11 @@ static void parse_new_commit(const char *arg)
 			"encoding %s\n",
 			encoding);
 
-	finalize_commit_buffer(&new_data, &sig_sha1, &sig_sha256, &msg);
+	if (signed_commit_mode == SIGN_STRIP_IF_INVALID &&
+	    (sig_sha1.hash_algo || sig_sha256.hash_algo))
+		handle_strip_if_invalid(&new_data, &sig_sha1, &sig_sha256, &msg);
+	else
+		finalize_commit_buffer(&new_data, &sig_sha1, &sig_sha256, &msg);
 
 	free(author);
 	free(committer);
@@ -2984,9 +3028,6 @@ static void handle_tag_signature(struct strbuf *msg, const char *name)
 	switch (signed_tag_mode) {
 
 	/* First, modes that don't change anything */
-	case SIGN_ABORT:
-		die(_("encountered signed tag; use "
-		      "--signed-tags=<mode> to handle it"));
 	case SIGN_WARN_VERBATIM:
 		warning(_("importing a tag signature verbatim for tag '%s'"), name);
 		/* fallthru */
@@ -3003,7 +3044,13 @@ static void handle_tag_signature(struct strbuf *msg, const char *name)
 		strbuf_setlen(msg, sig_offset);
 		break;
 
-	/* Third, BUG */
+	/* Third, aborting modes */
+	case SIGN_ABORT:
+		die(_("encountered signed tag; use "
+		      "--signed-tags=<mode> to handle it"));
+	case SIGN_STRIP_IF_INVALID:
+		die(_("'strip-if-invalid' is not a valid mode for "
+		      "git fast-import with --signed-tags=<mode>"));
 	default:
 		BUG("invalid signed_tag_mode value %d from tag '%s'",
 		    signed_tag_mode, name);
diff --git a/gpg-interface.c b/gpg-interface.c
index d1e88da8c1bfde..fe653b246433b8 100644
--- a/gpg-interface.c
+++ b/gpg-interface.c
@@ -1146,6 +1146,8 @@ int parse_sign_mode(const char *arg, enum sign_mode *mode)
 		*mode = SIGN_WARN_STRIP;
 	else if (!strcmp(arg, "strip"))
 		*mode = SIGN_STRIP;
+	else if (!strcmp(arg, "strip-if-invalid"))
+		*mode = SIGN_STRIP_IF_INVALID;
 	else
 		return -1;
 	return 0;
diff --git a/gpg-interface.h b/gpg-interface.h
index 50487aa1483274..71dde8cb807437 100644
--- a/gpg-interface.h
+++ b/gpg-interface.h
@@ -111,6 +111,7 @@ enum sign_mode {
 	SIGN_VERBATIM,
 	SIGN_WARN_STRIP,
 	SIGN_STRIP,
+	SIGN_STRIP_IF_INVALID,
 };
 
 /*
diff --git a/t/t9305-fast-import-signatures.sh b/t/t9305-fast-import-signatures.sh
index c2b427165862d3..022dae02e48177 100755
--- a/t/t9305-fast-import-signatures.sh
+++ b/t/t9305-fast-import-signatures.sh
@@ -79,7 +79,7 @@ test_expect_success GPG 'setup a commit with dual OpenPGP signatures on its SHA-
 	echo B >explicit-sha256/B &&
 	git -C explicit-sha256 add B &&
 	test_tick &&
-	git -C explicit-sha256 commit -S -m "signed" B &&
+	git -C explicit-sha256 commit -S -m "signed commit" B &&
 	SHA256_B=$(git -C explicit-sha256 rev-parse dual-signed) &&
 
 	# Create the corresponding SHA-1 commit
@@ -103,4 +103,71 @@ test_expect_success GPG 'strip both OpenPGP signatures with --signed-commits=war
 	test_line_count = 2 out
 '
 
+test_expect_success GPG 'import commit with no signature with --signed-commits=strip-if-invalid' '
+	git fast-export main >output &&
+	git -C new fast-import --quiet --signed-commits=strip-if-invalid <output >log 2>&1 &&
+	test_must_be_empty log
+'
+
+test_expect_success GPG 'keep valid OpenPGP signature with --signed-commits=strip-if-invalid' '
+	rm -rf new &&
+	git init new &&
+
+	git fast-export --signed-commits=verbatim openpgp-signing >output &&
+	git -C new fast-import --quiet --signed-commits=strip-if-invalid <output >log 2>&1 &&
+	IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) &&
+	test $OPENPGP_SIGNING = $IMPORTED &&
+	git -C new cat-file commit "$IMPORTED" >actual &&
+	test_grep -E "^gpgsig(-sha256)? " actual &&
+	test_must_be_empty log
+'
+
+test_expect_success GPG 'strip signature invalidated by message change with --signed-commits=strip-if-invalid' '
+	rm -rf new &&
+	git init new &&
+
+	git fast-export --signed-commits=verbatim openpgp-signing >output &&
+
+	# Change the commit message, which invalidates the signature.
+	# The commit message length should not change though, otherwise the
+	# corresponding `data <length>` command would have to be changed too.
+	sed "s/OpenPGP signed commit/OpenPGP forged commit/" output >modified &&
+
+	git -C new fast-import --quiet --signed-commits=strip-if-invalid <modified >log 2>&1 &&
+
+	IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) &&
+	test $OPENPGP_SIGNING != $IMPORTED &&
+	git -C new cat-file commit "$IMPORTED" >actual &&
+	test_grep ! -E "^gpgsig" actual &&
+	test_grep "stripping invalid signature" log
+'
+
+test_expect_success GPGSM 'keep valid X.509 signature with --signed-commits=strip-if-invalid' '
+	rm -rf new &&
+	git init new &&
+
+	git fast-export --signed-commits=verbatim x509-signing >output &&
+	git -C new fast-import --quiet --signed-commits=strip-if-invalid <output >log 2>&1 &&
+	IMPORTED=$(git -C new rev-parse --verify refs/heads/x509-signing) &&
+	test $X509_SIGNING = $IMPORTED &&
+	git -C new cat-file commit "$IMPORTED" >actual &&
+	test_grep -E "^gpgsig(-sha256)? " actual &&
+	test_must_be_empty log
+'
+
+test_expect_success GPGSSH 'keep valid SSH signature with --signed-commits=strip-if-invalid' '
+	rm -rf new &&
+	git init new &&
+
+	test_config -C new gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" &&
+
+	git fast-export --signed-commits=verbatim ssh-signing >output &&
+	git -C new fast-import --quiet --signed-commits=strip-if-invalid <output >log 2>&1 &&
+	IMPORTED=$(git -C new rev-parse --verify refs/heads/ssh-signing) &&
+	test $SSH_SIGNING = $IMPORTED &&
+	git -C new cat-file commit "$IMPORTED" >actual &&
+	test_grep -E "^gpgsig(-sha256)? " actual &&
+	test_must_be_empty log
+'
+
 test_done

From 0458e8b85440f77d4a4aec28d09112ba06cee05a Mon Sep 17 00:00:00 2001
From: Johannes Schindelin <johannes.schindelin@gmx.de>
Date: Mon, 17 Nov 2025 17:04:24 +0000
Subject: [PATCH 34/35] ci(dockerized): do show the result of failing tests
 again

The quality of tests and test suites is most apparent not when
everything passes, but in how quickly bugs can be identified,
analyzed, and resolved after test failures occur.

As such, it is an unfortunate side effect of 2a21098b98a (github: adapt
containerized jobs to be rootless, 2025-01-10) that the output of failed
test cases, which was shown before that change directly in the build
logs, is now no longer shown at all.

The reason is a side effect of trying to run the build and the tests
with permissions other than the `root` user, but without providing the
prerequisite permissions to signal what tests failed and whose output
hence needs to be included in the logs.

The way this signaling works is for the workflow to write into
special-purpose files whose path is specific to the current workflow
step and which can be accessed via the `$GITHUB_ENV` environment
variable, which differs between workflow steps. It is this file that is
missing write permission for the `builder` user that was introduced in
above-mentioned commit.

The solution is simple: make the file world-writable.

Technically, this write permission should be removed after the step has
completed, if proper security practices were to be upheld, but since
nothing uses that file again, it does not matter, and the fix is more
succinct this way.

This commit is best viewed with `--color-words`.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
[jc: squashed Elijah's rewrite of the first paragraph of the log message]
[jc: updated chmod to match "world-writable" in the log message]
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 1c8260ecb68b76..a2c06f9272e532 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -417,7 +417,7 @@ jobs:
     - run: ci/install-dependencies.sh
     - run: useradd builder --create-home
     - run: chown -R builder .
-    - run: sudo --preserve-env --set-home --user=builder ci/run-build-and-tests.sh
+    - run: chmod a+w $GITHUB_ENV && sudo --preserve-env --set-home --user=builder ci/run-build-and-tests.sh
     - name: print test failures
       if: failure() && env.FAILED_TEST_ARTIFACTS != ''
       run: sudo --preserve-env --set-home --user=builder ci/print-test-failures.sh

From bdc5341ff65278a3cc80b2e8a02a2f02aa1fac06 Mon Sep 17 00:00:00 2001
From: Junio C Hamano <gitster@pobox.com>
Date: Fri, 5 Dec 2025 14:49:13 +0900
Subject: [PATCH 35/35] The sixth batch

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/RelNotes/2.53.0.adoc | 31 ++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/Documentation/RelNotes/2.53.0.adoc b/Documentation/RelNotes/2.53.0.adoc
index c4dfeb1c23b406..9e896bd4244389 100644
--- a/Documentation/RelNotes/2.53.0.adoc
+++ b/Documentation/RelNotes/2.53.0.adoc
@@ -20,6 +20,9 @@ UI, Workflows & Features
 
  * Add a new manual that describes the data model.
 
+ * "git fast-import" learns "--strip-if-invalid" option to drop
+   invalid cryptographic signature from objects.
+
 
 Performance, Internal Implementation, Development Support etc.
 --------------------------------------------------------------
@@ -38,6 +41,13 @@ Performance, Internal Implementation, Development Support etc.
  * A part of code paths that deals with loose objects has been cleaned
    up.
 
+ * "make strip" has been taught to strip "scalar" as well as "git".
+
+ * Dockerised jobs at the GitHub Actions CI have been taught to show
+   more details of failed tests.
+
+ * Code refactoring around object database sources.
+
 
 Fixes since v2.52
 -----------------
@@ -105,8 +115,29 @@ Fixes since v2.52
  * Various issues detected by Asan have been corrected.
    (merge a031b6181a jk/asan-bonanza later to maint).
 
+ * "git config get --path" segfaulted on an ":(optional)path" that
+   does not exist, which has been corrected.
+   (merge 0bd16856ff jc/optional-path later to maint).
+
+ * The "--committer-date-is-author-date" option of "git am/rebase" is
+   a misguided one.  The documentation is updated to discourage its
+   use.
+   (merge fbf3d0669f kh/doc-committer-date-is-author-date later to maint).
+
+ * The option help text given by "git config unset -h" described
+   the "--all" option to "replace", not "unset", multiple variables,
+   which has been corrected.
+   (merge 18bf67b753 rs/config-unset-opthelp-fix later to maint).
+
+ * The error message given by "git config set", when the variable
+   being updated has more than one values defined, used old style "git
+   config" syntax with an incorrect option in its hint, both of which
+   have been corrected.
+   (merge df963f0df4 rs/config-set-multi-error-message-fix later to maint).
+
  * Other code cleanup, docfix, build fix, etc.
    (merge 46207a54cc qj/doc-http-bad-want-response later to maint).
    (merge df90eccd93 kh/doc-commit-extra-references later to maint).
    (merge f18aa68861 rs/xmkstemp-simplify later to maint).
    (merge fddba8f737 ja/doc-synopsis-style later to maint).
+   (merge 22ce0cb639 en/xdiff-cleanup-2 later to maint).