From 388517c14ce62e1c52b091af862bbaf28dbabb7a Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 17 Nov 2025 05:34:48 +0100 Subject: [PATCH 01/35] fast-import: refactor finalize_commit_buffer() In a following commit we are going to finalize commit buffers with or without signatures in order to check the signatures and possibly drop them. To do so easily and without duplication, let's refactor the current code that finalizes commit buffers into a new finalize_commit_buffer() function. Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- builtin/fast-import.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 54d3e592c6e460..493de57ef67bfb 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2815,6 +2815,18 @@ static void import_one_signature(struct signature_data *sig_sha1, die(_("parse_one_signature() returned unknown hash algo")); } +static void finalize_commit_buffer(struct strbuf *new_data, + struct signature_data *sig_sha1, + struct signature_data *sig_sha256, + struct strbuf *msg) +{ + add_gpgsig_to_commit(new_data, "gpgsig ", sig_sha1); + add_gpgsig_to_commit(new_data, "gpgsig-sha256 ", sig_sha256); + + strbuf_addch(new_data, '\n'); + strbuf_addbuf(new_data, msg); +} + static void parse_new_commit(const char *arg) { static struct strbuf msg = STRBUF_INIT; @@ -2950,11 +2962,8 @@ static void parse_new_commit(const char *arg) "encoding %s\n", encoding); - add_gpgsig_to_commit(&new_data, "gpgsig ", &sig_sha1); - add_gpgsig_to_commit(&new_data, "gpgsig-sha256 ", &sig_sha256); + finalize_commit_buffer(&new_data, &sig_sha1, &sig_sha256, &msg); - strbuf_addch(&new_data, '\n'); - strbuf_addbuf(&new_data, &msg); free(author); free(committer); free(encoding); From cb034c020aba54360e7c19faf82021399bf131e7 Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 17 Nov 2025 05:34:49 +0100 Subject: [PATCH 02/35] commit: refactor verify_commit_buffer() In a following commit, we are going to check commit signatures, but we won't have a commit yet, only a commit buffer, and we are going to discard this commit buffer if the signature is invalid. So it would be wasteful to create a commit that we might discard, just to be able to check a commit signature. It would be simpler instead to be able to check commit signatures using only a commit buffer instead of a commit. To be able to do that, let's extract some code from the check_commit_signature() function into a new verify_commit_buffer() function, and then let's make check_commit_signature() call verify_commit_buffer(). Note that this doesn't fundamentally change how check_commit_signature() works. It used to call parse_signed_commit() which calls repo_get_commit_buffer(), parse_buffer_signed_by_header() and repo_unuse_commit_buffer(). Now these 3 functions are called directly by verify_commit_buffer(). Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- commit.c | 17 +++++++++++++++-- commit.h | 7 +++++++ 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/commit.c b/commit.c index 16d91b2bfcf291..709c9eed58a790 100644 --- a/commit.c +++ b/commit.c @@ -1315,7 +1315,8 @@ static void handle_signed_tag(const struct commit *parent, struct commit_extra_h free(buf); } -int check_commit_signature(const struct commit *commit, struct signature_check *sigc) +int verify_commit_buffer(const char *buffer, size_t size, + struct signature_check *sigc) { struct strbuf payload = STRBUF_INIT; struct strbuf signature = STRBUF_INIT; @@ -1323,7 +1324,8 @@ int check_commit_signature(const struct commit *commit, struct signature_check * sigc->result = 'N'; - if (parse_signed_commit(commit, &payload, &signature, the_hash_algo) <= 0) + if (parse_buffer_signed_by_header(buffer, size, &payload, + &signature, the_hash_algo) <= 0) goto out; sigc->payload_type = SIGNATURE_PAYLOAD_COMMIT; @@ -1337,6 +1339,17 @@ int check_commit_signature(const struct commit *commit, struct signature_check * return ret; } +int check_commit_signature(const struct commit *commit, struct signature_check *sigc) +{ + unsigned long size; + const char *buffer = repo_get_commit_buffer(the_repository, commit, &size); + int ret = verify_commit_buffer(buffer, size, sigc); + + repo_unuse_commit_buffer(the_repository, commit, buffer); + + return ret; +} + void verify_merge_signature(struct commit *commit, int verbosity, int check_trust) { diff --git a/commit.h b/commit.h index 1d6e0c7518b3bb..5406dd266327d4 100644 --- a/commit.h +++ b/commit.h @@ -333,6 +333,13 @@ int remove_signature(struct strbuf *buf); */ int check_commit_signature(const struct commit *commit, struct signature_check *sigc); +/* + * Same as check_commit_signature() but accepts a commit buffer and + * its size, instead of a `struct commit *`. + */ +int verify_commit_buffer(const char *buffer, size_t size, + struct signature_check *sigc); + /* record author-date for each commit object */ struct author_date_slab; void record_author_date(struct author_date_slab *author_date, From c64eb849b14e5a78864f4260ccc12f46052020ec Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 17 Nov 2025 19:51:26 +0000 Subject: [PATCH 03/35] make strip: include `scalar` When Scalar was made a canonical part of Git in 7b5c93c6c68 (scalar: include in standard Git build & installation, 2022-09-02), it was added to all relevant Makefile targets except for the `strip` target. Let's correct that. Signed-off-by: Johannes Schindelin Signed-off-by: Junio C Hamano --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 70d1543b6b8688..fffb0d66e39842 100644 --- a/Makefile +++ b/Makefile @@ -2499,7 +2499,7 @@ please_set_SHELL_PATH_to_a_more_modern_shell: shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell -strip: $(PROGRAMS) git$X +strip: $(PROGRAMS) git$X scalar$X $(STRIP) $(STRIP_OPTS) $^ ### Target-specific flags and dependencies From 6971934d9bb4b8176b48658482862169a4582913 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Tue, 18 Nov 2025 22:34:13 +0000 Subject: [PATCH 04/35] doc: define unambiguous type mappings across C and Rust Document other nuances when crossing the FFI boundary. Other language mappings may be added in the future. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- Documentation/Makefile | 1 + Documentation/technical/meson.build | 1 + .../technical/unambiguous-types.adoc | 224 ++++++++++++++++++ 3 files changed, 226 insertions(+) create mode 100644 Documentation/technical/unambiguous-types.adoc diff --git a/Documentation/Makefile b/Documentation/Makefile index a3fbd29744bd39..b580bdc98b100e 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -140,6 +140,7 @@ TECH_DOCS += technical/shallow TECH_DOCS += technical/sparse-checkout TECH_DOCS += technical/sparse-index TECH_DOCS += technical/trivial-merge +TECH_DOCS += technical/unambiguous-types TECH_DOCS += technical/unit-tests SP_ARTICLES += $(TECH_DOCS) SP_ARTICLES += technical/api-index diff --git a/Documentation/technical/meson.build b/Documentation/technical/meson.build index 858af811a7bcc1..7df9b8acf60ed4 100644 --- a/Documentation/technical/meson.build +++ b/Documentation/technical/meson.build @@ -31,6 +31,7 @@ articles = [ 'sparse-checkout.adoc', 'sparse-index.adoc', 'trivial-merge.adoc', + 'unambiguous-types.adoc', 'unit-tests.adoc', ] diff --git a/Documentation/technical/unambiguous-types.adoc b/Documentation/technical/unambiguous-types.adoc new file mode 100644 index 00000000000000..9a4990847c0e0b --- /dev/null +++ b/Documentation/technical/unambiguous-types.adoc @@ -0,0 +1,224 @@ += Unambiguous types + +Most of these mappings are obvious, but there are some nuances and gotchas with +Rust FFI (Foreign Function Interface). + +This document defines clear, one-to-one mappings between primitive types in C, +Rust (and possible other languages in the future). Its purpose is to eliminate +ambiguity in type widths, signedness, and binary representation across +platforms and languages. + +For Git, the only header required to use these unambiguous types in C is +`git-compat-util.h`. + +== Boolean types +[cols="1,1", options="header"] +|=== +| C Type | Rust Type +| bool^1^ | bool +|=== + +== Integer types + +In C, `` (or an equivalent) must be included. + +[cols="1,1", options="header"] +|=== +| C Type | Rust Type +| uint8_t | u8 +| uint16_t | u16 +| uint32_t | u32 +| uint64_t | u64 + +| int8_t | i8 +| int16_t | i16 +| int32_t | i32 +| int64_t | i64 +|=== + +== Floating-point types + +Rust requires IEEE-754 semantics. +In C, that is typically true, but not guaranteed by the standard. + +[cols="1,1", options="header"] +|=== +| C Type | Rust Type +| float^2^ | f32 +| double^2^ | f64 +|=== + +== Size types + +These types represent pointer-sized integers and are typically defined in +`` or an equivalent header. + +Size types should be used any time pointer arithmetic is performed e.g. +indexing an array, describing the number of elements in memory, etc... + +[cols="1,1", options="header"] +|=== +| C Type | Rust Type +| size_t^3^ | usize +| ptrdiff_t^3^ | isize +|=== + +== Character types + +This is where C and Rust don't have a clean one-to-one mapping. + +A C `char` and a Rust `u8` share the same bit width, so any C struct containing +a `char` will have the same size as the corresponding Rust struct using `u8`. +In that sense, such structs are safe to pass over the FFI boundary, because +their fields will be laid out identically. However, beyond bit width, C `char` +has additional semantics and platform-dependent behavior that can cause +problems, as discussed below. + +The C language leaves the signedness of `char` implementation defined. Because +our developer build enables -Wsign-compare, comparison of a value of `char` +type with either signed or unsigned integers may trigger warnings from the +compiler. + +Note: Rust's `char` type is an unsigned 32-bit integer that is used to describe +Unicode code points. + +=== Notes +^1^ This is only true if stdbool.h (or equivalent) is used. + +^2^ C does not enforce IEEE-754 compatibility, but Rust expects it. If the +platform/arch for C does not follow IEEE-754 then this equivalence does not +hold. Also, it's assumed that `float` is 32 bits and `double` is 64, but +there may be a strange platform/arch where even this isn't true. + +^3^ C also defines uintptr_t, ssize_t and intptr_t, but these types are +discouraged for FFI purposes. For functions like `read()` and `write()` ssize_t +should be cast to a different, and unambiguous, type before being passed over +the FFI boundary. + + +== Problems with std::ffi::c_* types in Rust +TL;DR: In practice, Rust's `c_*` types aren't guaranteed to match C types for +all possible C compilers, platforms, or architectures, because Rust only +ensures correctness of C types on officially supported targets. These +definitions have changed over time to match more targets which means that the +c_* definitions will differ based on which Rust version Git chooses to use. + +Current list of safe, Rust side, FFI types in Git: + + +* `c_void` +* `CStr` +* `CString` + +Even then, they should be used sparingly, and only where the semantics match +exactly. + +The std::os::raw::c_* directly inherits the problems of core::ffi, which +changes over time and seems to make a best guess at the correct definition for +a given platform/target. This probably isn't a problem for all other platforms +that Rust supports currently, but can anyone say that Rust got it right for all +C compilers of all platforms/targets? + +To give an example: c_long is defined in +footnote:[https://doc.rust-lang.org/1.63.0/src/core/ffi/mod.rs.html#175-189[c_long in 1.63.0]] +footnote:[https://doc.rust-lang.org/1.89.0/src/core/ffi/primitives.rs.html#135-151[c_long in 1.89.0]] + +=== Rust version 1.63.0 + +``` +mod c_long_definition { + cfg_if! { + if #[cfg(all(target_pointer_width = "64", not(windows)))] { + pub type c_long = i64; + pub type NonZero_c_long = crate::num::NonZeroI64; + pub type c_ulong = u64; + pub type NonZero_c_ulong = crate::num::NonZeroU64; + } else { + // The minimal size of `long` in the C standard is 32 bits + pub type c_long = i32; + pub type NonZero_c_long = crate::num::NonZeroI32; + pub type c_ulong = u32; + pub type NonZero_c_ulong = crate::num::NonZeroU32; + } + } +} +``` + +=== Rust version 1.89.0 + +``` +mod c_long_definition { + crate::cfg_select! { + any( + all(target_pointer_width = "64", not(windows)), + // wasm32 Linux ABI uses 64-bit long + all(target_arch = "wasm32", target_os = "linux") + ) => { + pub(super) type c_long = i64; + pub(super) type c_ulong = u64; + } + _ => { + // The minimal size of `long` in the C standard is 32 bits + pub(super) type c_long = i32; + pub(super) type c_ulong = u32; + } + } +} +``` + +Even for the cases where C types are correctly mapped to Rust types via +std::ffi::c_* there are still problems. Let's take c_char for example. On some +platforms it's u8 on others it's i8. + +=== Subtraction underflow in debug mode + +The following code will panic in debug on platforms that define c_char as u8, +but won't if it's an i8. + +``` +let mut x: std::ffi::c_char = 0; +x -= 1; +``` + +=== Inconsistent shift behavior + +`x` will be 0xC0 for platforms that use i8, but will be 0x40 where it's u8. + +``` +let mut x: std::ffi::c_char = 0x80; +x >>= 1; +``` + +=== Equality fails to compile on some platforms + +The following will not compile on platforms that define c_char as i8, but will +if it's u8. You can cast x e.g. `assert_eq!(x as u8, b'a');`, but then you get +a warning on platforms that use u8 and a clean compilation where i8 is used. + +``` +let mut x: std::ffi::c_char = 0x61; +assert_eq!(x, b'a'); +``` + +== Enum types +Rust enum types should not be used as FFI types. Rust enum types are more like +C union types than C enum's. For something like: + +``` +#[repr(C, u8)] +enum Fruit { + Apple, + Banana, + Cherry, +} +``` + +It's easy enough to make sure the Rust enum matches what C would expect, but a +more complex type like. + +``` +enum HashResult { + SHA1([u8; 20]), + SHA256([u8; 32]), +} +``` + +The Rust compiler has to add a discriminant to the enum to distinguish between +the variants. The width, location, and values for that discriminant is up to +the Rust compiler and is not ABI stable. From f007f4f4b473565fb2e94780028399030926bacb Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Tue, 18 Nov 2025 22:34:14 +0000 Subject: [PATCH 05/35] xdiff: use ptrdiff_t for dstart/dend ptrdiff_t is appropriate for dstart and dend because they both describe positive or negative offsets relative to a pointer. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xtypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h index f145abba3ea8a3..7a2d429ec5e7ea 100644 --- a/xdiff/xtypes.h +++ b/xdiff/xtypes.h @@ -47,7 +47,7 @@ typedef struct s_xrecord { typedef struct s_xdfile { xrecord_t *recs; long nrec; - long dstart, dend; + ptrdiff_t dstart, dend; bool *changed; long *rindex; long nreff; From 10f97d6affcb59bdcb74a6878d3d9da0eec81296 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Tue, 18 Nov 2025 22:34:15 +0000 Subject: [PATCH 06/35] xdiff: make xrecord_t.ptr a uint8_t instead of char Make xrecord_t.ptr uint8_t because it's referring to bytes in memory. In order to avoid a refactor avalanche, many uses of this field were cast to char* or similar. Places where casting was unnecessary: xemit.c:156 xmerge.c:124 xmerge.c:127 xmerge.c:164 xmerge.c:169 xmerge.c:172 xmerge.c:178 Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xdiffi.c | 8 ++++---- xdiff/xemit.c | 6 +++--- xdiff/xmerge.c | 14 +++++++------- xdiff/xpatience.c | 2 +- xdiff/xprepare.c | 6 +++--- xdiff/xtypes.h | 2 +- xdiff/xutils.c | 4 ++-- 7 files changed, 21 insertions(+), 21 deletions(-) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 6f3998ee54c01e..95989b6af1d070 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -407,7 +407,7 @@ static int get_indent(xrecord_t *rec) int ret = 0; for (i = 0; i < rec->size; i++) { - char c = rec->ptr[i]; + char c = (char) rec->ptr[i]; if (!XDL_ISSPACE(c)) return ret; @@ -993,11 +993,11 @@ static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags) rec = &xe->xdf1.recs[xch->i1]; for (i = 0; i < xch->chg1 && ignore; i++) - ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); + ignore = xdl_blankline((const char *)rec[i].ptr, rec[i].size, flags); rec = &xe->xdf2.recs[xch->i2]; for (i = 0; i < xch->chg2 && ignore; i++) - ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); + ignore = xdl_blankline((const char *)rec[i].ptr, rec[i].size, flags); xch->ignore = ignore; } @@ -1008,7 +1008,7 @@ static int record_matches_regex(xrecord_t *rec, xpparam_t const *xpp) { size_t i; for (i = 0; i < xpp->ignore_regex_nr; i++) - if (!regexec_buf(xpp->ignore_regex[i], rec->ptr, rec->size, 1, + if (!regexec_buf(xpp->ignore_regex[i], (const char *)rec->ptr, rec->size, 1, ®match, 0)) return 1; diff --git a/xdiff/xemit.c b/xdiff/xemit.c index b2f1f30cd36eef..ead930088a5601 100644 --- a/xdiff/xemit.c +++ b/xdiff/xemit.c @@ -27,7 +27,7 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t * { xrecord_t *rec = &xdf->recs[ri]; - if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0) + if (xdl_emit_diffrec((char const *)rec->ptr, rec->size, pre, strlen(pre), ecb) < 0) return -1; return 0; @@ -113,8 +113,8 @@ static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri, xrecord_t *rec = &xdf->recs[ri]; if (!xecfg->find_func) - return def_ff(rec->ptr, rec->size, buf, sz); - return xecfg->find_func(rec->ptr, rec->size, buf, sz, xecfg->find_func_priv); + return def_ff((const char *)rec->ptr, rec->size, buf, sz); + return xecfg->find_func((const char *)rec->ptr, rec->size, buf, sz, xecfg->find_func_priv); } static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri) diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c index fd600cbb5d58a2..75cb3e76a2c8e4 100644 --- a/xdiff/xmerge.c +++ b/xdiff/xmerge.c @@ -101,8 +101,8 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2, xrecord_t *rec2 = xe2->xdf2.recs + i2; for (i = 0; i < line_count; i++) { - int result = xdl_recmatch(rec1[i].ptr, rec1[i].size, - rec2[i].ptr, rec2[i].size, flags); + int result = xdl_recmatch((const char *)rec1[i].ptr, rec1[i].size, + (const char *)rec2[i].ptr, rec2[i].size, flags); if (!result) return -1; } @@ -324,8 +324,8 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, static int recmatch(xrecord_t *rec1, xrecord_t *rec2, unsigned long flags) { - return xdl_recmatch(rec1->ptr, rec1->size, - rec2->ptr, rec2->size, flags); + return xdl_recmatch((const char *)rec1->ptr, rec1->size, + (const char *)rec2->ptr, rec2->size, flags); } /* @@ -382,10 +382,10 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m, * we have a very simple mmfile structure. */ t1.ptr = (char *)xe1->xdf2.recs[m->i1].ptr; - t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1].ptr + t1.size = (char *)xe1->xdf2.recs[m->i1 + m->chg1 - 1].ptr + xe1->xdf2.recs[m->i1 + m->chg1 - 1].size - t1.ptr; t2.ptr = (char *)xe2->xdf2.recs[m->i2].ptr; - t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1].ptr + t2.size = (char *)xe2->xdf2.recs[m->i2 + m->chg2 - 1].ptr + xe2->xdf2.recs[m->i2 + m->chg2 - 1].size - t2.ptr; if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0) return -1; @@ -440,7 +440,7 @@ static int line_contains_alnum(const char *ptr, long size) static int lines_contain_alnum(xdfenv_t *xe, int i, int chg) { for (; chg; chg--, i++) - if (line_contains_alnum(xe->xdf2.recs[i].ptr, + if (line_contains_alnum((const char *)xe->xdf2.recs[i].ptr, xe->xdf2.recs[i].size)) return 1; return 0; diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c index 669b653580efe6..bb61354f22a177 100644 --- a/xdiff/xpatience.c +++ b/xdiff/xpatience.c @@ -121,7 +121,7 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, return; map->entries[index].line1 = line; map->entries[index].hash = record->ha; - map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1].ptr); + map->entries[index].anchor = is_anchor(xpp, (const char *)map->env->xdf1.recs[line - 1].ptr); if (!map->first) map->first = map->entries + index; if (map->last) { diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 192334f1b72e63..4c564670764e53 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -99,8 +99,8 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) if (rcrec->rec.ha == rec->ha && - xdl_recmatch(rcrec->rec.ptr, rcrec->rec.size, - rec->ptr, rec->size, cf->flags)) + xdl_recmatch((const char *)rcrec->rec.ptr, rcrec->rec.size, + (const char *)rec->ptr, rec->size, cf->flags)) break; if (!rcrec) { @@ -156,7 +156,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec)) goto abort; crec = &xdf->recs[xdf->nrec++]; - crec->ptr = prev; + crec->ptr = (uint8_t const *)prev; crec->size = (long) (cur - prev); crec->ha = hav; if (xdl_classify_record(pass, cf, crec) < 0) diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h index 7a2d429ec5e7ea..69727fb29999ef 100644 --- a/xdiff/xtypes.h +++ b/xdiff/xtypes.h @@ -39,7 +39,7 @@ typedef struct s_chastore { } chastore_t; typedef struct s_xrecord { - char const *ptr; + uint8_t const *ptr; long size; unsigned long ha; } xrecord_t; diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 447e66c7198b08..7be063bfb61d72 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -465,10 +465,10 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp, xdfenv_t env; subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1].ptr; - subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2].ptr + + subfile1.size = (char *)diff_env->xdf1.recs[line1 + count1 - 2].ptr + diff_env->xdf1.recs[line1 + count1 - 2].size - subfile1.ptr; subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1].ptr; - subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2].ptr + + subfile2.size = (char *)diff_env->xdf2.recs[line2 + count2 - 2].ptr + diff_env->xdf2.recs[line2 + count2 - 2].size - subfile2.ptr; if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0) return -1; From 9bd193253c5e590203fc566ad7cff8f891ec0493 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Tue, 18 Nov 2025 22:34:16 +0000 Subject: [PATCH 07/35] xdiff: use size_t for xrecord_t.size size_t is the appropriate type because size is describing the number of elements, bytes in this case, in memory. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xdiffi.c | 7 +++---- xdiff/xemit.c | 8 ++++---- xdiff/xmerge.c | 16 ++++++++-------- xdiff/xprepare.c | 6 +++--- xdiff/xtypes.h | 2 +- 5 files changed, 19 insertions(+), 20 deletions(-) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 95989b6af1d070..cb8e412c7b9db6 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -403,10 +403,9 @@ static int recs_match(xrecord_t *rec1, xrecord_t *rec2) */ static int get_indent(xrecord_t *rec) { - long i; int ret = 0; - for (i = 0; i < rec->size; i++) { + for (size_t i = 0; i < rec->size; i++) { char c = (char) rec->ptr[i]; if (!XDL_ISSPACE(c)) @@ -993,11 +992,11 @@ static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags) rec = &xe->xdf1.recs[xch->i1]; for (i = 0; i < xch->chg1 && ignore; i++) - ignore = xdl_blankline((const char *)rec[i].ptr, rec[i].size, flags); + ignore = xdl_blankline((const char *)rec[i].ptr, (long)rec[i].size, flags); rec = &xe->xdf2.recs[xch->i2]; for (i = 0; i < xch->chg2 && ignore; i++) - ignore = xdl_blankline((const char *)rec[i].ptr, rec[i].size, flags); + ignore = xdl_blankline((const char *)rec[i].ptr, (long)rec[i].size, flags); xch->ignore = ignore; } diff --git a/xdiff/xemit.c b/xdiff/xemit.c index ead930088a5601..2f8007753c30f2 100644 --- a/xdiff/xemit.c +++ b/xdiff/xemit.c @@ -27,7 +27,7 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t * { xrecord_t *rec = &xdf->recs[ri]; - if (xdl_emit_diffrec((char const *)rec->ptr, rec->size, pre, strlen(pre), ecb) < 0) + if (xdl_emit_diffrec((char const *)rec->ptr, (long)rec->size, pre, strlen(pre), ecb) < 0) return -1; return 0; @@ -113,8 +113,8 @@ static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri, xrecord_t *rec = &xdf->recs[ri]; if (!xecfg->find_func) - return def_ff((const char *)rec->ptr, rec->size, buf, sz); - return xecfg->find_func((const char *)rec->ptr, rec->size, buf, sz, xecfg->find_func_priv); + return def_ff((const char *)rec->ptr, (long)rec->size, buf, sz); + return xecfg->find_func((const char *)rec->ptr, (long)rec->size, buf, sz, xecfg->find_func_priv); } static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri) @@ -151,7 +151,7 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg, static int is_empty_rec(xdfile_t *xdf, long ri) { xrecord_t *rec = &xdf->recs[ri]; - long i = 0; + size_t i = 0; for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++); diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c index 75cb3e76a2c8e4..0dd4558a3280dc 100644 --- a/xdiff/xmerge.c +++ b/xdiff/xmerge.c @@ -101,8 +101,8 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2, xrecord_t *rec2 = xe2->xdf2.recs + i2; for (i = 0; i < line_count; i++) { - int result = xdl_recmatch((const char *)rec1[i].ptr, rec1[i].size, - (const char *)rec2[i].ptr, rec2[i].size, flags); + int result = xdl_recmatch((const char *)rec1[i].ptr, (long)rec1[i].size, + (const char *)rec2[i].ptr, (long)rec2[i].size, flags); if (!result) return -1; } @@ -119,11 +119,11 @@ static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int nee if (count < 1) return 0; - for (i = 0; i < count; size += recs[i++].size) + for (i = 0; i < count; size += (int)recs[i++].size) if (dest) memcpy(dest + size, recs[i].ptr, recs[i].size); if (add_nl) { - i = recs[count - 1].size; + i = (int)recs[count - 1].size; if (i == 0 || recs[count - 1].ptr[i - 1] != '\n') { if (needs_cr) { if (dest) @@ -156,7 +156,7 @@ static int xdl_orig_copy(xdfenv_t *xe, int i, int count, int needs_cr, int add_n */ static int is_eol_crlf(xdfile_t *file, int i) { - long size; + size_t size; if (i < file->nrec - 1) /* All lines before the last *must* end in LF */ @@ -324,8 +324,8 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, static int recmatch(xrecord_t *rec1, xrecord_t *rec2, unsigned long flags) { - return xdl_recmatch((const char *)rec1->ptr, rec1->size, - (const char *)rec2->ptr, rec2->size, flags); + return xdl_recmatch((const char *)rec1->ptr, (long)rec1->size, + (const char *)rec2->ptr, (long)rec2->size, flags); } /* @@ -441,7 +441,7 @@ static int lines_contain_alnum(xdfenv_t *xe, int i, int chg) { for (; chg; chg--, i++) if (line_contains_alnum((const char *)xe->xdf2.recs[i].ptr, - xe->xdf2.recs[i].size)) + (long)xe->xdf2.recs[i].size)) return 1; return 0; } diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 4c564670764e53..b3219aed3e8795 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -99,8 +99,8 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) if (rcrec->rec.ha == rec->ha && - xdl_recmatch((const char *)rcrec->rec.ptr, rcrec->rec.size, - (const char *)rec->ptr, rec->size, cf->flags)) + xdl_recmatch((const char *)rcrec->rec.ptr, (long)rcrec->rec.size, + (const char *)rec->ptr, (long)rec->size, cf->flags)) break; if (!rcrec) { @@ -157,7 +157,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ goto abort; crec = &xdf->recs[xdf->nrec++]; crec->ptr = (uint8_t const *)prev; - crec->size = (long) (cur - prev); + crec->size = cur - prev; crec->ha = hav; if (xdl_classify_record(pass, cf, crec) < 0) goto abort; diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h index 69727fb29999ef..354349b523fbac 100644 --- a/xdiff/xtypes.h +++ b/xdiff/xtypes.h @@ -40,7 +40,7 @@ typedef struct s_chastore { typedef struct s_xrecord { uint8_t const *ptr; - long size; + size_t size; unsigned long ha; } xrecord_t; From b0d4ae30f5a23fa9da87e9396b78e6442b351ddc Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Tue, 18 Nov 2025 22:34:17 +0000 Subject: [PATCH 08/35] xdiff: use unambiguous types in xdl_hash_record() Convert the function signature and body to use unambiguous types. char is changed to uint8_t because this function processes bytes in memory. unsigned long to uint64_t so that the hash output is consistent across platforms. `flags` was changed from long to uint64_t to ensure the high order bits are not dropped on platforms that treat long as 32 bits. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff-interface.c | 2 +- xdiff/xprepare.c | 6 +++--- xdiff/xutils.c | 28 ++++++++++++++-------------- xdiff/xutils.h | 6 +++--- 4 files changed, 21 insertions(+), 21 deletions(-) diff --git a/xdiff-interface.c b/xdiff-interface.c index 4971f722b3e5f4..1a35556380451a 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -300,7 +300,7 @@ void xdiff_clear_find_func(xdemitconf_t *xecfg) unsigned long xdiff_hash_string(const char *s, size_t len, long flags) { - return xdl_hash_record(&s, s + len, flags); + return xdl_hash_record((uint8_t const**)&s, (uint8_t const*)s + len, flags); } int xdiff_compare_lines(const char *l1, long s1, diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index b3219aed3e8795..85e56021daf9e2 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -137,8 +137,8 @@ static void xdl_free_ctx(xdfile_t *xdf) static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) { long bsize; - unsigned long hav; - char const *blk, *cur, *top, *prev; + uint64_t hav; + uint8_t const *blk, *cur, *top, *prev; xrecord_t *crec; xdf->rindex = NULL; @@ -156,7 +156,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec)) goto abort; crec = &xdf->recs[xdf->nrec++]; - crec->ptr = (uint8_t const *)prev; + crec->ptr = prev; crec->size = cur - prev; crec->ha = hav; if (xdl_classify_record(pass, cf, crec) < 0) diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 7be063bfb61d72..77ee1ad9c86875 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -249,11 +249,11 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags) return 1; } -unsigned long xdl_hash_record_with_whitespace(char const **data, - char const *top, long flags) { - unsigned long ha = 5381; - char const *ptr = *data; - int cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL; +uint64_t xdl_hash_record_with_whitespace(uint8_t const **data, + uint8_t const *top, uint64_t flags) { + uint64_t ha = 5381; + uint8_t const *ptr = *data; + bool cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL; for (; ptr < top && *ptr != '\n'; ptr++) { if (cr_at_eol_only) { @@ -263,8 +263,8 @@ unsigned long xdl_hash_record_with_whitespace(char const **data, continue; } else if (XDL_ISSPACE(*ptr)) { - const char *ptr2 = ptr; - int at_eol; + const uint8_t *ptr2 = ptr; + bool at_eol; while (ptr + 1 < top && XDL_ISSPACE(ptr[1]) && ptr[1] != '\n') ptr++; @@ -274,20 +274,20 @@ unsigned long xdl_hash_record_with_whitespace(char const **data, else if (flags & XDF_IGNORE_WHITESPACE_CHANGE && !at_eol) { ha += (ha << 5); - ha ^= (unsigned long) ' '; + ha ^= (uint64_t) ' '; } else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL && !at_eol) { while (ptr2 != ptr + 1) { ha += (ha << 5); - ha ^= (unsigned long) *ptr2; + ha ^= (uint64_t) *ptr2; ptr2++; } } continue; } ha += (ha << 5); - ha ^= (unsigned long) *ptr; + ha ^= (uint64_t) *ptr; } *data = ptr < top ? ptr + 1: ptr; @@ -304,9 +304,9 @@ unsigned long xdl_hash_record_with_whitespace(char const **data, #define REASSOC_FENCE(x, y) #endif -unsigned long xdl_hash_record_verbatim(char const **data, char const *top) { - unsigned long ha = 5381, c0, c1; - char const *ptr = *data; +uint64_t xdl_hash_record_verbatim(uint8_t const **data, uint8_t const *top) { + uint64_t ha = 5381, c0, c1; + uint8_t const *ptr = *data; #if 0 /* * The baseline form of the optimized loop below. This is the djb2 @@ -314,7 +314,7 @@ unsigned long xdl_hash_record_verbatim(char const **data, char const *top) { */ for (; ptr < top && *ptr != '\n'; ptr++) { ha += (ha << 5); - ha += (unsigned long) *ptr; + ha += (uint64_t) *ptr; } *data = ptr < top ? ptr + 1: ptr; #else diff --git a/xdiff/xutils.h b/xdiff/xutils.h index 13f68310472a69..615b4a9d355433 100644 --- a/xdiff/xutils.h +++ b/xdiff/xutils.h @@ -34,9 +34,9 @@ void *xdl_cha_alloc(chastore_t *cha); long xdl_guess_lines(mmfile_t *mf, long sample); int xdl_blankline(const char *line, long size, long flags); int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags); -unsigned long xdl_hash_record_verbatim(char const **data, char const *top); -unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags); -static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags) +uint64_t xdl_hash_record_verbatim(uint8_t const **data, uint8_t const *top); +uint64_t xdl_hash_record_with_whitespace(uint8_t const **data, uint8_t const *top, uint64_t flags); +static inline uint64_t xdl_hash_record(uint8_t const **data, uint8_t const *top, uint64_t flags) { if (flags & XDF_WHITESPACE_FLAGS) return xdl_hash_record_with_whitespace(data, top, flags); From 6a26019c81faa07ba811541b4cf35be9e8ee1ead Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Tue, 18 Nov 2025 22:34:18 +0000 Subject: [PATCH 09/35] xdiff: split xrecord_t.ha into line_hash and minimal_perfect_hash The ha field is serving two different purposes, which makes the code harder to read. At first glance, it looks like many places assume there could never be hash collisions between lines of the two input files. In reality, line_hash is used together with xdl_recmatch() to ensure correct comparisons of lines, even when collisions occur. To make this clearer, the old ha field has been split: * line_hash: a straightforward hash of a line, independent of any external context. Its type is uint64_t, as it comes from a fixed width hash function. * minimal_perfect_hash: Not a new concept, but now a separate field. It comes from the classifier's general-purpose hash table, which assigns each line a unique and minimal hash across the two files. A size_t is used here because it's meant to be used to index an array. This also avoids ` as usize` casts on the Rust side when using it to index a slice. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xdiffi.c | 6 +++--- xdiff/xhistogram.c | 4 ++-- xdiff/xpatience.c | 10 +++++----- xdiff/xprepare.c | 18 +++++++++--------- xdiff/xtypes.h | 3 ++- 5 files changed, 21 insertions(+), 20 deletions(-) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index cb8e412c7b9db6..8d96074414f2e9 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -22,9 +22,9 @@ #include "xinclude.h" -static unsigned long get_hash(xdfile_t *xdf, long index) +static size_t get_hash(xdfile_t *xdf, long index) { - return xdf->recs[xdf->rindex[index]].ha; + return xdf->recs[xdf->rindex[index]].minimal_perfect_hash; } #define XDL_MAX_COST_MIN 256 @@ -385,7 +385,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, static int recs_match(xrecord_t *rec1, xrecord_t *rec2) { - return (rec1->ha == rec2->ha); + return rec1->minimal_perfect_hash == rec2->minimal_perfect_hash; } /* diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index 6dc450b1fe1dfc..5ae1282c27568c 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -90,7 +90,7 @@ struct region { static int cmp_recs(xrecord_t *r1, xrecord_t *r2) { - return r1->ha == r2->ha; + return r1->minimal_perfect_hash == r2->minimal_perfect_hash; } @@ -98,7 +98,7 @@ static int cmp_recs(xrecord_t *r1, xrecord_t *r2) (cmp_recs(REC(i->env, s1, l1), REC(i->env, s2, l2))) #define TABLE_HASH(index, side, line) \ - XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits) + XDL_HASHLONG((REC(index->env, side, line))->minimal_perfect_hash, index->table_bits) static int scanA(struct histindex *index, int line1, int count1) { diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c index bb61354f22a177..cc53266f3b8302 100644 --- a/xdiff/xpatience.c +++ b/xdiff/xpatience.c @@ -48,7 +48,7 @@ struct hashmap { int nr, alloc; struct entry { - unsigned long hash; + size_t minimal_perfect_hash; /* * 0 = unused entry, 1 = first line, 2 = second, etc. * line2 is NON_UNIQUE if the line is not unique @@ -101,10 +101,10 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, * So we multiply ha by 2 in the hope that the hashing was * "unique enough". */ - int index = (int)((record->ha << 1) % map->alloc); + int index = (int)((record->minimal_perfect_hash << 1) % map->alloc); while (map->entries[index].line1) { - if (map->entries[index].hash != record->ha) { + if (map->entries[index].minimal_perfect_hash != record->minimal_perfect_hash) { if (++index >= map->alloc) index = 0; continue; @@ -120,7 +120,7 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, if (pass == 2) return; map->entries[index].line1 = line; - map->entries[index].hash = record->ha; + map->entries[index].minimal_perfect_hash = record->minimal_perfect_hash; map->entries[index].anchor = is_anchor(xpp, (const char *)map->env->xdf1.recs[line - 1].ptr); if (!map->first) map->first = map->entries + index; @@ -248,7 +248,7 @@ static int match(struct hashmap *map, int line1, int line2) { xrecord_t *record1 = &map->env->xdf1.recs[line1 - 1]; xrecord_t *record2 = &map->env->xdf2.recs[line2 - 1]; - return record1->ha == record2->ha; + return record1->minimal_perfect_hash == record2->minimal_perfect_hash; } static int patience_diff(xpparam_t const *xpp, xdfenv_t *env, diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 85e56021daf9e2..bea0992b5e4a33 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -93,12 +93,12 @@ static void xdl_free_classifier(xdlclassifier_t *cf) { static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t *rec) { - long hi; + size_t hi; xdlclass_t *rcrec; - hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); + hi = XDL_HASHLONG(rec->line_hash, cf->hbits); for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) - if (rcrec->rec.ha == rec->ha && + if (rcrec->rec.line_hash == rec->line_hash && xdl_recmatch((const char *)rcrec->rec.ptr, (long)rcrec->rec.size, (const char *)rec->ptr, (long)rec->size, cf->flags)) break; @@ -120,7 +120,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t (pass == 1) ? rcrec->len1++ : rcrec->len2++; - rec->ha = (unsigned long) rcrec->idx; + rec->minimal_perfect_hash = (size_t)rcrec->idx; return 0; } @@ -158,7 +158,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ crec = &xdf->recs[xdf->nrec++]; crec->ptr = prev; crec->size = cur - prev; - crec->ha = hav; + crec->line_hash = hav; if (xdl_classify_record(pass, cf, crec) < 0) goto abort; } @@ -290,7 +290,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { - rcrec = cf->rcrecs[recs->ha]; + rcrec = cf->rcrecs[recs->minimal_perfect_hash]; nm = rcrec ? rcrec->len2 : 0; action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; } @@ -298,7 +298,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { - rcrec = cf->rcrecs[recs->ha]; + rcrec = cf->rcrecs[recs->minimal_perfect_hash]; nm = rcrec ? rcrec->len1 : 0; action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; } @@ -350,7 +350,7 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { recs2 = xdf2->recs; for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; i++, recs1++, recs2++) - if (recs1->ha != recs2->ha) + if (recs1->minimal_perfect_hash != recs2->minimal_perfect_hash) break; xdf1->dstart = xdf2->dstart = i; @@ -358,7 +358,7 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { recs1 = xdf1->recs + xdf1->nrec - 1; recs2 = xdf2->recs + xdf2->nrec - 1; for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) - if (recs1->ha != recs2->ha) + if (recs1->minimal_perfect_hash != recs2->minimal_perfect_hash) break; xdf1->dend = xdf1->nrec - i - 1; diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h index 354349b523fbac..d4e9cd2e763616 100644 --- a/xdiff/xtypes.h +++ b/xdiff/xtypes.h @@ -41,7 +41,8 @@ typedef struct s_chastore { typedef struct s_xrecord { uint8_t const *ptr; size_t size; - unsigned long ha; + uint64_t line_hash; + size_t minimal_perfect_hash; } xrecord_t; typedef struct s_xdfile { From 016538780e9f6e83a1d9c7b0ec771fb6c5583c0f Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Tue, 18 Nov 2025 22:34:19 +0000 Subject: [PATCH 10/35] xdiff: make xdfile_t.nrec a size_t instead of long size_t is used because nrec describes the number of elements for both recs, and for 'changed' + 2. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xdiffi.c | 8 ++++---- xdiff/xemit.c | 20 ++++++++++---------- xdiff/xmerge.c | 8 ++++---- xdiff/xpatience.c | 2 +- xdiff/xprepare.c | 12 ++++++------ xdiff/xtypes.h | 2 +- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 8d96074414f2e9..21d06bce969765 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -483,7 +483,7 @@ static void measure_split(const xdfile_t *xdf, long split, { long i; - if (split >= xdf->nrec) { + if (split >= (long)xdf->nrec) { m->end_of_file = 1; m->indent = -1; } else { @@ -506,7 +506,7 @@ static void measure_split(const xdfile_t *xdf, long split, m->post_blank = 0; m->post_indent = -1; - for (i = split + 1; i < xdf->nrec; i++) { + for (i = split + 1; i < (long)xdf->nrec; i++) { m->post_indent = get_indent(&xdf->recs[i]); if (m->post_indent != -1) break; @@ -717,7 +717,7 @@ static void group_init(xdfile_t *xdf, struct xdlgroup *g) */ static inline int group_next(xdfile_t *xdf, struct xdlgroup *g) { - if (g->end == xdf->nrec) + if (g->end == (long)xdf->nrec) return -1; g->start = g->end + 1; @@ -750,7 +750,7 @@ static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g) */ static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) { - if (g->end < xdf->nrec && + if (g->end < (long)xdf->nrec && recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) { xdf->changed[g->start++] = false; xdf->changed[g->end++] = true; diff --git a/xdiff/xemit.c b/xdiff/xemit.c index 2f8007753c30f2..04f7e9193b61f0 100644 --- a/xdiff/xemit.c +++ b/xdiff/xemit.c @@ -137,7 +137,7 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg, buf = func_line ? func_line->buf : dummy; size = func_line ? sizeof(func_line->buf) : sizeof(dummy); - for (l = start; l != limit && 0 <= l && l < xe->xdf1.nrec; l += step) { + for (l = start; l != limit && 0 <= l && l < (long)xe->xdf1.nrec; l += step) { long len = match_func_rec(&xe->xdf1, xecfg, l, buf, size); if (len >= 0) { if (func_line) @@ -179,14 +179,14 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, long fs1, i1 = xch->i1; /* Appended chunk? */ - if (i1 >= xe->xdf1.nrec) { + if (i1 >= (long)xe->xdf1.nrec) { long i2 = xch->i2; /* * We don't need additional context if * a whole function was added. */ - while (i2 < xe->xdf2.nrec) { + while (i2 < (long)xe->xdf2.nrec) { if (is_func_rec(&xe->xdf2, xecfg, i2)) goto post_context_calculation; i2++; @@ -196,7 +196,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, * Otherwise get more context from the * pre-image. */ - i1 = xe->xdf1.nrec - 1; + i1 = (long)xe->xdf1.nrec - 1; } fs1 = get_func_line(xe, xecfg, NULL, i1, -1); @@ -228,8 +228,8 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, post_context_calculation: lctx = xecfg->ctxlen; - lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1)); - lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2)); + lctx = XDL_MIN(lctx, (long)xe->xdf1.nrec - (xche->i1 + xche->chg1)); + lctx = XDL_MIN(lctx, (long)xe->xdf2.nrec - (xche->i2 + xche->chg2)); e1 = xche->i1 + xche->chg1 + lctx; e2 = xche->i2 + xche->chg2 + lctx; @@ -237,13 +237,13 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) { long fe1 = get_func_line(xe, xecfg, NULL, xche->i1 + xche->chg1, - xe->xdf1.nrec); + (long)xe->xdf1.nrec); while (fe1 > 0 && is_empty_rec(&xe->xdf1, fe1 - 1)) fe1--; if (fe1 < 0) - fe1 = xe->xdf1.nrec; + fe1 = (long)xe->xdf1.nrec; if (fe1 > e1) { - e2 = XDL_MIN(e2 + (fe1 - e1), xe->xdf2.nrec); + e2 = XDL_MIN(e2 + (fe1 - e1), (long)xe->xdf2.nrec); e1 = fe1; } @@ -254,7 +254,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, */ if (xche->next) { long l = XDL_MIN(xche->next->i1, - xe->xdf1.nrec - 1); + (long)xe->xdf1.nrec - 1); if (l - xecfg->ctxlen <= e1 || get_func_line(xe, xecfg, NULL, l, e1) < 0) { xche = xche->next; diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c index 0dd4558a3280dc..29dad98c496b07 100644 --- a/xdiff/xmerge.c +++ b/xdiff/xmerge.c @@ -158,7 +158,7 @@ static int is_eol_crlf(xdfile_t *file, int i) { size_t size; - if (i < file->nrec - 1) + if (i < (long)file->nrec - 1) /* All lines before the last *must* end in LF */ return (size = file->recs[i].size) > 1 && file->recs[i].ptr[size - 2] == '\r'; @@ -317,7 +317,7 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, continue; i = m->i1 + m->chg1; } - size += xdl_recs_copy(xe1, i, xe1->xdf2.nrec - i, 0, 0, + size += xdl_recs_copy(xe1, i, (int)xe1->xdf2.nrec - i, 0, 0, dest ? dest + size : NULL); return size; } @@ -622,7 +622,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, changes = c; i0 = xscr1->i1; i1 = xscr1->i2; - i2 = xscr1->i1 + xe2->xdf2.nrec - xe2->xdf1.nrec; + i2 = xscr1->i1 + (long)xe2->xdf2.nrec - (long)xe2->xdf1.nrec; chg0 = xscr1->chg1; chg1 = xscr1->chg2; chg2 = xscr1->chg1; @@ -637,7 +637,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, if (!changes) changes = c; i0 = xscr2->i1; - i1 = xscr2->i1 + xe1->xdf2.nrec - xe1->xdf1.nrec; + i1 = xscr2->i1 + (long)xe1->xdf2.nrec - (long)xe1->xdf1.nrec; i2 = xscr2->i2; chg0 = xscr2->chg1; chg1 = xscr2->chg1; diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c index cc53266f3b8302..a0b31eb5d8c1c0 100644 --- a/xdiff/xpatience.c +++ b/xdiff/xpatience.c @@ -370,5 +370,5 @@ static int patience_diff(xpparam_t const *xpp, xdfenv_t *env, int xdl_do_patience_diff(xpparam_t const *xpp, xdfenv_t *env) { - return patience_diff(xpp, env, 1, env->xdf1.nrec, 1, env->xdf2.nrec); + return patience_diff(xpp, env, 1, (int)env->xdf1.nrec, 1, (int)env->xdf2.nrec); } diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index bea0992b5e4a33..705ddd1ae00a36 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -153,7 +153,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ for (top = blk + bsize; cur < top; ) { prev = cur; hav = xdl_hash_record(&cur, top, xpp->flags); - if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec)) + if (XDL_ALLOC_GROW(xdf->recs, (long)xdf->nrec + 1, narec)) goto abort; crec = &xdf->recs[xdf->nrec++]; crec->ptr = prev; @@ -287,7 +287,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd /* * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE. */ - if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) + if ((mlim = xdl_bogosqrt((long)xdf1->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { rcrec = cf->rcrecs[recs->minimal_perfect_hash]; @@ -295,7 +295,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; } - if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) + if ((mlim = xdl_bogosqrt((long)xdf2->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { rcrec = cf->rcrecs[recs->minimal_perfect_hash]; @@ -348,7 +348,7 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { recs1 = xdf1->recs; recs2 = xdf2->recs; - for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; + for (i = 0, lim = (long)XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; i++, recs1++, recs2++) if (recs1->minimal_perfect_hash != recs2->minimal_perfect_hash) break; @@ -361,8 +361,8 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { if (recs1->minimal_perfect_hash != recs2->minimal_perfect_hash) break; - xdf1->dend = xdf1->nrec - i - 1; - xdf2->dend = xdf2->nrec - i - 1; + xdf1->dend = (long)xdf1->nrec - i - 1; + xdf2->dend = (long)xdf2->nrec - i - 1; return 0; } diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h index d4e9cd2e763616..4c4d9bd147ebe8 100644 --- a/xdiff/xtypes.h +++ b/xdiff/xtypes.h @@ -47,7 +47,7 @@ typedef struct s_xrecord { typedef struct s_xdfile { xrecord_t *recs; - long nrec; + size_t nrec; ptrdiff_t dstart, dend; bool *changed; long *rindex; From e35877eadbd9bee473936577c82abca9c8333abd Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Tue, 18 Nov 2025 22:34:20 +0000 Subject: [PATCH 11/35] xdiff: make xdfile_t.nreff a size_t instead of long size_t is used because nreff describes the number of elements in memory for rindex. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xprepare.c | 14 +++++++------- xdiff/xtypes.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 705ddd1ae00a36..39fd79d9d46331 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -264,7 +264,7 @@ static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) { * might be potentially discarded if they appear in a run of discardable. */ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { - long i, nm, nreff, mlim; + long i, nm, mlim; xrecord_t *recs; xdlclass_t *rcrec; uint8_t *action1 = NULL, *action2 = NULL; @@ -307,29 +307,29 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd * Use temporary arrays to decide if changed[i] should remain * false, or become true. */ - for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; + xdf1->nreff = 0; + for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { if (action1[i] == KEEP || (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) { - xdf1->rindex[nreff++] = i; + xdf1->rindex[xdf1->nreff++] = i; /* changed[i] remains false, i.e. keep */ } else xdf1->changed[i] = true; /* i.e. discard */ } - xdf1->nreff = nreff; - for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; + xdf2->nreff = 0; + for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { if (action2[i] == KEEP || (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) { - xdf2->rindex[nreff++] = i; + xdf2->rindex[xdf2->nreff++] = i; /* changed[i] remains false, i.e. keep */ } else xdf2->changed[i] = true; /* i.e. discard */ } - xdf2->nreff = nreff; cleanup: xdl_free(action1); diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h index 4c4d9bd147ebe8..1f495f987f861b 100644 --- a/xdiff/xtypes.h +++ b/xdiff/xtypes.h @@ -51,7 +51,7 @@ typedef struct s_xdfile { ptrdiff_t dstart, dend; bool *changed; long *rindex; - long nreff; + size_t nreff; } xdfile_t; typedef struct s_xdfenv { From 5004a8da14e2aa80b5697b0a3a60e594af1c8292 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Tue, 18 Nov 2025 22:34:21 +0000 Subject: [PATCH 12/35] xdiff: change rindex from long to size_t in xdfile_t The field rindex describes an index offset for other arrays. Change it to size_t. Changing the type of rindex from long to size_t has no cascading refactor impact because it is only ever used to directly index other arrays. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xtypes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h index 1f495f987f861b..9074cdadd1118c 100644 --- a/xdiff/xtypes.h +++ b/xdiff/xtypes.h @@ -50,7 +50,7 @@ typedef struct s_xdfile { size_t nrec; ptrdiff_t dstart, dend; bool *changed; - long *rindex; + size_t *rindex; size_t nreff; } xdfile_t; From 22ce0cb6397d3d15c21c217696f262c4b8eb44b3 Mon Sep 17 00:00:00 2001 From: Ezekiel Newren Date: Tue, 18 Nov 2025 22:34:22 +0000 Subject: [PATCH 13/35] xdiff: rename rindex -> reference_index The classic diff adds only the lines that it's going to consider, during the diff, to an array. A mapping between the compacted array, and the lines of the file that they reference, is facilitated by this array. Signed-off-by: Ezekiel Newren Signed-off-by: Junio C Hamano --- xdiff/xdiffi.c | 6 +++--- xdiff/xprepare.c | 10 +++++----- xdiff/xtypes.h | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 21d06bce969765..4376f943dba539 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -24,7 +24,7 @@ static size_t get_hash(xdfile_t *xdf, long index) { - return xdf->recs[xdf->rindex[index]].minimal_perfect_hash; + return xdf->recs[xdf->reference_index[index]].minimal_perfect_hash; } #define XDL_MAX_COST_MIN 256 @@ -278,10 +278,10 @@ int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1, */ if (off1 == lim1) { for (; off2 < lim2; off2++) - xdf2->changed[xdf2->rindex[off2]] = true; + xdf2->changed[xdf2->reference_index[off2]] = true; } else if (off2 == lim2) { for (; off1 < lim1; off1++) - xdf1->changed[xdf1->rindex[off1]] = true; + xdf1->changed[xdf1->reference_index[off1]] = true; } else { xdpsplit_t spl; spl.i1 = spl.i2 = 0; diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 39fd79d9d46331..34c82e4f8e1626 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -128,7 +128,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t static void xdl_free_ctx(xdfile_t *xdf) { - xdl_free(xdf->rindex); + xdl_free(xdf->reference_index); xdl_free(xdf->changed - 1); xdl_free(xdf->recs); } @@ -141,7 +141,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ uint8_t const *blk, *cur, *top, *prev; xrecord_t *crec; - xdf->rindex = NULL; + xdf->reference_index = NULL; xdf->changed = NULL; xdf->recs = NULL; @@ -169,7 +169,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) && (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) { - if (!XDL_ALLOC_ARRAY(xdf->rindex, xdf->nrec + 1)) + if (!XDL_ALLOC_ARRAY(xdf->reference_index, xdf->nrec + 1)) goto abort; } @@ -312,7 +312,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd i <= xdf1->dend; i++, recs++) { if (action1[i] == KEEP || (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) { - xdf1->rindex[xdf1->nreff++] = i; + xdf1->reference_index[xdf1->nreff++] = i; /* changed[i] remains false, i.e. keep */ } else xdf1->changed[i] = true; @@ -324,7 +324,7 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd i <= xdf2->dend; i++, recs++) { if (action2[i] == KEEP || (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) { - xdf2->rindex[xdf2->nreff++] = i; + xdf2->reference_index[xdf2->nreff++] = i; /* changed[i] remains false, i.e. keep */ } else xdf2->changed[i] = true; diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h index 9074cdadd1118c..979586f20a6028 100644 --- a/xdiff/xtypes.h +++ b/xdiff/xtypes.h @@ -50,7 +50,7 @@ typedef struct s_xdfile { size_t nrec; ptrdiff_t dstart, dend; bool *changed; - size_t *rindex; + size_t *reference_index; size_t nreff; } xdfile_t; From 831e02340b9de46c9ea0a1bbce3894f390f5a45e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:49 +0100 Subject: [PATCH 14/35] path: move `enter_repo()` into "setup.c" The function `enter_repo()` is used to enter a repository at a given path. As such it sits way closer to setting up a repository than it does with handling paths, but regardless of that it's located in "path.c" instead of in "setup.c". Move the function into "setup.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/receive-pack.c | 2 +- builtin/upload-archive.c | 2 +- builtin/upload-pack.c | 2 +- http-backend.c | 1 + path.c | 100 --------------------------------------- path.h | 15 ------ setup.c | 81 +++++++++++++++++++++++++++++++ setup.h | 38 +++++++++++++++ 8 files changed, 123 insertions(+), 118 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index c9288a9c7e382b..79a0fd4756665d 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -34,7 +34,6 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" -#include "path.h" #include "protocol.h" #include "commit-reach.h" #include "server-info.h" @@ -42,6 +41,7 @@ #include "trace2.h" #include "worktree.h" #include "shallow.h" +#include "setup.h" #include "parse-options.h" static const char * const receive_pack_usage[] = { diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c index 97d7c9522f9868..25312bb2a52887 100644 --- a/builtin/upload-archive.c +++ b/builtin/upload-archive.c @@ -4,8 +4,8 @@ #define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" #include "archive.h" -#include "path.h" #include "pkt-line.h" +#include "setup.h" #include "sideband.h" #include "run-command.h" #include "strvec.h" diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c index c2bbc035ab0c91..30498fafea3a8b 100644 --- a/builtin/upload-pack.c +++ b/builtin/upload-pack.c @@ -5,11 +5,11 @@ #include "gettext.h" #include "pkt-line.h" #include "parse-options.h" -#include "path.h" #include "protocol.h" #include "replace-object.h" #include "upload-pack.h" #include "serve.h" +#include "setup.h" #include "commit.h" #include "environment.h" diff --git a/http-backend.c b/http-backend.c index 52f0483dd309d7..e9d1ef92bd8dc1 100644 --- a/http-backend.c +++ b/http-backend.c @@ -16,6 +16,7 @@ #include "run-command.h" #include "string-list.h" #include "url.h" +#include "setup.h" #include "strvec.h" #include "packfile.h" #include "odb.h" diff --git a/path.c b/path.c index 7f56eaf9930374..d726537622cda6 100644 --- a/path.c +++ b/path.c @@ -738,106 +738,6 @@ char *interpolate_path(const char *path, int real_home) return NULL; } -/* - * First, one directory to try is determined by the following algorithm. - * - * (0) If "strict" is given, the path is used as given and no DWIM is - * done. Otherwise: - * (1) "~/path" to mean path under the running user's home directory; - * (2) "~user/path" to mean path under named user's home directory; - * (3) "relative/path" to mean cwd relative directory; or - * (4) "/absolute/path" to mean absolute directory. - * - * Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git" - * in this order. We select the first one that is a valid git repository, and - * chdir() to it. If none match, or we fail to chdir, we return NULL. - * - * If all goes well, we return the directory we used to chdir() (but - * before ~user is expanded), avoiding getcwd() resolving symbolic - * links. User relative paths are also returned as they are given, - * except DWIM suffixing. - */ -const char *enter_repo(const char *path, unsigned flags) -{ - static struct strbuf validated_path = STRBUF_INIT; - static struct strbuf used_path = STRBUF_INIT; - - if (!path) - return NULL; - - if (!(flags & ENTER_REPO_STRICT)) { - static const char *suffix[] = { - "/.git", "", ".git/.git", ".git", NULL, - }; - const char *gitfile; - int len = strlen(path); - int i; - while ((1 < len) && (path[len-1] == '/')) - len--; - - /* - * We can handle arbitrary-sized buffers, but this remains as a - * sanity check on untrusted input. - */ - if (PATH_MAX <= len) - return NULL; - - strbuf_reset(&used_path); - strbuf_reset(&validated_path); - strbuf_add(&used_path, path, len); - strbuf_add(&validated_path, path, len); - - if (used_path.buf[0] == '~') { - char *newpath = interpolate_path(used_path.buf, 0); - if (!newpath) - return NULL; - strbuf_attach(&used_path, newpath, strlen(newpath), - strlen(newpath)); - } - for (i = 0; suffix[i]; i++) { - struct stat st; - size_t baselen = used_path.len; - strbuf_addstr(&used_path, suffix[i]); - if (!stat(used_path.buf, &st) && - (S_ISREG(st.st_mode) || - (S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) { - strbuf_addstr(&validated_path, suffix[i]); - break; - } - strbuf_setlen(&used_path, baselen); - } - if (!suffix[i]) - return NULL; - gitfile = read_gitfile(used_path.buf); - if (!(flags & ENTER_REPO_ANY_OWNER_OK)) - die_upon_dubious_ownership(gitfile, NULL, used_path.buf); - if (gitfile) { - strbuf_reset(&used_path); - strbuf_addstr(&used_path, gitfile); - } - if (chdir(used_path.buf)) - return NULL; - path = validated_path.buf; - } - else { - const char *gitfile = read_gitfile(path); - if (!(flags & ENTER_REPO_ANY_OWNER_OK)) - die_upon_dubious_ownership(gitfile, NULL, path); - if (gitfile) - path = gitfile; - if (chdir(path)) - return NULL; - } - - if (is_git_directory(".")) { - set_git_dir(".", 0); - check_repository_format(NULL); - return path; - } - - return NULL; -} - int calc_shared_perm(struct repository *repo, int mode) { diff --git a/path.h b/path.h index e67348f25397cc..0ec95a0b079c90 100644 --- a/path.h +++ b/path.h @@ -146,21 +146,6 @@ int adjust_shared_perm(struct repository *repo, const char *path); char *interpolate_path(const char *path, int real_home); -/* The bits are as follows: - * - * - ENTER_REPO_STRICT: callers that require exact paths (as opposed - * to allowing known suffixes like ".git", ".git/.git" to be - * omitted) can set this bit. - * - * - ENTER_REPO_ANY_OWNER_OK: callers that are willing to run without - * ownership check can set this bit. - */ -enum { - ENTER_REPO_STRICT = (1<<0), - ENTER_REPO_ANY_OWNER_OK = (1<<1), -}; - -const char *enter_repo(const char *path, unsigned flags); const char *remove_leading_path(const char *in, const char *prefix); const char *relative_path(const char *in, const char *prefix, struct strbuf *sb); int normalize_path_copy_len(char *dst, const char *src, int *prefix_len); diff --git a/setup.c b/setup.c index 7086741e6c2d1f..98c6fd8ee4c02d 100644 --- a/setup.c +++ b/setup.c @@ -1703,6 +1703,87 @@ void set_git_dir(const char *path, int make_realpath) strbuf_release(&realpath); } +const char *enter_repo(const char *path, unsigned flags) +{ + static struct strbuf validated_path = STRBUF_INIT; + static struct strbuf used_path = STRBUF_INIT; + + if (!path) + return NULL; + + if (!(flags & ENTER_REPO_STRICT)) { + static const char *suffix[] = { + "/.git", "", ".git/.git", ".git", NULL, + }; + const char *gitfile; + int len = strlen(path); + int i; + while ((1 < len) && (path[len-1] == '/')) + len--; + + /* + * We can handle arbitrary-sized buffers, but this remains as a + * sanity check on untrusted input. + */ + if (PATH_MAX <= len) + return NULL; + + strbuf_reset(&used_path); + strbuf_reset(&validated_path); + strbuf_add(&used_path, path, len); + strbuf_add(&validated_path, path, len); + + if (used_path.buf[0] == '~') { + char *newpath = interpolate_path(used_path.buf, 0); + if (!newpath) + return NULL; + strbuf_attach(&used_path, newpath, strlen(newpath), + strlen(newpath)); + } + for (i = 0; suffix[i]; i++) { + struct stat st; + size_t baselen = used_path.len; + strbuf_addstr(&used_path, suffix[i]); + if (!stat(used_path.buf, &st) && + (S_ISREG(st.st_mode) || + (S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) { + strbuf_addstr(&validated_path, suffix[i]); + break; + } + strbuf_setlen(&used_path, baselen); + } + if (!suffix[i]) + return NULL; + gitfile = read_gitfile(used_path.buf); + if (!(flags & ENTER_REPO_ANY_OWNER_OK)) + die_upon_dubious_ownership(gitfile, NULL, used_path.buf); + if (gitfile) { + strbuf_reset(&used_path); + strbuf_addstr(&used_path, gitfile); + } + if (chdir(used_path.buf)) + return NULL; + path = validated_path.buf; + } + else { + const char *gitfile = read_gitfile(path); + if (!(flags & ENTER_REPO_ANY_OWNER_OK)) + die_upon_dubious_ownership(gitfile, NULL, path); + if (gitfile) + path = gitfile; + if (chdir(path)) + return NULL; + } + + if (is_git_directory(".")) { + set_git_dir(".", 0); + check_repository_format(NULL); + return path; + } + + return NULL; +} + static int git_work_tree_initialized; /* diff --git a/setup.h b/setup.h index 8522fa8575da71..bfea199bcd8769 100644 --- a/setup.h +++ b/setup.h @@ -97,6 +97,44 @@ static inline int discover_git_directory(struct strbuf *commondir, void set_git_dir(const char *path, int make_realpath); void set_git_work_tree(const char *tree); +/* Flags that can be passed to `enter_repo()`. */ +enum { + /* + * Callers that require exact paths (as opposed to allowing known + * suffixes like ".git", ".git/.git" to be omitted) can set this bit. + */ + ENTER_REPO_STRICT = (1<<0), + + /* + * Callers that are willing to run without ownership check can set this + * bit. + */ + ENTER_REPO_ANY_OWNER_OK = (1<<1), +}; + +/* + * Discover and enter a repository. + * + * First, one directory to try is determined by the following algorithm. + * + * (0) If "strict" is given, the path is used as given and no DWIM is + * done. Otherwise: + * (1) "~/path" to mean path under the running user's home directory; + * (2) "~user/path" to mean path under named user's home directory; + * (3) "relative/path" to mean cwd relative directory; or + * (4) "/absolute/path" to mean absolute directory. + * + * Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git" + * in this order. We select the first one that is a valid git repository, and + * chdir() to it. If none match, or we fail to chdir, we return NULL. + * + * If all goes well, we return the directory we used to chdir() (but + * before ~user is expanded), avoiding getcwd() resolving symbolic + * links. User relative paths are also returned as they are given, + * except DWIM suffixing. + */ +const char *enter_repo(const char *path, unsigned flags); + const char *setup_git_directory_gently(int *); const char *setup_git_directory(void); char *prefix_path(const char *prefix, int len, const char *path); From 7c188a9e45405ff911b81a5dd9029f4e91fb338e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:50 +0100 Subject: [PATCH 15/35] setup: convert `set_git_dir()` to have file scope We don't have any external callers of `set_git_dir()` anymore now that `enter_repo()` has been moved into "setup.c". Remove the declaration and mark the function as static. Note that this change requires us to move the implementation around so that we can avoid adding any new forward declarations. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- setup.c | 80 ++++++++++++++++++++++++++++----------------------------- setup.h | 1 - 2 files changed, 40 insertions(+), 41 deletions(-) diff --git a/setup.c b/setup.c index 98c6fd8ee4c02d..8bf52df71663a3 100644 --- a/setup.c +++ b/setup.c @@ -1002,6 +1002,46 @@ const char *read_gitfile_gently(const char *path, int *return_error_code) return error_code ? NULL : path; } +static void set_git_dir_1(const char *path) +{ + xsetenv(GIT_DIR_ENVIRONMENT, path, 1); + setup_git_env(path); +} + +static void update_relative_gitdir(const char *name UNUSED, + const char *old_cwd, + const char *new_cwd, + void *data UNUSED) +{ + char *path = reparent_relative_path(old_cwd, new_cwd, + repo_get_git_dir(the_repository)); + struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); + + trace_printf_key(&trace_setup_key, + "setup: move $GIT_DIR to '%s'", + path); + set_git_dir_1(path); + if (tmp_objdir) + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); + free(path); +} + +static void set_git_dir(const char *path, int make_realpath) +{ + struct strbuf realpath = STRBUF_INIT; + + if (make_realpath) { + strbuf_realpath(&realpath, path, 1); + path = realpath.buf; + } + + set_git_dir_1(path); + if (!is_absolute_path(path)) + chdir_notify_register(NULL, update_relative_gitdir, NULL); + + strbuf_release(&realpath); +} + static const char *setup_explicit_git_dir(const char *gitdirenv, struct strbuf *cwd, struct repository_format *repo_fmt, @@ -1663,46 +1703,6 @@ void setup_git_env(const char *git_dir) fetch_if_missing = 0; } -static void set_git_dir_1(const char *path) -{ - xsetenv(GIT_DIR_ENVIRONMENT, path, 1); - setup_git_env(path); -} - -static void update_relative_gitdir(const char *name UNUSED, - const char *old_cwd, - const char *new_cwd, - void *data UNUSED) -{ - char *path = reparent_relative_path(old_cwd, new_cwd, - repo_get_git_dir(the_repository)); - struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); - - trace_printf_key(&trace_setup_key, - "setup: move $GIT_DIR to '%s'", - path); - set_git_dir_1(path); - if (tmp_objdir) - tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); - free(path); -} - -void set_git_dir(const char *path, int make_realpath) -{ - struct strbuf realpath = STRBUF_INIT; - - if (make_realpath) { - strbuf_realpath(&realpath, path, 1); - path = realpath.buf; - } - - set_git_dir_1(path); - if (!is_absolute_path(path)) - chdir_notify_register(NULL, update_relative_gitdir, NULL); - - strbuf_release(&realpath); -} - const char *enter_repo(const char *path, unsigned flags) { static struct strbuf validated_path = STRBUF_INIT; diff --git a/setup.h b/setup.h index bfea199bcd8769..d55dcc66086308 100644 --- a/setup.h +++ b/setup.h @@ -94,7 +94,6 @@ static inline int discover_git_directory(struct strbuf *commondir, return 0; } -void set_git_dir(const char *path, int make_realpath); void set_git_work_tree(const char *tree); /* Flags that can be passed to `enter_repo()`. */ From 9aaba579932781c74f67d6cecddaad59f0daaaef Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:51 +0100 Subject: [PATCH 16/35] odb: adopt logic to close object databases The logic to close an object database is currently contained in the packfile subsystem. That choice is somewhat relatable, as most of the logic really is to close resources associated with the packfile store itself. But we also end up handling object sources and commit graphs, which certainly is not related to packfiles. Move the function into the object database subsystem and rename it to `odb_close()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/clone.c | 2 +- builtin/gc.c | 2 +- builtin/repack.c | 2 +- midx-write.c | 2 +- odb.c | 18 +++++++++++++++++- odb.h | 7 +++++++ packfile.c | 15 --------------- packfile.h | 1 - run-command.c | 2 +- scalar.c | 2 +- 10 files changed, 30 insertions(+), 23 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index c990f398ef6f37..b19b302b065467 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1617,7 +1617,7 @@ int cmd_clone(int argc, transport_disconnect(transport); if (option_dissociate) { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); dissociate_from_references(); } diff --git a/builtin/gc.c b/builtin/gc.c index d212cbb9b84781..961fa343c4b180 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1048,7 +1048,7 @@ int cmd_gc(int argc, report_garbage = report_pack_garbage; odb_reprepare(the_repository->objects); if (pack_garbage.nr > 0) { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); clean_pack_garbage(); } diff --git a/builtin/repack.c b/builtin/repack.c index cfdb4c0920b191..d9012141f699c9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -488,7 +488,7 @@ int cmd_repack(int argc, string_list_sort(&names); - close_object_store(repo->objects); + odb_close(repo->objects); /* * Ok we have prepared all new packfiles. diff --git a/midx-write.c b/midx-write.c index c73010df6d3a4f..60497586fdf2f4 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1459,7 +1459,7 @@ static int write_midx_internal(struct odb_source *source, } if (ctx.m || ctx.base_midx) - close_object_store(ctx.repo->objects); + odb_close(ctx.repo->objects); if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); diff --git a/odb.c b/odb.c index 3ec21ef24e16bb..bcefa5cede60b5 100644 --- a/odb.c +++ b/odb.c @@ -9,6 +9,7 @@ #include "khash.h" #include "lockfile.h" #include "loose.h" +#include "midx.h" #include "object-file-convert.h" #include "object-file.h" #include "odb.h" @@ -1044,6 +1045,21 @@ struct object_database *odb_new(struct repository *repo) return o; } +void odb_close(struct object_database *o) +{ + struct odb_source *source; + + packfile_store_close(o->packfiles); + + for (source = o->sources; source; source = source->next) { + if (source->midx) + close_midx(source->midx); + source->midx = NULL; + } + + close_commit_graph(o); +} + static void odb_free_sources(struct object_database *o) { while (o->sources) { @@ -1076,7 +1092,7 @@ void odb_clear(struct object_database *o) free((char *) o->cached_objects[i].value.buf); FREE_AND_NULL(o->cached_objects); - close_object_store(o); + odb_close(o); packfile_store_free(o->packfiles); o->packfiles = NULL; diff --git a/odb.h b/odb.h index 9bb28008b1d953..71b4897c82f3a8 100644 --- a/odb.h +++ b/odb.h @@ -169,6 +169,13 @@ struct object_database { struct object_database *odb_new(struct repository *repo); void odb_clear(struct object_database *o); +/* + * Close the object database and all of its sources so that any held resources + * will be released. The database can still be used after closing it, in which + * case these resources may be reallocated. + */ +void odb_close(struct object_database *o); + /* * Clear caches, reload alternates and then reload object sources so that new * objects may become accessible. diff --git a/packfile.c b/packfile.c index 40f733dd234900..af71eaf7e34461 100644 --- a/packfile.c +++ b/packfile.c @@ -359,21 +359,6 @@ void close_pack(struct packed_git *p) oidset_clear(&p->bad_objects); } -void close_object_store(struct object_database *o) -{ - struct odb_source *source; - - packfile_store_close(o->packfiles); - - for (source = o->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; - } - - close_commit_graph(o); -} - void unlink_pack_path(const char *pack_name, int force_delete) { static const char *exts[] = {".idx", ".pack", ".rev", ".keep", ".bitmap", ".promisor", ".mtimes"}; diff --git a/packfile.h b/packfile.h index 58fcc88e20224b..d9226a072ac96d 100644 --- a/packfile.h +++ b/packfile.h @@ -279,7 +279,6 @@ struct object_database; unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); void close_pack_windows(struct packed_git *); void close_pack(struct packed_git *); -void close_object_store(struct object_database *o); void unuse_pack(struct pack_window **); void clear_delta_base_cache(void); struct packed_git *add_packed_git(struct repository *r, const char *path, diff --git a/run-command.c b/run-command.c index ed9575bd6a8cbb..e3e02475ccec50 100644 --- a/run-command.c +++ b/run-command.c @@ -743,7 +743,7 @@ int start_command(struct child_process *cmd) fflush(NULL); if (cmd->close_object_store) - close_object_store(the_repository->objects); + odb_close(the_repository->objects); #ifndef GIT_WINDOWS_NATIVE { diff --git a/scalar.c b/scalar.c index f7543116272b77..2aeb191cc89b72 100644 --- a/scalar.c +++ b/scalar.c @@ -931,7 +931,7 @@ static int cmd_delete(int argc, const char **argv) if (dir_inside_of(cwd, enlistment.buf) >= 0) res = error(_("refusing to delete current working directory")); else { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); res = delete_enlistment(&enlistment); } strbuf_release(&enlistment); From f8bdf3127ab7df8a8f3039f41889b35eefe029a3 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:52 +0100 Subject: [PATCH 17/35] odb: refactor `odb_clear()` to `odb_free()` The function `odb_clear()` releases all resources allocated to an object database and ensures that all fields become zero'd out. Despite its naming though it doesn't really clear the object database so that it becomes ready for reuse afterwards again -- the caller would first have to reinitialize it, and that contradicts the terminology of "clearing" as we have defined it in our coding guidelines. There isn't really only a reason to have "clearing" semantics, either. There's only a single caller of `odb_clear()`, and that caller also ends up freeing the object database structure itself. Refactor the function to have "freeing" semantics instead, so that the structure itself is also freed, which allows us to drop some useless boilerplate to zero out the structure's members. This refactoring reveals that we're trying to close the commit graph multiple times: once directly via `free_commit_graph()`, and once via `odb_close()`. Drop the former call. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 19 ++++++++----------- odb.h | 4 +++- repository.c | 4 ++-- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index bcefa5cede60b5..29cf6496c5e50a 100644 --- a/odb.c +++ b/odb.c @@ -1073,30 +1073,27 @@ static void odb_free_sources(struct object_database *o) o->source_by_path = NULL; } -void odb_clear(struct object_database *o) +void odb_free(struct object_database *o) { - FREE_AND_NULL(o->alternate_db); + if (!o) + return; + + free(o->alternate_db); oidmap_clear(&o->replace_map, 1); pthread_mutex_destroy(&o->replace_mutex); - free_commit_graph(o->commit_graph); - o->commit_graph = NULL; - o->commit_graph_attempted = 0; - odb_free_sources(o); - o->sources_tail = NULL; - o->loaded_alternates = 0; for (size_t i = 0; i < o->cached_object_nr; i++) free((char *) o->cached_objects[i].value.buf); - FREE_AND_NULL(o->cached_objects); + free(o->cached_objects); odb_close(o); packfile_store_free(o->packfiles); - o->packfiles = NULL; - string_list_clear(&o->submodule_source_paths, 0); + + free(o); } void odb_reprepare(struct object_database *o) diff --git a/odb.h b/odb.h index 71b4897c82f3a8..77b313b784cad3 100644 --- a/odb.h +++ b/odb.h @@ -167,7 +167,9 @@ struct object_database { }; struct object_database *odb_new(struct repository *repo); -void odb_clear(struct object_database *o); + +/* Free the object database and release all resources. */ +void odb_free(struct object_database *o); /* * Close the object database and all of its sources so that any held resources diff --git a/repository.c b/repository.c index 6aaa7ba00869bf..3c8b3813b00af0 100644 --- a/repository.c +++ b/repository.c @@ -382,8 +382,8 @@ void repo_clear(struct repository *repo) FREE_AND_NULL(repo->worktree); FREE_AND_NULL(repo->submodule_prefix); - odb_clear(repo->objects); - FREE_AND_NULL(repo->objects); + odb_free(repo->objects); + repo->objects = NULL; parsed_object_pool_clear(repo->parsed_objects); FREE_AND_NULL(repo->parsed_objects); From fbf3d0669f830b4492070aa33f57dbf2c43fa4c8 Mon Sep 17 00:00:00 2001 From: Kristoffer Haugsbakk Date: Thu, 20 Nov 2025 17:26:49 +0100 Subject: [PATCH 18/35] doc: warn against --committer-date-is-author-date MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This option could create a commit history which violates the assumption that commits have non-decreasing commit timestamps. Warn against that in both git-am(1) and git-rebase(1). The genesis of this option is from git-am(1) and was added in 3f01ad66 (am: Add --committer-date-is-author-date option, 2009-01-22). The commit message doesn’t give us an example of a use case, but the thread starter does:[1] I've a big set of patches in a mbox file: there's sufficient info inside for git-am to work. Yet, each time I do import these, my sha1sums are changing because of different commit dates. I'd like to force the commit date to match the info/date from the time I received the email (and therefore always get back the right sha1sums). [1]: https://lore.kernel.org/git/46d6db660901221441q60eb90bdge601a7a250c3a247@mail.gmail.com/ So the motivation was to treat git-am(1) as an import command that creates the same commit IDs. Putting aside the question of whether you should be using git-am(1) for importing commits, this approach is problematic: • you still need to apply the commits to the same base if you want the same hashes; and • you need the same committer. And if you expect the same committer, why is this person applying the same patches multiple times with the goal of making *identical* commits? That was all for git-am(1). It was added to git-rebase(1) in 570ccad3 (rebase: add options passed to git-am, 2009-03-18)[2] in order to plug options that could not be sent on to git-am(1). At this point the utility of the option graduated to making no sense; a use case for `git rebase --committer-date-is-author- date` is still yet to be found. Just warn against using this option on both commands and remind the user to consider whether they really need it. † 2: See also 7573cec5 (rebase -i: support --committer-date-is-author-date, 2020-08-17) for the commit for the merge backend Suggested-by: Johannes Sixt Signed-off-by: Kristoffer Haugsbakk Acked-by: Johannes Sixt Signed-off-by: Junio C Hamano --- Documentation/git-am.adoc | 7 +++++++ Documentation/git-rebase.adoc | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/Documentation/git-am.adoc b/Documentation/git-am.adoc index 221070de481227..264d21a7de7801 100644 --- a/Documentation/git-am.adoc +++ b/Documentation/git-am.adoc @@ -161,6 +161,13 @@ Valid for the `--whitespace` option are: commit creation as the committer date. This allows the user to lie about the committer date by using the same value as the author date. ++ +WARNING: The history walking machinery assumes that commits have +non-decreasing commit timestamps. You should consider if you really need +to use this option. Then you should only use this option to override the +committer date when applying commits on top of a base which commit is +older (in terms of the commit date) than the oldest patch you are +applying. --ignore-date:: By default the command records the date from the e-mail diff --git a/Documentation/git-rebase.adoc b/Documentation/git-rebase.adoc index 956d3048f5a618..0f808c82b2877b 100644 --- a/Documentation/git-rebase.adoc +++ b/Documentation/git-rebase.adoc @@ -507,6 +507,13 @@ See also INCOMPATIBLE OPTIONS below. Instead of using the current time as the committer date, use the author date of the commit being rebased as the committer date. This option implies `--force-rebase`. ++ +WARNING: The history walking machinery assumes that commits have +non-decreasing commit timestamps. You should consider if you really need +to use this option. Then you should only use this option to override the +committer date when rebasing commits on top of a base which commit is +older (in terms of the commit date) than the oldest commit you are +applying (in terms of the author date). --ignore-date:: --reset-author-date:: From 770afe443784b3ec2c72d68aa509e48064942348 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 20 Nov 2025 11:32:45 -0800 Subject: [PATCH 19/35] config: mark otherwise unused function as file-scope static git_configset_get_pathname() is only used once inside config.c; we do not have to expose it as a public function. Signed-off-by: Junio C Hamano --- config.c | 2 +- config.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/config.c b/config.c index 73fc74c8fa1a35..6552e5b0b80202 100644 --- a/config.c +++ b/config.c @@ -1954,7 +1954,7 @@ int git_configset_get_maybe_bool(struct config_set *set, const char *key, int *d return 1; } -int git_configset_get_pathname(struct config_set *set, const char *key, char **dest) +static int git_configset_get_pathname(struct config_set *set, const char *key, char **dest) { const char *value; if (!git_configset_get_value(set, key, &value, NULL)) diff --git a/config.h b/config.h index 19c87fc0bc1a2a..ba426a960af9f4 100644 --- a/config.h +++ b/config.h @@ -564,7 +564,6 @@ int git_configset_get_ulong(struct config_set *cs, const char *key, unsigned lon int git_configset_get_bool(struct config_set *cs, const char *key, int *dest); int git_configset_get_bool_or_int(struct config_set *cs, const char *key, int *is_bool, int *dest); int git_configset_get_maybe_bool(struct config_set *cs, const char *key, int *dest); -int git_configset_get_pathname(struct config_set *cs, const char *key, char **dest); /** * Run only the discover part of the repo_config_get_*() functions From df963f0df4756fa751bfbb39e104d004e3f7d60b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 24 Nov 2025 21:33:24 +0100 Subject: [PATCH 20/35] config: fix suggestion for failed set of multi-valued option MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The command "git config set " fails for an option that has multiple values. List the "git config set" flags that can be used, instead of old-style "git config" actions. Reported-by: Paul Wintz Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/config.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/builtin/config.c b/builtin/config.c index 59fb113b073926..ef1c1a9cf2c0c8 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -974,7 +974,7 @@ static int cmd_config_set(int argc, const char **argv, const char *prefix, argv[0], comment, value); if (ret == CONFIG_NOTHING_SET) error(_("cannot overwrite multiple values with a single value\n" - " Use a regexp, --add or --replace-all to change %s."), argv[0]); + " Use --value=, --append or --all to change %s."), argv[0]); } location_options_release(&location_opts); From 18bf67b7537f8ff0cd772847aa03f9cc319b1346 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ren=C3=A9=20Scharfe?= Date: Mon, 24 Nov 2025 22:00:05 +0100 Subject: [PATCH 21/35] config: fix short help of unset flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The flags --all and --value of "git config unset" don't make the command "replace" or "show" anything, they are about selecting what to unset. Change their help text accordingly. Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- builtin/config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/config.c b/builtin/config.c index f70d6354772259..f1aa4e21ffd51f 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -992,8 +992,8 @@ static int cmd_config_unset(int argc, const char **argv, const char *prefix, struct option opts[] = { CONFIG_LOCATION_OPTIONS(location_opts), OPT_GROUP(N_("Filter")), - OPT_BIT(0, "all", &flags, N_("replace multi-valued config option with new value"), CONFIG_FLAGS_MULTI_REPLACE), - OPT_STRING(0, "value", &value_pattern, N_("pattern"), N_("show config with values matching the pattern")), + OPT_BIT(0, "all", &flags, N_("unset all multi-valued config options"), CONFIG_FLAGS_MULTI_REPLACE), + OPT_STRING(0, "value", &value_pattern, N_("pattern"), N_("unset multi-valued config options with matching values")), OPT_BIT(0, "fixed-value", &flags, N_("use string equality when comparing values to value pattern"), CONFIG_FLAGS_FIXED_VALUE), OPT_END(), }; From ce1a5a22a5beefac8a52da518855b5aecc562874 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 20 Nov 2025 11:34:50 -0800 Subject: [PATCH 22/35] config: really pretend missing :(optional) value is not there Earlier we added support for a value spelled as ":(optional)path" for configuration variables whose values are of type "path", with the documented semantics "if the path is missing, behave as if such a variable definition is not even there." This has worked OK for code paths that reads configuration files and stores the configured value as a string, where NULL in such a string is treated as if the setting is not there, left as the default. However, there are other code paths that do not _ignore_ such NULL values and misbehave. "git config get --path" is one of them. When git_config_pathname() helper function finds that the value of the variable is an optional path *and* the path is missing, it leaves the destination pointer intact (which usually is left to NULL) and returns 0 to signal a success. format_config() helper however assumed that the destination pointer always gets a string, which no longer is the case, and segfaulted. Make sure that git_config_pathname() clears the destination pointer in such a case, and teach format_config() to react to the condition by returning 1 (which is different from 0 that is a normal success and negative that is an error) to its callers. Adjust the callers to react to this new return value that tells them to pretend as if they did not even see this partcular pair. Reported-by: Han Jiang Helped-by: Jeff King Signed-off-by: Junio C Hamano --- builtin/config.c | 45 ++++++++++++++++++++++++++++++-------- config.c | 1 + t/meson.build | 1 + t/t1311-config-optional.sh | 38 ++++++++++++++++++++++++++++++++ 4 files changed, 76 insertions(+), 9 deletions(-) create mode 100755 t/t1311-config-optional.sh diff --git a/builtin/config.c b/builtin/config.c index 59fb113b073926..2b36eb7d1cd204 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -261,6 +261,12 @@ struct strbuf_list { int alloc; }; +/* + * Format the configuration key-value pair (`key_`, `value_`) and + * append it into strbuf `buf`. Returns a negative value on failure, + * 0 on success, 1 on a missing optional value (i.e., telling the + * caller to pretend that did not exist). + */ static int format_config(const struct config_display_options *opts, struct strbuf *buf, const char *key_, const char *value_, const struct key_value_info *kvi) @@ -299,7 +305,10 @@ static int format_config(const struct config_display_options *opts, char *v; if (git_config_pathname(&v, key_, value_) < 0) return -1; - strbuf_addstr(buf, v); + if (v) + strbuf_addstr(buf, v); + else + return 1; /* :(optional)no-such-file */ free((char *)v); } else if (opts->type == TYPE_EXPIRY_DATE) { timestamp_t t; @@ -344,6 +353,7 @@ static int collect_config(const char *key_, const char *value_, struct collect_config_data *data = cb; struct strbuf_list *values = data->values; const struct key_value_info *kvi = ctx->kvi; + int status; if (!(data->get_value_flags & GET_VALUE_KEY_REGEXP) && strcmp(key_, data->key)) @@ -361,8 +371,15 @@ static int collect_config(const char *key_, const char *value_, ALLOC_GROW(values->items, values->nr + 1, values->alloc); strbuf_init(&values->items[values->nr], 0); - return format_config(data->display_opts, &values->items[values->nr++], - key_, value_, kvi); + status = format_config(data->display_opts, &values->items[values->nr++], + key_, value_, kvi); + if (status < 0) + return status; + if (status) { + strbuf_release(&values->items[--values->nr]); + status = 0; + } + return status; } static int get_value(const struct config_location_options *opts, @@ -438,15 +455,23 @@ static int get_value(const struct config_location_options *opts, if (!values.nr && display_opts->default_value) { struct key_value_info kvi = KVI_INIT; struct strbuf *item; + int status; kvi_from_param(&kvi); ALLOC_GROW(values.items, values.nr + 1, values.alloc); item = &values.items[values.nr++]; strbuf_init(item, 0); - if (format_config(display_opts, item, key_, - display_opts->default_value, &kvi) < 0) + + status = format_config(display_opts, item, key_, + display_opts->default_value, &kvi); + if (status < 0) die(_("failed to format default config value: %s"), display_opts->default_value); + if (status) { + /* default was a missing optional value */ + values.nr--; + strbuf_release(item); + } } ret = !values.nr; @@ -706,11 +731,13 @@ static int get_urlmatch(const struct config_location_options *opts, for_each_string_list_item(item, &values) { struct urlmatch_current_candidate_value *matched = item->util; struct strbuf buf = STRBUF_INIT; + int status; - format_config(&display_opts, &buf, item->string, - matched->value_is_null ? NULL : matched->value.buf, - &matched->kvi); - fwrite(buf.buf, 1, buf.len, stdout); + status = format_config(&display_opts, &buf, item->string, + matched->value_is_null ? NULL : matched->value.buf, + &matched->kvi); + if (!status) + fwrite(buf.buf, 1, buf.len, stdout); strbuf_release(&buf); strbuf_release(&matched->value); diff --git a/config.c b/config.c index 6552e5b0b80202..c6ef290011b752 100644 --- a/config.c +++ b/config.c @@ -1292,6 +1292,7 @@ int git_config_pathname(char **dest, const char *var, const char *value) if (is_optional && is_missing_file(path)) { free(path); + *dest = NULL; return 0; } diff --git a/t/meson.build b/t/meson.build index bbeba1a8d50e1b..137c0caea0e1b5 100644 --- a/t/meson.build +++ b/t/meson.build @@ -182,6 +182,7 @@ integration_tests = [ 't1308-config-set.sh', 't1309-early-config.sh', 't1310-config-default.sh', + 't1311-config-optional.sh', 't1350-config-hooks-path.sh', 't1400-update-ref.sh', 't1401-symbolic-ref.sh', diff --git a/t/t1311-config-optional.sh b/t/t1311-config-optional.sh new file mode 100755 index 00000000000000..fbbacfc67b368d --- /dev/null +++ b/t/t1311-config-optional.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# +# Copyright (c) 2025 Google LLC +# + +test_description=':(optional) paths' + +. ./test-lib.sh + +test_expect_success 'var=:(optional)path-exists' ' + test_config a.path ":(optional)path-exists" && + >path-exists && + echo path-exists >expect && + + git config get --path a.path >actual && + test_cmp expect actual +' + +test_expect_success 'missing optional value is ignored' ' + test_config a.path ":(optional)no-such-path" && + # Using --show-scope ensures we skip writing not only the value + # but also any meta-information about the ignored key. + test_must_fail git config get --show-scope --path a.path >actual && + test_line_count = 0 actual +' + +test_expect_success 'missing optional value is ignored in multi-value config' ' + test_when_finished "git config unset --all a.path" && + git config set --append a.path ":(optional)path-exists" && + git config set --append a.path ":(optional)no-such-path" && + >path-exists && + echo path-exists >expect && + + git config --get --path a.path >actual && + test_cmp expect actual +' + +test_done From 0bd16856ffb3968de73699ad0555d1fae6c45406 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Thu, 20 Nov 2025 11:45:35 -0800 Subject: [PATCH 23/35] config: really treat missing optional path as not configured These callers expect that git_config_pathname() that returns 0 is a signal that the variable they passed has a string they need to act on. But with the introduction of ":(optional)path" earlier, that is no longer the case. If the path specified by the configuration variable is missing, their variable will get a NULL in it, and they need to act on it (often, just refraining from copying it elsewhere). Signed-off-by: Junio C Hamano --- builtin/blame.c | 3 ++- builtin/receive-pack.c | 5 +++-- fetch-pack.c | 5 +++-- fsck.c | 12 +++++++----- gpg-interface.c | 10 +++++++++- setup.c | 2 +- 6 files changed, 25 insertions(+), 12 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index 5b10e84b664228..10928341442e0f 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -739,7 +739,8 @@ static int git_blame_config(const char *var, const char *value, ret = git_config_pathname(&str, var, value); if (ret) return ret; - string_list_insert(&ignore_revs_file_list, str); + if (str) + string_list_insert(&ignore_revs_file_list, str); free(str); return 0; } diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 1113137a6f0b3f..471857335429f8 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -177,8 +177,9 @@ static int receive_pack_config(const char *var, const char *value, if (git_config_pathname(&path, var, value)) return -1; - strbuf_addf(&fsck_msg_types, "%cskiplist=%s", - fsck_msg_types.len ? ',' : '=', path); + if (path) + strbuf_addf(&fsck_msg_types, "%cskiplist=%s", + fsck_msg_types.len ? ',' : '=', path); free(path); return 0; } diff --git a/fetch-pack.c b/fetch-pack.c index 46c39f85c4ca9e..33a3f20bfc4d27 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1872,8 +1872,9 @@ int fetch_pack_fsck_config(const char *var, const char *value, if (git_config_pathname(&path, var, value)) return -1; - strbuf_addf(msg_types, "%cskiplist=%s", - msg_types->len ? ',' : '=', path); + if (path) + strbuf_addf(msg_types, "%cskiplist=%s", + msg_types->len ? ',' : '=', path); free(path); return 0; } diff --git a/fsck.c b/fsck.c index 171b424dd57de1..0c287699d28267 100644 --- a/fsck.c +++ b/fsck.c @@ -1351,14 +1351,16 @@ int git_fsck_config(const char *var, const char *value, if (strcmp(var, "fsck.skiplist") == 0) { char *path; - struct strbuf sb = STRBUF_INIT; if (git_config_pathname(&path, var, value)) return -1; - strbuf_addf(&sb, "skiplist=%s", path); - free(path); - fsck_set_msg_types(options, sb.buf); - strbuf_release(&sb); + if (path) { + struct strbuf sb = STRBUF_INIT; + strbuf_addf(&sb, "skiplist=%s", path); + free(path); + fsck_set_msg_types(options, sb.buf); + strbuf_release(&sb); + } return 0; } diff --git a/gpg-interface.c b/gpg-interface.c index 06e7fb50603d22..8b91a11a430a35 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -794,8 +794,16 @@ static int git_gpg_config(const char *var, const char *value, fmtname = "ssh"; if (fmtname) { + char *program; + int status; + fmt = get_format_by_name(fmtname); - return git_config_pathname((char **) &fmt->program, var, value); + status = git_config_pathname(&program, var, value); + if (status) + return status; + if (program) + fmt->program = program; + return status; } return 0; diff --git a/setup.c b/setup.c index 98ddbf377f923b..18301b5907dc20 100644 --- a/setup.c +++ b/setup.c @@ -1248,7 +1248,7 @@ static int safe_directory_cb(const char *key, const char *value, } else { char *allowed = NULL; - if (!git_config_pathname(&allowed, key, value)) { + if (!git_config_pathname(&allowed, key, value) && allowed) { char *normalized = NULL; /* From b67b2d9fb7ccfaa72446d76abc8c36849d2e0685 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:53 +0100 Subject: [PATCH 24/35] odb: move logic to disable ref updates into repo Our object database sources have a field `disable_ref_updates`. This field can obviously be set to disable reference updates, but it is somewhat curious that this logic is hosted by the object database. The reason for this is that it was primarily added to keep us from accidentally updating references while an ODB transaction is ongoing. Any objects part of the transaction have not yet been committed to disk, so new references that point to them might get corrupted in case we never end up committing the transaction. As such, whenever we create a new transaction we set up a new temporary ODB source and mark it as disabling reference updates. This has one (and only one?) upside: once we have committed the transaction, the temporary source will be dropped and thus we clean up the disabled reference updates automatically. But other than that, it's somewhat misdesigned: - We can have multiple ODB sources, but only the currently active source inhibits reference updates. - We're mixing concerns of the refdb with the ODB. Arguably, the decision of whether we can update references or not should be handled by the refdb. But that wouldn't be a great fit either, as there can be one refdb per worktree. So we'd again have the same problem that a "global" intent becomes localized to a specific instance. Instead, move the setting into the repository. While at it, convert it into a boolean. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 3 ++- odb.h | 7 ------- refs.c | 2 +- repository.c | 2 +- repository.h | 9 ++++++++- setup.c | 2 +- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/odb.c b/odb.c index 29cf6496c5e50a..ccc6e999e7ae25 100644 --- a/odb.c +++ b/odb.c @@ -360,7 +360,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, * Disable ref updates while a temporary odb is active, since * the objects in the database may roll back. */ - source->disable_ref_updates = 1; + odb->repo->disable_ref_updates = true; source->will_destroy = will_destroy; source->next = odb->sources; odb->sources = source; @@ -387,6 +387,7 @@ void odb_restore_primary_source(struct object_database *odb, if (cur_source->next != restore_source) BUG("we expect the old primary object store to be the first alternate"); + odb->repo->disable_ref_updates = false; odb->sources = restore_source; odb_source_free(cur_source); } diff --git a/odb.h b/odb.h index 77b313b784cad3..99c4d489729459 100644 --- a/odb.h +++ b/odb.h @@ -66,13 +66,6 @@ struct odb_source { */ bool local; - /* - * This is a temporary object store created by the tmp_objdir - * facility. Disable ref updates since the objects in the store - * might be discarded on rollback. - */ - int disable_ref_updates; - /* * This object store is ephemeral, so there is no need to fsync. */ diff --git a/refs.c b/refs.c index 965381367e0e53..6c7283d9eb2aa8 100644 --- a/refs.c +++ b/refs.c @@ -2491,7 +2491,7 @@ int ref_transaction_prepare(struct ref_transaction *transaction, break; } - if (refs->repo->objects->sources->disable_ref_updates) { + if (refs->repo->disable_ref_updates) { strbuf_addstr(err, _("ref updates forbidden inside quarantine environment")); return -1; diff --git a/repository.c b/repository.c index 3c8b3813b00af0..455c2d279fb8ab 100644 --- a/repository.c +++ b/repository.c @@ -179,7 +179,7 @@ void repo_set_gitdir(struct repository *repo, repo->objects->sources->path = objects_path; } - repo->objects->sources->disable_ref_updates = o->disable_ref_updates; + repo->disable_ref_updates = o->disable_ref_updates; free(repo->objects->alternate_db); repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); diff --git a/repository.h b/repository.h index 5808a5d610846a..614649413b68bc 100644 --- a/repository.h +++ b/repository.h @@ -71,6 +71,13 @@ struct repository { */ struct ref_store *refs_private; + /* + * Disable ref updates. This is especially used in contexts where + * transactions may still be rolled back so that we don't start to + * reference objects that may vanish. + */ + bool disable_ref_updates; + /* * A strmap of ref_stores, stored by submodule name, accessible via * `repo_get_submodule_ref_store()`. @@ -187,7 +194,7 @@ struct set_gitdir_args { const char *graft_file; const char *index_file; const char *alternate_db; - int disable_ref_updates; + bool disable_ref_updates; }; void repo_set_gitdir(struct repository *repo, const char *root, diff --git a/setup.c b/setup.c index 8bf52df71663a3..a752e9fc8476a0 100644 --- a/setup.c +++ b/setup.c @@ -1682,7 +1682,7 @@ void setup_git_env(const char *git_dir) args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); if (getenv(GIT_QUARANTINE_ENVIRONMENT)) { - args.disable_ref_updates = 1; + args.disable_ref_updates = true; } repo_set_gitdir(the_repository, git_dir, &args); From 5d795b34dcbf46039c3dda028bb8df8d75a5a9d0 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:54 +0100 Subject: [PATCH 25/35] oidset: introduce `oidset_equal()` Introduce a new function that allows the caller to verify whether two oidsets contain the exact same object IDs. Note that this change requires us to change `oidset_iter_init()` to accept a `const struct oidset`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- oidset.c | 16 ++++++++++++++++ oidset.h | 9 +++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/oidset.c b/oidset.c index 8d36aef8dca4fc..c8ff0b385c58ac 100644 --- a/oidset.c +++ b/oidset.c @@ -16,6 +16,22 @@ int oidset_contains(const struct oidset *set, const struct object_id *oid) return pos != kh_end(&set->set); } +bool oidset_equal(const struct oidset *a, const struct oidset *b) +{ + struct oidset_iter iter; + struct object_id *a_oid; + + if (oidset_size(a) != oidset_size(b)) + return false; + + oidset_iter_init(a, &iter); + while ((a_oid = oidset_iter_next(&iter))) + if (!oidset_contains(b, a_oid)) + return false; + + return true; +} + int oidset_insert(struct oidset *set, const struct object_id *oid) { int added; diff --git a/oidset.h b/oidset.h index 0106b6f2787f0e..e0f1a6ff4ff203 100644 --- a/oidset.h +++ b/oidset.h @@ -38,6 +38,11 @@ void oidset_init(struct oidset *set, size_t initial_size); */ int oidset_contains(const struct oidset *set, const struct object_id *oid); +/** + * Returns true iff `a` and `b` contain the exact same OIDs. + */ +bool oidset_equal(const struct oidset *a, const struct oidset *b); + /** * Insert the oid into the set; a copy is made, so "oid" does not need * to persist after this function is called. @@ -94,11 +99,11 @@ void oidset_parse_file_carefully(struct oidset *set, const char *path, oidset_parse_tweak_fn fn, void *cbdata); struct oidset_iter { - kh_oid_set_t *set; + const kh_oid_set_t *set; khiter_t iter; }; -static inline void oidset_iter_init(struct oidset *set, +static inline void oidset_iter_init(const struct oidset *set, struct oidset_iter *iter) { iter->set = &set->set; From 8dc22e87f000a092b62b4fb08e2542433f1ae192 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:55 +0100 Subject: [PATCH 26/35] builtin/index-pack: fix deferred fsck outside repos When asked to perform object consistency checks via the `--fsck-objects` flag we verify that each object part of the pack is valid. In general, this check can even be performed outside of a Git repository: we don't need an initialized object database as we simply read the object from the packfile directly. But there's one exception: a subset of the object checks may be deferred to a later point in time. For now, this only concerns ".gitmodules" and ".gitattributes" files: whenever we see a tree referencing these files we queue them for a deferred check. This is done because we need to do some extra checks for those files to ensure that they are well-formed, and these checks need to be done regardless of whether the corresponding blobs are part of the packfile or not. This works inside a repository, but unfortunately the logic leads to a segfault when running outside of one. This is because we eventually call `odb_read_object()`, which will crash because the object database has not been initialized. There's multiple options here: - We could in theory create a purely in-memory database with only a packfile store that contains the single packfile. We don't really have the infrastructure for this yet though, and it would end up being quite hacky. - We could refuse to perform consistency checks outside of a repository. But most of the checks work alright, so this would be a regression. - We can skip the finalizing consistency checks when running outside of a repository. This is not as invasive as skipping all checks, but it's not great to randomly skip a subset of tests, either. None of these options really feel perfect. The first one would be the obvious choice if easily possible. There's another option though: instead of skipping the final object checks, we can die if there are any queued object checks. With this change we now die exactly if and only if we would have previously segfaulted. Like this we ensure that objects that _may_ fail the consistency checks won't be silently skipped, and at the same time we give users a much better error message. Refactor the code accordingly and add a test that would have triggered the segfault. Note that we also move down the logic to add the packfile to the store. There is no point doing this any earlier than right before we execute `fsck_finish()`, and it ensures that the logic to set up and perform the consistency check is self-contained. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- builtin/index-pack.c | 21 ++++++++++++++++++--- fsck.c | 6 ++++++ fsck.h | 7 +++++++ t/t5302-pack-index.sh | 16 ++++++++++++++++ 4 files changed, 47 insertions(+), 3 deletions(-) diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 2b78ba7fe4d14a..699fe678cd60b0 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1640,7 +1640,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, hash, "idx", 1); - if (do_fsck_object) + if (do_fsck_object && startup_info->have_repository) packfile_store_load_pack(the_repository->objects->packfiles, final_index_name, 0); @@ -2110,8 +2110,23 @@ int cmd_index_pack(int argc, else close(input_fd); - if (do_fsck_object && fsck_finish(&fsck_options)) - die(_("fsck error in pack objects")); + if (do_fsck_object) { + /* + * We cannot perform queued consistency checks when running + * outside of a repository because those require us to read + * from the object database, which is uninitialized. + * + * TODO: we may eventually set up an in-memory object database, + * which would allow us to perform these queued checks. + */ + if (!startup_info->have_repository && + fsck_has_queued_checks(&fsck_options)) + die(_("cannot perform queued object checks outside " + "of a repository")); + + if (fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + } free(opts.anomaly); free(objects); diff --git a/fsck.c b/fsck.c index 341e100d24ece0..8e1565fe6d0133 100644 --- a/fsck.c +++ b/fsck.c @@ -1350,6 +1350,12 @@ int fsck_finish(struct fsck_options *options) return ret; } +bool fsck_has_queued_checks(struct fsck_options *options) +{ + return !oidset_equal(&options->gitmodules_found, &options->gitmodules_done) || + !oidset_equal(&options->gitattributes_found, &options->gitattributes_done); +} + void fsck_options_clear(struct fsck_options *options) { free(options->msg_type); diff --git a/fsck.h b/fsck.h index cb6ef32f4f3aaa..336917c0451aac 100644 --- a/fsck.h +++ b/fsck.h @@ -248,6 +248,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, */ int fsck_finish(struct fsck_options *options); +/* + * Check whether there are any checks that have been queued up and that still + * need to be run. Returns `false` iff `fsck_finish()` wouldn't perform any + * actions, `true` otherwise. + */ +bool fsck_has_queued_checks(struct fsck_options *options); + /* * Clear the fsck_options struct, freeing any allocated memory. */ diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index 413c99274c8f30..9697448cb27634 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -293,4 +293,20 @@ test_expect_success 'too-large packs report the breach' ' grep "maximum allowed size (20 bytes)" err ' +# git-index-pack(1) uses the default hash algorithm outside of the repository, +# and it has no way to tell it otherwise. So we can only run this test with the +# default hash algorithm, as it would otherwise fail to parse the tree. +test_expect_success DEFAULT_HASH_ALGORITHM 'index-pack --fsck-objects outside of a repo' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + printf "100644 blob $(test_oid 001)\t.gitattributes\n" >tree && + git mktree --missing tree-oid && + git pack-objects err && + test_grep "cannot perform queued object checks outside of a repository" err + ) +' + test_done From eea83c010cf431325a05f06cc7c64029538c8495 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:56 +0100 Subject: [PATCH 27/35] t/helper: stop setting up `the_repository` repeatedly The "repository" test helper sets up `the_repository` twice. In fact though, we don't even have to set it up even once: all we need is to set up its hash algorithm, because we still depend on some subsystems that aren't free of `the_repository`. Refactor the code accordingly. This prepares for a subsequent change, where setting up the repository repeatedly will lead to a `BUG()`. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- t/helper/test-repository.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/t/helper/test-repository.c b/t/helper/test-repository.c index 63c37de33d22f1..9ba94cdffa4c47 100644 --- a/t/helper/test-repository.c +++ b/t/helper/test-repository.c @@ -17,10 +17,6 @@ static void test_parse_commit_in_graph(const char *gitdir, const char *worktree, struct commit *c; struct commit_list *parent; - setup_git_env(gitdir); - - repo_clear(the_repository); - if (repo_init(&r, gitdir, worktree)) die("Couldn't init repo"); @@ -47,10 +43,6 @@ static void test_get_commit_tree_in_graph(const char *gitdir, struct commit *c; struct tree *tree; - setup_git_env(gitdir); - - repo_clear(the_repository); - if (repo_init(&r, gitdir, worktree)) die("Couldn't init repo"); @@ -75,24 +67,20 @@ static void test_get_commit_tree_in_graph(const char *gitdir, int cmd__repository(int argc, const char **argv) { - int nongit_ok = 0; - - setup_git_directory_gently(&nongit_ok); - if (argc < 2) die("must have at least 2 arguments"); if (!strcmp(argv[1], "parse_commit_in_graph")) { struct object_id oid; if (argc < 5) die("not enough arguments"); - if (parse_oid_hex(argv[4], &oid, &argv[4])) + if (parse_oid_hex_any(argv[4], &oid, &argv[4]) == GIT_HASH_UNKNOWN) die("cannot parse oid '%s'", argv[4]); test_parse_commit_in_graph(argv[2], argv[3], &oid); } else if (!strcmp(argv[1], "get_commit_tree_in_graph")) { struct object_id oid; if (argc < 5) die("not enough arguments"); - if (parse_oid_hex(argv[4], &oid, &argv[4])) + if (parse_oid_hex_any(argv[4], &oid, &argv[4]) == GIT_HASH_UNKNOWN) die("cannot parse oid '%s'", argv[4]); test_get_commit_tree_in_graph(argv[2], argv[3], &oid); } else { From c257bd59165e2f55dfa2c97b0ca1e39131513654 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:57 +0100 Subject: [PATCH 28/35] http-push: stop setting up `the_repository` for each reference When pushing references via HTTP we call `repo_init_revisions()` in a loop for each reference that we're about to push. As third argument we pass the result of `setup_git_directory()`, which causes us to reinitialize the repository every single time. This is an obvious waste of compute, as the repository that we're working in will never change across any of the initializations. The only reason that we do this is to retrieve the directory of the repository. Furthermore, this is about to create issues in a subsequent commit, where reinitializing the repository will cause a `BUG()`. Address this by storing the Git directory in a variable instead so that we don't have to call the function repeatedly. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- http-push.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/http-push.c b/http-push.c index a1c01e3b9b93a3..a48ca237996567 100644 --- a/http-push.c +++ b/http-push.c @@ -1725,6 +1725,7 @@ int cmd_main(int argc, const char **argv) int i; int new_refs; struct ref *ref, *local_refs = NULL; + const char *gitdir; CALLOC_ARRAY(repo, 1); @@ -1787,7 +1788,7 @@ int cmd_main(int argc, const char **argv) if (delete_branch && rs.nr != 1) die("You must specify only one branch name when deleting a remote branch"); - setup_git_directory(); + gitdir = setup_git_directory(); memset(remote_dir_exists, -1, 256); @@ -1941,7 +1942,7 @@ int cmd_main(int argc, const char **argv) if (!push_all && !is_null_oid(&ref->old_oid)) strvec_pushf(&commit_argv, "^%s", oid_to_hex(&ref->old_oid)); - repo_init_revisions(the_repository, &revs, setup_git_directory()); + repo_init_revisions(the_repository, &revs, gitdir); setup_revisions_from_strvec(&commit_argv, &revs, NULL); revs.edge_hint = 0; /* just in case */ From 35d9fc65edc0a5df9f714d02afaa2c942fb28570 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:58 +0100 Subject: [PATCH 29/35] odb: handle initialization of sources in `odb_new()` The logic to set up a new object database is currently distributed across two functions in "repository.c": - In `initialize_repository()` we initialize an empty object database. This object database is not fully initialized and doesn't have any sources attached to it. - The primary object database source is then created in `repo_set_gitdir()`. Ideally though, the logic should be entirely self-contained so that we can iterate more readily on how exactly the sources themselves get set up. Refactor `odb_new()` to handle both allocation and setup of the object database. This ensures that the object database is always initialized and ready for use, and it allows us to change how the sources get set up eventually. Note that `repo_set_gitdir()` still reaches into the sources when the function gets called with an already-initialized object database. This will be fixed in the next commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 14 +++++++++++++- odb.h | 15 ++++++++++++++- repository.c | 20 ++++++++------------ 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/odb.c b/odb.c index ccc6e999e7ae25..88b40c81c0060c 100644 --- a/odb.c +++ b/odb.c @@ -1034,15 +1034,27 @@ int odb_write_object_stream(struct object_database *odb, return odb_source_loose_write_stream(odb->sources, stream, len, oid); } -struct object_database *odb_new(struct repository *repo) +struct object_database *odb_new(struct repository *repo, + const char *primary_source, + const char *secondary_sources) { struct object_database *o = xmalloc(sizeof(*o)); + char *to_free = NULL; memset(o, 0, sizeof(*o)); o->repo = repo; o->packfiles = packfile_store_new(o); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); + + if (!primary_source) + primary_source = to_free = xstrfmt("%s/objects", repo->commondir); + o->sources = odb_source_new(o, primary_source, true); + o->sources_tail = &o->sources->next; + o->alternate_db = xstrdup_or_null(secondary_sources); + + free(to_free); + return o; } diff --git a/odb.h b/odb.h index 99c4d489729459..41b3c03027f4d8 100644 --- a/odb.h +++ b/odb.h @@ -159,7 +159,20 @@ struct object_database { struct string_list submodule_source_paths; }; -struct object_database *odb_new(struct repository *repo); +/* + * Create a new object database for the given repository. + * + * If the primary source parameter is set it will override the usual primary + * object directory derived from the repository's common directory. The + * alternate sources are expected to be a PATH_SEP-separated list of secondary + * sources. Note that these alternate sources will be added in addition to, not + * instead of, the alternates identified by the primary source. + * + * Returns the newly created object database. + */ +struct object_database *odb_new(struct repository *repo, + const char *primary_source, + const char *alternate_sources); /* Free the object database and release all resources. */ void odb_free(struct object_database *o); diff --git a/repository.c b/repository.c index 455c2d279fb8ab..5975c8f341c8cb 100644 --- a/repository.c +++ b/repository.c @@ -52,7 +52,6 @@ static void set_default_hash_algo(struct repository *repo) void initialize_repository(struct repository *repo) { - repo->objects = odb_new(repo); repo->remote_state = remote_state_new(); repo->parsed_objects = parsed_object_pool_new(repo); ALLOC_ARRAY(repo->index, 1); @@ -160,29 +159,26 @@ void repo_set_gitdir(struct repository *repo, * until after xstrdup(root). Then we can free it. */ char *old_gitdir = repo->gitdir; - char *objects_path = NULL; repo->gitdir = xstrdup(gitfile ? gitfile : root); free(old_gitdir); repo_set_commondir(repo, o->commondir); - expand_base_dir(&objects_path, o->object_dir, - repo->commondir, "objects"); - - if (!repo->objects->sources) { - repo->objects->sources = odb_source_new(repo->objects, - objects_path, true); - repo->objects->sources_tail = &repo->objects->sources->next; - free(objects_path); + + if (!repo->objects) { + repo->objects = odb_new(repo, o->object_dir, o->alternate_db); } else { + char *objects_path = NULL; + expand_base_dir(&objects_path, o->object_dir, + repo->commondir, "objects"); free(repo->objects->sources->path); repo->objects->sources->path = objects_path; + free(repo->objects->alternate_db); + repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); } repo->disable_ref_updates = o->disable_ref_updates; - free(repo->objects->alternate_db); - repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); expand_base_dir(&repo->graft_file, o->graft_file, repo->commondir, "info/grafts"); expand_base_dir(&repo->index_file, o->index_file, From 2574c617362a0c67d15fa01e01cdbd0f6bcdbc93 Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:50:59 +0100 Subject: [PATCH 30/35] chdir-notify: add function to unregister listeners While we (obviously) have a way to register new listeners that get called whenever we chdir(3p), we don't have an equivalent that can be used to unregister such a listener again. Add one, as it will be required in a subsequent commit. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- chdir-notify.c | 18 ++++++++++++++++++ chdir-notify.h | 2 ++ 2 files changed, 20 insertions(+) diff --git a/chdir-notify.c b/chdir-notify.c index 0d7bc0460747b2..f8bfe3cbef9aba 100644 --- a/chdir-notify.c +++ b/chdir-notify.c @@ -25,6 +25,24 @@ void chdir_notify_register(const char *name, list_add_tail(&e->list, &chdir_notify_entries); } +void chdir_notify_unregister(const char *name, chdir_notify_callback cb, + void *data) +{ + struct list_head *pos, *p; + + list_for_each_safe(pos, p, &chdir_notify_entries) { + struct chdir_notify_entry *e = + list_entry(pos, struct chdir_notify_entry, list); + + if (e->cb != cb || e->data != data || !e->name != !name || + (e->name && strcmp(e->name, name))) + continue; + + list_del(pos); + free(e); + } +} + static void reparent_cb(const char *name, const char *old_cwd, const char *new_cwd, diff --git a/chdir-notify.h b/chdir-notify.h index 366e4c1ee9908c..81eb69d846e45d 100644 --- a/chdir-notify.h +++ b/chdir-notify.h @@ -41,6 +41,8 @@ typedef void (*chdir_notify_callback)(const char *name, const char *new_cwd, void *data); void chdir_notify_register(const char *name, chdir_notify_callback cb, void *data); +void chdir_notify_unregister(const char *name, chdir_notify_callback cb, + void *data); void chdir_notify_reparent(const char *name, char **path); /* From 2816b748e5c300afda559a09426f8342c235c29d Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:51:00 +0100 Subject: [PATCH 31/35] odb: handle changing a repository's commondir The function `repo_set_gitdir()` is called in two situations: - To initialize the repository with its discovered location. As part of this we also set up the new object database. - To update the repository's discovered location in case the process changes its working directory so that we update relative paths. This means we also have to update any relative paths that are potentially used in the object database. In the context of the object database we ideally wouldn't ever have to worry about the second case: if all paths used by our object database sources were absolute, then we wouldn't have to update them. But unfortunately, the paths aren't only used to locate files owned by the given source, but we also use them for reporting purposes. One such example is `repo_get_object_directory()`, where we cannot just change semantics to always return absolute paths, as that is likely to break tooling out there. One solution to this would be to have both a "display path" and an "internal path". This would allow us to use internal paths for all internal matters, but continue to use the potentially-relative display paths so that we don't break compatibility. But converting the codebase to honor this split is quite a messy endeavour, and it wouldn't even help us with the goal to get rid of the need to update the display path on chdir(3p). Another solution would be to rework "setup.c" so that we never have to update paths in the first place. In that case, we'd only initialize the repository once we have figured out final locations for all directories. This would be a significant simplification of that subsystem indeed, but the current logic is so messy that it would take significant investments to get there. Meanwhile though, while object sources may still use relative paths, the best thing we can do is to handle the reparenting of the object source paths in the object database itself. This can be done by registering one callback for each object database so that we get notified whenever the current working directory changes, and we then perform the reparenting ourselves. Ideally, this wouldn't even happen on the object database level, but instead handled by each object database source. But we don't yet have proper pluggable object database sources, so this will need to be handled at a later point in time. The logic itself is rather simple: - We register the callback when creating the object database. - We unregister the callback when releasing it again. - We split up `set_git_dir_1()` so that it becomes possible to skip recreating the object database. This is required because the function is called both when the current working directory changes, but also when we set up the repository. Calling this function without skipping creation of the ODB will result in a bug in case it's already created. Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 37 +++++++++++++++++++++-- odb.h | 4 --- repository.c | 13 ++------ repository.h | 1 + setup.c | 84 ++++++++++++++++++++++++++++------------------------ 5 files changed, 83 insertions(+), 56 deletions(-) diff --git a/odb.c b/odb.c index 88b40c81c0060c..70665fb7f48f0f 100644 --- a/odb.c +++ b/odb.c @@ -1,5 +1,6 @@ #include "git-compat-util.h" #include "abspath.h" +#include "chdir-notify.h" #include "commit-graph.h" #include "config.h" #include "dir.h" @@ -142,9 +143,9 @@ static void read_info_alternates(struct object_database *odb, const char *relative_base, int depth); -struct odb_source *odb_source_new(struct object_database *odb, - const char *path, - bool local) +static struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local) { struct odb_source *source; @@ -1034,6 +1035,32 @@ int odb_write_object_stream(struct object_database *odb, return odb_source_loose_write_stream(odb->sources, stream, len, oid); } +static void odb_update_commondir(const char *name UNUSED, + const char *old_cwd, + const char *new_cwd, + void *cb_data) +{ + struct object_database *odb = cb_data; + struct odb_source *source; + + /* + * In theory, we only have to do this for the primary object source, as + * alternates' paths are always resolved to an absolute path. + */ + for (source = odb->sources; source; source = source->next) { + char *path; + + if (is_absolute_path(source->path)) + continue; + + path = reparent_relative_path(old_cwd, new_cwd, + source->path); + + free(source->path); + source->path = path; + } +} + struct object_database *odb_new(struct repository *repo, const char *primary_source, const char *secondary_sources) @@ -1055,6 +1082,8 @@ struct object_database *odb_new(struct repository *repo, free(to_free); + chdir_notify_register(NULL, odb_update_commondir, o); + return o; } @@ -1106,6 +1135,8 @@ void odb_free(struct object_database *o) packfile_store_free(o->packfiles); string_list_clear(&o->submodule_source_paths, 0); + chdir_notify_unregister(NULL, odb_update_commondir, o); + free(o); } diff --git a/odb.h b/odb.h index 41b3c03027f4d8..014cd9585a2f6e 100644 --- a/odb.h +++ b/odb.h @@ -78,10 +78,6 @@ struct odb_source { char *path; }; -struct odb_source *odb_source_new(struct object_database *odb, - const char *path, - bool local); - struct packed_git; struct packfile_store; struct cached_object_entry; diff --git a/repository.c b/repository.c index 5975c8f341c8cb..863f24411b7bf9 100644 --- a/repository.c +++ b/repository.c @@ -165,17 +165,10 @@ void repo_set_gitdir(struct repository *repo, repo_set_commondir(repo, o->commondir); - if (!repo->objects) { + if (!repo->objects) repo->objects = odb_new(repo, o->object_dir, o->alternate_db); - } else { - char *objects_path = NULL; - expand_base_dir(&objects_path, o->object_dir, - repo->commondir, "objects"); - free(repo->objects->sources->path); - repo->objects->sources->path = objects_path; - free(repo->objects->alternate_db); - repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); - } + else if (!o->skip_initializing_odb) + BUG("cannot reinitialize an already-initialized object directory"); repo->disable_ref_updates = o->disable_ref_updates; diff --git a/repository.h b/repository.h index 614649413b68bc..6063c4b846d031 100644 --- a/repository.h +++ b/repository.h @@ -195,6 +195,7 @@ struct set_gitdir_args { const char *index_file; const char *alternate_db; bool disable_ref_updates; + bool skip_initializing_odb; }; void repo_set_gitdir(struct repository *repo, const char *root, diff --git a/setup.c b/setup.c index a752e9fc8476a0..a625f9fbc8b1b7 100644 --- a/setup.c +++ b/setup.c @@ -1002,10 +1002,51 @@ const char *read_gitfile_gently(const char *path, int *return_error_code) return error_code ? NULL : path; } -static void set_git_dir_1(const char *path) +static void setup_git_env_internal(const char *git_dir, + bool skip_initializing_odb) +{ + char *git_replace_ref_base; + const char *shallow_file; + const char *replace_ref_base; + struct set_gitdir_args args = { NULL }; + struct strvec to_free = STRVEC_INIT; + + args.commondir = getenv_safe(&to_free, GIT_COMMON_DIR_ENVIRONMENT); + args.object_dir = getenv_safe(&to_free, DB_ENVIRONMENT); + args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT); + args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); + args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); + if (getenv(GIT_QUARANTINE_ENVIRONMENT)) + args.disable_ref_updates = true; + args.skip_initializing_odb = skip_initializing_odb; + + repo_set_gitdir(the_repository, git_dir, &args); + strvec_clear(&to_free); + + if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) + disable_replace_refs(); + replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT); + git_replace_ref_base = xstrdup(replace_ref_base ? replace_ref_base + : "refs/replace/"); + update_ref_namespace(NAMESPACE_REPLACE, git_replace_ref_base); + + shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT); + if (shallow_file) + set_alternate_shallow_file(the_repository, shallow_file, 0); + + if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0)) + fetch_if_missing = 0; +} + +void setup_git_env(const char *git_dir) +{ + setup_git_env_internal(git_dir, false); +} + +static void set_git_dir_1(const char *path, bool skip_initializing_odb) { xsetenv(GIT_DIR_ENVIRONMENT, path, 1); - setup_git_env(path); + setup_git_env_internal(path, skip_initializing_odb); } static void update_relative_gitdir(const char *name UNUSED, @@ -1020,7 +1061,7 @@ static void update_relative_gitdir(const char *name UNUSED, trace_printf_key(&trace_setup_key, "setup: move $GIT_DIR to '%s'", path); - set_git_dir_1(path); + set_git_dir_1(path, true); if (tmp_objdir) tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); free(path); @@ -1035,7 +1076,7 @@ static void set_git_dir(const char *path, int make_realpath) path = realpath.buf; } - set_git_dir_1(path); + set_git_dir_1(path, false); if (!is_absolute_path(path)) chdir_notify_register(NULL, update_relative_gitdir, NULL); @@ -1668,41 +1709,6 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir, return result; } -void setup_git_env(const char *git_dir) -{ - char *git_replace_ref_base; - const char *shallow_file; - const char *replace_ref_base; - struct set_gitdir_args args = { NULL }; - struct strvec to_free = STRVEC_INIT; - - args.commondir = getenv_safe(&to_free, GIT_COMMON_DIR_ENVIRONMENT); - args.object_dir = getenv_safe(&to_free, DB_ENVIRONMENT); - args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT); - args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); - args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); - if (getenv(GIT_QUARANTINE_ENVIRONMENT)) { - args.disable_ref_updates = true; - } - - repo_set_gitdir(the_repository, git_dir, &args); - strvec_clear(&to_free); - - if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) - disable_replace_refs(); - replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT); - git_replace_ref_base = xstrdup(replace_ref_base ? replace_ref_base - : "refs/replace/"); - update_ref_namespace(NAMESPACE_REPLACE, git_replace_ref_base); - - shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT); - if (shallow_file) - set_alternate_shallow_file(the_repository, shallow_file, 0); - - if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0)) - fetch_if_missing = 0; -} - const char *enter_repo(const char *path, unsigned flags) { static struct strbuf validated_path = STRBUF_INIT; From ac65c70663b092e823b0d3de1c1cfdee0a4fbc8e Mon Sep 17 00:00:00 2001 From: Patrick Steinhardt Date: Wed, 19 Nov 2025 08:51:01 +0100 Subject: [PATCH 32/35] odb: handle recreation of quarantine directories In the preceding commit we have moved the logic that reparents object database sources on chdir(3p) from "setup.c" into "odb.c". Let's also do the same for any temporary quarantine directories so that the complete reparenting logic is self-contained in "odb.c". Signed-off-by: Patrick Steinhardt Signed-off-by: Junio C Hamano --- odb.c | 7 +++++++ setup.c | 5 ----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/odb.c b/odb.c index 70665fb7f48f0f..dc8f292f3d9645 100644 --- a/odb.c +++ b/odb.c @@ -24,6 +24,7 @@ #include "strbuf.h" #include "strvec.h" #include "submodule.h" +#include "tmp-objdir.h" #include "trace2.h" #include "write-or-die.h" @@ -1041,8 +1042,11 @@ static void odb_update_commondir(const char *name UNUSED, void *cb_data) { struct object_database *odb = cb_data; + struct tmp_objdir *tmp_objdir; struct odb_source *source; + tmp_objdir = tmp_objdir_unapply_primary_odb(); + /* * In theory, we only have to do this for the primary object source, as * alternates' paths are always resolved to an absolute path. @@ -1059,6 +1063,9 @@ static void odb_update_commondir(const char *name UNUSED, free(source->path); source->path = path; } + + if (tmp_objdir) + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); } struct object_database *odb_new(struct repository *repo, diff --git a/setup.c b/setup.c index a625f9fbc8b1b7..ae66188af3f6f4 100644 --- a/setup.c +++ b/setup.c @@ -22,7 +22,6 @@ #include "chdir-notify.h" #include "path.h" #include "quote.h" -#include "tmp-objdir.h" #include "trace.h" #include "trace2.h" #include "worktree.h" @@ -1056,14 +1055,10 @@ static void update_relative_gitdir(const char *name UNUSED, { char *path = reparent_relative_path(old_cwd, new_cwd, repo_get_git_dir(the_repository)); - struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); - trace_printf_key(&trace_setup_key, "setup: move $GIT_DIR to '%s'", path); set_git_dir_1(path, true); - if (tmp_objdir) - tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); free(path); } From c20f112e5149d1bd0d4741c4b28a65f81318309a Mon Sep 17 00:00:00 2001 From: Christian Couder Date: Mon, 17 Nov 2025 05:34:50 +0100 Subject: [PATCH 33/35] fast-import: add 'strip-if-invalid' mode to --signed-commits= Tools like `git filter-repo`[1] use `git fast-export` and `git fast-import` to rewrite repository history. When rewriting history using one such tool though, commit signatures might become invalid because the commits they sign changed due to the changes in the repository history made by the tool between the fast-export and the fast-import steps. Note that as far as signature handling goes: * Since fast-export doesn't know what changes filter-repo may make to the stream, it can't know whether the signatures will still be valid. * Since filter-repo doesn't know what history canonicalizations fast-export performed (and it performs a few), it can't know whether the signatures will still be valid. * Therefore, fast-import is the only process in the pipeline that can know whether a specified signature remains valid. Having invalid signatures in a rewritten repository could be confusing, so users rewritting history might prefer to simply discard signatures that are invalid at the fast-import step. For example a common use case is to rewrite only "recent" history. While specifying commit ranges corresponding to "recent" commits could work, users worry about getting it wrong and want to just automatically rewrite everything, expecting older commit signatures to be untouched. To let them do that, let's add a new 'strip-if-invalid' mode to the `--signed-commits=` option of `git fast-import`. It would be interesting for the `--signed-tags=` option to have this mode too, but we leave that for a future improvement. It might also be possible for `git fast-export` to have such a mode in its `--signed-commits=` and `--signed-tags=` options, but the use cases for it are much less clear, so we also leave that for possible future improvements. For now let's just die() if 'strip-if-invalid' is passed to these options where it hasn't been implemented yet. [1]: https://github.com/newren/git-filter-repo Helped-by: Elijah Newren Signed-off-by: Christian Couder Signed-off-by: Junio C Hamano --- Documentation/git-fast-import.adoc | 29 +++++++++---- builtin/fast-export.c | 38 ++++++++++++---- builtin/fast-import.c | 59 ++++++++++++++++++++++--- gpg-interface.c | 2 + gpg-interface.h | 1 + t/t9305-fast-import-signatures.sh | 69 +++++++++++++++++++++++++++++- 6 files changed, 174 insertions(+), 24 deletions(-) diff --git a/Documentation/git-fast-import.adoc b/Documentation/git-fast-import.adoc index b74179a6c891d5..479c4081da8f27 100644 --- a/Documentation/git-fast-import.adoc +++ b/Documentation/git-fast-import.adoc @@ -66,15 +66,26 @@ fast-import stream! This option is enabled automatically for remote-helpers that use the `import` capability, as they are already trusted to run their own code. ---signed-tags=(verbatim|warn-verbatim|warn-strip|strip|abort):: - Specify how to handle signed tags. Behaves in the same way - as the same option in linkgit:git-fast-export[1], except that - default is 'verbatim' (instead of 'abort'). - ---signed-commits=(verbatim|warn-verbatim|warn-strip|strip|abort):: - Specify how to handle signed commits. Behaves in the same way - as the same option in linkgit:git-fast-export[1], except that - default is 'verbatim' (instead of 'abort'). +`--signed-tags=(verbatim|warn-verbatim|warn-strip|strip|abort)`:: + Specify how to handle signed tags. Behaves in the same way as + the `--signed-commits=` below, except that the + `strip-if-invalid` mode is not yet supported. Like for signed + commits, the default mode is `verbatim`. + +`--signed-commits=`:: + Specify how to handle signed commits. The following s + are supported: ++ +* `verbatim`, which is the default, will silently import commit + signatures. +* `warn-verbatim` will import them, but will display a warning. +* `abort` will make this program die when encountering a signed + commit. +* `strip` will silently make the commits unsigned. +* `warn-strip` will make them unsigned, but will display a warning. +* `strip-if-invalid` will check signatures and, if they are invalid, + will strip them and display a warning. The validation is performed + in the same way as linkgit:git-verify-commit[1] does it. Options for Frontends ~~~~~~~~~~~~~~~~~~~~~ diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 7adbc55f0dccb1..a839a8f9acefde 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -797,10 +797,8 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, (int)(committer_end - committer), committer); if (signatures.nr) { switch (signed_commit_mode) { - case SIGN_ABORT: - die("encountered signed commit %s; use " - "--signed-commits= to handle it", - oid_to_hex(&commit->object.oid)); + + /* Exporting modes */ case SIGN_WARN_VERBATIM: warning("exporting %"PRIuMAX" signature(s) for commit %s", (uintmax_t)signatures.nr, oid_to_hex(&commit->object.oid)); @@ -811,12 +809,25 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, print_signature(item->string, item->util); } break; + + /* Stripping modes */ case SIGN_WARN_STRIP: warning("stripping signature(s) from commit %s", oid_to_hex(&commit->object.oid)); /* fallthru */ case SIGN_STRIP: break; + + /* Aborting modes */ + case SIGN_ABORT: + die(_("encountered signed commit %s; use " + "--signed-commits= to handle it"), + oid_to_hex(&commit->object.oid)); + case SIGN_STRIP_IF_INVALID: + die(_("'strip-if-invalid' is not a valid mode for " + "git fast-export with --signed-commits=")); + default: + BUG("invalid signed_commit_mode value %d", signed_commit_mode); } string_list_clear(&signatures, 0); } @@ -934,16 +945,16 @@ static void handle_tag(const char *name, struct tag *tag) size_t sig_offset = parse_signed_buffer(message, message_size); if (sig_offset < message_size) switch (signed_tag_mode) { - case SIGN_ABORT: - die("encountered signed tag %s; use " - "--signed-tags= to handle it", - oid_to_hex(&tag->object.oid)); + + /* Exporting modes */ case SIGN_WARN_VERBATIM: warning("exporting signed tag %s", oid_to_hex(&tag->object.oid)); /* fallthru */ case SIGN_VERBATIM: break; + + /* Stripping modes */ case SIGN_WARN_STRIP: warning("stripping signature from tag %s", oid_to_hex(&tag->object.oid)); @@ -951,6 +962,17 @@ static void handle_tag(const char *name, struct tag *tag) case SIGN_STRIP: message_size = sig_offset; break; + + /* Aborting modes */ + case SIGN_ABORT: + die(_("encountered signed tag %s; use " + "--signed-tags= to handle it"), + oid_to_hex(&tag->object.oid)); + case SIGN_STRIP_IF_INVALID: + die(_("'strip-if-invalid' is not a valid mode for " + "git fast-export with --signed-tags=")); + default: + BUG("invalid signed_commit_mode value %d", signed_commit_mode); } } diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 493de57ef67bfb..e2c6894461044b 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2772,7 +2772,7 @@ static void add_gpgsig_to_commit(struct strbuf *commit_data, { struct string_list siglines = STRING_LIST_INIT_NODUP; - if (!sig->hash_algo) + if (!sig || !sig->hash_algo) return; strbuf_addstr(commit_data, header); @@ -2827,6 +2827,45 @@ static void finalize_commit_buffer(struct strbuf *new_data, strbuf_addbuf(new_data, msg); } +static void handle_strip_if_invalid(struct strbuf *new_data, + struct signature_data *sig_sha1, + struct signature_data *sig_sha256, + struct strbuf *msg) +{ + struct strbuf tmp_buf = STRBUF_INIT; + struct signature_check signature_check = { 0 }; + int ret; + + /* Check signature in a temporary commit buffer */ + strbuf_addbuf(&tmp_buf, new_data); + finalize_commit_buffer(&tmp_buf, sig_sha1, sig_sha256, msg); + ret = verify_commit_buffer(tmp_buf.buf, tmp_buf.len, &signature_check); + + if (ret) { + const char *signer = signature_check.signer ? + signature_check.signer : _("unknown"); + const char *subject; + int subject_len = find_commit_subject(msg->buf, &subject); + + if (subject_len > 100) + warning(_("stripping invalid signature for commit '%.100s...'\n" + " allegedly by %s"), subject, signer); + else if (subject_len > 0) + warning(_("stripping invalid signature for commit '%.*s'\n" + " allegedly by %s"), subject_len, subject, signer); + else + warning(_("stripping invalid signature for commit\n" + " allegedly by %s"), signer); + + finalize_commit_buffer(new_data, NULL, NULL, msg); + } else { + strbuf_swap(new_data, &tmp_buf); + } + + signature_check_clear(&signature_check); + strbuf_release(&tmp_buf); +} + static void parse_new_commit(const char *arg) { static struct strbuf msg = STRBUF_INIT; @@ -2878,6 +2917,7 @@ static void parse_new_commit(const char *arg) warning(_("importing a commit signature verbatim")); /* fallthru */ case SIGN_VERBATIM: + case SIGN_STRIP_IF_INVALID: import_one_signature(&sig_sha1, &sig_sha256, v); break; @@ -2962,7 +3002,11 @@ static void parse_new_commit(const char *arg) "encoding %s\n", encoding); - finalize_commit_buffer(&new_data, &sig_sha1, &sig_sha256, &msg); + if (signed_commit_mode == SIGN_STRIP_IF_INVALID && + (sig_sha1.hash_algo || sig_sha256.hash_algo)) + handle_strip_if_invalid(&new_data, &sig_sha1, &sig_sha256, &msg); + else + finalize_commit_buffer(&new_data, &sig_sha1, &sig_sha256, &msg); free(author); free(committer); @@ -2984,9 +3028,6 @@ static void handle_tag_signature(struct strbuf *msg, const char *name) switch (signed_tag_mode) { /* First, modes that don't change anything */ - case SIGN_ABORT: - die(_("encountered signed tag; use " - "--signed-tags= to handle it")); case SIGN_WARN_VERBATIM: warning(_("importing a tag signature verbatim for tag '%s'"), name); /* fallthru */ @@ -3003,7 +3044,13 @@ static void handle_tag_signature(struct strbuf *msg, const char *name) strbuf_setlen(msg, sig_offset); break; - /* Third, BUG */ + /* Third, aborting modes */ + case SIGN_ABORT: + die(_("encountered signed tag; use " + "--signed-tags= to handle it")); + case SIGN_STRIP_IF_INVALID: + die(_("'strip-if-invalid' is not a valid mode for " + "git fast-import with --signed-tags=")); default: BUG("invalid signed_tag_mode value %d from tag '%s'", signed_tag_mode, name); diff --git a/gpg-interface.c b/gpg-interface.c index d1e88da8c1bfde..fe653b246433b8 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -1146,6 +1146,8 @@ int parse_sign_mode(const char *arg, enum sign_mode *mode) *mode = SIGN_WARN_STRIP; else if (!strcmp(arg, "strip")) *mode = SIGN_STRIP; + else if (!strcmp(arg, "strip-if-invalid")) + *mode = SIGN_STRIP_IF_INVALID; else return -1; return 0; diff --git a/gpg-interface.h b/gpg-interface.h index 50487aa1483274..71dde8cb807437 100644 --- a/gpg-interface.h +++ b/gpg-interface.h @@ -111,6 +111,7 @@ enum sign_mode { SIGN_VERBATIM, SIGN_WARN_STRIP, SIGN_STRIP, + SIGN_STRIP_IF_INVALID, }; /* diff --git a/t/t9305-fast-import-signatures.sh b/t/t9305-fast-import-signatures.sh index c2b427165862d3..022dae02e48177 100755 --- a/t/t9305-fast-import-signatures.sh +++ b/t/t9305-fast-import-signatures.sh @@ -79,7 +79,7 @@ test_expect_success GPG 'setup a commit with dual OpenPGP signatures on its SHA- echo B >explicit-sha256/B && git -C explicit-sha256 add B && test_tick && - git -C explicit-sha256 commit -S -m "signed" B && + git -C explicit-sha256 commit -S -m "signed commit" B && SHA256_B=$(git -C explicit-sha256 rev-parse dual-signed) && # Create the corresponding SHA-1 commit @@ -103,4 +103,71 @@ test_expect_success GPG 'strip both OpenPGP signatures with --signed-commits=war test_line_count = 2 out ' +test_expect_success GPG 'import commit with no signature with --signed-commits=strip-if-invalid' ' + git fast-export main >output && + git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && + test_must_be_empty log +' + +test_expect_success GPG 'keep valid OpenPGP signature with --signed-commits=strip-if-invalid' ' + rm -rf new && + git init new && + + git fast-export --signed-commits=verbatim openpgp-signing >output && + git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) && + test $OPENPGP_SIGNING = $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep -E "^gpgsig(-sha256)? " actual && + test_must_be_empty log +' + +test_expect_success GPG 'strip signature invalidated by message change with --signed-commits=strip-if-invalid' ' + rm -rf new && + git init new && + + git fast-export --signed-commits=verbatim openpgp-signing >output && + + # Change the commit message, which invalidates the signature. + # The commit message length should not change though, otherwise the + # corresponding `data ` command would have to be changed too. + sed "s/OpenPGP signed commit/OpenPGP forged commit/" output >modified && + + git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && + + IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) && + test $OPENPGP_SIGNING != $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep ! -E "^gpgsig" actual && + test_grep "stripping invalid signature" log +' + +test_expect_success GPGSM 'keep valid X.509 signature with --signed-commits=strip-if-invalid' ' + rm -rf new && + git init new && + + git fast-export --signed-commits=verbatim x509-signing >output && + git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/x509-signing) && + test $X509_SIGNING = $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep -E "^gpgsig(-sha256)? " actual && + test_must_be_empty log +' + +test_expect_success GPGSSH 'keep valid SSH signature with --signed-commits=strip-if-invalid' ' + rm -rf new && + git init new && + + test_config -C new gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + + git fast-export --signed-commits=verbatim ssh-signing >output && + git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/ssh-signing) && + test $SSH_SIGNING = $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep -E "^gpgsig(-sha256)? " actual && + test_must_be_empty log +' + test_done From 0458e8b85440f77d4a4aec28d09112ba06cee05a Mon Sep 17 00:00:00 2001 From: Johannes Schindelin Date: Mon, 17 Nov 2025 17:04:24 +0000 Subject: [PATCH 34/35] ci(dockerized): do show the result of failing tests again The quality of tests and test suites is most apparent not when everything passes, but in how quickly bugs can be identified, analyzed, and resolved after test failures occur. As such, it is an unfortunate side effect of 2a21098b98a (github: adapt containerized jobs to be rootless, 2025-01-10) that the output of failed test cases, which was shown before that change directly in the build logs, is now no longer shown at all. The reason is a side effect of trying to run the build and the tests with permissions other than the `root` user, but without providing the prerequisite permissions to signal what tests failed and whose output hence needs to be included in the logs. The way this signaling works is for the workflow to write into special-purpose files whose path is specific to the current workflow step and which can be accessed via the `$GITHUB_ENV` environment variable, which differs between workflow steps. It is this file that is missing write permission for the `builder` user that was introduced in above-mentioned commit. The solution is simple: make the file world-writable. Technically, this write permission should be removed after the step has completed, if proper security practices were to be upheld, but since nothing uses that file again, it does not matter, and the fix is more succinct this way. This commit is best viewed with `--color-words`. Signed-off-by: Johannes Schindelin [jc: squashed Elijah's rewrite of the first paragraph of the log message] [jc: updated chmod to match "world-writable" in the log message] Signed-off-by: Junio C Hamano --- .github/workflows/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 1c8260ecb68b76..a2c06f9272e532 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -417,7 +417,7 @@ jobs: - run: ci/install-dependencies.sh - run: useradd builder --create-home - run: chown -R builder . - - run: sudo --preserve-env --set-home --user=builder ci/run-build-and-tests.sh + - run: chmod a+w $GITHUB_ENV && sudo --preserve-env --set-home --user=builder ci/run-build-and-tests.sh - name: print test failures if: failure() && env.FAILED_TEST_ARTIFACTS != '' run: sudo --preserve-env --set-home --user=builder ci/print-test-failures.sh From bdc5341ff65278a3cc80b2e8a02a2f02aa1fac06 Mon Sep 17 00:00:00 2001 From: Junio C Hamano Date: Fri, 5 Dec 2025 14:49:13 +0900 Subject: [PATCH 35/35] The sixth batch Signed-off-by: Junio C Hamano --- Documentation/RelNotes/2.53.0.adoc | 31 ++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/Documentation/RelNotes/2.53.0.adoc b/Documentation/RelNotes/2.53.0.adoc index c4dfeb1c23b406..9e896bd4244389 100644 --- a/Documentation/RelNotes/2.53.0.adoc +++ b/Documentation/RelNotes/2.53.0.adoc @@ -20,6 +20,9 @@ UI, Workflows & Features * Add a new manual that describes the data model. + * "git fast-import" learns "--strip-if-invalid" option to drop + invalid cryptographic signature from objects. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -38,6 +41,13 @@ Performance, Internal Implementation, Development Support etc. * A part of code paths that deals with loose objects has been cleaned up. + * "make strip" has been taught to strip "scalar" as well as "git". + + * Dockerised jobs at the GitHub Actions CI have been taught to show + more details of failed tests. + + * Code refactoring around object database sources. + Fixes since v2.52 ----------------- @@ -105,8 +115,29 @@ Fixes since v2.52 * Various issues detected by Asan have been corrected. (merge a031b6181a jk/asan-bonanza later to maint). + * "git config get --path" segfaulted on an ":(optional)path" that + does not exist, which has been corrected. + (merge 0bd16856ff jc/optional-path later to maint). + + * The "--committer-date-is-author-date" option of "git am/rebase" is + a misguided one. The documentation is updated to discourage its + use. + (merge fbf3d0669f kh/doc-committer-date-is-author-date later to maint). + + * The option help text given by "git config unset -h" described + the "--all" option to "replace", not "unset", multiple variables, + which has been corrected. + (merge 18bf67b753 rs/config-unset-opthelp-fix later to maint). + + * The error message given by "git config set", when the variable + being updated has more than one values defined, used old style "git + config" syntax with an incorrect option in its hint, both of which + have been corrected. + (merge df963f0df4 rs/config-set-multi-error-message-fix later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 46207a54cc qj/doc-http-bad-want-response later to maint). (merge df90eccd93 kh/doc-commit-extra-references later to maint). (merge f18aa68861 rs/xmkstemp-simplify later to maint). (merge fddba8f737 ja/doc-synopsis-style later to maint). + (merge 22ce0cb639 en/xdiff-cleanup-2 later to maint).