diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 27ebf2c8cc2e64..f2e93f54611b62 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -443,7 +443,7 @@ jobs: - run: ci/install-dependencies.sh - run: useradd builder --create-home - run: chown -R builder . - - run: sudo --preserve-env --set-home --user=builder ci/run-build-and-tests.sh + - run: chmod a+w $GITHUB_ENV && sudo --preserve-env --set-home --user=builder ci/run-build-and-tests.sh - name: print test failures if: failure() && env.FAILED_TEST_ARTIFACTS != '' run: sudo --preserve-env --set-home --user=builder ci/print-test-failures.sh diff --git a/Documentation/Makefile b/Documentation/Makefile index 47208269a2e124..2699f0b24af192 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -143,6 +143,7 @@ TECH_DOCS += technical/shallow TECH_DOCS += technical/sparse-checkout TECH_DOCS += technical/sparse-index TECH_DOCS += technical/trivial-merge +TECH_DOCS += technical/unambiguous-types TECH_DOCS += technical/unit-tests SP_ARTICLES += $(TECH_DOCS) SP_ARTICLES += technical/api-index diff --git a/Documentation/RelNotes/2.53.0.adoc b/Documentation/RelNotes/2.53.0.adoc index c4dfeb1c23b406..9e896bd4244389 100644 --- a/Documentation/RelNotes/2.53.0.adoc +++ b/Documentation/RelNotes/2.53.0.adoc @@ -20,6 +20,9 @@ UI, Workflows & Features * Add a new manual that describes the data model. + * "git fast-import" learns "--strip-if-invalid" option to drop + invalid cryptographic signature from objects. + Performance, Internal Implementation, Development Support etc. -------------------------------------------------------------- @@ -38,6 +41,13 @@ Performance, Internal Implementation, Development Support etc. * A part of code paths that deals with loose objects has been cleaned up. + * "make strip" has been taught to strip "scalar" as well as "git". + + * Dockerised jobs at the GitHub Actions CI have been taught to show + more details of failed tests. + + * Code refactoring around object database sources. + Fixes since v2.52 ----------------- @@ -105,8 +115,29 @@ Fixes since v2.52 * Various issues detected by Asan have been corrected. (merge a031b6181a jk/asan-bonanza later to maint). + * "git config get --path" segfaulted on an ":(optional)path" that + does not exist, which has been corrected. + (merge 0bd16856ff jc/optional-path later to maint). + + * The "--committer-date-is-author-date" option of "git am/rebase" is + a misguided one. The documentation is updated to discourage its + use. + (merge fbf3d0669f kh/doc-committer-date-is-author-date later to maint). + + * The option help text given by "git config unset -h" described + the "--all" option to "replace", not "unset", multiple variables, + which has been corrected. + (merge 18bf67b753 rs/config-unset-opthelp-fix later to maint). + + * The error message given by "git config set", when the variable + being updated has more than one values defined, used old style "git + config" syntax with an incorrect option in its hint, both of which + have been corrected. + (merge df963f0df4 rs/config-set-multi-error-message-fix later to maint). + * Other code cleanup, docfix, build fix, etc. (merge 46207a54cc qj/doc-http-bad-want-response later to maint). (merge df90eccd93 kh/doc-commit-extra-references later to maint). (merge f18aa68861 rs/xmkstemp-simplify later to maint). (merge fddba8f737 ja/doc-synopsis-style later to maint). + (merge 22ce0cb639 en/xdiff-cleanup-2 later to maint). diff --git a/Documentation/git-am.adoc b/Documentation/git-am.adoc index b23b4fba2013c2..0c94776e296981 100644 --- a/Documentation/git-am.adoc +++ b/Documentation/git-am.adoc @@ -162,6 +162,13 @@ Valid for the `--whitespace` option are: commit creation as the committer date. This allows the user to lie about the committer date by using the same value as the author date. ++ +WARNING: The history walking machinery assumes that commits have +non-decreasing commit timestamps. You should consider if you really need +to use this option. Then you should only use this option to override the +committer date when applying commits on top of a base which commit is +older (in terms of the commit date) than the oldest patch you are +applying. --ignore-date:: By default the command records the date from the e-mail diff --git a/Documentation/git-fast-import.adoc b/Documentation/git-fast-import.adoc index b74179a6c891d5..479c4081da8f27 100644 --- a/Documentation/git-fast-import.adoc +++ b/Documentation/git-fast-import.adoc @@ -66,15 +66,26 @@ fast-import stream! This option is enabled automatically for remote-helpers that use the `import` capability, as they are already trusted to run their own code. ---signed-tags=(verbatim|warn-verbatim|warn-strip|strip|abort):: - Specify how to handle signed tags. Behaves in the same way - as the same option in linkgit:git-fast-export[1], except that - default is 'verbatim' (instead of 'abort'). - ---signed-commits=(verbatim|warn-verbatim|warn-strip|strip|abort):: - Specify how to handle signed commits. Behaves in the same way - as the same option in linkgit:git-fast-export[1], except that - default is 'verbatim' (instead of 'abort'). +`--signed-tags=(verbatim|warn-verbatim|warn-strip|strip|abort)`:: + Specify how to handle signed tags. Behaves in the same way as + the `--signed-commits=` below, except that the + `strip-if-invalid` mode is not yet supported. Like for signed + commits, the default mode is `verbatim`. + +`--signed-commits=`:: + Specify how to handle signed commits. The following s + are supported: ++ +* `verbatim`, which is the default, will silently import commit + signatures. +* `warn-verbatim` will import them, but will display a warning. +* `abort` will make this program die when encountering a signed + commit. +* `strip` will silently make the commits unsigned. +* `warn-strip` will make them unsigned, but will display a warning. +* `strip-if-invalid` will check signatures and, if they are invalid, + will strip them and display a warning. The validation is performed + in the same way as linkgit:git-verify-commit[1] does it. Options for Frontends ~~~~~~~~~~~~~~~~~~~~~ diff --git a/Documentation/git-rebase.adoc b/Documentation/git-rebase.adoc index 005caf61646ec7..9742833ebe4207 100644 --- a/Documentation/git-rebase.adoc +++ b/Documentation/git-rebase.adoc @@ -474,6 +474,13 @@ See also INCOMPATIBLE OPTIONS below. Instead of using the current time as the committer date, use the author date of the commit being rebased as the committer date. This option implies `--force-rebase`. ++ +WARNING: The history walking machinery assumes that commits have +non-decreasing commit timestamps. You should consider if you really need +to use this option. Then you should only use this option to override the +committer date when rebasing commits on top of a base which commit is +older (in terms of the commit date) than the oldest commit you are +applying (in terms of the author date). --ignore-date:: --reset-author-date:: diff --git a/Documentation/technical/meson.build b/Documentation/technical/meson.build index faff3964a9bef5..ec07088c57617f 100644 --- a/Documentation/technical/meson.build +++ b/Documentation/technical/meson.build @@ -32,6 +32,7 @@ articles = [ 'sparse-checkout.adoc', 'sparse-index.adoc', 'trivial-merge.adoc', + 'unambiguous-types.adoc', 'unit-tests.adoc', ] diff --git a/Documentation/technical/unambiguous-types.adoc b/Documentation/technical/unambiguous-types.adoc new file mode 100644 index 00000000000000..9a4990847c0e0b --- /dev/null +++ b/Documentation/technical/unambiguous-types.adoc @@ -0,0 +1,224 @@ += Unambiguous types + +Most of these mappings are obvious, but there are some nuances and gotchas with +Rust FFI (Foreign Function Interface). + +This document defines clear, one-to-one mappings between primitive types in C, +Rust (and possible other languages in the future). Its purpose is to eliminate +ambiguity in type widths, signedness, and binary representation across +platforms and languages. + +For Git, the only header required to use these unambiguous types in C is +`git-compat-util.h`. + +== Boolean types +[cols="1,1", options="header"] +|=== +| C Type | Rust Type +| bool^1^ | bool +|=== + +== Integer types + +In C, `` (or an equivalent) must be included. + +[cols="1,1", options="header"] +|=== +| C Type | Rust Type +| uint8_t | u8 +| uint16_t | u16 +| uint32_t | u32 +| uint64_t | u64 + +| int8_t | i8 +| int16_t | i16 +| int32_t | i32 +| int64_t | i64 +|=== + +== Floating-point types + +Rust requires IEEE-754 semantics. +In C, that is typically true, but not guaranteed by the standard. + +[cols="1,1", options="header"] +|=== +| C Type | Rust Type +| float^2^ | f32 +| double^2^ | f64 +|=== + +== Size types + +These types represent pointer-sized integers and are typically defined in +`` or an equivalent header. + +Size types should be used any time pointer arithmetic is performed e.g. +indexing an array, describing the number of elements in memory, etc... + +[cols="1,1", options="header"] +|=== +| C Type | Rust Type +| size_t^3^ | usize +| ptrdiff_t^3^ | isize +|=== + +== Character types + +This is where C and Rust don't have a clean one-to-one mapping. + +A C `char` and a Rust `u8` share the same bit width, so any C struct containing +a `char` will have the same size as the corresponding Rust struct using `u8`. +In that sense, such structs are safe to pass over the FFI boundary, because +their fields will be laid out identically. However, beyond bit width, C `char` +has additional semantics and platform-dependent behavior that can cause +problems, as discussed below. + +The C language leaves the signedness of `char` implementation defined. Because +our developer build enables -Wsign-compare, comparison of a value of `char` +type with either signed or unsigned integers may trigger warnings from the +compiler. + +Note: Rust's `char` type is an unsigned 32-bit integer that is used to describe +Unicode code points. + +=== Notes +^1^ This is only true if stdbool.h (or equivalent) is used. + +^2^ C does not enforce IEEE-754 compatibility, but Rust expects it. If the +platform/arch for C does not follow IEEE-754 then this equivalence does not +hold. Also, it's assumed that `float` is 32 bits and `double` is 64, but +there may be a strange platform/arch where even this isn't true. + +^3^ C also defines uintptr_t, ssize_t and intptr_t, but these types are +discouraged for FFI purposes. For functions like `read()` and `write()` ssize_t +should be cast to a different, and unambiguous, type before being passed over +the FFI boundary. + + +== Problems with std::ffi::c_* types in Rust +TL;DR: In practice, Rust's `c_*` types aren't guaranteed to match C types for +all possible C compilers, platforms, or architectures, because Rust only +ensures correctness of C types on officially supported targets. These +definitions have changed over time to match more targets which means that the +c_* definitions will differ based on which Rust version Git chooses to use. + +Current list of safe, Rust side, FFI types in Git: + + +* `c_void` +* `CStr` +* `CString` + +Even then, they should be used sparingly, and only where the semantics match +exactly. + +The std::os::raw::c_* directly inherits the problems of core::ffi, which +changes over time and seems to make a best guess at the correct definition for +a given platform/target. This probably isn't a problem for all other platforms +that Rust supports currently, but can anyone say that Rust got it right for all +C compilers of all platforms/targets? + +To give an example: c_long is defined in +footnote:[https://doc.rust-lang.org/1.63.0/src/core/ffi/mod.rs.html#175-189[c_long in 1.63.0]] +footnote:[https://doc.rust-lang.org/1.89.0/src/core/ffi/primitives.rs.html#135-151[c_long in 1.89.0]] + +=== Rust version 1.63.0 + +``` +mod c_long_definition { + cfg_if! { + if #[cfg(all(target_pointer_width = "64", not(windows)))] { + pub type c_long = i64; + pub type NonZero_c_long = crate::num::NonZeroI64; + pub type c_ulong = u64; + pub type NonZero_c_ulong = crate::num::NonZeroU64; + } else { + // The minimal size of `long` in the C standard is 32 bits + pub type c_long = i32; + pub type NonZero_c_long = crate::num::NonZeroI32; + pub type c_ulong = u32; + pub type NonZero_c_ulong = crate::num::NonZeroU32; + } + } +} +``` + +=== Rust version 1.89.0 + +``` +mod c_long_definition { + crate::cfg_select! { + any( + all(target_pointer_width = "64", not(windows)), + // wasm32 Linux ABI uses 64-bit long + all(target_arch = "wasm32", target_os = "linux") + ) => { + pub(super) type c_long = i64; + pub(super) type c_ulong = u64; + } + _ => { + // The minimal size of `long` in the C standard is 32 bits + pub(super) type c_long = i32; + pub(super) type c_ulong = u32; + } + } +} +``` + +Even for the cases where C types are correctly mapped to Rust types via +std::ffi::c_* there are still problems. Let's take c_char for example. On some +platforms it's u8 on others it's i8. + +=== Subtraction underflow in debug mode + +The following code will panic in debug on platforms that define c_char as u8, +but won't if it's an i8. + +``` +let mut x: std::ffi::c_char = 0; +x -= 1; +``` + +=== Inconsistent shift behavior + +`x` will be 0xC0 for platforms that use i8, but will be 0x40 where it's u8. + +``` +let mut x: std::ffi::c_char = 0x80; +x >>= 1; +``` + +=== Equality fails to compile on some platforms + +The following will not compile on platforms that define c_char as i8, but will +if it's u8. You can cast x e.g. `assert_eq!(x as u8, b'a');`, but then you get +a warning on platforms that use u8 and a clean compilation where i8 is used. + +``` +let mut x: std::ffi::c_char = 0x61; +assert_eq!(x, b'a'); +``` + +== Enum types +Rust enum types should not be used as FFI types. Rust enum types are more like +C union types than C enum's. For something like: + +``` +#[repr(C, u8)] +enum Fruit { + Apple, + Banana, + Cherry, +} +``` + +It's easy enough to make sure the Rust enum matches what C would expect, but a +more complex type like. + +``` +enum HashResult { + SHA1([u8; 20]), + SHA256([u8; 32]), +} +``` + +The Rust compiler has to add a discriminant to the enum to distinguish between +the variants. The width, location, and values for that discriminant is up to +the Rust compiler and is not ABI stable. diff --git a/Makefile b/Makefile index 237b56fc9d98c9..6fc322ff88184d 100644 --- a/Makefile +++ b/Makefile @@ -2567,7 +2567,7 @@ please_set_SHELL_PATH_to_a_more_modern_shell: shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell -strip: $(PROGRAMS) git$X +strip: $(PROGRAMS) git$X scalar$X $(STRIP) $(STRIP_OPTS) $^ ### Target-specific flags and dependencies diff --git a/builtin/blame.c b/builtin/blame.c index 27b513d27faddd..6044973462a173 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -739,7 +739,8 @@ static int git_blame_config(const char *var, const char *value, ret = git_config_pathname(&str, var, value); if (ret) return ret; - string_list_insert(&ignore_revs_file_list, str); + if (str) + string_list_insert(&ignore_revs_file_list, str); free(str); return 0; } diff --git a/builtin/clone.c b/builtin/clone.c index c990f398ef6f37..b19b302b065467 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -1617,7 +1617,7 @@ int cmd_clone(int argc, transport_disconnect(transport); if (option_dissociate) { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); dissociate_from_references(); } diff --git a/builtin/config.c b/builtin/config.c index 75852bd79d5059..288ebdfdaaab1c 100644 --- a/builtin/config.c +++ b/builtin/config.c @@ -261,6 +261,12 @@ struct strbuf_list { int alloc; }; +/* + * Format the configuration key-value pair (`key_`, `value_`) and + * append it into strbuf `buf`. Returns a negative value on failure, + * 0 on success, 1 on a missing optional value (i.e., telling the + * caller to pretend that did not exist). + */ static int format_config(const struct config_display_options *opts, struct strbuf *buf, const char *key_, const char *value_, const struct key_value_info *kvi) @@ -299,7 +305,10 @@ static int format_config(const struct config_display_options *opts, char *v; if (git_config_pathname(&v, key_, value_) < 0) return -1; - strbuf_addstr(buf, v); + if (v) + strbuf_addstr(buf, v); + else + return 1; /* :(optional)no-such-file */ free((char *)v); } else if (opts->type == TYPE_EXPIRY_DATE) { timestamp_t t; @@ -344,6 +353,7 @@ static int collect_config(const char *key_, const char *value_, struct collect_config_data *data = cb; struct strbuf_list *values = data->values; const struct key_value_info *kvi = ctx->kvi; + int status; if (!(data->get_value_flags & GET_VALUE_KEY_REGEXP) && strcmp(key_, data->key)) @@ -361,8 +371,15 @@ static int collect_config(const char *key_, const char *value_, ALLOC_GROW(values->items, values->nr + 1, values->alloc); strbuf_init(&values->items[values->nr], 0); - return format_config(data->display_opts, &values->items[values->nr++], - key_, value_, kvi); + status = format_config(data->display_opts, &values->items[values->nr++], + key_, value_, kvi); + if (status < 0) + return status; + if (status) { + strbuf_release(&values->items[--values->nr]); + status = 0; + } + return status; } static int get_value(const struct config_location_options *opts, @@ -438,15 +455,23 @@ static int get_value(const struct config_location_options *opts, if (!values.nr && display_opts->default_value) { struct key_value_info kvi = KVI_INIT; struct strbuf *item; + int status; kvi_from_param(&kvi); ALLOC_GROW(values.items, values.nr + 1, values.alloc); item = &values.items[values.nr++]; strbuf_init(item, 0); - if (format_config(display_opts, item, key_, - display_opts->default_value, &kvi) < 0) + + status = format_config(display_opts, item, key_, + display_opts->default_value, &kvi); + if (status < 0) die(_("failed to format default config value: %s"), display_opts->default_value); + if (status) { + /* default was a missing optional value */ + values.nr--; + strbuf_release(item); + } } ret = !values.nr; @@ -714,11 +739,13 @@ static int get_urlmatch(const struct config_location_options *opts, for_each_string_list_item(item, &values) { struct urlmatch_current_candidate_value *matched = item->util; struct strbuf buf = STRBUF_INIT; + int status; - format_config(&display_opts, &buf, item->string, - matched->value_is_null ? NULL : matched->value.buf, - &matched->kvi); - fwrite(buf.buf, 1, buf.len, stdout); + status = format_config(&display_opts, &buf, item->string, + matched->value_is_null ? NULL : matched->value.buf, + &matched->kvi); + if (!status) + fwrite(buf.buf, 1, buf.len, stdout); strbuf_release(&buf); strbuf_release(&matched->value); @@ -985,7 +1012,7 @@ static int cmd_config_set(int argc, const char **argv, const char *prefix, argv[0], comment, value); if (ret == CONFIG_NOTHING_SET) error(_("cannot overwrite multiple values with a single value\n" - " Use a regexp, --add or --replace-all to change %s."), argv[0]); + " Use --value=, --append or --all to change %s."), argv[0]); } location_options_release(&location_opts); @@ -1003,8 +1030,8 @@ static int cmd_config_unset(int argc, const char **argv, const char *prefix, struct option opts[] = { CONFIG_LOCATION_OPTIONS(location_opts), OPT_GROUP(N_("Filter")), - OPT_BIT(0, "all", &flags, N_("replace multi-valued config option with new value"), CONFIG_FLAGS_MULTI_REPLACE), - OPT_STRING(0, "value", &value_pattern, N_("pattern"), N_("show config with values matching the pattern")), + OPT_BIT(0, "all", &flags, N_("unset all multi-valued config options"), CONFIG_FLAGS_MULTI_REPLACE), + OPT_STRING(0, "value", &value_pattern, N_("pattern"), N_("unset multi-valued config options with matching values")), OPT_BIT(0, "fixed-value", &flags, N_("use string equality when comparing values to value pattern"), CONFIG_FLAGS_FIXED_VALUE), OPT_END(), }; diff --git a/builtin/fast-export.c b/builtin/fast-export.c index 0421360ab7d7d6..b90da5e616e0ef 100644 --- a/builtin/fast-export.c +++ b/builtin/fast-export.c @@ -797,10 +797,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, (int)(committer_end - committer), committer); if (signatures.nr) { switch (signed_commit_mode) { - case SIGN_ABORT: - die(_("encountered signed commit %s; use " - "--signed-commits= to handle it"), - oid_to_hex(&commit->object.oid)); + /* Exporting modes */ case SIGN_WARN_VERBATIM: warning(_("exporting %"PRIuMAX" signature(s) for commit %s"), (uintmax_t)signatures.nr, oid_to_hex(&commit->object.oid)); @@ -811,12 +808,25 @@ static void handle_commit(struct commit *commit, struct rev_info *rev, print_signature(item->string, item->util); } break; + + /* Stripping modes */ case SIGN_WARN_STRIP: warning(_("stripping signature(s) from commit %s"), oid_to_hex(&commit->object.oid)); /* fallthru */ case SIGN_STRIP: break; + + /* Aborting modes */ + case SIGN_ABORT: + die(_("encountered signed commit %s; use " + "--signed-commits= to handle it"), + oid_to_hex(&commit->object.oid)); + case SIGN_STRIP_IF_INVALID: + die(_("'strip-if-invalid' is not a valid mode for " + "git fast-export with --signed-commits=")); + default: + BUG("invalid signed_commit_mode value %d", signed_commit_mode); } string_list_clear(&signatures, 0); } @@ -935,16 +945,15 @@ static void handle_tag(const char *name, struct tag *tag) size_t sig_offset = parse_signed_buffer(message, message_size); if (sig_offset < message_size) switch (signed_tag_mode) { - case SIGN_ABORT: - die(_("encountered signed tag %s; use " - "--signed-tags= to handle it"), - oid_to_hex(&tag->object.oid)); + /* Exporting modes */ case SIGN_WARN_VERBATIM: warning(_("exporting signed tag %s"), oid_to_hex(&tag->object.oid)); /* fallthru */ case SIGN_VERBATIM: break; + + /* Stripping modes */ case SIGN_WARN_STRIP: warning(_("stripping signature from tag %s"), oid_to_hex(&tag->object.oid)); @@ -952,6 +961,17 @@ static void handle_tag(const char *name, struct tag *tag) case SIGN_STRIP: message_size = sig_offset; break; + + /* Aborting modes */ + case SIGN_ABORT: + die(_("encountered signed tag %s; use " + "--signed-tags= to handle it"), + oid_to_hex(&tag->object.oid)); + case SIGN_STRIP_IF_INVALID: + die(_("'strip-if-invalid' is not a valid mode for " + "git fast-export with --signed-tags=")); + default: + BUG("invalid signed_commit_mode value %d", signed_commit_mode); } } diff --git a/builtin/fast-import.c b/builtin/fast-import.c index 4cd0b079b61687..7849005ccb15ff 100644 --- a/builtin/fast-import.c +++ b/builtin/fast-import.c @@ -2772,7 +2772,7 @@ static void add_gpgsig_to_commit(struct strbuf *commit_data, { struct string_list siglines = STRING_LIST_INIT_NODUP; - if (!sig->hash_algo) + if (!sig || !sig->hash_algo) return; strbuf_addstr(commit_data, header); @@ -2815,6 +2815,57 @@ static void import_one_signature(struct signature_data *sig_sha1, die(_("parse_one_signature() returned unknown hash algo")); } +static void finalize_commit_buffer(struct strbuf *new_data, + struct signature_data *sig_sha1, + struct signature_data *sig_sha256, + struct strbuf *msg) +{ + add_gpgsig_to_commit(new_data, "gpgsig ", sig_sha1); + add_gpgsig_to_commit(new_data, "gpgsig-sha256 ", sig_sha256); + + strbuf_addch(new_data, '\n'); + strbuf_addbuf(new_data, msg); +} + +static void handle_strip_if_invalid(struct strbuf *new_data, + struct signature_data *sig_sha1, + struct signature_data *sig_sha256, + struct strbuf *msg) +{ + struct strbuf tmp_buf = STRBUF_INIT; + struct signature_check signature_check = { 0 }; + int ret; + + /* Check signature in a temporary commit buffer */ + strbuf_addbuf(&tmp_buf, new_data); + finalize_commit_buffer(&tmp_buf, sig_sha1, sig_sha256, msg); + ret = verify_commit_buffer(tmp_buf.buf, tmp_buf.len, &signature_check); + + if (ret) { + const char *signer = signature_check.signer ? + signature_check.signer : _("unknown"); + const char *subject; + int subject_len = find_commit_subject(msg->buf, &subject); + + if (subject_len > 100) + warning(_("stripping invalid signature for commit '%.100s...'\n" + " allegedly by %s"), subject, signer); + else if (subject_len > 0) + warning(_("stripping invalid signature for commit '%.*s'\n" + " allegedly by %s"), subject_len, subject, signer); + else + warning(_("stripping invalid signature for commit\n" + " allegedly by %s"), signer); + + finalize_commit_buffer(new_data, NULL, NULL, msg); + } else { + strbuf_swap(new_data, &tmp_buf); + } + + signature_check_clear(&signature_check); + strbuf_release(&tmp_buf); +} + static void parse_new_commit(const char *arg) { static struct strbuf msg = STRBUF_INIT; @@ -2866,6 +2917,7 @@ static void parse_new_commit(const char *arg) warning(_("importing a commit signature verbatim")); /* fallthru */ case SIGN_VERBATIM: + case SIGN_STRIP_IF_INVALID: import_one_signature(&sig_sha1, &sig_sha256, v); break; @@ -2950,11 +3002,12 @@ static void parse_new_commit(const char *arg) "encoding %s\n", encoding); - add_gpgsig_to_commit(&new_data, "gpgsig ", &sig_sha1); - add_gpgsig_to_commit(&new_data, "gpgsig-sha256 ", &sig_sha256); + if (signed_commit_mode == SIGN_STRIP_IF_INVALID && + (sig_sha1.hash_algo || sig_sha256.hash_algo)) + handle_strip_if_invalid(&new_data, &sig_sha1, &sig_sha256, &msg); + else + finalize_commit_buffer(&new_data, &sig_sha1, &sig_sha256, &msg); - strbuf_addch(&new_data, '\n'); - strbuf_addbuf(&new_data, &msg); free(author); free(committer); free(encoding); @@ -2975,9 +3028,6 @@ static void handle_tag_signature(struct strbuf *msg, const char *name) switch (signed_tag_mode) { /* First, modes that don't change anything */ - case SIGN_ABORT: - die(_("encountered signed tag; use " - "--signed-tags= to handle it")); case SIGN_WARN_VERBATIM: warning(_("importing a tag signature verbatim for tag '%s'"), name); /* fallthru */ @@ -2994,7 +3044,13 @@ static void handle_tag_signature(struct strbuf *msg, const char *name) strbuf_setlen(msg, sig_offset); break; - /* Third, BUG */ + /* Third, aborting modes */ + case SIGN_ABORT: + die(_("encountered signed tag; use " + "--signed-tags= to handle it")); + case SIGN_STRIP_IF_INVALID: + die(_("'strip-if-invalid' is not a valid mode for " + "git fast-import with --signed-tags=")); default: BUG("invalid signed_tag_mode value %d from tag '%s'", signed_tag_mode, name); diff --git a/builtin/gc.c b/builtin/gc.c index e9a76243aa633d..92c6e7b954faff 100644 --- a/builtin/gc.c +++ b/builtin/gc.c @@ -1063,7 +1063,7 @@ int cmd_gc(int argc, report_garbage = report_pack_garbage; odb_reprepare(the_repository->objects); if (pack_garbage.nr > 0) { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); clean_pack_garbage(); } diff --git a/builtin/index-pack.c b/builtin/index-pack.c index 2b78ba7fe4d14a..699fe678cd60b0 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -1640,7 +1640,7 @@ static void final(const char *final_pack_name, const char *curr_pack_name, rename_tmp_packfile(&final_index_name, curr_index_name, &index_name, hash, "idx", 1); - if (do_fsck_object) + if (do_fsck_object && startup_info->have_repository) packfile_store_load_pack(the_repository->objects->packfiles, final_index_name, 0); @@ -2110,8 +2110,23 @@ int cmd_index_pack(int argc, else close(input_fd); - if (do_fsck_object && fsck_finish(&fsck_options)) - die(_("fsck error in pack objects")); + if (do_fsck_object) { + /* + * We cannot perform queued consistency checks when running + * outside of a repository because those require us to read + * from the object database, which is uninitialized. + * + * TODO: we may eventually set up an in-memory object database, + * which would allow us to perform these queued checks. + */ + if (!startup_info->have_repository && + fsck_has_queued_checks(&fsck_options)) + die(_("cannot perform queued object checks outside " + "of a repository")); + + if (fsck_finish(&fsck_options)) + die(_("fsck error in pack objects")); + } free(opts.anomaly); free(objects); diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index e8ee0e73217aae..9c491746168a6f 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -34,7 +34,6 @@ #include "object-file.h" #include "object-name.h" #include "odb.h" -#include "path.h" #include "protocol.h" #include "commit-reach.h" #include "server-info.h" @@ -42,6 +41,7 @@ #include "trace2.h" #include "worktree.h" #include "shallow.h" +#include "setup.h" #include "parse-options.h" static const char * const receive_pack_usage[] = { @@ -177,8 +177,9 @@ static int receive_pack_config(const char *var, const char *value, if (git_config_pathname(&path, var, value)) return -1; - strbuf_addf(&fsck_msg_types, "%cskiplist=%s", - fsck_msg_types.len ? ',' : '=', path); + if (path) + strbuf_addf(&fsck_msg_types, "%cskiplist=%s", + fsck_msg_types.len ? ',' : '=', path); free(path); return 0; } diff --git a/builtin/repack.c b/builtin/repack.c index cfdb4c0920b191..d9012141f699c9 100644 --- a/builtin/repack.c +++ b/builtin/repack.c @@ -488,7 +488,7 @@ int cmd_repack(int argc, string_list_sort(&names); - close_object_store(repo->objects); + odb_close(repo->objects); /* * Ok we have prepared all new packfiles. diff --git a/builtin/upload-archive.c b/builtin/upload-archive.c index 97d7c9522f9868..25312bb2a52887 100644 --- a/builtin/upload-archive.c +++ b/builtin/upload-archive.c @@ -4,8 +4,8 @@ #define USE_THE_REPOSITORY_VARIABLE #include "builtin.h" #include "archive.h" -#include "path.h" #include "pkt-line.h" +#include "setup.h" #include "sideband.h" #include "run-command.h" #include "strvec.h" diff --git a/builtin/upload-pack.c b/builtin/upload-pack.c index c2bbc035ab0c91..30498fafea3a8b 100644 --- a/builtin/upload-pack.c +++ b/builtin/upload-pack.c @@ -5,11 +5,11 @@ #include "gettext.h" #include "pkt-line.h" #include "parse-options.h" -#include "path.h" #include "protocol.h" #include "replace-object.h" #include "upload-pack.h" #include "serve.h" +#include "setup.h" #include "commit.h" #include "environment.h" diff --git a/chdir-notify.c b/chdir-notify.c index 0d7bc0460747b2..f8bfe3cbef9aba 100644 --- a/chdir-notify.c +++ b/chdir-notify.c @@ -25,6 +25,24 @@ void chdir_notify_register(const char *name, list_add_tail(&e->list, &chdir_notify_entries); } +void chdir_notify_unregister(const char *name, chdir_notify_callback cb, + void *data) +{ + struct list_head *pos, *p; + + list_for_each_safe(pos, p, &chdir_notify_entries) { + struct chdir_notify_entry *e = + list_entry(pos, struct chdir_notify_entry, list); + + if (e->cb != cb || e->data != data || !e->name != !name || + (e->name && strcmp(e->name, name))) + continue; + + list_del(pos); + free(e); + } +} + static void reparent_cb(const char *name, const char *old_cwd, const char *new_cwd, diff --git a/chdir-notify.h b/chdir-notify.h index 366e4c1ee9908c..81eb69d846e45d 100644 --- a/chdir-notify.h +++ b/chdir-notify.h @@ -41,6 +41,8 @@ typedef void (*chdir_notify_callback)(const char *name, const char *new_cwd, void *data); void chdir_notify_register(const char *name, chdir_notify_callback cb, void *data); +void chdir_notify_unregister(const char *name, chdir_notify_callback cb, + void *data); void chdir_notify_reparent(const char *name, char **path); /* diff --git a/commit.c b/commit.c index 16d91b2bfcf291..709c9eed58a790 100644 --- a/commit.c +++ b/commit.c @@ -1315,7 +1315,8 @@ static void handle_signed_tag(const struct commit *parent, struct commit_extra_h free(buf); } -int check_commit_signature(const struct commit *commit, struct signature_check *sigc) +int verify_commit_buffer(const char *buffer, size_t size, + struct signature_check *sigc) { struct strbuf payload = STRBUF_INIT; struct strbuf signature = STRBUF_INIT; @@ -1323,7 +1324,8 @@ int check_commit_signature(const struct commit *commit, struct signature_check * sigc->result = 'N'; - if (parse_signed_commit(commit, &payload, &signature, the_hash_algo) <= 0) + if (parse_buffer_signed_by_header(buffer, size, &payload, + &signature, the_hash_algo) <= 0) goto out; sigc->payload_type = SIGNATURE_PAYLOAD_COMMIT; @@ -1337,6 +1339,17 @@ int check_commit_signature(const struct commit *commit, struct signature_check * return ret; } +int check_commit_signature(const struct commit *commit, struct signature_check *sigc) +{ + unsigned long size; + const char *buffer = repo_get_commit_buffer(the_repository, commit, &size); + int ret = verify_commit_buffer(buffer, size, sigc); + + repo_unuse_commit_buffer(the_repository, commit, buffer); + + return ret; +} + void verify_merge_signature(struct commit *commit, int verbosity, int check_trust) { diff --git a/commit.h b/commit.h index 1d6e0c7518b3bb..5406dd266327d4 100644 --- a/commit.h +++ b/commit.h @@ -333,6 +333,13 @@ int remove_signature(struct strbuf *buf); */ int check_commit_signature(const struct commit *commit, struct signature_check *sigc); +/* + * Same as check_commit_signature() but accepts a commit buffer and + * its size, instead of a `struct commit *`. + */ +int verify_commit_buffer(const char *buffer, size_t size, + struct signature_check *sigc); + /* record author-date for each commit object */ struct author_date_slab; void record_author_date(struct author_date_slab *author_date, diff --git a/config.c b/config.c index f1def0dcfbacba..1738c0cb0de5c6 100644 --- a/config.c +++ b/config.c @@ -1291,6 +1291,7 @@ int git_config_pathname(char **dest, const char *var, const char *value) if (is_optional && is_missing_file(path)) { free(path); + *dest = NULL; return 0; } @@ -1953,7 +1954,7 @@ int git_configset_get_maybe_bool(struct config_set *set, const char *key, int *d return 1; } -int git_configset_get_pathname(struct config_set *set, const char *key, char **dest) +static int git_configset_get_pathname(struct config_set *set, const char *key, char **dest) { const char *value; if (!git_configset_get_value(set, key, &value, NULL)) diff --git a/config.h b/config.h index 19c87fc0bc1a2a..ba426a960af9f4 100644 --- a/config.h +++ b/config.h @@ -564,7 +564,6 @@ int git_configset_get_ulong(struct config_set *cs, const char *key, unsigned lon int git_configset_get_bool(struct config_set *cs, const char *key, int *dest); int git_configset_get_bool_or_int(struct config_set *cs, const char *key, int *is_bool, int *dest); int git_configset_get_maybe_bool(struct config_set *cs, const char *key, int *dest); -int git_configset_get_pathname(struct config_set *cs, const char *key, char **dest); /** * Run only the discover part of the repo_config_get_*() functions diff --git a/fetch-pack.c b/fetch-pack.c index 78c45d4a155c89..40316c9a348f23 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -1865,8 +1865,9 @@ int fetch_pack_fsck_config(const char *var, const char *value, if (git_config_pathname(&path, var, value)) return -1; - strbuf_addf(msg_types, "%cskiplist=%s", - msg_types->len ? ',' : '=', path); + if (path) + strbuf_addf(msg_types, "%cskiplist=%s", + msg_types->len ? ',' : '=', path); free(path); return 0; } diff --git a/fsck.c b/fsck.c index 8e8083e7c6e42a..138fffded935c4 100644 --- a/fsck.c +++ b/fsck.c @@ -1379,6 +1379,12 @@ int fsck_finish(struct fsck_options *options) return ret; } +bool fsck_has_queued_checks(struct fsck_options *options) +{ + return !oidset_equal(&options->gitmodules_found, &options->gitmodules_done) || + !oidset_equal(&options->gitattributes_found, &options->gitattributes_done); +} + void fsck_options_clear(struct fsck_options *options) { free(options->msg_type); @@ -1398,14 +1404,16 @@ int git_fsck_config(const char *var, const char *value, if (strcmp(var, "fsck.skiplist") == 0) { char *path; - struct strbuf sb = STRBUF_INIT; if (git_config_pathname(&path, var, value)) return -1; - strbuf_addf(&sb, "skiplist=%s", path); - free(path); - fsck_set_msg_types(options, sb.buf); - strbuf_release(&sb); + if (path) { + struct strbuf sb = STRBUF_INIT; + strbuf_addf(&sb, "skiplist=%s", path); + free(path); + fsck_set_msg_types(options, sb.buf); + strbuf_release(&sb); + } return 0; } diff --git a/fsck.h b/fsck.h index cb6ef32f4f3aaa..336917c0451aac 100644 --- a/fsck.h +++ b/fsck.h @@ -248,6 +248,13 @@ int fsck_tag_standalone(const struct object_id *oid, const char *buffer, */ int fsck_finish(struct fsck_options *options); +/* + * Check whether there are any checks that have been queued up and that still + * need to be run. Returns `false` iff `fsck_finish()` wouldn't perform any + * actions, `true` otherwise. + */ +bool fsck_has_queued_checks(struct fsck_options *options); + /* * Clear the fsck_options struct, freeing any allocated memory. */ diff --git a/gpg-interface.c b/gpg-interface.c index f680ed38c0fc20..47222bf31b6ef2 100644 --- a/gpg-interface.c +++ b/gpg-interface.c @@ -794,8 +794,16 @@ static int git_gpg_config(const char *var, const char *value, fmtname = "ssh"; if (fmtname) { + char *program; + int status; + fmt = get_format_by_name(fmtname); - return git_config_pathname((char **) &fmt->program, var, value); + status = git_config_pathname(&program, var, value); + if (status) + return status; + if (program) + fmt->program = program; + return status; } return 0; @@ -1146,6 +1154,8 @@ int parse_sign_mode(const char *arg, enum sign_mode *mode) *mode = SIGN_WARN_STRIP; else if (!strcmp(arg, "strip")) *mode = SIGN_STRIP; + else if (!strcmp(arg, "strip-if-invalid")) + *mode = SIGN_STRIP_IF_INVALID; else return -1; return 0; diff --git a/gpg-interface.h b/gpg-interface.h index ead1ed69670219..789d1ffac47850 100644 --- a/gpg-interface.h +++ b/gpg-interface.h @@ -111,6 +111,7 @@ enum sign_mode { SIGN_VERBATIM, SIGN_WARN_STRIP, SIGN_STRIP, + SIGN_STRIP_IF_INVALID, }; /* diff --git a/http-backend.c b/http-backend.c index 273ed7266fbff1..24f0dc119ab8e6 100644 --- a/http-backend.c +++ b/http-backend.c @@ -16,6 +16,7 @@ #include "run-command.h" #include "string-list.h" #include "url.h" +#include "setup.h" #include "strvec.h" #include "packfile.h" #include "odb.h" diff --git a/http-push.c b/http-push.c index d86ce771198206..60a9b756209071 100644 --- a/http-push.c +++ b/http-push.c @@ -1725,6 +1725,7 @@ int cmd_main(int argc, const char **argv) int i; int new_refs; struct ref *ref, *local_refs = NULL; + const char *gitdir; CALLOC_ARRAY(repo, 1); @@ -1787,7 +1788,7 @@ int cmd_main(int argc, const char **argv) if (delete_branch && rs.nr != 1) die("You must specify only one branch name when deleting a remote branch"); - setup_git_directory(); + gitdir = setup_git_directory(); memset(remote_dir_exists, -1, 256); @@ -1941,7 +1942,7 @@ int cmd_main(int argc, const char **argv) if (!push_all && !is_null_oid(&ref->old_oid)) strvec_pushf(&commit_argv, "^%s", oid_to_hex(&ref->old_oid)); - repo_init_revisions(the_repository, &revs, setup_git_directory()); + repo_init_revisions(the_repository, &revs, gitdir); setup_revisions_from_strvec(&commit_argv, &revs, NULL); revs.edge_hint = 0; /* just in case */ diff --git a/midx-write.c b/midx-write.c index 23e61cb0001428..e3e9be6d03cd6f 100644 --- a/midx-write.c +++ b/midx-write.c @@ -1458,7 +1458,7 @@ static int write_midx_internal(struct odb_source *source, } if (ctx.m || ctx.base_midx) - close_object_store(ctx.repo->objects); + odb_close(ctx.repo->objects); if (commit_lock_file(&lk) < 0) die_errno(_("could not write multi-pack-index")); diff --git a/odb.c b/odb.c index 3ec21ef24e16bb..dc8f292f3d9645 100644 --- a/odb.c +++ b/odb.c @@ -1,5 +1,6 @@ #include "git-compat-util.h" #include "abspath.h" +#include "chdir-notify.h" #include "commit-graph.h" #include "config.h" #include "dir.h" @@ -9,6 +10,7 @@ #include "khash.h" #include "lockfile.h" #include "loose.h" +#include "midx.h" #include "object-file-convert.h" #include "object-file.h" #include "odb.h" @@ -22,6 +24,7 @@ #include "strbuf.h" #include "strvec.h" #include "submodule.h" +#include "tmp-objdir.h" #include "trace2.h" #include "write-or-die.h" @@ -141,9 +144,9 @@ static void read_info_alternates(struct object_database *odb, const char *relative_base, int depth); -struct odb_source *odb_source_new(struct object_database *odb, - const char *path, - bool local) +static struct odb_source *odb_source_new(struct object_database *odb, + const char *path, + bool local) { struct odb_source *source; @@ -359,7 +362,7 @@ struct odb_source *odb_set_temporary_primary_source(struct object_database *odb, * Disable ref updates while a temporary odb is active, since * the objects in the database may roll back. */ - source->disable_ref_updates = 1; + odb->repo->disable_ref_updates = true; source->will_destroy = will_destroy; source->next = odb->sources; odb->sources = source; @@ -386,6 +389,7 @@ void odb_restore_primary_source(struct object_database *odb, if (cur_source->next != restore_source) BUG("we expect the old primary object store to be the first alternate"); + odb->repo->disable_ref_updates = false; odb->sources = restore_source; odb_source_free(cur_source); } @@ -1032,18 +1036,79 @@ int odb_write_object_stream(struct object_database *odb, return odb_source_loose_write_stream(odb->sources, stream, len, oid); } -struct object_database *odb_new(struct repository *repo) +static void odb_update_commondir(const char *name UNUSED, + const char *old_cwd, + const char *new_cwd, + void *cb_data) +{ + struct object_database *odb = cb_data; + struct tmp_objdir *tmp_objdir; + struct odb_source *source; + + tmp_objdir = tmp_objdir_unapply_primary_odb(); + + /* + * In theory, we only have to do this for the primary object source, as + * alternates' paths are always resolved to an absolute path. + */ + for (source = odb->sources; source; source = source->next) { + char *path; + + if (is_absolute_path(source->path)) + continue; + + path = reparent_relative_path(old_cwd, new_cwd, + source->path); + + free(source->path); + source->path = path; + } + + if (tmp_objdir) + tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); +} + +struct object_database *odb_new(struct repository *repo, + const char *primary_source, + const char *secondary_sources) { struct object_database *o = xmalloc(sizeof(*o)); + char *to_free = NULL; memset(o, 0, sizeof(*o)); o->repo = repo; o->packfiles = packfile_store_new(o); pthread_mutex_init(&o->replace_mutex, NULL); string_list_init_dup(&o->submodule_source_paths); + + if (!primary_source) + primary_source = to_free = xstrfmt("%s/objects", repo->commondir); + o->sources = odb_source_new(o, primary_source, true); + o->sources_tail = &o->sources->next; + o->alternate_db = xstrdup_or_null(secondary_sources); + + free(to_free); + + chdir_notify_register(NULL, odb_update_commondir, o); + return o; } +void odb_close(struct object_database *o) +{ + struct odb_source *source; + + packfile_store_close(o->packfiles); + + for (source = o->sources; source; source = source->next) { + if (source->midx) + close_midx(source->midx); + source->midx = NULL; + } + + close_commit_graph(o); +} + static void odb_free_sources(struct object_database *o) { while (o->sources) { @@ -1057,30 +1122,29 @@ static void odb_free_sources(struct object_database *o) o->source_by_path = NULL; } -void odb_clear(struct object_database *o) +void odb_free(struct object_database *o) { - FREE_AND_NULL(o->alternate_db); + if (!o) + return; + + free(o->alternate_db); oidmap_clear(&o->replace_map, 1); pthread_mutex_destroy(&o->replace_mutex); - free_commit_graph(o->commit_graph); - o->commit_graph = NULL; - o->commit_graph_attempted = 0; - odb_free_sources(o); - o->sources_tail = NULL; - o->loaded_alternates = 0; for (size_t i = 0; i < o->cached_object_nr; i++) free((char *) o->cached_objects[i].value.buf); - FREE_AND_NULL(o->cached_objects); + free(o->cached_objects); - close_object_store(o); + odb_close(o); packfile_store_free(o->packfiles); - o->packfiles = NULL; - string_list_clear(&o->submodule_source_paths, 0); + + chdir_notify_unregister(NULL, odb_update_commondir, o); + + free(o); } void odb_reprepare(struct object_database *o) diff --git a/odb.h b/odb.h index 9bb28008b1d953..014cd9585a2f6e 100644 --- a/odb.h +++ b/odb.h @@ -66,13 +66,6 @@ struct odb_source { */ bool local; - /* - * This is a temporary object store created by the tmp_objdir - * facility. Disable ref updates since the objects in the store - * might be discarded on rollback. - */ - int disable_ref_updates; - /* * This object store is ephemeral, so there is no need to fsync. */ @@ -85,10 +78,6 @@ struct odb_source { char *path; }; -struct odb_source *odb_source_new(struct object_database *odb, - const char *path, - bool local); - struct packed_git; struct packfile_store; struct cached_object_entry; @@ -166,8 +155,30 @@ struct object_database { struct string_list submodule_source_paths; }; -struct object_database *odb_new(struct repository *repo); -void odb_clear(struct object_database *o); +/* + * Create a new object database for the given repository. + * + * If the primary source parameter is set it will override the usual primary + * object directory derived from the repository's common directory. The + * alternate sources are expected to be a PATH_SEP-separated list of secondary + * sources. Note that these alternate sources will be added in addition to, not + * instead of, the alternates identified by the primary source. + * + * Returns the newly created object database. + */ +struct object_database *odb_new(struct repository *repo, + const char *primary_source, + const char *alternate_sources); + +/* Free the object database and release all resources. */ +void odb_free(struct object_database *o); + +/* + * Close the object database and all of its sources so that any held resources + * will be released. The database can still be used after closing it, in which + * case these resources may be reallocated. + */ +void odb_close(struct object_database *o); /* * Clear caches, reload alternates and then reload object sources so that new diff --git a/oidset.c b/oidset.c index 8d36aef8dca4fc..c8ff0b385c58ac 100644 --- a/oidset.c +++ b/oidset.c @@ -16,6 +16,22 @@ int oidset_contains(const struct oidset *set, const struct object_id *oid) return pos != kh_end(&set->set); } +bool oidset_equal(const struct oidset *a, const struct oidset *b) +{ + struct oidset_iter iter; + struct object_id *a_oid; + + if (oidset_size(a) != oidset_size(b)) + return false; + + oidset_iter_init(a, &iter); + while ((a_oid = oidset_iter_next(&iter))) + if (!oidset_contains(b, a_oid)) + return false; + + return true; +} + int oidset_insert(struct oidset *set, const struct object_id *oid) { int added; diff --git a/oidset.h b/oidset.h index 0106b6f2787f0e..e0f1a6ff4ff203 100644 --- a/oidset.h +++ b/oidset.h @@ -38,6 +38,11 @@ void oidset_init(struct oidset *set, size_t initial_size); */ int oidset_contains(const struct oidset *set, const struct object_id *oid); +/** + * Returns true iff `a` and `b` contain the exact same OIDs. + */ +bool oidset_equal(const struct oidset *a, const struct oidset *b); + /** * Insert the oid into the set; a copy is made, so "oid" does not need * to persist after this function is called. @@ -94,11 +99,11 @@ void oidset_parse_file_carefully(struct oidset *set, const char *path, oidset_parse_tweak_fn fn, void *cbdata); struct oidset_iter { - kh_oid_set_t *set; + const kh_oid_set_t *set; khiter_t iter; }; -static inline void oidset_iter_init(struct oidset *set, +static inline void oidset_iter_init(const struct oidset *set, struct oidset_iter *iter) { iter->set = &set->set; diff --git a/packfile.c b/packfile.c index 9cc11b6dc56225..3d8b994a617e81 100644 --- a/packfile.c +++ b/packfile.c @@ -443,21 +443,6 @@ void close_pack(struct packed_git *p) oidset_clear(&p->bad_objects); } -void close_object_store(struct object_database *o) -{ - struct odb_source *source; - - packfile_store_close(o->packfiles); - - for (source = o->sources; source; source = source->next) { - if (source->midx) - close_midx(source->midx); - source->midx = NULL; - } - - close_commit_graph(o); -} - void unlink_pack_path(const char *pack_name, int force_delete) { static const char *exts[] = {".idx", ".pack", ".rev", ".keep", ".bitmap", ".promisor", ".mtimes"}; diff --git a/packfile.h b/packfile.h index 0e178fb279fe93..20e1cf17b26083 100644 --- a/packfile.h +++ b/packfile.h @@ -299,7 +299,6 @@ struct object_database; unsigned char *use_pack(struct packed_git *, struct pack_window **, off_t, unsigned long *); void close_pack_windows(struct packed_git *); void close_pack(struct packed_git *); -void close_object_store(struct object_database *o); void unuse_pack(struct pack_window **); void clear_delta_base_cache(void); struct packed_git *add_packed_git(struct repository *r, const char *path, diff --git a/path.c b/path.c index 7f56eaf9930374..d726537622cda6 100644 --- a/path.c +++ b/path.c @@ -738,106 +738,6 @@ char *interpolate_path(const char *path, int real_home) return NULL; } -/* - * First, one directory to try is determined by the following algorithm. - * - * (0) If "strict" is given, the path is used as given and no DWIM is - * done. Otherwise: - * (1) "~/path" to mean path under the running user's home directory; - * (2) "~user/path" to mean path under named user's home directory; - * (3) "relative/path" to mean cwd relative directory; or - * (4) "/absolute/path" to mean absolute directory. - * - * Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git" - * in this order. We select the first one that is a valid git repository, and - * chdir() to it. If none match, or we fail to chdir, we return NULL. - * - * If all goes well, we return the directory we used to chdir() (but - * before ~user is expanded), avoiding getcwd() resolving symbolic - * links. User relative paths are also returned as they are given, - * except DWIM suffixing. - */ -const char *enter_repo(const char *path, unsigned flags) -{ - static struct strbuf validated_path = STRBUF_INIT; - static struct strbuf used_path = STRBUF_INIT; - - if (!path) - return NULL; - - if (!(flags & ENTER_REPO_STRICT)) { - static const char *suffix[] = { - "/.git", "", ".git/.git", ".git", NULL, - }; - const char *gitfile; - int len = strlen(path); - int i; - while ((1 < len) && (path[len-1] == '/')) - len--; - - /* - * We can handle arbitrary-sized buffers, but this remains as a - * sanity check on untrusted input. - */ - if (PATH_MAX <= len) - return NULL; - - strbuf_reset(&used_path); - strbuf_reset(&validated_path); - strbuf_add(&used_path, path, len); - strbuf_add(&validated_path, path, len); - - if (used_path.buf[0] == '~') { - char *newpath = interpolate_path(used_path.buf, 0); - if (!newpath) - return NULL; - strbuf_attach(&used_path, newpath, strlen(newpath), - strlen(newpath)); - } - for (i = 0; suffix[i]; i++) { - struct stat st; - size_t baselen = used_path.len; - strbuf_addstr(&used_path, suffix[i]); - if (!stat(used_path.buf, &st) && - (S_ISREG(st.st_mode) || - (S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) { - strbuf_addstr(&validated_path, suffix[i]); - break; - } - strbuf_setlen(&used_path, baselen); - } - if (!suffix[i]) - return NULL; - gitfile = read_gitfile(used_path.buf); - if (!(flags & ENTER_REPO_ANY_OWNER_OK)) - die_upon_dubious_ownership(gitfile, NULL, used_path.buf); - if (gitfile) { - strbuf_reset(&used_path); - strbuf_addstr(&used_path, gitfile); - } - if (chdir(used_path.buf)) - return NULL; - path = validated_path.buf; - } - else { - const char *gitfile = read_gitfile(path); - if (!(flags & ENTER_REPO_ANY_OWNER_OK)) - die_upon_dubious_ownership(gitfile, NULL, path); - if (gitfile) - path = gitfile; - if (chdir(path)) - return NULL; - } - - if (is_git_directory(".")) { - set_git_dir(".", 0); - check_repository_format(NULL); - return path; - } - - return NULL; -} - int calc_shared_perm(struct repository *repo, int mode) { diff --git a/path.h b/path.h index e67348f25397cc..0ec95a0b079c90 100644 --- a/path.h +++ b/path.h @@ -146,21 +146,6 @@ int adjust_shared_perm(struct repository *repo, const char *path); char *interpolate_path(const char *path, int real_home); -/* The bits are as follows: - * - * - ENTER_REPO_STRICT: callers that require exact paths (as opposed - * to allowing known suffixes like ".git", ".git/.git" to be - * omitted) can set this bit. - * - * - ENTER_REPO_ANY_OWNER_OK: callers that are willing to run without - * ownership check can set this bit. - */ -enum { - ENTER_REPO_STRICT = (1<<0), - ENTER_REPO_ANY_OWNER_OK = (1<<1), -}; - -const char *enter_repo(const char *path, unsigned flags); const char *remove_leading_path(const char *in, const char *prefix); const char *relative_path(const char *in, const char *prefix, struct strbuf *sb); int normalize_path_copy_len(char *dst, const char *src, int *prefix_len); diff --git a/refs.c b/refs.c index 5583f6e09d7c76..046b695bb20f54 100644 --- a/refs.c +++ b/refs.c @@ -2508,7 +2508,7 @@ int ref_transaction_prepare(struct ref_transaction *transaction, break; } - if (refs->repo->objects->sources->disable_ref_updates) { + if (refs->repo->disable_ref_updates) { strbuf_addstr(err, _("ref updates forbidden inside quarantine environment")); return -1; diff --git a/repository.c b/repository.c index 6aaa7ba00869bf..863f24411b7bf9 100644 --- a/repository.c +++ b/repository.c @@ -52,7 +52,6 @@ static void set_default_hash_algo(struct repository *repo) void initialize_repository(struct repository *repo) { - repo->objects = odb_new(repo); repo->remote_state = remote_state_new(); repo->parsed_objects = parsed_object_pool_new(repo); ALLOC_ARRAY(repo->index, 1); @@ -160,29 +159,19 @@ void repo_set_gitdir(struct repository *repo, * until after xstrdup(root). Then we can free it. */ char *old_gitdir = repo->gitdir; - char *objects_path = NULL; repo->gitdir = xstrdup(gitfile ? gitfile : root); free(old_gitdir); repo_set_commondir(repo, o->commondir); - expand_base_dir(&objects_path, o->object_dir, - repo->commondir, "objects"); - - if (!repo->objects->sources) { - repo->objects->sources = odb_source_new(repo->objects, - objects_path, true); - repo->objects->sources_tail = &repo->objects->sources->next; - free(objects_path); - } else { - free(repo->objects->sources->path); - repo->objects->sources->path = objects_path; - } - repo->objects->sources->disable_ref_updates = o->disable_ref_updates; + if (!repo->objects) + repo->objects = odb_new(repo, o->object_dir, o->alternate_db); + else if (!o->skip_initializing_odb) + BUG("cannot reinitialize an already-initialized object directory"); + + repo->disable_ref_updates = o->disable_ref_updates; - free(repo->objects->alternate_db); - repo->objects->alternate_db = xstrdup_or_null(o->alternate_db); expand_base_dir(&repo->graft_file, o->graft_file, repo->commondir, "info/grafts"); expand_base_dir(&repo->index_file, o->index_file, @@ -382,8 +371,8 @@ void repo_clear(struct repository *repo) FREE_AND_NULL(repo->worktree); FREE_AND_NULL(repo->submodule_prefix); - odb_clear(repo->objects); - FREE_AND_NULL(repo->objects); + odb_free(repo->objects); + repo->objects = NULL; parsed_object_pool_clear(repo->parsed_objects); FREE_AND_NULL(repo->parsed_objects); diff --git a/repository.h b/repository.h index 5808a5d610846a..6063c4b846d031 100644 --- a/repository.h +++ b/repository.h @@ -71,6 +71,13 @@ struct repository { */ struct ref_store *refs_private; + /* + * Disable ref updates. This is especially used in contexts where + * transactions may still be rolled back so that we don't start to + * reference objects that may vanish. + */ + bool disable_ref_updates; + /* * A strmap of ref_stores, stored by submodule name, accessible via * `repo_get_submodule_ref_store()`. @@ -187,7 +194,8 @@ struct set_gitdir_args { const char *graft_file; const char *index_file; const char *alternate_db; - int disable_ref_updates; + bool disable_ref_updates; + bool skip_initializing_odb; }; void repo_set_gitdir(struct repository *repo, const char *root, diff --git a/run-command.c b/run-command.c index ed9575bd6a8cbb..e3e02475ccec50 100644 --- a/run-command.c +++ b/run-command.c @@ -743,7 +743,7 @@ int start_command(struct child_process *cmd) fflush(NULL); if (cmd->close_object_store) - close_object_store(the_repository->objects); + odb_close(the_repository->objects); #ifndef GIT_WINDOWS_NATIVE { diff --git a/scalar.c b/scalar.c index f7543116272b77..2aeb191cc89b72 100644 --- a/scalar.c +++ b/scalar.c @@ -931,7 +931,7 @@ static int cmd_delete(int argc, const char **argv) if (dir_inside_of(cwd, enlistment.buf) >= 0) res = error(_("refusing to delete current working directory")); else { - close_object_store(the_repository->objects); + odb_close(the_repository->objects); res = delete_enlistment(&enlistment); } strbuf_release(&enlistment); diff --git a/setup.c b/setup.c index 7086741e6c2d1f..3a6a048620dd7d 100644 --- a/setup.c +++ b/setup.c @@ -22,7 +22,6 @@ #include "chdir-notify.h" #include "path.h" #include "quote.h" -#include "tmp-objdir.h" #include "trace.h" #include "trace2.h" #include "worktree.h" @@ -1002,6 +1001,83 @@ const char *read_gitfile_gently(const char *path, int *return_error_code) return error_code ? NULL : path; } +static void setup_git_env_internal(const char *git_dir, + bool skip_initializing_odb) +{ + char *git_replace_ref_base; + const char *shallow_file; + const char *replace_ref_base; + struct set_gitdir_args args = { NULL }; + struct strvec to_free = STRVEC_INIT; + + args.commondir = getenv_safe(&to_free, GIT_COMMON_DIR_ENVIRONMENT); + args.object_dir = getenv_safe(&to_free, DB_ENVIRONMENT); + args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT); + args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); + args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); + if (getenv(GIT_QUARANTINE_ENVIRONMENT)) + args.disable_ref_updates = true; + args.skip_initializing_odb = skip_initializing_odb; + + repo_set_gitdir(the_repository, git_dir, &args); + strvec_clear(&to_free); + + if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) + disable_replace_refs(); + replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT); + git_replace_ref_base = xstrdup(replace_ref_base ? replace_ref_base + : "refs/replace/"); + update_ref_namespace(NAMESPACE_REPLACE, git_replace_ref_base); + + shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT); + if (shallow_file) + set_alternate_shallow_file(the_repository, shallow_file, 0); + + if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0)) + fetch_if_missing = 0; +} + +void setup_git_env(const char *git_dir) +{ + setup_git_env_internal(git_dir, false); +} + +static void set_git_dir_1(const char *path, bool skip_initializing_odb) +{ + xsetenv(GIT_DIR_ENVIRONMENT, path, 1); + setup_git_env_internal(path, skip_initializing_odb); +} + +static void update_relative_gitdir(const char *name UNUSED, + const char *old_cwd, + const char *new_cwd, + void *data UNUSED) +{ + char *path = reparent_relative_path(old_cwd, new_cwd, + repo_get_git_dir(the_repository)); + trace_printf_key(&trace_setup_key, + "setup: move $GIT_DIR to '%s'", + path); + set_git_dir_1(path, true); + free(path); +} + +static void set_git_dir(const char *path, int make_realpath) +{ + struct strbuf realpath = STRBUF_INIT; + + if (make_realpath) { + strbuf_realpath(&realpath, path, 1); + path = realpath.buf; + } + + set_git_dir_1(path, false); + if (!is_absolute_path(path)) + chdir_notify_register(NULL, update_relative_gitdir, NULL); + + strbuf_release(&realpath); +} + static const char *setup_explicit_git_dir(const char *gitdirenv, struct strbuf *cwd, struct repository_format *repo_fmt, @@ -1248,7 +1324,7 @@ static int safe_directory_cb(const char *key, const char *value, } else { char *allowed = NULL; - if (!git_config_pathname(&allowed, key, value)) { + if (!git_config_pathname(&allowed, key, value) && allowed) { char *normalized = NULL; /* @@ -1628,79 +1704,85 @@ enum discovery_result discover_git_directory_reason(struct strbuf *commondir, return result; } -void setup_git_env(const char *git_dir) -{ - char *git_replace_ref_base; - const char *shallow_file; - const char *replace_ref_base; - struct set_gitdir_args args = { NULL }; - struct strvec to_free = STRVEC_INIT; - - args.commondir = getenv_safe(&to_free, GIT_COMMON_DIR_ENVIRONMENT); - args.object_dir = getenv_safe(&to_free, DB_ENVIRONMENT); - args.graft_file = getenv_safe(&to_free, GRAFT_ENVIRONMENT); - args.index_file = getenv_safe(&to_free, INDEX_ENVIRONMENT); - args.alternate_db = getenv_safe(&to_free, ALTERNATE_DB_ENVIRONMENT); - if (getenv(GIT_QUARANTINE_ENVIRONMENT)) { - args.disable_ref_updates = 1; - } - - repo_set_gitdir(the_repository, git_dir, &args); - strvec_clear(&to_free); - - if (getenv(NO_REPLACE_OBJECTS_ENVIRONMENT)) - disable_replace_refs(); - replace_ref_base = getenv(GIT_REPLACE_REF_BASE_ENVIRONMENT); - git_replace_ref_base = xstrdup(replace_ref_base ? replace_ref_base - : "refs/replace/"); - update_ref_namespace(NAMESPACE_REPLACE, git_replace_ref_base); - - shallow_file = getenv(GIT_SHALLOW_FILE_ENVIRONMENT); - if (shallow_file) - set_alternate_shallow_file(the_repository, shallow_file, 0); - - if (git_env_bool(NO_LAZY_FETCH_ENVIRONMENT, 0)) - fetch_if_missing = 0; -} - -static void set_git_dir_1(const char *path) +const char *enter_repo(const char *path, unsigned flags) { - xsetenv(GIT_DIR_ENVIRONMENT, path, 1); - setup_git_env(path); -} + static struct strbuf validated_path = STRBUF_INIT; + static struct strbuf used_path = STRBUF_INIT; -static void update_relative_gitdir(const char *name UNUSED, - const char *old_cwd, - const char *new_cwd, - void *data UNUSED) -{ - char *path = reparent_relative_path(old_cwd, new_cwd, - repo_get_git_dir(the_repository)); - struct tmp_objdir *tmp_objdir = tmp_objdir_unapply_primary_odb(); + if (!path) + return NULL; - trace_printf_key(&trace_setup_key, - "setup: move $GIT_DIR to '%s'", - path); - set_git_dir_1(path); - if (tmp_objdir) - tmp_objdir_reapply_primary_odb(tmp_objdir, old_cwd, new_cwd); - free(path); -} + if (!(flags & ENTER_REPO_STRICT)) { + static const char *suffix[] = { + "/.git", "", ".git/.git", ".git", NULL, + }; + const char *gitfile; + int len = strlen(path); + int i; + while ((1 < len) && (path[len-1] == '/')) + len--; -void set_git_dir(const char *path, int make_realpath) -{ - struct strbuf realpath = STRBUF_INIT; + /* + * We can handle arbitrary-sized buffers, but this remains as a + * sanity check on untrusted input. + */ + if (PATH_MAX <= len) + return NULL; - if (make_realpath) { - strbuf_realpath(&realpath, path, 1); - path = realpath.buf; + strbuf_reset(&used_path); + strbuf_reset(&validated_path); + strbuf_add(&used_path, path, len); + strbuf_add(&validated_path, path, len); + + if (used_path.buf[0] == '~') { + char *newpath = interpolate_path(used_path.buf, 0); + if (!newpath) + return NULL; + strbuf_attach(&used_path, newpath, strlen(newpath), + strlen(newpath)); + } + for (i = 0; suffix[i]; i++) { + struct stat st; + size_t baselen = used_path.len; + strbuf_addstr(&used_path, suffix[i]); + if (!stat(used_path.buf, &st) && + (S_ISREG(st.st_mode) || + (S_ISDIR(st.st_mode) && is_git_directory(used_path.buf)))) { + strbuf_addstr(&validated_path, suffix[i]); + break; + } + strbuf_setlen(&used_path, baselen); + } + if (!suffix[i]) + return NULL; + gitfile = read_gitfile(used_path.buf); + if (!(flags & ENTER_REPO_ANY_OWNER_OK)) + die_upon_dubious_ownership(gitfile, NULL, used_path.buf); + if (gitfile) { + strbuf_reset(&used_path); + strbuf_addstr(&used_path, gitfile); + } + if (chdir(used_path.buf)) + return NULL; + path = validated_path.buf; + } + else { + const char *gitfile = read_gitfile(path); + if (!(flags & ENTER_REPO_ANY_OWNER_OK)) + die_upon_dubious_ownership(gitfile, NULL, path); + if (gitfile) + path = gitfile; + if (chdir(path)) + return NULL; } - set_git_dir_1(path); - if (!is_absolute_path(path)) - chdir_notify_register(NULL, update_relative_gitdir, NULL); + if (is_git_directory(".")) { + set_git_dir(".", 0); + check_repository_format(NULL); + return path; + } - strbuf_release(&realpath); + return NULL; } static int git_work_tree_initialized; diff --git a/setup.h b/setup.h index 8522fa8575da71..d55dcc66086308 100644 --- a/setup.h +++ b/setup.h @@ -94,9 +94,46 @@ static inline int discover_git_directory(struct strbuf *commondir, return 0; } -void set_git_dir(const char *path, int make_realpath); void set_git_work_tree(const char *tree); +/* Flags that can be passed to `enter_repo()`. */ +enum { + /* + * Callers that require exact paths (as opposed to allowing known + * suffixes like ".git", ".git/.git" to be omitted) can set this bit. + */ + ENTER_REPO_STRICT = (1<<0), + + /* + * Callers that are willing to run without ownership check can set this + * bit. + */ + ENTER_REPO_ANY_OWNER_OK = (1<<1), +}; + +/* + * Discover and enter a repository. + * + * First, one directory to try is determined by the following algorithm. + * + * (0) If "strict" is given, the path is used as given and no DWIM is + * done. Otherwise: + * (1) "~/path" to mean path under the running user's home directory; + * (2) "~user/path" to mean path under named user's home directory; + * (3) "relative/path" to mean cwd relative directory; or + * (4) "/absolute/path" to mean absolute directory. + * + * Unless "strict" is given, we check "%s/.git", "%s", "%s.git/.git", "%s.git" + * in this order. We select the first one that is a valid git repository, and + * chdir() to it. If none match, or we fail to chdir, we return NULL. + * + * If all goes well, we return the directory we used to chdir() (but + * before ~user is expanded), avoiding getcwd() resolving symbolic + * links. User relative paths are also returned as they are given, + * except DWIM suffixing. + */ +const char *enter_repo(const char *path, unsigned flags); + const char *setup_git_directory_gently(int *); const char *setup_git_directory(void); char *prefix_path(const char *prefix, int len, const char *path); diff --git a/t/helper/test-repository.c b/t/helper/test-repository.c index 63c37de33d22f1..9ba94cdffa4c47 100644 --- a/t/helper/test-repository.c +++ b/t/helper/test-repository.c @@ -17,10 +17,6 @@ static void test_parse_commit_in_graph(const char *gitdir, const char *worktree, struct commit *c; struct commit_list *parent; - setup_git_env(gitdir); - - repo_clear(the_repository); - if (repo_init(&r, gitdir, worktree)) die("Couldn't init repo"); @@ -47,10 +43,6 @@ static void test_get_commit_tree_in_graph(const char *gitdir, struct commit *c; struct tree *tree; - setup_git_env(gitdir); - - repo_clear(the_repository); - if (repo_init(&r, gitdir, worktree)) die("Couldn't init repo"); @@ -75,24 +67,20 @@ static void test_get_commit_tree_in_graph(const char *gitdir, int cmd__repository(int argc, const char **argv) { - int nongit_ok = 0; - - setup_git_directory_gently(&nongit_ok); - if (argc < 2) die("must have at least 2 arguments"); if (!strcmp(argv[1], "parse_commit_in_graph")) { struct object_id oid; if (argc < 5) die("not enough arguments"); - if (parse_oid_hex(argv[4], &oid, &argv[4])) + if (parse_oid_hex_any(argv[4], &oid, &argv[4]) == GIT_HASH_UNKNOWN) die("cannot parse oid '%s'", argv[4]); test_parse_commit_in_graph(argv[2], argv[3], &oid); } else if (!strcmp(argv[1], "get_commit_tree_in_graph")) { struct object_id oid; if (argc < 5) die("not enough arguments"); - if (parse_oid_hex(argv[4], &oid, &argv[4])) + if (parse_oid_hex_any(argv[4], &oid, &argv[4]) == GIT_HASH_UNKNOWN) die("cannot parse oid '%s'", argv[4]); test_get_commit_tree_in_graph(argv[2], argv[3], &oid); } else { diff --git a/t/meson.build b/t/meson.build index 7c994d4643efe2..d3d0be28224b9c 100644 --- a/t/meson.build +++ b/t/meson.build @@ -185,6 +185,7 @@ integration_tests = [ 't1308-config-set.sh', 't1309-early-config.sh', 't1310-config-default.sh', + 't1311-config-optional.sh', 't1350-config-hooks-path.sh', 't1400-update-ref.sh', 't1401-symbolic-ref.sh', diff --git a/t/t1311-config-optional.sh b/t/t1311-config-optional.sh new file mode 100755 index 00000000000000..fbbacfc67b368d --- /dev/null +++ b/t/t1311-config-optional.sh @@ -0,0 +1,38 @@ +#!/bin/sh +# +# Copyright (c) 2025 Google LLC +# + +test_description=':(optional) paths' + +. ./test-lib.sh + +test_expect_success 'var=:(optional)path-exists' ' + test_config a.path ":(optional)path-exists" && + >path-exists && + echo path-exists >expect && + + git config get --path a.path >actual && + test_cmp expect actual +' + +test_expect_success 'missing optional value is ignored' ' + test_config a.path ":(optional)no-such-path" && + # Using --show-scope ensures we skip writing not only the value + # but also any meta-information about the ignored key. + test_must_fail git config get --show-scope --path a.path >actual && + test_line_count = 0 actual +' + +test_expect_success 'missing optional value is ignored in multi-value config' ' + test_when_finished "git config unset --all a.path" && + git config set --append a.path ":(optional)path-exists" && + git config set --append a.path ":(optional)no-such-path" && + >path-exists && + echo path-exists >expect && + + git config --get --path a.path >actual && + test_cmp expect actual +' + +test_done diff --git a/t/t5302-pack-index.sh b/t/t5302-pack-index.sh index 413c99274c8f30..9697448cb27634 100755 --- a/t/t5302-pack-index.sh +++ b/t/t5302-pack-index.sh @@ -293,4 +293,20 @@ test_expect_success 'too-large packs report the breach' ' grep "maximum allowed size (20 bytes)" err ' +# git-index-pack(1) uses the default hash algorithm outside of the repository, +# and it has no way to tell it otherwise. So we can only run this test with the +# default hash algorithm, as it would otherwise fail to parse the tree. +test_expect_success DEFAULT_HASH_ALGORITHM 'index-pack --fsck-objects outside of a repo' ' + test_when_finished "rm -rf repo" && + git init repo && + ( + cd repo && + printf "100644 blob $(test_oid 001)\t.gitattributes\n" >tree && + git mktree --missing tree-oid && + git pack-objects err && + test_grep "cannot perform queued object checks outside of a repository" err + ) +' + test_done diff --git a/t/t9305-fast-import-signatures.sh b/t/t9305-fast-import-signatures.sh index c2b427165862d3..022dae02e48177 100755 --- a/t/t9305-fast-import-signatures.sh +++ b/t/t9305-fast-import-signatures.sh @@ -79,7 +79,7 @@ test_expect_success GPG 'setup a commit with dual OpenPGP signatures on its SHA- echo B >explicit-sha256/B && git -C explicit-sha256 add B && test_tick && - git -C explicit-sha256 commit -S -m "signed" B && + git -C explicit-sha256 commit -S -m "signed commit" B && SHA256_B=$(git -C explicit-sha256 rev-parse dual-signed) && # Create the corresponding SHA-1 commit @@ -103,4 +103,71 @@ test_expect_success GPG 'strip both OpenPGP signatures with --signed-commits=war test_line_count = 2 out ' +test_expect_success GPG 'import commit with no signature with --signed-commits=strip-if-invalid' ' + git fast-export main >output && + git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && + test_must_be_empty log +' + +test_expect_success GPG 'keep valid OpenPGP signature with --signed-commits=strip-if-invalid' ' + rm -rf new && + git init new && + + git fast-export --signed-commits=verbatim openpgp-signing >output && + git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) && + test $OPENPGP_SIGNING = $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep -E "^gpgsig(-sha256)? " actual && + test_must_be_empty log +' + +test_expect_success GPG 'strip signature invalidated by message change with --signed-commits=strip-if-invalid' ' + rm -rf new && + git init new && + + git fast-export --signed-commits=verbatim openpgp-signing >output && + + # Change the commit message, which invalidates the signature. + # The commit message length should not change though, otherwise the + # corresponding `data ` command would have to be changed too. + sed "s/OpenPGP signed commit/OpenPGP forged commit/" output >modified && + + git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && + + IMPORTED=$(git -C new rev-parse --verify refs/heads/openpgp-signing) && + test $OPENPGP_SIGNING != $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep ! -E "^gpgsig" actual && + test_grep "stripping invalid signature" log +' + +test_expect_success GPGSM 'keep valid X.509 signature with --signed-commits=strip-if-invalid' ' + rm -rf new && + git init new && + + git fast-export --signed-commits=verbatim x509-signing >output && + git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/x509-signing) && + test $X509_SIGNING = $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep -E "^gpgsig(-sha256)? " actual && + test_must_be_empty log +' + +test_expect_success GPGSSH 'keep valid SSH signature with --signed-commits=strip-if-invalid' ' + rm -rf new && + git init new && + + test_config -C new gpg.ssh.allowedSignersFile "${GPGSSH_ALLOWED_SIGNERS}" && + + git fast-export --signed-commits=verbatim ssh-signing >output && + git -C new fast-import --quiet --signed-commits=strip-if-invalid log 2>&1 && + IMPORTED=$(git -C new rev-parse --verify refs/heads/ssh-signing) && + test $SSH_SIGNING = $IMPORTED && + git -C new cat-file commit "$IMPORTED" >actual && + test_grep -E "^gpgsig(-sha256)? " actual && + test_must_be_empty log +' + test_done diff --git a/xdiff-interface.c b/xdiff-interface.c index 4971f722b3e5f4..1a35556380451a 100644 --- a/xdiff-interface.c +++ b/xdiff-interface.c @@ -300,7 +300,7 @@ void xdiff_clear_find_func(xdemitconf_t *xecfg) unsigned long xdiff_hash_string(const char *s, size_t len, long flags) { - return xdl_hash_record(&s, s + len, flags); + return xdl_hash_record((uint8_t const**)&s, (uint8_t const*)s + len, flags); } int xdiff_compare_lines(const char *l1, long s1, diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c index 6f3998ee54c01e..4376f943dba539 100644 --- a/xdiff/xdiffi.c +++ b/xdiff/xdiffi.c @@ -22,9 +22,9 @@ #include "xinclude.h" -static unsigned long get_hash(xdfile_t *xdf, long index) +static size_t get_hash(xdfile_t *xdf, long index) { - return xdf->recs[xdf->rindex[index]].ha; + return xdf->recs[xdf->reference_index[index]].minimal_perfect_hash; } #define XDL_MAX_COST_MIN 256 @@ -278,10 +278,10 @@ int xdl_recs_cmp(xdfile_t *xdf1, long off1, long lim1, */ if (off1 == lim1) { for (; off2 < lim2; off2++) - xdf2->changed[xdf2->rindex[off2]] = true; + xdf2->changed[xdf2->reference_index[off2]] = true; } else if (off2 == lim2) { for (; off1 < lim1; off1++) - xdf1->changed[xdf1->rindex[off1]] = true; + xdf1->changed[xdf1->reference_index[off1]] = true; } else { xdpsplit_t spl; spl.i1 = spl.i2 = 0; @@ -385,7 +385,7 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1, static int recs_match(xrecord_t *rec1, xrecord_t *rec2) { - return (rec1->ha == rec2->ha); + return rec1->minimal_perfect_hash == rec2->minimal_perfect_hash; } /* @@ -403,11 +403,10 @@ static int recs_match(xrecord_t *rec1, xrecord_t *rec2) */ static int get_indent(xrecord_t *rec) { - long i; int ret = 0; - for (i = 0; i < rec->size; i++) { - char c = rec->ptr[i]; + for (size_t i = 0; i < rec->size; i++) { + char c = (char) rec->ptr[i]; if (!XDL_ISSPACE(c)) return ret; @@ -484,7 +483,7 @@ static void measure_split(const xdfile_t *xdf, long split, { long i; - if (split >= xdf->nrec) { + if (split >= (long)xdf->nrec) { m->end_of_file = 1; m->indent = -1; } else { @@ -507,7 +506,7 @@ static void measure_split(const xdfile_t *xdf, long split, m->post_blank = 0; m->post_indent = -1; - for (i = split + 1; i < xdf->nrec; i++) { + for (i = split + 1; i < (long)xdf->nrec; i++) { m->post_indent = get_indent(&xdf->recs[i]); if (m->post_indent != -1) break; @@ -718,7 +717,7 @@ static void group_init(xdfile_t *xdf, struct xdlgroup *g) */ static inline int group_next(xdfile_t *xdf, struct xdlgroup *g) { - if (g->end == xdf->nrec) + if (g->end == (long)xdf->nrec) return -1; g->start = g->end + 1; @@ -751,7 +750,7 @@ static inline int group_previous(xdfile_t *xdf, struct xdlgroup *g) */ static int group_slide_down(xdfile_t *xdf, struct xdlgroup *g) { - if (g->end < xdf->nrec && + if (g->end < (long)xdf->nrec && recs_match(&xdf->recs[g->start], &xdf->recs[g->end])) { xdf->changed[g->start++] = false; xdf->changed[g->end++] = true; @@ -993,11 +992,11 @@ static void xdl_mark_ignorable_lines(xdchange_t *xscr, xdfenv_t *xe, long flags) rec = &xe->xdf1.recs[xch->i1]; for (i = 0; i < xch->chg1 && ignore; i++) - ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); + ignore = xdl_blankline((const char *)rec[i].ptr, (long)rec[i].size, flags); rec = &xe->xdf2.recs[xch->i2]; for (i = 0; i < xch->chg2 && ignore; i++) - ignore = xdl_blankline(rec[i].ptr, rec[i].size, flags); + ignore = xdl_blankline((const char *)rec[i].ptr, (long)rec[i].size, flags); xch->ignore = ignore; } @@ -1008,7 +1007,7 @@ static int record_matches_regex(xrecord_t *rec, xpparam_t const *xpp) { size_t i; for (i = 0; i < xpp->ignore_regex_nr; i++) - if (!regexec_buf(xpp->ignore_regex[i], rec->ptr, rec->size, 1, + if (!regexec_buf(xpp->ignore_regex[i], (const char *)rec->ptr, rec->size, 1, ®match, 0)) return 1; diff --git a/xdiff/xemit.c b/xdiff/xemit.c index b2f1f30cd36eef..04f7e9193b61f0 100644 --- a/xdiff/xemit.c +++ b/xdiff/xemit.c @@ -27,7 +27,7 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t * { xrecord_t *rec = &xdf->recs[ri]; - if (xdl_emit_diffrec(rec->ptr, rec->size, pre, strlen(pre), ecb) < 0) + if (xdl_emit_diffrec((char const *)rec->ptr, (long)rec->size, pre, strlen(pre), ecb) < 0) return -1; return 0; @@ -113,8 +113,8 @@ static long match_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri, xrecord_t *rec = &xdf->recs[ri]; if (!xecfg->find_func) - return def_ff(rec->ptr, rec->size, buf, sz); - return xecfg->find_func(rec->ptr, rec->size, buf, sz, xecfg->find_func_priv); + return def_ff((const char *)rec->ptr, (long)rec->size, buf, sz); + return xecfg->find_func((const char *)rec->ptr, (long)rec->size, buf, sz, xecfg->find_func_priv); } static int is_func_rec(xdfile_t *xdf, xdemitconf_t const *xecfg, long ri) @@ -137,7 +137,7 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg, buf = func_line ? func_line->buf : dummy; size = func_line ? sizeof(func_line->buf) : sizeof(dummy); - for (l = start; l != limit && 0 <= l && l < xe->xdf1.nrec; l += step) { + for (l = start; l != limit && 0 <= l && l < (long)xe->xdf1.nrec; l += step) { long len = match_func_rec(&xe->xdf1, xecfg, l, buf, size); if (len >= 0) { if (func_line) @@ -151,7 +151,7 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg, static int is_empty_rec(xdfile_t *xdf, long ri) { xrecord_t *rec = &xdf->recs[ri]; - long i = 0; + size_t i = 0; for (; i < rec->size && XDL_ISSPACE(rec->ptr[i]); i++); @@ -179,14 +179,14 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, long fs1, i1 = xch->i1; /* Appended chunk? */ - if (i1 >= xe->xdf1.nrec) { + if (i1 >= (long)xe->xdf1.nrec) { long i2 = xch->i2; /* * We don't need additional context if * a whole function was added. */ - while (i2 < xe->xdf2.nrec) { + while (i2 < (long)xe->xdf2.nrec) { if (is_func_rec(&xe->xdf2, xecfg, i2)) goto post_context_calculation; i2++; @@ -196,7 +196,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, * Otherwise get more context from the * pre-image. */ - i1 = xe->xdf1.nrec - 1; + i1 = (long)xe->xdf1.nrec - 1; } fs1 = get_func_line(xe, xecfg, NULL, i1, -1); @@ -228,8 +228,8 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, post_context_calculation: lctx = xecfg->ctxlen; - lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1)); - lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2)); + lctx = XDL_MIN(lctx, (long)xe->xdf1.nrec - (xche->i1 + xche->chg1)); + lctx = XDL_MIN(lctx, (long)xe->xdf2.nrec - (xche->i2 + xche->chg2)); e1 = xche->i1 + xche->chg1 + lctx; e2 = xche->i2 + xche->chg2 + lctx; @@ -237,13 +237,13 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) { long fe1 = get_func_line(xe, xecfg, NULL, xche->i1 + xche->chg1, - xe->xdf1.nrec); + (long)xe->xdf1.nrec); while (fe1 > 0 && is_empty_rec(&xe->xdf1, fe1 - 1)) fe1--; if (fe1 < 0) - fe1 = xe->xdf1.nrec; + fe1 = (long)xe->xdf1.nrec; if (fe1 > e1) { - e2 = XDL_MIN(e2 + (fe1 - e1), xe->xdf2.nrec); + e2 = XDL_MIN(e2 + (fe1 - e1), (long)xe->xdf2.nrec); e1 = fe1; } @@ -254,7 +254,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, */ if (xche->next) { long l = XDL_MIN(xche->next->i1, - xe->xdf1.nrec - 1); + (long)xe->xdf1.nrec - 1); if (l - xecfg->ctxlen <= e1 || get_func_line(xe, xecfg, NULL, l, e1) < 0) { xche = xche->next; diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c index 6dc450b1fe1dfc..5ae1282c27568c 100644 --- a/xdiff/xhistogram.c +++ b/xdiff/xhistogram.c @@ -90,7 +90,7 @@ struct region { static int cmp_recs(xrecord_t *r1, xrecord_t *r2) { - return r1->ha == r2->ha; + return r1->minimal_perfect_hash == r2->minimal_perfect_hash; } @@ -98,7 +98,7 @@ static int cmp_recs(xrecord_t *r1, xrecord_t *r2) (cmp_recs(REC(i->env, s1, l1), REC(i->env, s2, l2))) #define TABLE_HASH(index, side, line) \ - XDL_HASHLONG((REC(index->env, side, line))->ha, index->table_bits) + XDL_HASHLONG((REC(index->env, side, line))->minimal_perfect_hash, index->table_bits) static int scanA(struct histindex *index, int line1, int count1) { diff --git a/xdiff/xmerge.c b/xdiff/xmerge.c index fd600cbb5d58a2..29dad98c496b07 100644 --- a/xdiff/xmerge.c +++ b/xdiff/xmerge.c @@ -101,8 +101,8 @@ static int xdl_merge_cmp_lines(xdfenv_t *xe1, int i1, xdfenv_t *xe2, int i2, xrecord_t *rec2 = xe2->xdf2.recs + i2; for (i = 0; i < line_count; i++) { - int result = xdl_recmatch(rec1[i].ptr, rec1[i].size, - rec2[i].ptr, rec2[i].size, flags); + int result = xdl_recmatch((const char *)rec1[i].ptr, (long)rec1[i].size, + (const char *)rec2[i].ptr, (long)rec2[i].size, flags); if (!result) return -1; } @@ -119,11 +119,11 @@ static int xdl_recs_copy_0(int use_orig, xdfenv_t *xe, int i, int count, int nee if (count < 1) return 0; - for (i = 0; i < count; size += recs[i++].size) + for (i = 0; i < count; size += (int)recs[i++].size) if (dest) memcpy(dest + size, recs[i].ptr, recs[i].size); if (add_nl) { - i = recs[count - 1].size; + i = (int)recs[count - 1].size; if (i == 0 || recs[count - 1].ptr[i - 1] != '\n') { if (needs_cr) { if (dest) @@ -156,9 +156,9 @@ static int xdl_orig_copy(xdfenv_t *xe, int i, int count, int needs_cr, int add_n */ static int is_eol_crlf(xdfile_t *file, int i) { - long size; + size_t size; - if (i < file->nrec - 1) + if (i < (long)file->nrec - 1) /* All lines before the last *must* end in LF */ return (size = file->recs[i].size) > 1 && file->recs[i].ptr[size - 2] == '\r'; @@ -317,15 +317,15 @@ static int xdl_fill_merge_buffer(xdfenv_t *xe1, const char *name1, continue; i = m->i1 + m->chg1; } - size += xdl_recs_copy(xe1, i, xe1->xdf2.nrec - i, 0, 0, + size += xdl_recs_copy(xe1, i, (int)xe1->xdf2.nrec - i, 0, 0, dest ? dest + size : NULL); return size; } static int recmatch(xrecord_t *rec1, xrecord_t *rec2, unsigned long flags) { - return xdl_recmatch(rec1->ptr, rec1->size, - rec2->ptr, rec2->size, flags); + return xdl_recmatch((const char *)rec1->ptr, (long)rec1->size, + (const char *)rec2->ptr, (long)rec2->size, flags); } /* @@ -382,10 +382,10 @@ static int xdl_refine_conflicts(xdfenv_t *xe1, xdfenv_t *xe2, xdmerge_t *m, * we have a very simple mmfile structure. */ t1.ptr = (char *)xe1->xdf2.recs[m->i1].ptr; - t1.size = xe1->xdf2.recs[m->i1 + m->chg1 - 1].ptr + t1.size = (char *)xe1->xdf2.recs[m->i1 + m->chg1 - 1].ptr + xe1->xdf2.recs[m->i1 + m->chg1 - 1].size - t1.ptr; t2.ptr = (char *)xe2->xdf2.recs[m->i2].ptr; - t2.size = xe2->xdf2.recs[m->i2 + m->chg2 - 1].ptr + t2.size = (char *)xe2->xdf2.recs[m->i2 + m->chg2 - 1].ptr + xe2->xdf2.recs[m->i2 + m->chg2 - 1].size - t2.ptr; if (xdl_do_diff(&t1, &t2, xpp, &xe) < 0) return -1; @@ -440,8 +440,8 @@ static int line_contains_alnum(const char *ptr, long size) static int lines_contain_alnum(xdfenv_t *xe, int i, int chg) { for (; chg; chg--, i++) - if (line_contains_alnum(xe->xdf2.recs[i].ptr, - xe->xdf2.recs[i].size)) + if (line_contains_alnum((const char *)xe->xdf2.recs[i].ptr, + (long)xe->xdf2.recs[i].size)) return 1; return 0; } @@ -622,7 +622,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, changes = c; i0 = xscr1->i1; i1 = xscr1->i2; - i2 = xscr1->i1 + xe2->xdf2.nrec - xe2->xdf1.nrec; + i2 = xscr1->i1 + (long)xe2->xdf2.nrec - (long)xe2->xdf1.nrec; chg0 = xscr1->chg1; chg1 = xscr1->chg2; chg2 = xscr1->chg1; @@ -637,7 +637,7 @@ static int xdl_do_merge(xdfenv_t *xe1, xdchange_t *xscr1, if (!changes) changes = c; i0 = xscr2->i1; - i1 = xscr2->i1 + xe1->xdf2.nrec - xe1->xdf1.nrec; + i1 = xscr2->i1 + (long)xe1->xdf2.nrec - (long)xe1->xdf1.nrec; i2 = xscr2->i2; chg0 = xscr2->chg1; chg1 = xscr2->chg1; diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c index 669b653580efe6..a0b31eb5d8c1c0 100644 --- a/xdiff/xpatience.c +++ b/xdiff/xpatience.c @@ -48,7 +48,7 @@ struct hashmap { int nr, alloc; struct entry { - unsigned long hash; + size_t minimal_perfect_hash; /* * 0 = unused entry, 1 = first line, 2 = second, etc. * line2 is NON_UNIQUE if the line is not unique @@ -101,10 +101,10 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, * So we multiply ha by 2 in the hope that the hashing was * "unique enough". */ - int index = (int)((record->ha << 1) % map->alloc); + int index = (int)((record->minimal_perfect_hash << 1) % map->alloc); while (map->entries[index].line1) { - if (map->entries[index].hash != record->ha) { + if (map->entries[index].minimal_perfect_hash != record->minimal_perfect_hash) { if (++index >= map->alloc) index = 0; continue; @@ -120,8 +120,8 @@ static void insert_record(xpparam_t const *xpp, int line, struct hashmap *map, if (pass == 2) return; map->entries[index].line1 = line; - map->entries[index].hash = record->ha; - map->entries[index].anchor = is_anchor(xpp, map->env->xdf1.recs[line - 1].ptr); + map->entries[index].minimal_perfect_hash = record->minimal_perfect_hash; + map->entries[index].anchor = is_anchor(xpp, (const char *)map->env->xdf1.recs[line - 1].ptr); if (!map->first) map->first = map->entries + index; if (map->last) { @@ -248,7 +248,7 @@ static int match(struct hashmap *map, int line1, int line2) { xrecord_t *record1 = &map->env->xdf1.recs[line1 - 1]; xrecord_t *record2 = &map->env->xdf2.recs[line2 - 1]; - return record1->ha == record2->ha; + return record1->minimal_perfect_hash == record2->minimal_perfect_hash; } static int patience_diff(xpparam_t const *xpp, xdfenv_t *env, @@ -370,5 +370,5 @@ static int patience_diff(xpparam_t const *xpp, xdfenv_t *env, int xdl_do_patience_diff(xpparam_t const *xpp, xdfenv_t *env) { - return patience_diff(xpp, env, 1, env->xdf1.nrec, 1, env->xdf2.nrec); + return patience_diff(xpp, env, 1, (int)env->xdf1.nrec, 1, (int)env->xdf2.nrec); } diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c index 192334f1b72e63..34c82e4f8e1626 100644 --- a/xdiff/xprepare.c +++ b/xdiff/xprepare.c @@ -93,14 +93,14 @@ static void xdl_free_classifier(xdlclassifier_t *cf) { static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t *rec) { - long hi; + size_t hi; xdlclass_t *rcrec; - hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); + hi = XDL_HASHLONG(rec->line_hash, cf->hbits); for (rcrec = cf->rchash[hi]; rcrec; rcrec = rcrec->next) - if (rcrec->rec.ha == rec->ha && - xdl_recmatch(rcrec->rec.ptr, rcrec->rec.size, - rec->ptr, rec->size, cf->flags)) + if (rcrec->rec.line_hash == rec->line_hash && + xdl_recmatch((const char *)rcrec->rec.ptr, (long)rcrec->rec.size, + (const char *)rec->ptr, (long)rec->size, cf->flags)) break; if (!rcrec) { @@ -120,7 +120,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t (pass == 1) ? rcrec->len1++ : rcrec->len2++; - rec->ha = (unsigned long) rcrec->idx; + rec->minimal_perfect_hash = (size_t)rcrec->idx; return 0; } @@ -128,7 +128,7 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t static void xdl_free_ctx(xdfile_t *xdf) { - xdl_free(xdf->rindex); + xdl_free(xdf->reference_index); xdl_free(xdf->changed - 1); xdl_free(xdf->recs); } @@ -137,11 +137,11 @@ static void xdl_free_ctx(xdfile_t *xdf) static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, xdlclassifier_t *cf, xdfile_t *xdf) { long bsize; - unsigned long hav; - char const *blk, *cur, *top, *prev; + uint64_t hav; + uint8_t const *blk, *cur, *top, *prev; xrecord_t *crec; - xdf->rindex = NULL; + xdf->reference_index = NULL; xdf->changed = NULL; xdf->recs = NULL; @@ -153,12 +153,12 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ for (top = blk + bsize; cur < top; ) { prev = cur; hav = xdl_hash_record(&cur, top, xpp->flags); - if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec)) + if (XDL_ALLOC_GROW(xdf->recs, (long)xdf->nrec + 1, narec)) goto abort; crec = &xdf->recs[xdf->nrec++]; crec->ptr = prev; - crec->size = (long) (cur - prev); - crec->ha = hav; + crec->size = cur - prev; + crec->line_hash = hav; if (xdl_classify_record(pass, cf, crec) < 0) goto abort; } @@ -169,7 +169,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_ if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) && (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) { - if (!XDL_ALLOC_ARRAY(xdf->rindex, xdf->nrec + 1)) + if (!XDL_ALLOC_ARRAY(xdf->reference_index, xdf->nrec + 1)) goto abort; } @@ -264,7 +264,7 @@ static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) { * might be potentially discarded if they appear in a run of discardable. */ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { - long i, nm, nreff, mlim; + long i, nm, mlim; xrecord_t *recs; xdlclass_t *rcrec; uint8_t *action1 = NULL, *action2 = NULL; @@ -287,18 +287,18 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd /* * Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE. */ - if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) + if ((mlim = xdl_bogosqrt((long)xdf1->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { - rcrec = cf->rcrecs[recs->ha]; + rcrec = cf->rcrecs[recs->minimal_perfect_hash]; nm = rcrec ? rcrec->len2 : 0; action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; } - if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) + if ((mlim = xdl_bogosqrt((long)xdf2->nrec)) > XDL_MAX_EQLIMIT) mlim = XDL_MAX_EQLIMIT; for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { - rcrec = cf->rcrecs[recs->ha]; + rcrec = cf->rcrecs[recs->minimal_perfect_hash]; nm = rcrec ? rcrec->len1 : 0; action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP; } @@ -307,29 +307,29 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd * Use temporary arrays to decide if changed[i] should remain * false, or become true. */ - for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; + xdf1->nreff = 0; + for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { if (action1[i] == KEEP || (action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) { - xdf1->rindex[nreff++] = i; + xdf1->reference_index[xdf1->nreff++] = i; /* changed[i] remains false, i.e. keep */ } else xdf1->changed[i] = true; /* i.e. discard */ } - xdf1->nreff = nreff; - for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; + xdf2->nreff = 0; + for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { if (action2[i] == KEEP || (action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) { - xdf2->rindex[nreff++] = i; + xdf2->reference_index[xdf2->nreff++] = i; /* changed[i] remains false, i.e. keep */ } else xdf2->changed[i] = true; /* i.e. discard */ } - xdf2->nreff = nreff; cleanup: xdl_free(action1); @@ -348,9 +348,9 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { recs1 = xdf1->recs; recs2 = xdf2->recs; - for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; + for (i = 0, lim = (long)XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim; i++, recs1++, recs2++) - if (recs1->ha != recs2->ha) + if (recs1->minimal_perfect_hash != recs2->minimal_perfect_hash) break; xdf1->dstart = xdf2->dstart = i; @@ -358,11 +358,11 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { recs1 = xdf1->recs + xdf1->nrec - 1; recs2 = xdf2->recs + xdf2->nrec - 1; for (lim -= i, i = 0; i < lim; i++, recs1--, recs2--) - if (recs1->ha != recs2->ha) + if (recs1->minimal_perfect_hash != recs2->minimal_perfect_hash) break; - xdf1->dend = xdf1->nrec - i - 1; - xdf2->dend = xdf2->nrec - i - 1; + xdf1->dend = (long)xdf1->nrec - i - 1; + xdf2->dend = (long)xdf2->nrec - i - 1; return 0; } diff --git a/xdiff/xtypes.h b/xdiff/xtypes.h index f145abba3ea8a3..979586f20a6028 100644 --- a/xdiff/xtypes.h +++ b/xdiff/xtypes.h @@ -39,18 +39,19 @@ typedef struct s_chastore { } chastore_t; typedef struct s_xrecord { - char const *ptr; - long size; - unsigned long ha; + uint8_t const *ptr; + size_t size; + uint64_t line_hash; + size_t minimal_perfect_hash; } xrecord_t; typedef struct s_xdfile { xrecord_t *recs; - long nrec; - long dstart, dend; + size_t nrec; + ptrdiff_t dstart, dend; bool *changed; - long *rindex; - long nreff; + size_t *reference_index; + size_t nreff; } xdfile_t; typedef struct s_xdfenv { diff --git a/xdiff/xutils.c b/xdiff/xutils.c index 447e66c7198b08..77ee1ad9c86875 100644 --- a/xdiff/xutils.c +++ b/xdiff/xutils.c @@ -249,11 +249,11 @@ int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags) return 1; } -unsigned long xdl_hash_record_with_whitespace(char const **data, - char const *top, long flags) { - unsigned long ha = 5381; - char const *ptr = *data; - int cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL; +uint64_t xdl_hash_record_with_whitespace(uint8_t const **data, + uint8_t const *top, uint64_t flags) { + uint64_t ha = 5381; + uint8_t const *ptr = *data; + bool cr_at_eol_only = (flags & XDF_WHITESPACE_FLAGS) == XDF_IGNORE_CR_AT_EOL; for (; ptr < top && *ptr != '\n'; ptr++) { if (cr_at_eol_only) { @@ -263,8 +263,8 @@ unsigned long xdl_hash_record_with_whitespace(char const **data, continue; } else if (XDL_ISSPACE(*ptr)) { - const char *ptr2 = ptr; - int at_eol; + const uint8_t *ptr2 = ptr; + bool at_eol; while (ptr + 1 < top && XDL_ISSPACE(ptr[1]) && ptr[1] != '\n') ptr++; @@ -274,20 +274,20 @@ unsigned long xdl_hash_record_with_whitespace(char const **data, else if (flags & XDF_IGNORE_WHITESPACE_CHANGE && !at_eol) { ha += (ha << 5); - ha ^= (unsigned long) ' '; + ha ^= (uint64_t) ' '; } else if (flags & XDF_IGNORE_WHITESPACE_AT_EOL && !at_eol) { while (ptr2 != ptr + 1) { ha += (ha << 5); - ha ^= (unsigned long) *ptr2; + ha ^= (uint64_t) *ptr2; ptr2++; } } continue; } ha += (ha << 5); - ha ^= (unsigned long) *ptr; + ha ^= (uint64_t) *ptr; } *data = ptr < top ? ptr + 1: ptr; @@ -304,9 +304,9 @@ unsigned long xdl_hash_record_with_whitespace(char const **data, #define REASSOC_FENCE(x, y) #endif -unsigned long xdl_hash_record_verbatim(char const **data, char const *top) { - unsigned long ha = 5381, c0, c1; - char const *ptr = *data; +uint64_t xdl_hash_record_verbatim(uint8_t const **data, uint8_t const *top) { + uint64_t ha = 5381, c0, c1; + uint8_t const *ptr = *data; #if 0 /* * The baseline form of the optimized loop below. This is the djb2 @@ -314,7 +314,7 @@ unsigned long xdl_hash_record_verbatim(char const **data, char const *top) { */ for (; ptr < top && *ptr != '\n'; ptr++) { ha += (ha << 5); - ha += (unsigned long) *ptr; + ha += (uint64_t) *ptr; } *data = ptr < top ? ptr + 1: ptr; #else @@ -465,10 +465,10 @@ int xdl_fall_back_diff(xdfenv_t *diff_env, xpparam_t const *xpp, xdfenv_t env; subfile1.ptr = (char *)diff_env->xdf1.recs[line1 - 1].ptr; - subfile1.size = diff_env->xdf1.recs[line1 + count1 - 2].ptr + + subfile1.size = (char *)diff_env->xdf1.recs[line1 + count1 - 2].ptr + diff_env->xdf1.recs[line1 + count1 - 2].size - subfile1.ptr; subfile2.ptr = (char *)diff_env->xdf2.recs[line2 - 1].ptr; - subfile2.size = diff_env->xdf2.recs[line2 + count2 - 2].ptr + + subfile2.size = (char *)diff_env->xdf2.recs[line2 + count2 - 2].ptr + diff_env->xdf2.recs[line2 + count2 - 2].size - subfile2.ptr; if (xdl_do_diff(&subfile1, &subfile2, xpp, &env) < 0) return -1; diff --git a/xdiff/xutils.h b/xdiff/xutils.h index 13f68310472a69..615b4a9d355433 100644 --- a/xdiff/xutils.h +++ b/xdiff/xutils.h @@ -34,9 +34,9 @@ void *xdl_cha_alloc(chastore_t *cha); long xdl_guess_lines(mmfile_t *mf, long sample); int xdl_blankline(const char *line, long size, long flags); int xdl_recmatch(const char *l1, long s1, const char *l2, long s2, long flags); -unsigned long xdl_hash_record_verbatim(char const **data, char const *top); -unsigned long xdl_hash_record_with_whitespace(char const **data, char const *top, long flags); -static inline unsigned long xdl_hash_record(char const **data, char const *top, long flags) +uint64_t xdl_hash_record_verbatim(uint8_t const **data, uint8_t const *top); +uint64_t xdl_hash_record_with_whitespace(uint8_t const **data, uint8_t const *top, uint64_t flags); +static inline uint64_t xdl_hash_record(uint8_t const **data, uint8_t const *top, uint64_t flags) { if (flags & XDF_WHITESPACE_FLAGS) return xdl_hash_record_with_whitespace(data, top, flags);