From bb434fa121c17164a32718cc93b31ad296dff2d4 Mon Sep 17 00:00:00 2001 From: Victor Moene Date: Fri, 19 Dec 2025 15:20:04 +0100 Subject: [PATCH 1/2] Implemented absolute path Git repository module Absolute modules are modules consisting of a local directory containing a Git repository given by absolute path Ticket: ENT-12974 Changelog: Title Signed-off-by: Victor Moene --- JSON.md | 5 +- cfbs/cfbs_config.py | 19 ++++++- cfbs/commands.py | 11 +++- cfbs/git.py | 24 +++++++++ cfbs/index.py | 49 ++++++++++++++++-- cfbs/internal_file_management.py | 42 ++++++++++++++- cfbs/module.py | 7 +++ cfbs/validate.py | 88 ++++++++++++++++++++++++++++---- 8 files changed, 227 insertions(+), 18 deletions(-) diff --git a/JSON.md b/JSON.md index 4333293e..c9c9840d 100644 --- a/JSON.md +++ b/JSON.md @@ -174,9 +174,10 @@ The modules inside `build`, `provides`, and `index` use these fields: For `provides` and `index` dictionaries, this name must be the key of each entry (not a field inside). For the `build` array, it must be inside each module object (with `name` as the key). Local modules (files and folders in same directory as `cfbs.json`), must start with `./`, and end with `/` if it's a directory. + Absolute modules (a directory given by absolute path containing a Git repository) must start with `/` and end with `/`. Module names should not be longer than 64 characters. - Module names (not including adfixes `./`, `/`, `.cf`, `.json` for local modules) should only contain lowercase ASCII alphanumeric characters possibly separated by dashes, and should start with a letter. - Local module names can contain underscores instead of dashes. + Module names (not including adfixes `./`, `/`, `.cf`, `.json` for local and absolute modules) should only contain lowercase ASCII alphanumeric characters possibly separated by dashes, and should start with a letter. + Local and absolute module names can contain underscores instead of dashes. - `description` (string): Human readable description of what this module does. - `tags` (array of strings): Mostly used for information / finding modules on [build.cfengine.com](https://build.cfengine.com). Some common examples include `supported`, `experimental`, `security`, `library`, `promise-type`. diff --git a/cfbs/cfbs_config.py b/cfbs/cfbs_config.py index bec6b3e1..8472e5d2 100644 --- a/cfbs/cfbs_config.py +++ b/cfbs/cfbs_config.py @@ -39,7 +39,12 @@ ) from cfbs.pretty import pretty, CFBS_DEFAULT_SORTING_RULES from cfbs.cfbs_json import CFBSJson -from cfbs.module import Module, is_module_added_manually, is_module_local +from cfbs.module import ( + Module, + is_module_added_manually, + is_module_local, + is_module_absolute, +) from cfbs.prompts import prompt_user, prompt_user_yesno from cfbs.validate import validate_single_module @@ -337,8 +342,12 @@ def _handle_local_module(self, module, use_default_build_steps=True): name.startswith("./") and name.endswith((".cf", "/")) and "local" in module["tags"] + ) and not ( + name.startswith("/") and name.endswith("/") and "absolute" in module["tags"] ): - log.debug("Module '%s' does not appear to be a local module" % name) + log.debug( + "Module '%s' do not appear to be a local or absolute module" % name + ) return if name.endswith(".cf"): @@ -486,6 +495,12 @@ def add_command( "URI scheme not supported. The supported URI schemes are: " + ", ".join(SUPPORTED_URI_SCHEMES) ) + for m in to_add: + if is_module_absolute(m): + if not os.path.exists(m): + raise CFBSUserError("Absolute path module doesn't exist") + if not os.path.isdir(m): + raise CFBSUserError("Absolute path module is not a dir") self._add_modules(to_add, added_by, checksum, explicit_build_steps) added = {m["name"] for m in self["build"]}.difference(before) diff --git a/cfbs/commands.py b/cfbs/commands.py index 5ed4ed09..de5ec285 100644 --- a/cfbs/commands.py +++ b/cfbs/commands.py @@ -104,6 +104,7 @@ def search_command(terms: List[str]): validate_single_module, ) from cfbs.internal_file_management import ( + absolute_module_copy, clone_url_repo, SUPPORTED_URI_SCHEMES, fetch_archive, @@ -116,11 +117,12 @@ def search_command(terms: List[str]): git_configure_and_initialize, is_git_repo, CFBSGitError, + head_commit_hash, ) from cfbs.git_magic import commit_after_command, git_commit_maybe_prompt from cfbs.prompts import prompt_user, prompt_user_yesno -from cfbs.module import Module, is_module_added_manually +from cfbs.module import Module, is_module_absolute, is_module_added_manually from cfbs.masterfiles.generate_release_information import generate_release_information _MODULES_URL = "https://archive.build.cfengine.com/modules" @@ -634,6 +636,9 @@ def update_command(to_update): continue new_module = provides[module_name] + elif is_module_absolute(old_module["name"]): + new_module = index.get_module_object(update.name) + new_module["commit"] = head_commit_hash(old_module["name"]) else: if "version" not in old_module: @@ -806,6 +811,10 @@ def _download_dependencies(config: CFBSConfig, redownload=False, ignore_versions local_module_copy(module, counter, max_length) counter += 1 continue + if name.startswith("/"): + absolute_module_copy(module, counter, max_length) + counter += 1 + continue if "commit" not in module: raise CFBSExitError("module %s must have a commit property" % name) commit = module["commit"] diff --git a/cfbs/git.py b/cfbs/git.py index d79b1205..36eaa63b 100644 --- a/cfbs/git.py +++ b/cfbs/git.py @@ -11,6 +11,7 @@ import os import itertools import tempfile +import shutil from subprocess import check_call, check_output, run, PIPE, DEVNULL, CalledProcessError from typing import Iterable, Union @@ -258,3 +259,26 @@ def treeish_exists(treeish, repo_path): result = run(command, cwd=repo_path, stdout=DEVNULL, stderr=DEVNULL, check=False) return result.returncode == 0 + + +def head_commit_hash(repo_path): + result = run( + ["git", "rev-parse", "HEAD"], + cwd=repo_path, + stdout=PIPE, + stderr=DEVNULL, + check=True, + ) + + return result.stdout.decode("utf-8").strip() + + +# Ensure reproducibility when copying git repositories +# 1. hard reset to specific commit +# 2. remove untracked files +# 3. remove .git directory +def git_clean_reset(repo_path, commit): + run(["git", "reset", "--hard", commit], cwd=repo_path, check=True, stdout=DEVNULL) + run(["git", "clean", "-fxd"], cwd=repo_path, check=True, stdout=DEVNULL) + git_dir = os.path.join(repo_path, ".git") + shutil.rmtree(git_dir, ignore_errors=True) diff --git a/cfbs/index.py b/cfbs/index.py index 4edda454..5c4a2960 100644 --- a/cfbs/index.py +++ b/cfbs/index.py @@ -3,9 +3,10 @@ from collections import OrderedDict from typing import List, Optional, Union -from cfbs.module import Module +from cfbs.git import head_commit_hash, is_git_repo +from cfbs.module import Module, is_module_absolute from cfbs.utils import CFBSNetworkError, get_or_read_json, CFBSExitError, get_json -from cfbs.internal_file_management import local_module_name +from cfbs.internal_file_management import absolute_module_name, local_module_name _DEFAULT_INDEX = ( "https://raw.githubusercontent.com/cfengine/build-index/master/cfbs.json" @@ -48,6 +49,30 @@ def _local_module_data_subdir( "description": "Local subdirectory added using cfbs command line", "tags": ["local"], "steps": build_steps, + # TODO: turn this into an argument, for when it's not "cfbs add" adding the module + "added_by": "cfbs add", + } + + +def _absolute_module_data(module_name: str, version: Optional[str]): + assert module_name.startswith("/") + assert module_name.endswith("/") + + if version is not None: + commit_hash = version + elif is_git_repo(module_name): + commit_hash = head_commit_hash(module_name) + else: + commit_hash = "" + + dst = os.path.join("services", "cfbs", module_name[1:]) + build_steps = ["directory ./ {}".format(dst)] + return { + "description": "Module added via absolute path to a Git repository directory", + "tags": ["absolute"], + "steps": build_steps, + "commit": commit_hash, + # TODO: turn this into an argument, for when it's not "cfbs add" adding the module "added_by": "cfbs add", } @@ -67,6 +92,14 @@ def _generate_local_module_object( return _local_module_data_json_file(module_name) +def _generate_absolute_module_object(module_name: str, version: Optional[str]): + assert module_name.startswith("/") + assert module_name.endswith("/") + assert os.path.isdir(module_name) + + return _absolute_module_data(module_name, version) + + class Index: """Class representing the cfbs.json containing the index of available modules""" @@ -171,7 +204,10 @@ def translate_alias(self, module: Module): module.name = data["alias"] else: if os.path.exists(module.name): - module.name = local_module_name(module.name) + if is_module_absolute(module.name): + module.name = absolute_module_name(module.name) + else: + module.name = local_module_name(module.name) def get_module_object( self, @@ -187,6 +223,13 @@ def get_module_object( if name.startswith("./"): object = _generate_local_module_object(name, explicit_build_steps) + elif is_module_absolute(name): + if not os.path.isdir(name): + pass + object = _generate_absolute_module_object(name, version) + # currently, the argument of cfbs-add is split by `@` in the `Module` constructor + # due to that, this hack is used to prevent creating the "version" field + module = Module(name).to_dict() else: object = self[name] if version: diff --git a/cfbs/internal_file_management.py b/cfbs/internal_file_management.py index b4a0a636..4ff230b3 100644 --- a/cfbs/internal_file_management.py +++ b/cfbs/internal_file_management.py @@ -28,12 +28,14 @@ CFBSExitError, ) +from cfbs.git import git_clean_reset + _SUPPORTED_TAR_TYPES = (".tar.gz", ".tgz") SUPPORTED_ARCHIVES = (".zip",) + _SUPPORTED_TAR_TYPES SUPPORTED_URI_SCHEMES = ("https://", "ssh://", "git://") -def local_module_name(module_path): +def local_module_name(module_path: str): assert os.path.exists(module_path) module = module_path @@ -64,6 +66,27 @@ def local_module_name(module_path): return module +def absolute_module_name(module_path: str): + assert os.path.exists(module_path) + module = module_path + assert module.startswith("/") + + for illegal in ["//", "..", " ", "\n", "\t", " "]: + if illegal in module: + raise CFBSExitError("Module path cannot contain %s" % repr(illegal)) + + if not module.endswith("/"): + module = module + "/" + while "/./" in module: + module = module.replace("/./", "/") + + assert os.path.exists(module) + if not os.path.isdir(module): + raise CFBSExitError("'%s' must be a directory" % module) + + return module + + def get_download_path(module) -> str: downloads = os.path.join(cfbs_dir(), "downloads") @@ -117,6 +140,23 @@ def local_module_copy(module, counter, max_length): ) +def absolute_module_copy(module, counter, max_length): + assert "commit" in module + name = module["name"] + pretty_name = _prettify_name(name) + target = "out/steps/%03d_%s_local/" % (counter, pretty_name) + module["_directory"] = target + module["_counter"] = counter + + cp(name, target) + git_clean_reset(target, module["commit"]) + + print( + "%03d %s @ %s (Copied)" + % (counter, pad_right(name, max_length), module["commit"][:7]) + ) + + def _get_path_from_url(url): if not url.startswith(SUPPORTED_URI_SCHEMES): if "://" in url: diff --git a/cfbs/module.py b/cfbs/module.py index a8c8029b..8ea9e901 100644 --- a/cfbs/module.py +++ b/cfbs/module.py @@ -15,6 +15,13 @@ def is_module_local(name: str): return name.startswith("./") +def is_module_absolute(name: str): + """A module might contain `"absolute"` in its `"tags"` but this is not required. + The source of truth for whether the module is absolute is whether it starts with `/`. + """ + return name.startswith("/") + + class Module: """Class representing a module in cfbs.json""" diff --git a/cfbs/validate.py b/cfbs/validate.py index 15ff5755..972dc5de 100644 --- a/cfbs/validate.py +++ b/cfbs/validate.py @@ -31,14 +31,17 @@ """ import logging as log +import os import re from collections import OrderedDict from typing import List, Tuple -from cfbs.module import is_module_local +from cfbs.git import is_git_repo, treeish_exists +from cfbs.module import is_module_absolute, is_module_local from cfbs.utils import ( is_a_commit_hash, strip_left, + strip_right, strip_right_any, CFBSExitError, CFBSValidationError, @@ -169,17 +172,57 @@ def _validate_top_level_keys(config): ) -def validate_module_name_content(name): - MAX_MODULE_NAME_LENGTH = 64 +def validate_absolute_module(name_path, module): + if not os.path.exists(name_path): + raise CFBSValidationError( + name_path, + "Absolute module's directory does not exist", + ) - if len(name) > MAX_MODULE_NAME_LENGTH: + if not os.path.isdir(name_path): raise CFBSValidationError( - name, - "Module name is too long (over " - + str(MAX_MODULE_NAME_LENGTH) - + " characters)", + name_path, + "Absolute module's path is not a directory", ) + if not is_git_repo(name_path): + raise CFBSValidationError(name_path, "Absolute module is not a Git repository") + + if not module["commit"]: + raise CFBSValidationError( + name_path, 'Absolute modules require the "commit" key' + ) + commit = module["commit"] + + # TODO: unless a branch / tag name is a valid "commit" field value, this needs to be checked differently + if not treeish_exists(commit, name_path): + raise CFBSValidationError( + name_path, + "Git repository of the absolute module does not contain the specified commit", + ) + + +def validate_module_name_content(name): + MAX_MODULE_NAME_LENGTH = 64 + + if is_module_absolute(name): + for component in name.split("/"): + if len(component) > MAX_MODULE_NAME_LENGTH: + raise CFBSValidationError( + name, + "Path component '%s' in module name is too long (over " % component + + str(MAX_MODULE_NAME_LENGTH) + + " characters)", + ) + else: + if len(name) > MAX_MODULE_NAME_LENGTH: + raise CFBSValidationError( + name, + "Module name is too long (over " + + str(MAX_MODULE_NAME_LENGTH) + + " characters)", + ) + # lowercase ASCII alphanumericals, starting with a letter, and possible singular dashes in the middle r = "[a-z][a-z0-9]*(-[a-z0-9]+)*" proper_name = name @@ -200,9 +243,33 @@ def validate_module_name_content(name): # only validate the local module's name, not the entire path to it proper_name = proper_name.split("/")[-1] - # allow underscores, only for local modules + # allow underscores, only for local and absolute modules + proper_name = proper_name.replace("_", "-") + + if is_module_absolute(name): + if not name.startswith("/"): + raise CFBSValidationError( + name, "Absolute module names should begin with `/`" + ) + + if not name.endswith("/"): + raise CFBSValidationError( + name, + "Absolute module names should end with `/`", + ) + proper_name = strip_right(proper_name, "/") + # TODO: more validation of the entire path instead of just the name (final component), since the module "name" is actually a path + proper_name = proper_name.split("/")[-1] + + # allow underscores, only for local and absolute modules proper_name = proper_name.replace("_", "-") + if len(proper_name) == 0: + raise CFBSValidationError( + name, + "Module name proper is empty", + ) + if not re.fullmatch(r, proper_name): raise CFBSValidationError( name, @@ -739,6 +806,9 @@ def validate_single_module(context, name, module, config, local_check=False): name, '"%s" field is required, but missing' % required_field ) + if is_module_absolute(name): + validate_absolute_module(name, module) + # Step 3 - Validate fields: if "name" in module: From 905532556fe9c2e9f739eb48f7d8bf7dce614fc0 Mon Sep 17 00:00:00 2001 From: Victor Moene Date: Mon, 22 Dec 2025 12:01:17 +0100 Subject: [PATCH 2/2] Added tests for cfbs add absolute path Signed-off-by: Victor Moene --- tests/shell/047_absolute_path_modules.sh | 46 ++++++++++++++++++++++++ tests/shell/all.sh | 1 + 2 files changed, 47 insertions(+) create mode 100644 tests/shell/047_absolute_path_modules.sh diff --git a/tests/shell/047_absolute_path_modules.sh b/tests/shell/047_absolute_path_modules.sh new file mode 100644 index 00000000..0152b91c --- /dev/null +++ b/tests/shell/047_absolute_path_modules.sh @@ -0,0 +1,46 @@ +set -e +set -x +cd tests/ +mkdir -p ./tmp +cd ./tmp/ +touch cfbs.json && rm cfbs.json +rm -rf .git + +cleanup() { + rm -rf /tmp/foo +} +trap cleanup EXIT QUIT TERM +mkdir -p /tmp/foo + +export GIT_AUTHOR_NAME="github_actions" +export GIT_AUTHOR_EMAIL="github_actions@example.com" + +# Add first commit +cp -r ../sample/foo/main.cf /tmp/foo/foo.cf +git init /tmp/foo +cd /tmp/foo +git add /tmp/foo/foo.cf +git -c user.name="$GIT_AUTHOR_NAME" -c user.email="$GIT_AUTHOR_EMAIL" commit -m "initial commit" +head_commit=$(git rev-parse HEAD) +cd - + +# run cfbs +cfbs --non-interactive init --masterfiles no +cfbs --non-interactive add /tmp/foo +cfbs build + +grep "$head_commit" cfbs.json + +# Add second commit +cp ../sample/bar/baz/main.cf /tmp/foo/baz.cf +cd /tmp/foo +git add /tmp/foo/baz.cf +git -c user.name="$GIT_AUTHOR_NAME" -c user.email="$GIT_AUTHOR_EMAIL" commit -m "second commit" +head_commit=$(git rev-parse HEAD) +cd - + +# run cfbs +cfbs --non-interactive update +cfbs build + +grep "$head_commit" cfbs.json diff --git a/tests/shell/all.sh b/tests/shell/all.sh index 1e3334bf..7ff7f595 100644 --- a/tests/shell/all.sh +++ b/tests/shell/all.sh @@ -50,5 +50,6 @@ bash tests/shell/043_replace_version.sh bash tests/shell/044_replace.sh bash tests/shell/045_update_from_url_branch_uptodate.sh bash tests/shell/046_update_from_url_branch.sh +bash tests/shell/047_absolute_path_modules.sh echo "All cfbs shell tests completed successfully!"