From 5ab707025deb28943c00432b34f847f8372cb679 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20B=C3=A4uerle?= Date: Thu, 22 Jan 2026 11:05:08 +0100 Subject: [PATCH 1/4] Add .bazelversion Add a .bazelversion file to pin bazel version to 8.3.1. Builds fail with version 9.0.0. --- .bazelversion | 1 + cr_checker/tests/.bazelversion | 1 + python_basics/integration_tests/.bazelversion | 2 +- starpls/integration_tests/.bazelversion | 1 + 4 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 .bazelversion create mode 120000 cr_checker/tests/.bazelversion mode change 100644 => 120000 python_basics/integration_tests/.bazelversion create mode 120000 starpls/integration_tests/.bazelversion diff --git a/.bazelversion b/.bazelversion new file mode 100644 index 0000000..56b6be4 --- /dev/null +++ b/.bazelversion @@ -0,0 +1 @@ +8.3.1 diff --git a/cr_checker/tests/.bazelversion b/cr_checker/tests/.bazelversion new file mode 120000 index 0000000..96cf949 --- /dev/null +++ b/cr_checker/tests/.bazelversion @@ -0,0 +1 @@ +../../.bazelversion \ No newline at end of file diff --git a/python_basics/integration_tests/.bazelversion b/python_basics/integration_tests/.bazelversion deleted file mode 100644 index 2bf50aa..0000000 --- a/python_basics/integration_tests/.bazelversion +++ /dev/null @@ -1 +0,0 @@ -8.3.0 diff --git a/python_basics/integration_tests/.bazelversion b/python_basics/integration_tests/.bazelversion new file mode 120000 index 0000000..96cf949 --- /dev/null +++ b/python_basics/integration_tests/.bazelversion @@ -0,0 +1 @@ +../../.bazelversion \ No newline at end of file diff --git a/starpls/integration_tests/.bazelversion b/starpls/integration_tests/.bazelversion new file mode 120000 index 0000000..96cf949 --- /dev/null +++ b/starpls/integration_tests/.bazelversion @@ -0,0 +1 @@ +../../.bazelversion \ No newline at end of file From 8e1190c7638284679baaa85fb5316dc5115791b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20B=C3=A4uerle?= Date: Mon, 19 Jan 2026 14:58:08 +0100 Subject: [PATCH 2/4] cr_checker: add exclusion list feature Allow specifying paths that are being excluded from the copyright check. This feature is supposed to be used for source code parts taken from other open source projects that require correct attribution. The exclusion file shall contain one path to a file per entry. The check shall fail if a file listed does not exist. --- cr_checker/cr_checker.bzl | 18 +++++-- cr_checker/tests/test_shebang_handling.py | 4 +- cr_checker/tool/cr_checker.py | 62 ++++++++++++++++++++++- 3 files changed, 76 insertions(+), 8 deletions(-) diff --git a/cr_checker/cr_checker.bzl b/cr_checker/cr_checker.bzl index 1829cc0..8278335 100644 --- a/cr_checker/cr_checker.bzl +++ b/cr_checker/cr_checker.bzl @@ -21,6 +21,7 @@ def copyright_checker( visibility, template, config, + exclusion = None, extensions = [], offset = 0, remove_offset = 0, @@ -40,6 +41,8 @@ def copyright_checker( Defaults to "//tools/cr_checker/resources:templates". config (str, optional): Path to the config resource used for project variables. Defaults to "//tools/cr_checker/resources:config". + exclusion (str, optional): Path to a text file listing files to be excluded from the copyright check. + File format: one path per line, relative to the repository root. extensions (list, optional): A list of file extensions to filter the source files. Defaults to an empty list, meaning all files are checked. offset (int, optional): The line offset for applying checks or modifications. @@ -65,12 +68,14 @@ def copyright_checker( "-t $(location {})".format(template), "-c $(location {})".format(config), ] - data = [] if len(extensions): args.append("-e {exts}".format( exts = " ".join([exts for exts in extensions]), )) + if exclusion: + args.append("--exclusion-file $(location {})".format(exclusion)) + if offset: args.append("--offset {}".format(offset)) @@ -89,6 +94,12 @@ def copyright_checker( if remove_offset: args.append("--remove_offset {}".format(remove_offset)) + data = srcs[:] + data.append(template) + data.append(config) + if exclusion: + data.append(exclusion) + py_binary( name = t_name, main = "cr_checker.py", @@ -96,10 +107,7 @@ def copyright_checker( "@score_tooling//cr_checker/tool:cr_checker_lib", ], args = args, - data = srcs + [ - template, - config, - ], + data = data, visibility = visibility, ) diff --git a/cr_checker/tests/test_shebang_handling.py b/cr_checker/tests/test_shebang_handling.py index 545ae49..9251519 100644 --- a/cr_checker/tests/test_shebang_handling.py +++ b/cr_checker/tests/test_shebang_handling.py @@ -103,7 +103,7 @@ def test_process_files_fix_inserts_header_after_shebang(tmp_path): [script], {"py": header_template}, True, - config, + config=config, use_mmap=False, encoding="utf-8", offset=0, @@ -154,7 +154,7 @@ def test_process_files_fix_inserts_header_without_shebang(tmp_path): [script], {"py": header_template}, True, - config, + config=config, use_mmap=False, encoding="utf-8", offset=0, diff --git a/cr_checker/tool/cr_checker.py b/cr_checker/tool/cr_checker.py index a2b2d05..ef27eb9 100644 --- a/cr_checker/tool/cr_checker.py +++ b/cr_checker/tool/cr_checker.py @@ -186,6 +186,40 @@ def add_template_for_extensions(templates: dict, extensions: list, template: str return templates +def load_exclusion(path): + """ + Loads the list of files being excluded from the copyright check. + + Args: + path (str): Path to the exclusion file. + + Returns: + tuple(list, bool): a list of files that are excluded from the coypright check and a boolean indicating whether + all paths listed in the exclusion file exist and are files. + """ + + exclusion = [] + valid = True + with open(path, "r", encoding="utf-8") as file: + exclusion = file.read().splitlines() + + for item in exclusion: + path = Path(item) + if not path.exists(): + LOGGER.error("Excluded file %s does not exist.", item) + exclusion.remove(item) + valid = False + continue + if not path.is_file(): + exclusion.remove(item) + LOGGER.error("Excluded file %s is not a file.", item) + valid = False + continue + + LOGGER.debug(exclusion) + return exclusion, valid + + def configure_logging(log_file_path=None, verbose=False): """ Configures logging to write messages to the specified log file. @@ -490,6 +524,7 @@ def process_files( files, templates, fix, + exclusion=[], config=None, use_mmap=False, encoding="utf-8", @@ -504,6 +539,8 @@ def process_files( templates (dict): A dictionary where keys are file extensions (e.g., '.py', '.txt') and values are strings or patterns representing the required copyright text. + exclusion (list): A list of paths to files to be excluded from the copyright + check. config (Path): Path to the config JSON file where configuration variables are stored (e.g. years for copyright headers). use_mmap (bool): Flag for using mmap function for reading files @@ -528,6 +565,10 @@ def process_files( ) continue + if str(item) in exclusion: + logging.debug("Skipped due to exclusion: %s", item) + continue + if os.path.getsize(item) == 0: # No need to add copyright headers to empty files continue @@ -576,6 +617,13 @@ def parse_arguments(argv): help="Path to the template file", ) + parser.add_argument( + "--exclusion-file", + type=Path, + required=False, + help="Path to the file listing file paths excluded from the copyright check.", + ) + parser.add_argument( "-c", "--config-file", @@ -675,6 +723,15 @@ def main(argv=None): LOGGER.error("Failed to load copyright text: %s", err) return err.errno + exclusion = [] + exclusion_valid = True + if args.exclusion_file: + try: + exclusion, exclusion_valid = load_exclusion(args.exclusion_file) + except IOError as err: + LOGGER.error("Failed to load exclusion list: %s", err) + return err.errno + try: files = collect_inputs(args.inputs, args.extensions) except IOError as err: @@ -695,6 +752,7 @@ def main(argv=None): files, templates, args.fix, + exclusion, args.config_file, args.use_memory_map, args.encoding, @@ -712,6 +770,8 @@ def main(argv=None): total_no, COLORS["ENDC"], ) + if not exclusion_valid: + LOGGER.info("The exclusion file contains paths that do not exist.") if args.fix: total_not_fixed = total_no - total_fixes LOGGER.info( @@ -728,7 +788,7 @@ def main(argv=None): ) LOGGER.info("=" * 64) - return 0 if total_no == 0 else 1 + return 0 if (total_no == 0 and exclusion_valid) else 1 if __name__ == "__main__": From ba4c8430c026692015c04f2064995e45f6a70099 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20B=C3=A4uerle?= Date: Tue, 20 Jan 2026 17:26:35 +0100 Subject: [PATCH 3/4] cr_checker: rename test suite and add tests for core functionality Add test cases for the cr_checker core functionality. Fixes: #99 --- cr_checker/tests/BUILD | 4 +- ...shebang_handling.py => test_cr_checker.py} | 131 +++++++++++++++++- 2 files changed, 126 insertions(+), 9 deletions(-) rename cr_checker/tests/{test_shebang_handling.py => test_cr_checker.py} (62%) diff --git a/cr_checker/tests/BUILD b/cr_checker/tests/BUILD index 3da53e8..3bc90fc 100644 --- a/cr_checker/tests/BUILD +++ b/cr_checker/tests/BUILD @@ -14,9 +14,9 @@ load("@score_tooling//python_basics:defs.bzl", "score_py_pytest") score_py_pytest( - name = "shebang_unit_tests", + name = "unit_tests", srcs = [ - "test_shebang_handling.py", + "test_cr_checker.py", ], deps = [ "@score_tooling//cr_checker/tool:cr_checker_lib", diff --git a/cr_checker/tests/test_shebang_handling.py b/cr_checker/tests/test_cr_checker.py similarity index 62% rename from cr_checker/tests/test_shebang_handling.py rename to cr_checker/tests/test_cr_checker.py index 9251519..33f2120 100644 --- a/cr_checker/tests/test_shebang_handling.py +++ b/cr_checker/tests/test_cr_checker.py @@ -15,6 +15,7 @@ import importlib.util import json +import pytest from datetime import datetime from pathlib import Path @@ -32,15 +33,15 @@ def load_cr_checker_module(): # load the license template -def load_py_template() -> str: +def load_template(extension: str) -> str: cr_checker = load_cr_checker_module() template_file = Path(__file__).resolve().parents[1] / "resources" / "templates.ini" templates = cr_checker.load_templates(template_file) - return templates["py"] + return templates[extension] # write the config file here so that the year is always up to date with the year -# written in the mock "script.py" file +# written in the test file def write_config(path: Path, author: str) -> Path: config_path = path / "config.json" config_path.write_text(json.dumps({"author": author}), encoding="utf-8") @@ -61,11 +62,127 @@ def test_detect_shebang_offset_counts_trailing_newlines(tmp_path): assert offset == len("#!/usr/bin/env python3\n\n".encode("utf-8")) +@pytest.fixture( + params=[ + "cpp", + "c", + "h", + "hpp", + "py", + "sh", + "bzl", + "ini", + "yml", + "BUILD", + "bazel", + "rs", + "rst", + ] +) +def prepare_test_with_header(request: SubRequest, tmp_path: PosixPath) -> tuple: + extension = request.param + test_file = tmp_path / ("file." + extension) + header_template = load_template(extension) + current_year = datetime.now().year + header = header_template.format(year=current_year, author="Author") + test_file.write_text( + header + "some content\n", + encoding="utf-8", + ) + return test_file, extension, header_template + + +@pytest.fixture( + params=[ + "cpp", + "c", + "h", + "hpp", + "py", + "sh", + "bzl", + "ini", + "yml", + "BUILD", + "bazel", + "rs", + "rst", + ] +) +def prepare_test_no_header(request: SubRequest, tmp_path: PosixPath) -> tuple: + extension = request.param + test_file = tmp_path / ("file." + extension) + header_template = load_template(extension) + current_year = datetime.now().year + test_file.write_text( + "some content\n", + encoding="utf-8", + ) + return test_file, extension, header_template, tmp_path + + +def test_process_files_detects_header(prepare_test_with_header): + cr_checker = load_cr_checker_module() + test_file, extension, header_template = prepare_test_with_header + + results = cr_checker.process_files( + [test_file], + {extension: header_template}, + False, + use_mmap=False, + encoding="utf-8", + offset=0, + remove_offset=0, + ) + + assert results["no_copyright"] == 0 + + +def test_process_files_detects_missing_header(prepare_test_no_header): + cr_checker = load_cr_checker_module() + test_file, extension, header_template, tmp_path = prepare_test_no_header + + results = cr_checker.process_files( + [test_file], + {extension: header_template}, + False, + use_mmap=False, + encoding="utf-8", + offset=0, + remove_offset=0, + ) + + assert results["no_copyright"] == 1 + + +def test_process_files_inserts_missing_header(prepare_test_no_header): + cr_checker = load_cr_checker_module() + test_file, extension, header_template, tmp_path = prepare_test_no_header + author = "Author" + config = write_config(tmp_path, author) + + results = cr_checker.process_files( + [test_file], + {extension: header_template}, + True, + config=config, + use_mmap=False, + encoding="utf-8", + offset=0, + remove_offset=0, + ) + + assert results["no_copyright"] == 1 + assert results["fixed"] == 1 + expected_header = header_template.format(year=datetime.now().year, author="Author") + assert test_file.read_text(encoding="utf-8").startswith(expected_header) + + # test that process_files function validates a license header after the shebang line def test_process_files_accepts_header_after_shebang(tmp_path): cr_checker = load_cr_checker_module() script = tmp_path / "script.py" - header_template = load_py_template() + header_template = load_template("py") current_year = datetime.now().year header = header_template.format(year=current_year, author="Author") script.write_text( @@ -94,7 +211,7 @@ def test_process_files_fix_inserts_header_after_shebang(tmp_path): "#!/usr/bin/env python3\nprint('hi')\n", encoding="utf-8", ) - header_template = load_py_template() + header_template = load_template("py") current_year = datetime.now().year author = "Author" config = write_config(tmp_path, author) @@ -122,7 +239,7 @@ def test_process_files_fix_inserts_header_after_shebang(tmp_path): def test_process_files_accepts_header_without_shebang(tmp_path): cr_checker = load_cr_checker_module() script = tmp_path / "script.py" - header_template = load_py_template() + header_template = load_template("py") current_year = datetime.now().year header = header_template.format(year=current_year, author="Author") script.write_text(header + "print('hi')\n", encoding="utf-8") @@ -145,7 +262,7 @@ def test_process_files_fix_inserts_header_without_shebang(tmp_path): cr_checker = load_cr_checker_module() script = tmp_path / "script.py" script.write_text("print('hi')\n", encoding="utf-8") - header_template = load_py_template() + header_template = load_template("py") current_year = datetime.now().year author = "Author" config = write_config(tmp_path, author) From 998a84f4c7573785195acfe578d18fd86322b9a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Florian=20B=C3=A4uerle?= Date: Tue, 20 Jan 2026 17:32:57 +0100 Subject: [PATCH 4/4] cr_checker: tests: add test case for file exclusion --- cr_checker/tests/test_cr_checker.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/cr_checker/tests/test_cr_checker.py b/cr_checker/tests/test_cr_checker.py index 33f2120..bd2b6e1 100644 --- a/cr_checker/tests/test_cr_checker.py +++ b/cr_checker/tests/test_cr_checker.py @@ -178,6 +178,24 @@ def test_process_files_inserts_missing_header(prepare_test_no_header): assert test_file.read_text(encoding="utf-8").startswith(expected_header) +def test_process_files_skips_exclusion_with_missing_header(prepare_test_no_header): + cr_checker = load_cr_checker_module() + test_file, extension, header_template, tmp_path = prepare_test_no_header + + results = cr_checker.process_files( + [test_file], + {extension: header_template}, + False, + [str(test_file)], + use_mmap=False, + encoding="utf-8", + offset=0, + remove_offset=0, + ) + + assert results["no_copyright"] == 0 + + # test that process_files function validates a license header after the shebang line def test_process_files_accepts_header_after_shebang(tmp_path): cr_checker = load_cr_checker_module()