diff --git a/.bazelversion b/.bazelversion new file mode 100644 index 0000000..56b6be4 --- /dev/null +++ b/.bazelversion @@ -0,0 +1 @@ +8.3.1 diff --git a/cr_checker/cr_checker.bzl b/cr_checker/cr_checker.bzl index 1829cc0..8278335 100644 --- a/cr_checker/cr_checker.bzl +++ b/cr_checker/cr_checker.bzl @@ -21,6 +21,7 @@ def copyright_checker( visibility, template, config, + exclusion = None, extensions = [], offset = 0, remove_offset = 0, @@ -40,6 +41,8 @@ def copyright_checker( Defaults to "//tools/cr_checker/resources:templates". config (str, optional): Path to the config resource used for project variables. Defaults to "//tools/cr_checker/resources:config". + exclusion (str, optional): Path to a text file listing files to be excluded from the copyright check. + File format: one path per line, relative to the repository root. extensions (list, optional): A list of file extensions to filter the source files. Defaults to an empty list, meaning all files are checked. offset (int, optional): The line offset for applying checks or modifications. @@ -65,12 +68,14 @@ def copyright_checker( "-t $(location {})".format(template), "-c $(location {})".format(config), ] - data = [] if len(extensions): args.append("-e {exts}".format( exts = " ".join([exts for exts in extensions]), )) + if exclusion: + args.append("--exclusion-file $(location {})".format(exclusion)) + if offset: args.append("--offset {}".format(offset)) @@ -89,6 +94,12 @@ def copyright_checker( if remove_offset: args.append("--remove_offset {}".format(remove_offset)) + data = srcs[:] + data.append(template) + data.append(config) + if exclusion: + data.append(exclusion) + py_binary( name = t_name, main = "cr_checker.py", @@ -96,10 +107,7 @@ def copyright_checker( "@score_tooling//cr_checker/tool:cr_checker_lib", ], args = args, - data = srcs + [ - template, - config, - ], + data = data, visibility = visibility, ) diff --git a/cr_checker/tests/.bazelversion b/cr_checker/tests/.bazelversion new file mode 120000 index 0000000..96cf949 --- /dev/null +++ b/cr_checker/tests/.bazelversion @@ -0,0 +1 @@ +../../.bazelversion \ No newline at end of file diff --git a/cr_checker/tests/BUILD b/cr_checker/tests/BUILD index 3da53e8..3bc90fc 100644 --- a/cr_checker/tests/BUILD +++ b/cr_checker/tests/BUILD @@ -14,9 +14,9 @@ load("@score_tooling//python_basics:defs.bzl", "score_py_pytest") score_py_pytest( - name = "shebang_unit_tests", + name = "unit_tests", srcs = [ - "test_shebang_handling.py", + "test_cr_checker.py", ], deps = [ "@score_tooling//cr_checker/tool:cr_checker_lib", diff --git a/cr_checker/tests/test_shebang_handling.py b/cr_checker/tests/test_cr_checker.py similarity index 58% rename from cr_checker/tests/test_shebang_handling.py rename to cr_checker/tests/test_cr_checker.py index 545ae49..bd2b6e1 100644 --- a/cr_checker/tests/test_shebang_handling.py +++ b/cr_checker/tests/test_cr_checker.py @@ -15,6 +15,7 @@ import importlib.util import json +import pytest from datetime import datetime from pathlib import Path @@ -32,15 +33,15 @@ def load_cr_checker_module(): # load the license template -def load_py_template() -> str: +def load_template(extension: str) -> str: cr_checker = load_cr_checker_module() template_file = Path(__file__).resolve().parents[1] / "resources" / "templates.ini" templates = cr_checker.load_templates(template_file) - return templates["py"] + return templates[extension] # write the config file here so that the year is always up to date with the year -# written in the mock "script.py" file +# written in the test file def write_config(path: Path, author: str) -> Path: config_path = path / "config.json" config_path.write_text(json.dumps({"author": author}), encoding="utf-8") @@ -61,11 +62,145 @@ def test_detect_shebang_offset_counts_trailing_newlines(tmp_path): assert offset == len("#!/usr/bin/env python3\n\n".encode("utf-8")) +@pytest.fixture( + params=[ + "cpp", + "c", + "h", + "hpp", + "py", + "sh", + "bzl", + "ini", + "yml", + "BUILD", + "bazel", + "rs", + "rst", + ] +) +def prepare_test_with_header(request: SubRequest, tmp_path: PosixPath) -> tuple: + extension = request.param + test_file = tmp_path / ("file." + extension) + header_template = load_template(extension) + current_year = datetime.now().year + header = header_template.format(year=current_year, author="Author") + test_file.write_text( + header + "some content\n", + encoding="utf-8", + ) + return test_file, extension, header_template + + +@pytest.fixture( + params=[ + "cpp", + "c", + "h", + "hpp", + "py", + "sh", + "bzl", + "ini", + "yml", + "BUILD", + "bazel", + "rs", + "rst", + ] +) +def prepare_test_no_header(request: SubRequest, tmp_path: PosixPath) -> tuple: + extension = request.param + test_file = tmp_path / ("file." + extension) + header_template = load_template(extension) + current_year = datetime.now().year + test_file.write_text( + "some content\n", + encoding="utf-8", + ) + return test_file, extension, header_template, tmp_path + + +def test_process_files_detects_header(prepare_test_with_header): + cr_checker = load_cr_checker_module() + test_file, extension, header_template = prepare_test_with_header + + results = cr_checker.process_files( + [test_file], + {extension: header_template}, + False, + use_mmap=False, + encoding="utf-8", + offset=0, + remove_offset=0, + ) + + assert results["no_copyright"] == 0 + + +def test_process_files_detects_missing_header(prepare_test_no_header): + cr_checker = load_cr_checker_module() + test_file, extension, header_template, tmp_path = prepare_test_no_header + + results = cr_checker.process_files( + [test_file], + {extension: header_template}, + False, + use_mmap=False, + encoding="utf-8", + offset=0, + remove_offset=0, + ) + + assert results["no_copyright"] == 1 + + +def test_process_files_inserts_missing_header(prepare_test_no_header): + cr_checker = load_cr_checker_module() + test_file, extension, header_template, tmp_path = prepare_test_no_header + author = "Author" + config = write_config(tmp_path, author) + + results = cr_checker.process_files( + [test_file], + {extension: header_template}, + True, + config=config, + use_mmap=False, + encoding="utf-8", + offset=0, + remove_offset=0, + ) + + assert results["no_copyright"] == 1 + assert results["fixed"] == 1 + expected_header = header_template.format(year=datetime.now().year, author="Author") + assert test_file.read_text(encoding="utf-8").startswith(expected_header) + + +def test_process_files_skips_exclusion_with_missing_header(prepare_test_no_header): + cr_checker = load_cr_checker_module() + test_file, extension, header_template, tmp_path = prepare_test_no_header + + results = cr_checker.process_files( + [test_file], + {extension: header_template}, + False, + [str(test_file)], + use_mmap=False, + encoding="utf-8", + offset=0, + remove_offset=0, + ) + + assert results["no_copyright"] == 0 + + # test that process_files function validates a license header after the shebang line def test_process_files_accepts_header_after_shebang(tmp_path): cr_checker = load_cr_checker_module() script = tmp_path / "script.py" - header_template = load_py_template() + header_template = load_template("py") current_year = datetime.now().year header = header_template.format(year=current_year, author="Author") script.write_text( @@ -94,7 +229,7 @@ def test_process_files_fix_inserts_header_after_shebang(tmp_path): "#!/usr/bin/env python3\nprint('hi')\n", encoding="utf-8", ) - header_template = load_py_template() + header_template = load_template("py") current_year = datetime.now().year author = "Author" config = write_config(tmp_path, author) @@ -103,7 +238,7 @@ def test_process_files_fix_inserts_header_after_shebang(tmp_path): [script], {"py": header_template}, True, - config, + config=config, use_mmap=False, encoding="utf-8", offset=0, @@ -122,7 +257,7 @@ def test_process_files_fix_inserts_header_after_shebang(tmp_path): def test_process_files_accepts_header_without_shebang(tmp_path): cr_checker = load_cr_checker_module() script = tmp_path / "script.py" - header_template = load_py_template() + header_template = load_template("py") current_year = datetime.now().year header = header_template.format(year=current_year, author="Author") script.write_text(header + "print('hi')\n", encoding="utf-8") @@ -145,7 +280,7 @@ def test_process_files_fix_inserts_header_without_shebang(tmp_path): cr_checker = load_cr_checker_module() script = tmp_path / "script.py" script.write_text("print('hi')\n", encoding="utf-8") - header_template = load_py_template() + header_template = load_template("py") current_year = datetime.now().year author = "Author" config = write_config(tmp_path, author) @@ -154,7 +289,7 @@ def test_process_files_fix_inserts_header_without_shebang(tmp_path): [script], {"py": header_template}, True, - config, + config=config, use_mmap=False, encoding="utf-8", offset=0, diff --git a/cr_checker/tool/cr_checker.py b/cr_checker/tool/cr_checker.py index a2b2d05..ef27eb9 100644 --- a/cr_checker/tool/cr_checker.py +++ b/cr_checker/tool/cr_checker.py @@ -186,6 +186,40 @@ def add_template_for_extensions(templates: dict, extensions: list, template: str return templates +def load_exclusion(path): + """ + Loads the list of files being excluded from the copyright check. + + Args: + path (str): Path to the exclusion file. + + Returns: + tuple(list, bool): a list of files that are excluded from the coypright check and a boolean indicating whether + all paths listed in the exclusion file exist and are files. + """ + + exclusion = [] + valid = True + with open(path, "r", encoding="utf-8") as file: + exclusion = file.read().splitlines() + + for item in exclusion: + path = Path(item) + if not path.exists(): + LOGGER.error("Excluded file %s does not exist.", item) + exclusion.remove(item) + valid = False + continue + if not path.is_file(): + exclusion.remove(item) + LOGGER.error("Excluded file %s is not a file.", item) + valid = False + continue + + LOGGER.debug(exclusion) + return exclusion, valid + + def configure_logging(log_file_path=None, verbose=False): """ Configures logging to write messages to the specified log file. @@ -490,6 +524,7 @@ def process_files( files, templates, fix, + exclusion=[], config=None, use_mmap=False, encoding="utf-8", @@ -504,6 +539,8 @@ def process_files( templates (dict): A dictionary where keys are file extensions (e.g., '.py', '.txt') and values are strings or patterns representing the required copyright text. + exclusion (list): A list of paths to files to be excluded from the copyright + check. config (Path): Path to the config JSON file where configuration variables are stored (e.g. years for copyright headers). use_mmap (bool): Flag for using mmap function for reading files @@ -528,6 +565,10 @@ def process_files( ) continue + if str(item) in exclusion: + logging.debug("Skipped due to exclusion: %s", item) + continue + if os.path.getsize(item) == 0: # No need to add copyright headers to empty files continue @@ -576,6 +617,13 @@ def parse_arguments(argv): help="Path to the template file", ) + parser.add_argument( + "--exclusion-file", + type=Path, + required=False, + help="Path to the file listing file paths excluded from the copyright check.", + ) + parser.add_argument( "-c", "--config-file", @@ -675,6 +723,15 @@ def main(argv=None): LOGGER.error("Failed to load copyright text: %s", err) return err.errno + exclusion = [] + exclusion_valid = True + if args.exclusion_file: + try: + exclusion, exclusion_valid = load_exclusion(args.exclusion_file) + except IOError as err: + LOGGER.error("Failed to load exclusion list: %s", err) + return err.errno + try: files = collect_inputs(args.inputs, args.extensions) except IOError as err: @@ -695,6 +752,7 @@ def main(argv=None): files, templates, args.fix, + exclusion, args.config_file, args.use_memory_map, args.encoding, @@ -712,6 +770,8 @@ def main(argv=None): total_no, COLORS["ENDC"], ) + if not exclusion_valid: + LOGGER.info("The exclusion file contains paths that do not exist.") if args.fix: total_not_fixed = total_no - total_fixes LOGGER.info( @@ -728,7 +788,7 @@ def main(argv=None): ) LOGGER.info("=" * 64) - return 0 if total_no == 0 else 1 + return 0 if (total_no == 0 and exclusion_valid) else 1 if __name__ == "__main__": diff --git a/python_basics/integration_tests/.bazelversion b/python_basics/integration_tests/.bazelversion deleted file mode 100644 index 2bf50aa..0000000 --- a/python_basics/integration_tests/.bazelversion +++ /dev/null @@ -1 +0,0 @@ -8.3.0 diff --git a/python_basics/integration_tests/.bazelversion b/python_basics/integration_tests/.bazelversion new file mode 120000 index 0000000..96cf949 --- /dev/null +++ b/python_basics/integration_tests/.bazelversion @@ -0,0 +1 @@ +../../.bazelversion \ No newline at end of file diff --git a/starpls/integration_tests/.bazelversion b/starpls/integration_tests/.bazelversion new file mode 120000 index 0000000..96cf949 --- /dev/null +++ b/starpls/integration_tests/.bazelversion @@ -0,0 +1 @@ +../../.bazelversion \ No newline at end of file