diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index a5f2bf8..7734cd4 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -15,11 +15,11 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.8", "3.9", "3.10", "3.11"] + python-version: ["3.9", "3.10", "3.11", "3.12"] os: ["ubuntu-latest", "windows-latest", "macos-latest"] runs-on: ${{ matrix.os }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: @@ -32,7 +32,7 @@ jobs: installer-parallel: true - name: Load cached venv id: cached-poetry-venv - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: .venv key: venv-${{ matrix.python-version }}-${{ runner.os }}-${{ hashFiles('poetry.lock') }} @@ -54,7 +54,7 @@ jobs: coverage: strategy: matrix: - python-version: [ "3.8" ] + python-version: [ "3.12" ] runs-on: "ubuntu-latest" steps: - uses: actions/checkout@v3 @@ -70,7 +70,7 @@ jobs: installer-parallel: true - name: Load cached venv id: cached-poetry-venv - uses: actions/cache@v3 + uses: actions/cache@v4 with: path: .venv key: venv-${{ matrix.python-version }}-{{ runner.os }}-${{ hashFiles('poetry.lock') }} @@ -96,7 +96,7 @@ jobs: typecheck: strategy: matrix: - python-version: [ "3.8" ] + python-version: [ "3.12" ] runs-on: "ubuntu-latest" steps: - uses: actions/checkout@v3 diff --git a/codestripper/cli.py b/codestripper/cli.py index 537b421..a9873c5 100644 --- a/codestripper/cli.py +++ b/codestripper/cli.py @@ -4,6 +4,7 @@ from codestripper.code_stripper import strip_files from codestripper.utils import FileUtils, set_logger_level, get_working_directory +from codestripper.utils.enums import UnexpectedInputOptions def add_commandline_arguments(parser: argparse.ArgumentParser) -> None: @@ -26,6 +27,10 @@ def add_commandline_arguments(parser: argparse.ArgumentParser) -> None: help="set the working directory for include/exclude", default=os.getcwd()) parser.add_argument("-e", "--fail-on-error", action="store_false", help="Fail if an error occurs during code stripping") + parser.add_argument("-b", "--binary", choices=list(UnexpectedInputOptions), default=UnexpectedInputOptions.FAIL, + action="store", help="What to do if binary file is matched") + parser.add_argument("-u", "--unknown", choices=list(UnexpectedInputOptions), default=UnexpectedInputOptions.FAIL, + action="store", help="What to do if a file with unknown extension is matched") def main() -> None: diff --git a/codestripper/code_stripper.py b/codestripper/code_stripper.py index 7cf5a54..a48a6b4 100644 --- a/codestripper/code_stripper.py +++ b/codestripper/code_stripper.py @@ -10,12 +10,14 @@ from codestripper.tokenizer import Tokenizer from codestripper.utils import get_working_directory from codestripper.utils.comments import comments_mapping, Comment +from codestripper.utils.enums import UnexpectedInputOptions logger = logging.getLogger("codestripper") def strip_files(files: Iterable[str], working_directory: Union[str, None] = None, * ,comments: Optional[List[str]] = None, - output: Union[Path, str] = "out", dry_run: bool = False, fail_on_error: bool = False) -> List[str]: + output: Union[Path, str] = "out", dry_run: bool = False, fail_on_error: bool = False, + binary: UnexpectedInputOptions = UnexpectedInputOptions.FAIL, unknown_extension: UnexpectedInputOptions = UnexpectedInputOptions.FAIL) -> List[str]: if comments is not None: for comment in comments: @@ -32,18 +34,41 @@ def strip_files(files: Iterable[str], working_directory: Union[str, None] = None stripped_files: List[str] = [] has_errors: bool = False for file in files: - with open(os.path.join(cwd, file), 'r') as handle: - content = handle.read() + try: + with open(os.path.join(cwd, file), 'r') as handle: + content = handle.read() + except UnicodeDecodeError as e: + if binary == UnexpectedInputOptions.FAIL: + raise e + elif binary == UnexpectedInputOptions.IGNORE: + logger.info(f"Ignoring binary file: '{file}'") + continue + else: + path = os.path.join(out, file) + os.makedirs(os.path.dirname(path), exist_ok=True) + shutil.copy2(os.path.join(cwd, file), path) + stripped_files.append(file) + continue if content is not None: + stripped = "" try: _, file_extension = os.path.splitext(file) file_extension = file_extension.lower() if not file_extension in comments_mapping: - logger.error(f"Unknown extension: '{file_extension}', " + if unknown_extension == UnexpectedInputOptions.FAIL: + logger.error(f"Unknown extension: '{file_extension}', " f"please specify which comment to use for this file extension.") - continue - com = comments_mapping[file_extension] - stripped = CodeStripper(content, com).strip() + has_errors = True + break + elif unknown_extension == UnexpectedInputOptions.IGNORE: + logger.info(f"Unknown extension: '{file_extension}' ignored") + continue + else: + # Keep the complete content + stripped = content + else: + com = comments_mapping[file_extension] + stripped = CodeStripper(content, com).strip() except IgnoreFileError: logger.info(f"File '{file}' is ignored, because of ignore tag") continue diff --git a/codestripper/utils/enums.py b/codestripper/utils/enums.py new file mode 100644 index 0000000..f0a9a69 --- /dev/null +++ b/codestripper/utils/enums.py @@ -0,0 +1,7 @@ +import enum + + +class UnexpectedInputOptions(enum.Enum): + FAIL = "fail", + IGNORE = "ignore", + INCLUDE = "include" \ No newline at end of file diff --git a/tests/test_codestripper.py b/tests/test_codestripper.py index ea9a64d..323b4f9 100644 --- a/tests/test_codestripper.py +++ b/tests/test_codestripper.py @@ -1,6 +1,7 @@ import logging import os.path import re +import shutil from pathlib import Path import pytest @@ -8,10 +9,33 @@ from codestripper.code_stripper import strip_files from codestripper.utils import FileUtils +from codestripper.utils.enums import UnexpectedInputOptions test_project_dir = os.path.join(Path(__file__).parent.absolute()) +def test_project_with_unknown_extension_fail(monkeypatch: pytest.MonkeyPatch): + monkeypatch.chdir(test_project_dir) + files = FileUtils(["**/*.java", "pom.xml", "**/*.test"], working_directory="testproject").get_matching_files() + + with pytest.raises(Exception): + strip_files(files, "testproject", output="out",unknown_extension=UnexpectedInputOptions.FAIL, fail_on_error=True) + + +def test_project_with_unknown_extension_ignore(monkeypatch: pytest.MonkeyPatch): + monkeypatch.chdir(test_project_dir) + files = FileUtils(["**/*.java", "pom.xml", "test.test"], working_directory="testproject").get_matching_files() + stripped = strip_files(files, "testproject", output="out", unknown_extension=UnexpectedInputOptions.IGNORE) + assert "test.test" not in stripped + + +def test_project_with_unknown_extension_include(monkeypatch: pytest.MonkeyPatch): + monkeypatch.chdir(test_project_dir) + files = FileUtils(["**/*.java", "pom.xml", "test.test"], working_directory="testproject").get_matching_files() + stripped = strip_files(files, "testproject", output="out", unknown_extension=UnexpectedInputOptions.INCLUDE) + assert "test.test" in stripped + + def test_project(monkeypatch: pytest.MonkeyPatch, caplog: LogCaptureFixture): monkeypatch.chdir(test_project_dir) with caplog.at_level(logging.INFO, logger='codestripper'): @@ -132,3 +156,30 @@ def test_non_fail_on_error(monkeypatch: pytest.MonkeyPatch, caplog: pytest.LogCa strip_files(files, "files", output="out", fail_on_error=False) errors = [rec.message for rec in caplog.records] assert len(errors) == 4 + + +def test_project_with_binary_fail(monkeypatch: pytest.MonkeyPatch): + monkeypatch.chdir(test_project_dir) + shutil.rmtree("out", ignore_errors=True) + files = FileUtils(["**/*.java", "pom.xml", "test.jpg"], working_directory="testproject").get_matching_files() + + with pytest.raises(Exception): + strip_files(files, "testproject", output="out",binary=UnexpectedInputOptions.FAIL, fail_on_error=True) + + +def test_project_with_binary_ignore(monkeypatch: pytest.MonkeyPatch): + monkeypatch.chdir(test_project_dir) + shutil.rmtree("out", ignore_errors=True) + files = FileUtils(["**/*.java", "pom.xml", "test.jpg"], working_directory="testproject").get_matching_files() + + stripped = strip_files(files, "testproject", output="out",binary=UnexpectedInputOptions.IGNORE) + assert "test.jpg" not in stripped + + +def test_project_with_binary_include(monkeypatch: pytest.MonkeyPatch): + monkeypatch.chdir(test_project_dir) + shutil.rmtree("out", ignore_errors=True) + files = FileUtils(["**/*.java", "pom.xml", "test.jpg"], working_directory="testproject").get_matching_files() + + stripped = strip_files(files, "testproject", output="out",binary=UnexpectedInputOptions.INCLUDE) + assert "test.jpg" in stripped diff --git a/tests/testproject/test.jpg b/tests/testproject/test.jpg new file mode 100644 index 0000000..e58c685 Binary files /dev/null and b/tests/testproject/test.jpg differ