diff --git a/CHANGELOG.md b/CHANGELOG.md index b05308ab..e67d820c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Change Log +## Unreleased + +- Added graceful handling of configuration errors with a simple error message. +- Added option to specify that exceptions should be raised when using the `simvue.Run` class. +- Ensured that if Simvue itself aborts a process, this abort is recorded in log files. + ## [v2.3.0](https://github.com/simvue-io/client/releases/tag/v2.3.0) - 2025-12-11 - Refactored sender functionality introducing new `Sender` class. diff --git a/pyproject.toml b/pyproject.toml index 57b96e11..42e68320 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,6 +114,7 @@ markers = [ "local: tests of functionality which do not involve a server or writing to an offline cache file", "object_retrieval: tests relating to retrieval of objects from the server", "object_removal: tests relating to removal of objects from the server", + "cli: sender CLI tests" ] [tool.interrogate] diff --git a/simvue/config/user.py b/simvue/config/user.py index 11ea57a6..9637e814 100644 --- a/simvue/config/user.py +++ b/simvue/config/user.py @@ -16,6 +16,8 @@ import toml import semver +from simvue.exception import SimvueUserConfigError + try: from typing import Self except ImportError: @@ -225,10 +227,10 @@ def fetch( _run_mode = mode or _config_dict["run"].get("mode") or "online" if not _server_url and _run_mode != "offline": - raise RuntimeError("No server URL was specified") + raise SimvueUserConfigError("No server URL was specified") if not _server_token and _run_mode != "offline": - raise RuntimeError("No server token was specified") + raise SimvueUserConfigError("No server token was specified") _config_dict["server"]["token"] = _server_token _config_dict["server"]["url"] = _server_url diff --git a/simvue/exception.py b/simvue/exception.py index 3dc5e65e..b61149ed 100644 --- a/simvue/exception.py +++ b/simvue/exception.py @@ -7,7 +7,13 @@ """ -class ObjectNotFoundError(Exception): +class SimvueException(Exception): + """Base exception for all Simvue Python API errors.""" + + pass + + +class ObjectNotFoundError(SimvueException): """For failure retrieving Simvue object from server""" def __init__(self, obj_type: str, name: str, extra: str | None = None) -> None: @@ -18,7 +24,13 @@ def __init__(self, obj_type: str, name: str, extra: str | None = None) -> None: ) -class SimvueRunError(RuntimeError): +class SimvueRunError(SimvueException, RuntimeError): """A special sub-class of runtime error specifically for Simvue run errors""" pass + + +class SimvueUserConfigError(SimvueException): + """Raised when no local Simvue Configuration file has been found.""" + + pass diff --git a/simvue/executor.py b/simvue/executor.py index 3d25461e..11a22a94 100644 --- a/simvue/executor.py +++ b/simvue/executor.py @@ -24,6 +24,7 @@ import time import typing from simvue.api.objects.alert.user import UserAlert +from simvue.models import simvue_timestamp if typing.TYPE_CHECKING: import simvue @@ -475,6 +476,13 @@ def kill_process( kill_children_only : bool, optional if process_id is an integer, whether to kill only its children """ + # Ensure logs have record of aborted status + with pathlib.Path(f"{self._runner.name}_{process_id}.err").open( + "a" + ) as err_file: + err_file.writelines( + [f"{simvue_timestamp()} Process was aborted by Simvue executor."] + ) if isinstance(process_id, str): if not (process := self._processes.get(process_id)): logger.error( diff --git a/simvue/run.py b/simvue/run.py index 7e3e96fb..ccc1e62b 100644 --- a/simvue/run.py +++ b/simvue/run.py @@ -33,7 +33,7 @@ from simvue.api.objects.alert.fetch import Alert from simvue.api.objects.folder import Folder from simvue.api.objects.grids import GridMetrics -from simvue.exception import ObjectNotFoundError, SimvueRunError +from simvue.exception import ObjectNotFoundError, SimvueRunError, SimvueUserConfigError from simvue.utilities import prettify_pydantic @@ -122,6 +122,7 @@ def __init__( abort_callback: typing.Callable[[Self], None] | None = None, server_token: pydantic.SecretStr | None = None, server_url: str | None = None, + raise_exception: bool = False, debug: bool = False, ) -> None: """Initialise a new Simvue run @@ -141,6 +142,9 @@ def __init__( overwrite value for server token, default is None server_url : str, optional overwrite value for server URL, default is None + raise_exception : bool, optional + whether to raise an exception with traceback as opposed to + printing an error and exiting, default is False debug : bool, optional run in debug mode, default is False @@ -175,6 +179,11 @@ def __init__( self._grids: dict[str, str] = {} self._suppress_errors: bool = False self._queue_blocking: bool = False + self._raise_exception: bool = raise_exception + + # Capture exceptions raised within threads + self._thread_exception_message: str | None = None + self._status: ( typing.Literal[ "created", "running", "completed", "failed", "terminated", "lost" @@ -184,9 +193,23 @@ def __init__( self._data: dict[str, typing.Any] = {} self._step: int = 0 self._active: bool = False - self._user_config: SimvueConfiguration = SimvueConfiguration.fetch( - server_url=server_url, server_token=server_token, mode=mode - ) + + try: + self._user_config: SimvueConfiguration = SimvueConfiguration.fetch( + server_url=server_url, server_token=server_token, mode=mode + ) + except SimvueUserConfigError as e: + if self._raise_exception: + raise e + _help_str = ( + "A required Simvue configuration is missing, " + "please ensure you have created a valid configuration " + "file, or the environment variables 'SIMVUE_URL' and 'SIMVUE_TOKEN' " + "have been defined." + ) + click.secho(_help_str) + click.secho(f"[simvue] {e}", bold=True, fg="red") + sys.exit(1) logging.getLogger(self.__class__.__module__).setLevel( logging.DEBUG @@ -298,7 +321,6 @@ def processes(self) -> list[psutil.Process]: def _terminate_run( self, abort_callback: typing.Callable[[Self], None] | None, - force_exit: bool = True, ) -> None: """Close the current simvue Run and its subprocesses. @@ -309,8 +331,6 @@ def _terminate_run( ---------- abort_callback: Callable, optional the callback to execute on the termination else None - force_exit: bool, optional - whether to close Python itself, the default is True """ self._alert_raised_trigger.set() logger.debug("Received abort request from server") @@ -326,13 +346,16 @@ def _terminate_run( self._dispatcher.join() if self._active: self.set_status("terminated") + if self._raise_exception: + self._thread_exception_message = "Run was aborted." + return click.secho( "[simvue] Run was aborted.", fg="red" if self._term_color else None, bold=self._term_color, ) if self._abort_on_alert == "terminate": - os._exit(1) if force_exit else sys.exit(1) + os._exit(1) def _get_internal_metrics( self, @@ -445,7 +468,9 @@ def _heartbeat( # Check if the user has aborted the run with self._configuration_lock: if self._sv_obj and self._sv_obj.abort_trigger: - self._terminate_run(abort_callback=abort_callback) + self._terminate_run( + abort_callback=abort_callback, + ) if self._sv_obj: self._sv_obj.send_heartbeat() @@ -1862,14 +1887,21 @@ def _tidy_run(self) -> None: ) if _error_msg: _error_msg = f":\n{_error_msg}" + _error_msg = ( + f"Process executor terminated with non-zero exit status {_non_zero}" + ) + if self._raise_exception: + raise SimvueRunError(_error_msg) click.secho( - "[simvue] Process executor terminated with non-zero exit status " - f"{_non_zero}{_error_msg}", + f"[simvue] {_error_msg}", fg="red" if self._term_color else None, bold=self._term_color, ) sys.exit(_non_zero) + if self._thread_exception_message and self._raise_exception: + raise SimvueRunError(self._thread_exception_message) + @skip_if_failed("_aborted", "_suppress_errors", False) def close(self) -> bool: """Close the run diff --git a/tests/conftest.py b/tests/conftest.py index 5c94e50f..1cf207a7 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -133,15 +133,8 @@ def log_messages(caplog): @pytest.fixture -def prevent_script_exit(monkeypatch: monkeypatch.MonkeyPatch) -> None: - _orig_func = sv_run.Run._terminate_run - monkeypatch.setattr(sv_run.Run, "_terminate_run", lambda *args, **kwargs: _orig_func(*args, force_exit=False, **kwargs)) - - -@pytest.fixture -def create_test_run(request, prevent_script_exit) -> Generator[tuple[sv_run.Run, dict]]: - _ = prevent_script_exit - with sv_run.Run() as run: +def create_test_run(request) -> Generator[tuple[sv_run.Run, dict]]: + with sv_run.Run(raise_exception=True) as run: with tempfile.TemporaryDirectory() as tempd: _test_run_data = setup_test_run(run, temp_dir=pathlib.Path(tempd), create_objects=True, request=request) yield run, _test_run_data @@ -154,11 +147,10 @@ def create_test_run(request, prevent_script_exit) -> Generator[tuple[sv_run.Run, @pytest.fixture -def create_test_run_offline(request, monkeypatch: pytest.MonkeyPatch, prevent_script_exit) -> Generator[tuple[sv_run.Run, dict]]: - _ = prevent_script_exit +def create_test_run_offline(request, monkeypatch: pytest.MonkeyPatch) -> Generator[tuple[sv_run.Run, dict]]: with tempfile.TemporaryDirectory() as temp_d: monkeypatch.setenv("SIMVUE_OFFLINE_DIRECTORY", temp_d) - with sv_run.Run("offline") as run: + with sv_run.Run("offline", raise_exception=True) as run: _test_run_data = setup_test_run(run, temp_dir=pathlib.Path(temp_d), create_objects=True, request=request) yield run, _test_run_data with contextlib.suppress(ObjectNotFoundError): @@ -170,11 +162,8 @@ def create_test_run_offline(request, monkeypatch: pytest.MonkeyPatch, prevent_sc @pytest.fixture -def create_plain_run(request, prevent_script_exit, mocker) -> Generator[tuple[sv_run.Run, dict]]: - _ = prevent_script_exit - def testing_exit(status: int) -> None: - raise SystemExit(status) - with sv_run.Run() as run: +def create_plain_run(request, mocker) -> Generator[tuple[sv_run.Run, dict]]: + with sv_run.Run(raise_exception=True) as run: run.metric_spy = mocker.spy(run, "_get_internal_metrics") with tempfile.TemporaryDirectory() as tempd: yield run, setup_test_run(run, temp_dir=pathlib.Path(tempd), create_objects=False, request=request) @@ -182,20 +171,18 @@ def testing_exit(status: int) -> None: @pytest.fixture -def create_pending_run(request, prevent_script_exit) -> Generator[tuple[sv_run.Run, dict]]: - _ = prevent_script_exit - with sv_run.Run() as run: +def create_pending_run(request) -> Generator[tuple[sv_run.Run, dict]]: + with sv_run.Run(raise_exception=True) as run: with tempfile.TemporaryDirectory() as tempd: yield run, setup_test_run(run, temp_dir=pathlib.Path(tempd), create_objects=False, request=request, created_only=True) clear_out_files() @pytest.fixture -def create_plain_run_offline(request,prevent_script_exit,monkeypatch) -> Generator[tuple[sv_run.Run, dict]]: - _ = prevent_script_exit +def create_plain_run_offline(request, monkeypatch) -> Generator[tuple[sv_run.Run, dict]]: with tempfile.TemporaryDirectory() as temp_d: monkeypatch.setenv("SIMVUE_OFFLINE_DIRECTORY", temp_d) - with sv_run.Run("offline") as run: + with sv_run.Run("offline", raise_exception=True) as run: _temporary_directory = pathlib.Path(temp_d) yield run, setup_test_run(run, temp_dir=_temporary_directory, create_objects=False, request=request) clear_out_files() diff --git a/tests/functional/test_client.py b/tests/functional/test_client.py index feef0655..0ba467f4 100644 --- a/tests/functional/test_client.py +++ b/tests/functional/test_client.py @@ -10,7 +10,7 @@ import tempfile import simvue.client as svc -from simvue.exception import ObjectNotFoundError +from simvue.exception import ObjectNotFoundError, SimvueRunError import simvue.run as sv_run import simvue.api.objects as sv_api_obj from simvue.api.objects.alert.base import AlertBase @@ -535,16 +535,24 @@ def test_alert_deletion() -> None: @pytest.mark.client @pytest.mark.object_removal -def test_abort_run(speedy_heartbeat, create_plain_run: tuple[sv_run.Run, dict]) -> None: - run, run_data = create_plain_run +def test_abort_run(speedy_heartbeat) -> None: + run = sv_run.Run(raise_exception=True) + unique_id = f"{uuid.uuid4()}".split("-")[0] + run.init( + name="test_abort_run", + folder=f"/simvue_unit_testing/{unique_id}", + tags=["test_tag_deletion", platform.system()], + retention_period="1 min", + ) + run._heartbeat_interval = 1 _uuid = f"{uuid.uuid4()}".split("-")[0] run.update_tags([f"delete_me_{_uuid}"]) _client = svc.Client() _client.abort_run(run.id, reason="Test abort") _attempts: int = 0 - while run.status != "terminated" and _attempts < 10: - time.sleep(1) - _attempts += 1 - if _attempts >= 10: - raise AssertionError("Failed to terminate run.") + with pytest.raises(SimvueRunError): + run.close() + while run.status != "terminated" and _attempts < 10: + time.sleep(1) + _attempts += 1 diff --git a/tests/functional/test_config.py b/tests/functional/test_config.py index dcbb013d..c3127a22 100644 --- a/tests/functional/test_config.py +++ b/tests/functional/test_config.py @@ -6,6 +6,7 @@ import pytest_mock import tempfile from simvue.config.user import SimvueConfiguration +from simvue.exception import SimvueUserConfigError @pytest.mark.config @@ -100,7 +101,7 @@ def _mocked_find(file_names: list[str], *_, ppt_file=_ppt_file, conf_file=_confi import simvue.config.user if not use_file and not use_env and not use_args: - with pytest.raises(RuntimeError): + with pytest.raises(SimvueUserConfigError): simvue.config.user.SimvueConfiguration.fetch(mode="online") return elif use_args: diff --git a/tests/functional/test_run_class.py b/tests/functional/test_run_class.py index f1ba2d81..6ab2e42b 100644 --- a/tests/functional/test_run_class.py +++ b/tests/functional/test_run_class.py @@ -25,7 +25,6 @@ from simvue.sender import Sender import simvue.run as sv_run import simvue.client as sv_cl -import simvue.config.user as sv_cfg from simvue.api.objects import Run as RunObject @@ -1380,48 +1379,60 @@ def abort_callback(abort_run=trigger) -> None: @pytest.mark.run @pytest.mark.online -def test_abort_on_alert_python( - speedy_heartbeat, create_plain_run: tuple[sv_run.Run, dict], mocker: pytest_mock.MockerFixture -) -> None: - timeout: int = 20 - interval: int = 0 - run, _ = create_plain_run +def test_abort_on_alert_python() -> None: + run = sv_run.Run(raise_exception=True) + unique_id = f"{uuid.uuid4()}".split("-")[0] + run.init( + name="test_abort_on_alert_python", + folder=f"/simvue_unit_testing/{unique_id}", + tags=["test_abort_on_alert_python", platform.system()], + retention_period="1 min", + ) + run._heartbeat_interval = 1 client = sv_cl.Client() client.abort_run(run.id, reason="Test abort") attempts: int = 0 - while run._status == "terminated" and attemps < 5: - time.sleep(1) - attempts += 1 + with pytest.raises(SimvueRunError) as e: + run.close() + while run._status == "terminated" and attempts < 5: + time.sleep(1) + attempts += 1 + assert "Run was aborted." in f"{e}" - if attempts >= 5: - raise AssertionError("Failed to terminate run") @pytest.mark.run @pytest.mark.online def test_abort_on_alert_raise( - create_plain_run: tuple[sv_run.Run, dict] ) -> None: - - run, _ = create_plain_run + run = sv_run.Run(raise_exception=True) + unique_id = f"{uuid.uuid4()}".split("-")[0] + run.init( + name="test_abort_on_alert_raise", + folder=f"/simvue_unit_testing/{unique_id}", + tags=["test_abort_on_alert_raise", platform.system()], + retention_period="1 min", + ) run.config(system_metrics_interval=1) run._heartbeat_interval = 1 run._testing = True + alert_id = run.create_user_alert("abort_test", trigger_abort=True) run.add_process(identifier=f"forever_long_other_{os.environ.get('PYTEST_XDIST_WORKER', 0)}", executable="bash", c="sleep 10") run.log_alert(identifier=alert_id, state="critical") _alert = Alert(identifier=alert_id) assert _alert.get_status(run.id) == "critical" counter = 0 - while run._status != "terminated" and counter < 15: - time.sleep(1) - assert run._sv_obj.abort_trigger, "Abort trigger was not set" - counter += 1 - if counter >= 15: - run.kill_all_processes() - raise AssertionError("Run was not terminated") + + with pytest.raises(SimvueRunError) as e: + run.close() + while run._status != "terminated" and counter < 15: + time.sleep(1) + assert run._sv_obj.abort_trigger, "Abort trigger was not set" + counter += 1 + assert "Process executor terminated with non-zero exit status" in f"{e}" @pytest.mark.run diff --git a/tests/functional/test_run_execute_process.py b/tests/functional/test_run_execute_process.py index 66b8a4c9..e0f64e05 100644 --- a/tests/functional/test_run_execute_process.py +++ b/tests/functional/test_run_execute_process.py @@ -72,7 +72,10 @@ def test_abort_all_processes(create_plain_run: tuple[Run, dict]) -> None: _out_err = pathlib.Path.cwd().glob(f"*process_*_{os.environ.get('PYTEST_XDIST_WORKER', 0)}.err") for file in _out_err: with file.open() as in_f: - assert not in_f.readlines() + # Simvue Executor appends message informing user it aborted the process itself + _lines = in_f.readlines() + assert len(_lines) == 1 + assert "Process was aborted by Simvue executor." in _lines[0] # Now check the counter in the process was terminated # just beyond the sleep time