From 864a1b4e260ef3fa92954c8a232e8a4c5f21fa8b Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Fri, 12 Dec 2025 21:14:25 +0100 Subject: [PATCH 1/2] Add more `TypedDict` definitions This commit adds more `TypedDict` definitions for `File`, `Dirent`, and `Directory` objects. In addition, it adds the `exitCode` entry to the `CWLRuntimeParameterContext`, which was missing. --- cwl_utils/cwl_v1_0_expression_refactor.py | 82 +++++++------- cwl_utils/cwl_v1_1_expression_refactor.py | 82 +++++++------- cwl_utils/cwl_v1_2_expression_refactor.py | 70 ++++++------ cwl_utils/expression.py | 19 ++-- cwl_utils/file_formats.py | 6 +- cwl_utils/sandboxjs.py | 42 ++++++-- cwl_utils/types.py | 124 +++++++++++++++++++--- tests/test_format.py | 25 ++--- 8 files changed, 300 insertions(+), 150 deletions(-) diff --git a/cwl_utils/cwl_v1_0_expression_refactor.py b/cwl_utils/cwl_v1_0_expression_refactor.py index dc39488e..8af5929e 100755 --- a/cwl_utils/cwl_v1_0_expression_refactor.py +++ b/cwl_utils/cwl_v1_0_expression_refactor.py @@ -5,7 +5,7 @@ import copy import hashlib import uuid -from collections.abc import Mapping, MutableSequence, Sequence +from collections.abc import MutableSequence, Sequence from contextlib import suppress from typing import Any, Optional, cast @@ -18,10 +18,13 @@ from cwl_utils.errors import JavascriptException, WorkflowException from cwl_utils.expression import do_eval, interpolate from cwl_utils.types import ( + CWLDirectoryType, + CWLFileType, CWLObjectType, CWLOutputType, CWLParameterContext, CWLRuntimeParameterContext, + is_file_or_directory, ) @@ -547,31 +550,37 @@ def example_input(some_type: Any) -> Any: """Produce a fake input for the given type.""" # TODO: accept some sort of context object with local custom type definitions if some_type == "Directory": - return { - "class": "Directory", - "location": "https://www.example.com/example", - "basename": "example", - "listing": [ - { - "class": "File", - "basename": "example.txt", - "size": 23, - "contents": "hoopla", - "nameroot": "example", - "nameext": "txt", - } - ], - } + return CWLDirectoryType( + **{ + "class": "Directory", + "location": "https://www.example.com/example", + "basename": "example", + "listing": [ + CWLFileType( + **{ + "class": "File", + "basename": "example.txt", + "size": 23, + "contents": "hoopla", + "nameroot": "example", + "nameext": "txt", + } + ) + ], + } + ) if some_type == "File": - return { - "class": "File", - "location": "https://www.example.com/example.txt", - "basename": "example.txt", - "size": 23, - "contents": "hoopla", - "nameroot": "example", - "nameext": "txt", - } + return CWLFileType( + **{ + "class": "File", + "location": "https://www.example.com/example.txt", + "basename": "example.txt", + "size": 23, + "contents": "hoopla", + "nameroot": "example", + "nameext": "txt", + } + ) if some_type == "int": return 23 if some_type == "string": @@ -581,12 +590,14 @@ def example_input(some_type: Any) -> Any: return None -EMPTY_FILE: CWLOutputType = { - "class": "File", - "basename": "em.pty", - "nameroot": "em", - "nameext": "pty", -} +EMPTY_FILE = CWLFileType( + **{ + "class": "File", + "basename": "em.pty", + "nameroot": "em", + "nameext": "pty", + } +) TOPLEVEL_SF_EXPR_ERROR = ( "Input '{}'. Sorry, CWL Expressions as part of a secondaryFiles " @@ -793,14 +804,7 @@ def process_workflow_reqs_and_hints( resources={}, ) modified = True - if ( - isinstance(expr_result, Mapping) - and "class" in expr_result - and ( - expr_result["class"] - in ("File", "Directory") - ) - ): + if is_file_or_directory(expr_result): target = cwl.InputParameter( id=None, type_=expr_result["class"], diff --git a/cwl_utils/cwl_v1_1_expression_refactor.py b/cwl_utils/cwl_v1_1_expression_refactor.py index c0c6eb3f..12bff3d0 100755 --- a/cwl_utils/cwl_v1_1_expression_refactor.py +++ b/cwl_utils/cwl_v1_1_expression_refactor.py @@ -5,7 +5,7 @@ import copy import hashlib import uuid -from collections.abc import Mapping, MutableSequence, Sequence +from collections.abc import MutableSequence, Sequence from contextlib import suppress from typing import Any, Optional, cast @@ -18,10 +18,13 @@ from cwl_utils.errors import JavascriptException, WorkflowException from cwl_utils.expression import do_eval, interpolate from cwl_utils.types import ( + CWLDirectoryType, + CWLFileType, CWLObjectType, CWLOutputType, CWLParameterContext, CWLRuntimeParameterContext, + is_file_or_directory, ) @@ -545,31 +548,37 @@ def example_input(some_type: Any) -> Any: """Produce a fake input for the given type.""" # TODO: accept some sort of context object with local custom type definitions if some_type == "Directory": - return { - "class": "Directory", - "location": "https://www.example.com/example", - "basename": "example", - "listing": [ - { - "class": "File", - "basename": "example.txt", - "size": 23, - "contents": "hoopla", - "nameroot": "example", - "nameext": "txt", - } - ], - } + return CWLDirectoryType( + **{ + "class": "Directory", + "location": "https://www.example.com/example", + "basename": "example", + "listing": [ + CWLFileType( + **{ + "class": "File", + "basename": "example.txt", + "size": 23, + "contents": "hoopla", + "nameroot": "example", + "nameext": "txt", + } + ) + ], + } + ) if some_type == "File": - return { - "class": "File", - "location": "https://www.example.com/example.txt", - "basename": "example.txt", - "size": 23, - "contents": "hoopla", - "nameroot": "example", - "nameext": "txt", - } + return CWLFileType( + **{ + "class": "File", + "location": "https://www.example.com/example.txt", + "basename": "example.txt", + "size": 23, + "contents": "hoopla", + "nameroot": "example", + "nameext": "txt", + } + ) if some_type == "int": return 23 if some_type == "string": @@ -579,12 +588,14 @@ def example_input(some_type: Any) -> Any: return None -EMPTY_FILE: CWLOutputType = { - "class": "File", - "basename": "em.pty", - "nameroot": "em", - "nameext": "pty", -} +EMPTY_FILE = CWLFileType( + **{ + "class": "File", + "basename": "em.pty", + "nameroot": "em", + "nameext": "pty", + } +) TOPLEVEL_SF_EXPR_ERROR = ( "Input '{}'. Sorry, CWL Expressions as part of a secondaryFiles " @@ -795,14 +806,7 @@ def process_workflow_reqs_and_hints( resources={}, ) modified = True - if ( - isinstance(expr_result, Mapping) - and "class" in expr_result - and ( - expr_result["class"] - in ("File", "Directory") - ) - ): + if is_file_or_directory(expr_result): target = cwl.WorkflowInputParameter( id=None, type_=expr_result["class"], diff --git a/cwl_utils/cwl_v1_2_expression_refactor.py b/cwl_utils/cwl_v1_2_expression_refactor.py index 1ce15479..eb484a57 100755 --- a/cwl_utils/cwl_v1_2_expression_refactor.py +++ b/cwl_utils/cwl_v1_2_expression_refactor.py @@ -18,6 +18,8 @@ from cwl_utils.errors import JavascriptException, WorkflowException from cwl_utils.expression import do_eval, interpolate from cwl_utils.types import ( + CWLDirectoryType, + CWLFileType, CWLObjectType, CWLOutputType, CWLParameterContext, @@ -545,31 +547,37 @@ def example_input(some_type: Any) -> Any: """Produce a fake input for the given type.""" # TODO: accept some sort of context object with local custom type definitions if some_type == "Directory": - return { - "class": "Directory", - "location": "https://www.example.com/example", - "basename": "example", - "listing": [ - { - "class": "File", - "basename": "example.txt", - "size": 23, - "contents": "hoopla", - "nameroot": "example", - "nameext": "txt", - } - ], - } + return CWLDirectoryType( + **{ + "class": "Directory", + "location": "https://www.example.com/example", + "basename": "example", + "listing": [ + CWLFileType( + **{ + "class": "File", + "basename": "example.txt", + "size": 23, + "contents": "hoopla", + "nameroot": "example", + "nameext": "txt", + } + ) + ], + } + ) if some_type == "File": - return { - "class": "File", - "location": "https://www.example.com/example.txt", - "basename": "example.txt", - "size": 23, - "contents": "hoopla", - "nameroot": "example", - "nameext": "txt", - } + return CWLFileType( + **{ + "class": "File", + "location": "https://www.example.com/example.txt", + "basename": "example.txt", + "size": 23, + "contents": "hoopla", + "nameroot": "example", + "nameext": "txt", + } + ) if some_type == "int": return 23 if some_type == "string": @@ -579,12 +587,14 @@ def example_input(some_type: Any) -> Any: return None -EMPTY_FILE: CWLOutputType = { - "class": "File", - "basename": "em.pty", - "nameroot": "em", - "nameext": "pty", -} +EMPTY_FILE = CWLFileType( + **{ + "class": "File", + "basename": "em.pty", + "nameroot": "em", + "nameext": "pty", + } +) TOPLEVEL_SF_EXPR_ERROR = ( "Input '{}'. Sorry, CWL Expressions as part of a secondaryFiles " diff --git a/cwl_utils/expression.py b/cwl_utils/expression.py index 3a719399..90448c8a 100644 --- a/cwl_utils/expression.py +++ b/cwl_utils/expression.py @@ -6,14 +6,19 @@ import json from collections.abc import Awaitable, MutableMapping from enum import Enum -from typing import Any, Literal, Union, cast +from typing import Any, Union, cast from schema_salad.utils import json_dumps from cwl_utils.errors import JavascriptException, SubstitutionError, WorkflowException from cwl_utils.loghandler import _logger from cwl_utils.sandboxjs import JSEngine, default_timeout, get_js_engine, param_re -from cwl_utils.types import CWLObjectType, CWLOutputType, CWLParameterContext +from cwl_utils.types import ( + CWLObjectType, + CWLOutputType, + CWLParameterContext, + is_cwl_parameter_context_key, +) from cwl_utils.utils import bytes2str_in_dicts @@ -123,11 +128,7 @@ def evaluator( if first_symbol_end + 1 == len(ex) and first_symbol == "null": return None try: - if first_symbol in ("inputs", "self", "runtime"): - symbol = cast( - Literal["inputs"] | Literal["self"] | Literal["runtime"], - first_symbol, - ) + if is_cwl_parameter_context_key(first_symbol): if inspect.iscoroutinefunction(js_engine.regex_eval): return asyncio.get_event_loop().run_until_complete( cast( @@ -135,7 +136,7 @@ def evaluator( js_engine.regex_eval( first_symbol, ex[first_symbol_end:-1], - cast(CWLOutputType, obj[symbol]), + cast(CWLOutputType, obj[first_symbol]), **kwargs, ), ) @@ -146,7 +147,7 @@ def evaluator( js_engine.regex_eval( first_symbol, ex[first_symbol_end:-1], - cast(CWLOutputType, obj[symbol]), + cast(CWLOutputType, obj[first_symbol]), **kwargs, ), ) diff --git a/cwl_utils/file_formats.py b/cwl_utils/file_formats.py index 2eea6b21..985a9b08 100644 --- a/cwl_utils/file_formats.py +++ b/cwl_utils/file_formats.py @@ -6,11 +6,11 @@ """ -from rdflib import OWL, RDFS, Graph, URIRef +from rdflib import Graph, OWL, RDFS, URIRef from schema_salad.exceptions import ValidationException from schema_salad.utils import aslist, json_dumps -from cwl_utils.types import CWLObjectType +from cwl_utils.types import CWLFileType def formatSubclassOf( @@ -49,7 +49,7 @@ def formatSubclassOf( def check_format( - actual_file: CWLObjectType | list[CWLObjectType], + actual_file: CWLFileType | list[CWLFileType], input_formats: list[str] | str, ontology: Graph | None, ) -> None: diff --git a/cwl_utils/sandboxjs.py b/cwl_utils/sandboxjs.py index c0f0f644..92ffd9b7 100644 --- a/cwl_utils/sandboxjs.py +++ b/cwl_utils/sandboxjs.py @@ -22,7 +22,13 @@ from cwl_utils.errors import JavascriptException, WorkflowException from cwl_utils.loghandler import _logger -from cwl_utils.types import CWLOutputType +from cwl_utils.types import ( + CWLOutputType, + is_directory, + is_directory_key, + is_file, + is_file_key, +) from cwl_utils.utils import singularity_supports_userns default_timeout = 20 @@ -549,11 +555,35 @@ def regex_eval( if isinstance(current_value, Mapping): try: - return self.regex_eval( - parsed_string + remaining_string, - remaining_string[m.end(1) :], - cast(CWLOutputType, current_value[cast(str, key)]), - ) + if is_directory(current_value) and is_directory_key(key): + return self.regex_eval( + parsed_string + remaining_string, + remaining_string[m.end(1) :], + cast( + CWLOutputType, + current_value[key], + ), + ) + elif is_file(current_value) and is_file_key(key): + return self.regex_eval( + parsed_string + remaining_string, + remaining_string[m.end(1) :], + cast( + CWLOutputType, + current_value[key], + ), + ) + else: + return self.regex_eval( + parsed_string + remaining_string, + remaining_string[m.end(1) :], + cast( + CWLOutputType, + cast(MutableMapping[str, Any], current_value)[ + cast(str, key) + ], + ), + ) except KeyError as exc: raise WorkflowException( f"{parsed_string!r} doesn't have property {key!r}." diff --git a/cwl_utils/types.py b/cwl_utils/types.py index ed0fd157..1d605306 100644 --- a/cwl_utils/types.py +++ b/cwl_utils/types.py @@ -1,8 +1,15 @@ # SPDX-License-Identifier: Apache-2.0 # From https://github.com/rabix/sbpack/blob/b8404a0859ffcbe1edae6d8f934e51847b003320/sbpack/lib.py """Shared Python type definitions for commons JSON like CWL objects.""" -from collections.abc import MutableMapping, MutableSequence -from typing import TypeAlias, TypedDict +import sys +from collections.abc import Mapping, MutableMapping, MutableSequence +from typing import Any, Literal, TypeAlias, TypeGuard, TypedDict + +if sys.version_info >= (3, 11): + from typing import Required +else: + from typing_extensions import Required + built_in_types: list[str] = [ "null", @@ -21,22 +28,48 @@ ] -CWLOutputAtomType: TypeAlias = ( - None - | bool - | str - | int - | float - | MutableSequence["CWLOutputAtomType"] - | MutableMapping[str, "CWLOutputAtomType"] +CWLDirectoryType = TypedDict( + "CWLDirectoryType", + { + "class": Required[Literal["Directory"]], + "location": str, + "path": str, + "basename": str, + "listing": MutableSequence["CWLFileType | CWLDirectoryType"], + }, + total=False, +) + + +CWLFileType = TypedDict( + "CWLFileType", + { + "class": Required[Literal["File"]], + "location": str, + "path": str, + "basename": str, + "dirname": str, + "nameroot": str, + "nameext": str, + "checksum": str, + "size": int, + "secondaryFiles": MutableSequence["CWLFileType | CWLDirectoryType"], + "format": str, + "contents": str, + }, + total=False, ) + + CWLOutputType: TypeAlias = ( bool | str | int | float - | MutableSequence[CWLOutputAtomType] - | MutableMapping[str, CWLOutputAtomType] + | CWLFileType + | CWLDirectoryType + | MutableSequence["CWLOutputType | None"] + | MutableMapping[str, "CWLOutputType | None"] ) CWLObjectType: TypeAlias = MutableMapping[str, CWLOutputType | None] SinkType: TypeAlias = CWLOutputType | CWLObjectType @@ -49,9 +82,76 @@ class CWLRuntimeParameterContext(TypedDict, total=False): ram: float | str outdirSize: float | str tmpdirSize: float | str + exitCode: int class CWLParameterContext(TypedDict, total=False): inputs: CWLObjectType self: CWLOutputType | None runtime: CWLRuntimeParameterContext + + +class DirentType(TypedDict, total=False): + entry: Required[str] + entryname: str + writable: bool + + +def is_cwl_parameter_context_key( + key: Any, +) -> TypeGuard[Literal["inputs", "self", "runtime"]]: + return key in ("inputs", "self", "runtime") + + +def is_directory(value: Any) -> TypeGuard[CWLDirectoryType]: + return isinstance(value, Mapping) and value.get("class") == "Directory" + + +def is_directory_key( + key: Any, +) -> TypeGuard[Literal["class", "location", "path", "basename", "listing"]]: + return key in ("class", "location", "path", "basename", "listing") + + +def is_file(value: Any) -> TypeGuard[CWLFileType]: + return isinstance(value, Mapping) and value.get("class") == "File" + + +def is_file_key( + key: Any, +) -> TypeGuard[ + Literal[ + "class", + "location", + "path", + "basename", + "dirname", + "nameroot", + "nameext", + "checksum", + "size", + "secondaryFiles", + "format", + "contents", + ] +]: + return key in ( + "class", + "location", + "path", + "basename", + "dirname", + "nameroot", + "nameext", + "checksum", + "size", + "secondaryFiles", + "format", + "contents", + ) + + +def is_file_or_directory( + value: Any, +) -> TypeGuard[CWLFileType | CWLDirectoryType]: + return isinstance(value, Mapping) and value.get("class") in ("File", "Directory") diff --git a/tests/test_format.py b/tests/test_format.py index 96af3a5b..02fed14f 100644 --- a/tests/test_format.py +++ b/tests/test_format.py @@ -14,20 +14,21 @@ from cwl_utils.file_formats import check_format from cwl_utils.parser import load_document_by_uri -from cwl_utils.types import CWLObjectType - +from cwl_utils.types import CWLFileType from .util import get_path -def _create_file(format_: str | None = None) -> CWLObjectType: - obj: CWLObjectType = { - "class": "File", - "basename": "example.txt", - "size": 23, - "contents": "hoopla", - "nameroot": "example", - "nameext": "txt", - } +def _create_file(format_: str | None = None) -> CWLFileType: + obj = CWLFileType( + **{ + "class": "File", + "basename": "example.txt", + "size": 23, + "contents": "hoopla", + "nameroot": "example", + "nameext": "txt", + } + ) if format_: obj["format"] = format_ return obj @@ -120,7 +121,7 @@ def test_check_format_no_format() -> None: def test_check_format_missing_file() -> None: """Confirm that a missing file produces no error.""" check_format( - actual_file=[{}], + actual_file=[], input_formats="http://edamontology.org/format_2330", ontology=EDAM, ) From c21e5a110a9f43da59f7a4a8259af75992370088 Mon Sep 17 00:00:00 2001 From: GlassOfWhiskey Date: Tue, 16 Dec 2025 16:37:45 +0100 Subject: [PATCH 2/2] Using `TypeIs` instead of `TypeGuard` --- cwl_utils/types.py | 11 ++++++++--- requirements.txt | 1 + 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cwl_utils/types.py b/cwl_utils/types.py index 1d605306..b3096747 100644 --- a/cwl_utils/types.py +++ b/cwl_utils/types.py @@ -5,6 +5,11 @@ from collections.abc import Mapping, MutableMapping, MutableSequence from typing import Any, Literal, TypeAlias, TypeGuard, TypedDict +if sys.version_info >= (3, 13): + from typing import TypeIs +else: + from typing_extensions import TypeIs + if sys.version_info >= (3, 11): from typing import Required else: @@ -103,7 +108,7 @@ def is_cwl_parameter_context_key( return key in ("inputs", "self", "runtime") -def is_directory(value: Any) -> TypeGuard[CWLDirectoryType]: +def is_directory(value: Any) -> TypeIs[CWLDirectoryType]: return isinstance(value, Mapping) and value.get("class") == "Directory" @@ -113,7 +118,7 @@ def is_directory_key( return key in ("class", "location", "path", "basename", "listing") -def is_file(value: Any) -> TypeGuard[CWLFileType]: +def is_file(value: Any) -> TypeIs[CWLFileType]: return isinstance(value, Mapping) and value.get("class") == "File" @@ -153,5 +158,5 @@ def is_file_key( def is_file_or_directory( value: Any, -) -> TypeGuard[CWLFileType | CWLDirectoryType]: +) -> TypeIs[CWLFileType | CWLDirectoryType]: return isinstance(value, Mapping) and value.get("class") in ("File", "Directory") diff --git a/requirements.txt b/requirements.txt index 6ae186b8..4405f086 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ rdflib requests schema-salad >= 8.8.20250205075315,<9 ruamel.yaml >= 0.17.6, < 0.19 +typing_extensions >= 4.10.0