diff --git a/checkup.schema.json b/checkup.schema.json new file mode 100644 index 0000000..f418c89 --- /dev/null +++ b/checkup.schema.json @@ -0,0 +1,678 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://checkup.dev/schemas/checkup.yaml.json", + "title": "Checkup Configuration", + "description": "Configuration file for checkup", + "type": "object", + "properties": { + "tags": { + "type": "object", + "description": "Tags to identify the data product (e.g., product, team)", + "additionalProperties": { + "type": "string" + } + }, + "providers": { + "type": "array", + "description": "Data providers for context enrichment", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "name": { + "const": "airflow" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "bitbucket" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "conveyor" + }, + "project_name": { + "type": "string" + }, + "api_key": { + "type": "string" + }, + "environment_name": { + "type": "string" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt" + }, + "manifest_path": { + "type": "string", + "default": null + }, + "dbt_project_dir": { + "type": "string", + "default": null + }, + "profiles_dir": { + "type": "string", + "default": null + }, + "verbose": { + "type": "boolean", + "default": false + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "git" + }, + "repo_path": { + "type": "string", + "default": "." + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "github" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "gitlab" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + } + ] + } + }, + "metrics": { + "type": "array", + "description": "Metrics to calculate", + "items": { + "oneOf": [ + { + "type": "object", + "properties": { + "name": { + "const": "conveyor_is_dirty_deployment" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "conveyor_last_deployment_time" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "conveyor_last_run_status" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_column_test_coverage" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_column_tests" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_columns" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_columns_with_description" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_columns_without_description" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_data_tests" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_flagged_packages" + }, + "flagged_packages": { + "items": { + "type": "string" + }, + "title": "Flagged Packages", + "type": "array" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_models" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_models_not_adhering_to_naming_convention" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_models_with_description" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_models_without_description" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_output_columns_without_data_type" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_output_models" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_output_models_with_description" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_output_models_without_contracts" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_output_models_without_description" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_profile_host" + }, + "profile": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Profile" + }, + "target": { + "title": "Target", + "type": "string" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_supported_version" + }, + "min_version": { + "title": "Min Version", + "type": "string" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_tested_columns" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_tests" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_unit_tests" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "dbt_version" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "git_days_since_last_update" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "git_tracked_file_count" + }, + "pattern": { + "default": "*", + "title": "Pattern", + "type": "string" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "python_version" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "name": { + "const": "python_version_check" + }, + "min_version": { + "title": "Min Version", + "type": "string" + }, + "max_version": { + "title": "Max Version", + "type": "string" + } + }, + "required": [ + "name" + ], + "additionalProperties": false + } + ] + } + }, + "materializer": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "const": "console" + }, + "include_indirect": { + "default": false, + "title": "Include Indirect", + "type": "boolean" + }, + "group_tags": { + "default": [], + "items": { + "type": "string" + }, + "title": "Group Tags", + "type": "array" + } + }, + "required": [ + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "type": { + "const": "csv" + }, + "include_indirect": { + "default": false, + "title": "Include Indirect", + "type": "boolean" + }, + "output_path": { + "format": "path", + "title": "Output Path", + "type": "string" + } + }, + "required": [ + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "type": { + "const": "html" + }, + "include_indirect": { + "default": false, + "title": "Include Indirect", + "type": "boolean" + }, + "output_path": { + "format": "path", + "title": "Output Path", + "type": "string" + }, + "group_tag_1": { + "title": "Group Tag 1", + "type": "string" + }, + "group_tag_2": { + "title": "Group Tag 2", + "type": "string" + } + }, + "required": [ + "type" + ], + "additionalProperties": false + }, + { + "type": "object", + "properties": { + "type": { + "const": "sqlalchemy" + }, + "include_indirect": { + "default": false, + "title": "Include Indirect", + "type": "boolean" + }, + "connection_url": { + "format": "password", + "title": "Connection Url", + "type": "string", + "writeOnly": true + }, + "table_name": { + "default": "metrics", + "title": "Table Name", + "type": "string" + }, + "table_schema": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Table Schema" + }, + "connect_args": { + "anyOf": [ + { + "additionalProperties": true, + "type": "object" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Connect Args" + }, + "expand_tags": { + "default": false, + "title": "Expand Tags", + "type": "boolean" + }, + "batch_size": { + "default": 1000, + "title": "Batch Size", + "type": "integer" + } + }, + "required": [ + "type" + ], + "additionalProperties": false + } + ] + } + }, + "additionalProperties": false +} diff --git a/checkup.yaml b/checkup.yaml new file mode 100644 index 0000000..85e4ca8 --- /dev/null +++ b/checkup.yaml @@ -0,0 +1,16 @@ +# yaml-language-server: $schema=checkup.schema.json + +tags: + project: checkup + +providers: +- name: git + +metrics: +- name: git_days_since_last_update +- name: git_tracked_file_count + pattern: src/checkup/* +- name: python_version +- name: python_version_check + min_version: '3.13' + max_version: '3.15' diff --git a/plugins/checkup-airflow/pyproject.toml b/plugins/checkup-airflow/pyproject.toml index 364d26b..cff6a8f 100644 --- a/plugins/checkup-airflow/pyproject.toml +++ b/plugins/checkup-airflow/pyproject.toml @@ -16,6 +16,9 @@ dev = [ [tool.uv.sources] checkup = { workspace = true } +[project.entry-points."checkup.providers"] +airflow = "checkup_airflow:AirflowProvider" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/plugins/checkup-bitbucket/pyproject.toml b/plugins/checkup-bitbucket/pyproject.toml index 5087151..47e5c08 100644 --- a/plugins/checkup-bitbucket/pyproject.toml +++ b/plugins/checkup-bitbucket/pyproject.toml @@ -16,6 +16,9 @@ dev = [ [tool.uv.sources] checkup = { workspace = true } +[project.entry-points."checkup.providers"] +bitbucket = "checkup_bitbucket:BitbucketProvider" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/plugins/checkup-conveyor/pyproject.toml b/plugins/checkup-conveyor/pyproject.toml index b734070..7293e6d 100644 --- a/plugins/checkup-conveyor/pyproject.toml +++ b/plugins/checkup-conveyor/pyproject.toml @@ -12,6 +12,14 @@ dependencies = [ [tool.uv.sources] checkup = { workspace = true } +[project.entry-points."checkup.providers"] +conveyor = "checkup_conveyor:ConveyorProvider" + +[project.entry-points."checkup.metrics"] +conveyor_last_deployment_time = "checkup_conveyor.conveyor_metric:ConveyorLastDeploymentTime" +conveyor_is_dirty_deployment = "checkup_conveyor.conveyor_metric:ConveyorIsDirtyDeployment" +conveyor_last_run_status = "checkup_conveyor.conveyor_metric:ConveyorLastRunStatus" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/plugins/checkup-dbt/pyproject.toml b/plugins/checkup-dbt/pyproject.toml index aceda83..6656c70 100644 --- a/plugins/checkup-dbt/pyproject.toml +++ b/plugins/checkup-dbt/pyproject.toml @@ -18,6 +18,33 @@ dev = [ [tool.uv.sources] checkup = { workspace = true } +[project.entry-points."checkup.providers"] +dbt = "checkup_dbt:DbtManifestProvider" + +[project.entry-points."checkup.metrics"] +dbt_models = "checkup_dbt:DbtModelsMetric" +dbt_columns = "checkup_dbt:DbtColumnsMetric" +dbt_tests = "checkup_dbt:DbtTestsMetric" +dbt_models_with_description = "checkup_dbt:DbtModelsWithDescriptionMetric" +dbt_models_without_description = "checkup_dbt:DbtModelsWithoutDescriptionMetric" +dbt_columns_with_description = "checkup_dbt:DbtColumnsWithDescriptionMetric" +dbt_columns_without_description = "checkup_dbt:DbtColumnsWithoutDescriptionMetric" +dbt_unit_tests = "checkup_dbt:DbtUnitTestsMetric" +dbt_data_tests = "checkup_dbt:DbtDataTestsMetric" +dbt_column_tests = "checkup_dbt:DbtColumnTestsMetric" +dbt_tested_columns = "checkup_dbt:DbtTestedColumnsMetric" +dbt_column_test_coverage = "checkup_dbt:DbtColumnTestCoverageMetric" +dbt_output_models = "checkup_dbt:DbtOutputModelsMetric" +dbt_output_models_with_description = "checkup_dbt:DbtOutputModelsWithDescriptionMetric" +dbt_output_models_without_description = "checkup_dbt:DbtOutputModelsWithoutDescriptionMetric" +dbt_output_models_without_contracts = "checkup_dbt:DbtOutputModelsWithoutContractsMetric" +dbt_output_columns_without_data_type = "checkup_dbt:DbtOutputColumnsWithoutDataTypeMetric" +dbt_flagged_packages = "checkup_dbt:DbtFlaggedPackagesMetric" +dbt_profile_host = "checkup_dbt:DbtProfileHostMetric" +dbt_models_not_adhering_to_naming_convention = "checkup_dbt:DbtModelsNotAdheringToNamingConventionMetric" +dbt_supported_version = "checkup_dbt:DbtSupportedVersionMetric" +dbt_version = "checkup_dbt:DbtVersionMetric" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/plugins/checkup-git/pyproject.toml b/plugins/checkup-git/pyproject.toml index dccc8d5..37e2bce 100644 --- a/plugins/checkup-git/pyproject.toml +++ b/plugins/checkup-git/pyproject.toml @@ -16,6 +16,13 @@ dev = [ [tool.uv.sources] checkup = { workspace = true } +[project.entry-points."checkup.providers"] +git = "checkup_git:GitProvider" + +[project.entry-points."checkup.metrics"] +git_days_since_last_update = "checkup_git:GitDaysSinceLastUpdateMetric" +git_tracked_file_count = "checkup_git:GitTrackedFileCountMetric" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/plugins/checkup-github/pyproject.toml b/plugins/checkup-github/pyproject.toml index f7dd520..15bd95e 100644 --- a/plugins/checkup-github/pyproject.toml +++ b/plugins/checkup-github/pyproject.toml @@ -16,6 +16,9 @@ dev = [ [tool.uv.sources] checkup = { workspace = true } +[project.entry-points."checkup.providers"] +github = "checkup_github:GitHubProvider" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/plugins/checkup-gitlab/pyproject.toml b/plugins/checkup-gitlab/pyproject.toml index bf7e948..1fe49f9 100644 --- a/plugins/checkup-gitlab/pyproject.toml +++ b/plugins/checkup-gitlab/pyproject.toml @@ -16,6 +16,9 @@ dev = [ [tool.uv.sources] checkup = { workspace = true } +[project.entry-points."checkup.providers"] +gitlab = "checkup_gitlab:GitLabProvider" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/plugins/checkup-python/pyproject.toml b/plugins/checkup-python/pyproject.toml index cb8faba..8c40c9d 100644 --- a/plugins/checkup-python/pyproject.toml +++ b/plugins/checkup-python/pyproject.toml @@ -11,6 +11,10 @@ dependencies = [ [tool.uv.sources] checkup = { workspace = true } +[project.entry-points."checkup.metrics"] +python_version = "checkup_python.metrics:PythonVersionMetric" +python_version_check = "checkup_python.metrics:PythonVersionCheckMetric" + [build-system] requires = ["hatchling"] build-backend = "hatchling.build" diff --git a/plugins/checkup-python/src/checkup_python/metrics/__init__.py b/plugins/checkup-python/src/checkup_python/metrics/__init__.py index 85b2011..9e86bc1 100644 --- a/plugins/checkup-python/src/checkup_python/metrics/__init__.py +++ b/plugins/checkup-python/src/checkup_python/metrics/__init__.py @@ -1,3 +1,4 @@ from checkup_python.metrics.version import PythonVersionMetric +from checkup_python.metrics.version_check import PythonVersionCheckMetric -__all__ = ["PythonVersionMetric"] +__all__ = ["PythonVersionMetric", "PythonVersionCheckMetric"] diff --git a/pyproject.toml b/pyproject.toml index 79b8ad0..6d9da26 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,13 +14,21 @@ dependencies = [ "jinja2>=3.1.6", "pydantic>=2.11.7", "pyyaml>=6.0", + "questionary>=2.0", "rich>=13.0", "sqlalchemy>=2.0", + "typer>=0.24", ] [project.scripts] checkup = "checkup:main" +[project.entry-points."checkup.materializers"] +console = "checkup.materializers:ConsoleMaterializer" +csv = "checkup.materializers:CSVMaterializer" +html = "checkup.materializers:HTMLMaterializer" +sqlalchemy = "checkup.materializers:SQLAlchemyMaterializer" + [build-system] requires = ["uv_build>=0.8.13,<0.9.0"] build-backend = "uv_build" diff --git a/src/checkup/__init__.py b/src/checkup/__init__.py index 842a796..0ca9cf5 100644 --- a/src/checkup/__init__.py +++ b/src/checkup/__init__.py @@ -1,4 +1,4 @@ -"""Checkup - Extensible metrics calculation framework.""" +"""Checkup - Computational governance framework for measuring data product health.""" from checkup.errors import ( DuplicateMetricNameError, @@ -49,6 +49,6 @@ def main() -> None: - """CLI entry point.""" - print("Checkup metrics framework") - print("Import CheckHub to get started") + from checkup.cli import app + + app() diff --git a/src/checkup/cli/__init__.py b/src/checkup/cli/__init__.py new file mode 100644 index 0000000..c23f005 --- /dev/null +++ b/src/checkup/cli/__init__.py @@ -0,0 +1,18 @@ +""" +Checkup CLI application. +""" + +import typer + +from checkup.cli.commands import config, init, run, schema + +app = typer.Typer( + name="checkup", + help="CheckUp - Computational governance framework for measuring data product health", + no_args_is_help=True, +) + +app.command()(run) +app.command()(init) +app.command()(config) +app.command()(schema) diff --git a/src/checkup/cli/commands/__init__.py b/src/checkup/cli/commands/__init__.py new file mode 100644 index 0000000..3e99bc9 --- /dev/null +++ b/src/checkup/cli/commands/__init__.py @@ -0,0 +1,15 @@ +""" +CLI commands. +""" + +from checkup.cli.commands.config import config +from checkup.cli.commands.init import init +from checkup.cli.commands.run import run +from checkup.cli.commands.schema import schema + +__all__ = [ + "config", + "init", + "run", + "schema", +] diff --git a/src/checkup/cli/commands/config.py b/src/checkup/cli/commands/config.py new file mode 100644 index 0000000..e27f25e --- /dev/null +++ b/src/checkup/cli/commands/config.py @@ -0,0 +1,23 @@ +""" +Config command. Modify an existing config file. +""" + +from pathlib import Path +from typing import Annotated + +import typer + +from checkup.cli.config_wizard import edit_config + + +def config( + config_path: Annotated[ + Path | None, + typer.Option("--config", "-c", help="Path to config file"), + ] = None, +) -> None: + """ + Modify the checkup.yaml config file. + """ + + edit_config(config_path=config_path) diff --git a/src/checkup/cli/commands/init.py b/src/checkup/cli/commands/init.py new file mode 100644 index 0000000..a224cb1 --- /dev/null +++ b/src/checkup/cli/commands/init.py @@ -0,0 +1,23 @@ +""" +Init command. Create a new config file. +""" + +from pathlib import Path +from typing import Annotated + +import typer + +from checkup.cli.config_wizard import create_config + + +def init( + output: Annotated[ + Path | None, + typer.Option("--output", "-o", help="Output path for config file"), + ] = None, +) -> None: + """ + Create a checkup.yaml config file. + """ + + create_config(output_path=output) diff --git a/src/checkup/cli/commands/run.py b/src/checkup/cli/commands/run.py new file mode 100644 index 0000000..5fab572 --- /dev/null +++ b/src/checkup/cli/commands/run.py @@ -0,0 +1,58 @@ +""" +Run command. Run metrics and materialize results. +""" + +import logging +from pathlib import Path +from typing import Annotated + +import typer + +from checkup.cli.executor import execute_checkup +from checkup.cli.utils import apply_cli_overrides +from checkup.configuration import load_config + + +def run( + config: Annotated[ + Path | None, + typer.Option("--config", "-c", help="Path to config file"), + ] = None, + tag: Annotated[ + list[str] | None, + typer.Option("--tag", "-t", help="Set tags (key=value)"), + ] = None, + provider: Annotated[ + list[str] | None, + typer.Option("--provider", "-p", help="Set providers (name or name:key=value)"), + ] = None, + metric: Annotated[ + list[str] | None, + typer.Option("--metric", "-m", help="Set metrics (name or name:key=value)"), + ] = None, + materializer: Annotated[ + str | None, + typer.Option( + "--materializer", help="Set materializer (type or type:key=value)" + ), + ] = None, + dry_run: Annotated[ + bool, + typer.Option("--dry-run", help="Don't materialize, just print"), + ] = False, + verbose: Annotated[ + bool, + typer.Option("--verbose", "-v", help="Verbose output"), + ] = False, +) -> None: + """ + Run metrics and materialize results. + """ + + if verbose: + logging.basicConfig(level=logging.DEBUG) + + cfg = load_config(config_path=config) + cfg = apply_cli_overrides(cfg, tag, provider, metric) + + execute_checkup(cfg, materializer="console" if dry_run else materializer) diff --git a/src/checkup/cli/commands/schema.py b/src/checkup/cli/commands/schema.py new file mode 100644 index 0000000..572698b --- /dev/null +++ b/src/checkup/cli/commands/schema.py @@ -0,0 +1,29 @@ +""" +Schema command. Generate JSON schema for checkup.yaml. +""" + +from pathlib import Path +from typing import Annotated + +import typer +from rich.console import Console + +from checkup.configuration.io import SCHEMA_FILENAME +from checkup.configuration.schema import write_schema + +console = Console() + + +def schema( + output: Annotated[ + Path | None, + typer.Option("--output", "-o", help="Output path for schema file"), + ] = None, +) -> None: + """ + Generate JSON schema for checkup.yaml. + """ + + path = output or Path.cwd() / SCHEMA_FILENAME + write_schema(path) + console.print(f"[green]Schema written to {path}[/green]") diff --git a/src/checkup/cli/config_wizard/__init__.py b/src/checkup/cli/config_wizard/__init__.py new file mode 100644 index 0000000..a1048eb --- /dev/null +++ b/src/checkup/cli/config_wizard/__init__.py @@ -0,0 +1,8 @@ +""" +Interactive config generation and editing. +""" + +from .create import create_config +from .edit import edit_config + +__all__ = ["create_config", "edit_config"] diff --git a/src/checkup/cli/config_wizard/_common.py b/src/checkup/cli/config_wizard/_common.py new file mode 100644 index 0000000..cb4ec37 --- /dev/null +++ b/src/checkup/cli/config_wizard/_common.py @@ -0,0 +1,133 @@ +""" +Shared utilities for the config wizard. +""" + +from pathlib import Path +from typing import TYPE_CHECKING + +import yaml +from rich.console import Console + +from checkup.configuration.io import SCHEMA_FILENAME +from checkup.configuration.models import CheckupConfig +from checkup.configuration.schema import write_schema + +if TYPE_CHECKING: + import questionary + + from checkup.registry import PluginRegistry + +console = Console(markup=False, highlight=False) + + +def get_questionary() -> "questionary": + """ + Lazy import questionary to avoid slow startup. + """ + + import questionary + + return questionary + + +def select_multiple( + available: list[str], + selected: list[str], + item_name: str, +) -> list[str]: + """ + Select multiple items with fuzzy search. + """ + + if not available: + console.print( + f"[yellow]No {item_name} found. Install checkup plugins.[/yellow]", + markup=True, + ) + return [] + + selected_count = len([s for s in selected if s in available]) + console.print(f"Currently selected: {selected_count}/{len(available)} {item_name}") + + choices = [ + get_questionary().Choice(name, checked=name in selected) + for name in sorted(available) + ] + + result = ( + get_questionary() + .checkbox( + f"Select {item_name}:", + choices=choices, + use_search_filter=True, + use_jk_keys=False, + instruction="(↑↓ navigate, space toggle, type to filter, enter confirm)", + ) + .ask() + ) + + return result or [] + + +def select_materializer(current: str | None, registry: "PluginRegistry") -> str | None: + """ + Select materializer interactively. + """ + + available = registry.list_materializer_names() + + if not available: + return "console" + + default = ( + current if current in available else (available[0] if available else "console") + ) + + return ( + get_questionary() + .select( + "Materializer type:", + choices=available, + default=default, + use_search_filter=True, + use_jk_keys=False, + ) + .ask() + ) + + +def write_config(path: Path, config: CheckupConfig) -> None: + """ + Write config to file with empty lines between sections. + Also generates the JSON schema file. + """ + + console.print(f"\n[bold]Writing config to {path}[/bold]", markup=True) + + data = config.model_dump(exclude_defaults=True) + + lines = [f"# yaml-language-server: $schema={SCHEMA_FILENAME}"] + + # Write sections separately with blank lines between them + for key in CheckupConfig.model_fields: + if key not in data or not data[key]: + continue + lines.append("") + lines.append( + yaml.dump( + {key: data[key]}, default_flow_style=False, sort_keys=False + ).rstrip() + ) + + with open(path, "w") as f: + f.write("\n".join(lines)) + f.write("\n") + + schema_path = path.parent / SCHEMA_FILENAME + write_schema(schema_path) + console.print(f"[green]Schema written to {schema_path}[/green]", markup=True) + + console.print("[green]Done![/green]", markup=True) + console.print( + "Run [bold]checkup run[/bold] to test your configuration.", markup=True + ) diff --git a/src/checkup/cli/config_wizard/create.py b/src/checkup/cli/config_wizard/create.py new file mode 100644 index 0000000..77171cb --- /dev/null +++ b/src/checkup/cli/config_wizard/create.py @@ -0,0 +1,134 @@ +""" +Interactive config creation. +""" + +from pathlib import Path +from typing import TYPE_CHECKING + +from checkup.configuration.io import CONFIG_FILENAME +from checkup.configuration.models import ( + CheckupConfig, + MaterializerConfig, + MetricConfig, + ProviderConfig, +) +from checkup.registry import get_registry + +from ._common import ( + console, + get_questionary, + select_materializer, + select_multiple, + write_config, +) + +if TYPE_CHECKING: + from checkup.registry import PluginRegistry + + +def create_config(output_path: Path | None = None) -> None: + """ + Interactively create a new config file. + """ + + path = output_path or Path.cwd() / CONFIG_FILENAME + + if not _confirm_overwrite(path): + return + + registry = get_registry() + config = _build_config(registry) + + if config is None: + console.print("\n[yellow]Cancelled.[/yellow]", markup=True) + return + + write_config(path, config) + + +def _confirm_overwrite(path: Path) -> bool: + if not path.exists(): + return True + + overwrite = ( + get_questionary().confirm(f"{path} exists. Overwrite?", default=False).ask() + ) + if overwrite is None: + console.print("\n[yellow]Cancelled.[/yellow]", markup=True) + return False + return overwrite + + +def _build_config(registry: "PluginRegistry") -> CheckupConfig | None: + tags = _prompt_tags() + if tags is None: + return None + + provider_names = _prompt_providers(registry) + if provider_names is None: + return None + + metric_names = _prompt_metrics(registry, provider_names) + if metric_names is None: + return None + + mat = _prompt_materializer(registry) + if mat is None: + return None + + return CheckupConfig( + tags=tags, + providers=[ProviderConfig(name=p) for p in provider_names], + metrics=[MetricConfig(name=m) for m in metric_names], + materializer=MaterializerConfig(type=mat) if mat != "console" else None, + ) + + +def _prompt_tags() -> dict[str, str] | None: + console.print("\n[bold]Tags[/bold]", markup=True) + console.print( + "Tags identify your data product (e.g., product=my-product, team=analytics)" + ) + return _collect_tags() + + +def _prompt_providers(registry: "PluginRegistry") -> list[str] | None: + console.print("\n[bold]Providers[/bold]", markup=True) + return select_multiple(registry.list_provider_names(), [], "providers") + + +def _prompt_metrics( + registry: "PluginRegistry", provider_names: list[str] +) -> list[str] | None: + console.print("\n[bold]Metrics[/bold]", markup=True) + with console.status("Loading metrics..."): + available_metrics = registry.list_compatible_metric_names(provider_names) + return select_multiple(available_metrics, [], "metrics") + + +def _prompt_materializer(registry: "PluginRegistry") -> str | None: + console.print("\n[bold]Materializer[/bold]", markup=True) + return select_materializer(None, registry) + + +def _collect_tags() -> dict[str, str] | None: + tags: dict[str, str] = {} + + while True: + tag: str = ( + get_questionary() + .text( + "Add tag (key=value, or empty to finish):", + ) + .ask() + ) + + if tag is None: + return None + if not tag: + break + if "=" in tag: + key, value = tag.split("=", 1) + tags[key.strip()] = value.strip() + + return tags diff --git a/src/checkup/cli/config_wizard/edit.py b/src/checkup/cli/config_wizard/edit.py new file mode 100644 index 0000000..56b6ac5 --- /dev/null +++ b/src/checkup/cli/config_wizard/edit.py @@ -0,0 +1,200 @@ +""" +Interactive config editing. +""" + +from pathlib import Path +from typing import TYPE_CHECKING + +from checkup.configuration import load_config +from checkup.configuration.io import CONFIG_FILENAME +from checkup.configuration.models import ( + CheckupConfig, + MaterializerConfig, + MetricConfig, + ProviderConfig, +) +from checkup.registry import get_registry + +from ._common import ( + console, + get_questionary, + select_materializer, + select_multiple, + write_config, +) + +if TYPE_CHECKING: + from checkup.registry import PluginRegistry + + +def edit_config(config_path: Path | None = None) -> None: + """ + Interactively edit an existing config file. + """ + + path = config_path or Path.cwd() / CONFIG_FILENAME + + if not path.exists(): + console.print(f"[red]Config file not found: {path}[/red]", markup=True) + console.print("Run [bold]checkup init[/bold] to create one.", markup=True) + return + + config = load_config(config_path=path) + registry = get_registry() + + console.print(f"[bold]Editing {path}[/bold]\n", markup=True) + _show_current_config(config) + + config_new = _build_config(config, registry) + + if config_new is None: + console.print("\n[yellow]Cancelled.[/yellow]", markup=True) + return + + write_config(path, config_new) + + +def _build_config( + config: CheckupConfig, registry: "PluginRegistry" +) -> CheckupConfig | None: + provider_configs = {p.name: p.config for p in config.providers} + metric_configs = {m.name: m.config for m in config.metrics} + + tags = _prompt_edit_tags(config) + if tags is None: + return None + + provider_names = _prompt_edit_providers(config, registry) + if provider_names is None: + return None + + metric_names = _prompt_edit_metrics(config, registry, provider_names) + if metric_names is None: + return None + + mat = _prompt_edit_materializer(config, registry) + if mat is None: + return None + + return CheckupConfig( + tags=tags, + providers=[ + ProviderConfig(name=p, config=provider_configs.get(p, {})) + for p in provider_names + ], + metrics=[ + MetricConfig(name=m, config=metric_configs.get(m, {})) for m in metric_names + ], + materializer=MaterializerConfig(type=mat) if mat else None, + ) + + +def _prompt_edit_tags(config: "CheckupConfig") -> dict | None: + edit = get_questionary().confirm("Edit tags?", default=False).ask() + if edit is None: + return None + elif edit: + return _edit_tags(dict(config.tags)) + return dict(config.tags) + + +def _prompt_edit_providers( + config: "CheckupConfig", + registry: "PluginRegistry", +) -> list[str] | None: + current_names = [p.name for p in config.providers] + + edit = get_questionary().confirm("Edit providers?", default=False).ask() + if edit is None: + return None + elif edit: + return select_multiple( + registry.list_provider_names(), + current_names, + "providers", + ) + return current_names + + +def _prompt_edit_metrics( + config: "CheckupConfig", + registry: "PluginRegistry", + provider_names: list[str], +) -> list[str] | None: + current_names = [m.name for m in config.metrics] + + with console.status("Loading metrics..."): + available = registry.list_compatible_metric_names(provider_names) + + edit = get_questionary().confirm("Edit metrics?", default=False).ask() + if edit is None: + return None + elif edit: + return select_multiple(available, current_names, "metrics") + return [m.name for m in config.metrics if m.name in available] + + +def _prompt_edit_materializer( + config: "CheckupConfig", + registry: "PluginRegistry", +) -> str | None: + edit = get_questionary().confirm("Edit materializer?", default=False).ask() + if edit is None: + return None + elif edit: + return select_materializer( + config.materializer.type if config.materializer else None, + registry, + ) + return config.materializer.type if config.materializer else "" + + +def _edit_tags(tags: dict) -> dict | None: + console.print(f"Current tags: {tags or '(none)'}") + + while True: + action: str | None = ( + get_questionary() + .select( + "Action:", + choices=["done", "add", "remove"], + ) + .ask() + ) + + if action is None: + return None + if action == "done": + break + elif action == "add": + tag: str | None = get_questionary().text("Tag (key=value):").ask() + if tag is None: + return None + if tag and "=" in tag: + key, value = tag.split("=", 1) + tags[key.strip()] = value.strip() + elif action == "remove" and tags: + key = ( + get_questionary() + .select( + "Key to remove:", + choices=list(tags.keys()), + ) + .ask() + ) + if key is None: + return None + if key: + tags.pop(key, None) + + return tags + + +def _show_current_config(config: "CheckupConfig") -> None: + console.print("[bold]Current configuration:[/bold]", markup=True) + console.print(f" Tags: {dict(config.tags) or '(none)'}") + console.print(f" Providers: {[p.name for p in config.providers] or '(none)'}") + console.print(f" Metrics: {[m.name for m in config.metrics] or '(none)'}") + mat = config.materializer.type if config.materializer else "(none)" + console.print(f" Materializer: {mat}") + console.print() diff --git a/src/checkup/cli/executor.py b/src/checkup/cli/executor.py new file mode 100644 index 0000000..c587c76 --- /dev/null +++ b/src/checkup/cli/executor.py @@ -0,0 +1,148 @@ +""" +Execute checkhub from CLI configuration. +""" + +import logging +from typing import TYPE_CHECKING + +from rich.console import Console + +from checkup.configuration import CheckupConfig +from checkup.hub import CheckHub +from checkup.materializers import ConsoleMaterializer +from checkup.providers.tags import TagProvider +from checkup.registry import get_registry + +if TYPE_CHECKING: + from checkup.materializers import Materializer + from checkup.metric import Metric + from checkup.provider import Provider + from checkup.registry.discovery import PluginRegistry + +logger = logging.getLogger(__name__) +console = Console() + + +def execute_checkup(config: CheckupConfig, materializer: str | None = None) -> None: + """ + Execute checkup with the given configuration. + + Args: + config: Loaded checkup configuration + materializer: Override materializer type (e.g., "console") + """ + + registry = get_registry() + + providers = _resolve_providers(config, registry) + if not providers: + console.print("[yellow]No providers configured[/yellow]") + return + + metrics = _resolve_metrics(config, registry) + if not metrics: + console.print("[yellow]No metrics configured[/yellow]") + return + + materializer = _resolve_materializer(config, registry, materializer) + + console.print(f"[blue]Running {len(metrics)} metrics...[/blue]") + + result = CheckHub().with_metrics(metrics).with_providers([providers]).measure() + + if result.errors: + for _, error in result.errors: + console.print(f"[red]Error: {error}[/red]") + + result.materialize(materializer) + + +def _resolve_providers( + config: CheckupConfig, + registry: "PluginRegistry", +) -> list["Provider"]: + """ + Resolve provider configs to provider instances. + """ + + providers: list[Provider] = [] + + if config.tags: + providers.append(TagProvider(**config.tags)) + + for provider_config in config.providers: + provider_cls = registry.get_provider(provider_config.name) + if provider_cls is None: + console.print(f"[yellow]Unknown provider: {provider_config.name}[/yellow]") + continue + + try: + provider = provider_cls(**provider_config.config) + providers.append(provider) + except Exception as e: + console.print( + f"[red]Failed to instantiate provider {provider_config.name}: {e}[/red]" + ) + + return providers + + +def _resolve_metrics( + config: CheckupConfig, + registry: "PluginRegistry", +) -> list["Metric"]: + """ + Resolve metric configs to metric instances. + """ + + metrics: list[Metric] = [] + + for metric_config in config.metrics: + metric_cls = registry.get_metric(metric_config.name) + if metric_cls is None: + console.print(f"[yellow]Unknown metric: {metric_config.name}[/yellow]") + continue + + try: + metric = metric_cls(**metric_config.config) + metrics.append(metric) + except Exception as e: + console.print( + f"[red]Failed to instantiate metric {metric_config.name}: {e}[/red]" + ) + + return metrics + + +def _resolve_materializer( + config: CheckupConfig, + registry: "PluginRegistry", + override: str | None = None, +) -> "Materializer": + """ + Resolve materializer config to materializer instance. + """ + + if override: + mat_type = override + mat_config = {} + elif config.materializer: + mat_type = config.materializer.type + mat_config = config.materializer.config + else: + mat_type = "console" + mat_config = {} + + materializer_cls = registry.get_materializer(mat_type) + + if materializer_cls is None: + console.print( + f"[yellow]Unknown materializer: {mat_type}, using console[/yellow]" + ) + return ConsoleMaterializer() + + try: + return materializer_cls(**mat_config) + except Exception as e: + console.print(f"[red]Failed to instantiate materializer {mat_type}: {e}[/red]") + return ConsoleMaterializer() diff --git a/src/checkup/cli/utils.py b/src/checkup/cli/utils.py new file mode 100644 index 0000000..32ed4bd --- /dev/null +++ b/src/checkup/cli/utils.py @@ -0,0 +1,75 @@ +""" +CLI utility functions. +""" + +import logging + +from checkup.configuration import CheckupConfig, MetricConfig, ProviderConfig + +logger = logging.getLogger(__name__) + + +def apply_cli_overrides( + cfg: CheckupConfig, + tags: list[str] | None, + providers: list[str] | None, + metrics: list[str] | None, +) -> CheckupConfig: + """ + Apply CLI flag overrides to config. + + When CLI arguments are provided, they replace the config file values + """ + + if tags: + new_tags = {} + for t in tags: + if "=" in t: + key, value = t.split("=", 1) + new_tags[key] = value + else: + new_tags = dict(cfg.tags) + + if providers: + new_providers = [] + for p in providers: + name, config = parse_cli_item(p) + new_providers.append(ProviderConfig(name=name, config=config)) + else: + new_providers = list(cfg.providers) + + if metrics: + new_metrics = [] + for m in metrics: + name, config = parse_cli_item(m) + new_metrics.append(MetricConfig(name=name, config=config)) + else: + new_metrics = list(cfg.metrics) + + return CheckupConfig( + tags=new_tags, + providers=new_providers, + metrics=new_metrics, + materializer=cfg.materializer, + ) + + +def parse_cli_item(item: str) -> tuple[str, dict]: + """ + Parse CLI item like 'name' or 'name:key=value,key2=value2'. + """ + + if ":" not in item: + return item, {} + + name, config_str = item.split(":", 1) + config: dict[str, str] = {} + + for pair in config_str.split(","): + if "=" not in pair: + logger.warning("Ignoring malformed config pair %r in %r", pair, item) + continue + key, value = pair.split("=", 1) + config[key] = value + + return name, config diff --git a/src/checkup/configuration/__init__.py b/src/checkup/configuration/__init__.py new file mode 100644 index 0000000..84328d0 --- /dev/null +++ b/src/checkup/configuration/__init__.py @@ -0,0 +1,18 @@ +"""CheckUp config files.""" + +from checkup.configuration.io import find_config_files, load_config +from checkup.configuration.models import ( + CheckupConfig, + MaterializerConfig, + MetricConfig, + ProviderConfig, +) + +__all__ = [ + "CheckupConfig", + "MaterializerConfig", + "MetricConfig", + "ProviderConfig", + "find_config_files", + "load_config", +] diff --git a/src/checkup/configuration/env.py b/src/checkup/configuration/env.py new file mode 100644 index 0000000..bb9d63a --- /dev/null +++ b/src/checkup/configuration/env.py @@ -0,0 +1,149 @@ +""" +Environment variable handling for configuration. +""" + +import logging +import os +import re +from typing import Any + +logger = logging.getLogger(__name__) + + +def substitute_env_vars(value: Any) -> Any: + """ + Recursively substitute ${VAR} patterns with environment variables. + + Supports: + - ${VAR} - substitute with env var value + - ${VAR:-default} - substitute with default if VAR not set + + Args: + value: Value to process (string, dict, list, or other) + + Returns: + Value with environment variables substituted + """ + + if isinstance(value, str): + pattern = r"\$\{([^}:]+)(?::-([^}]*))?\}" + + def replace(match: re.Match) -> str: + var_name = match.group(1) + default = match.group(2) + env_value = os.environ.get(var_name) + if env_value is not None: + return env_value + if default is not None: + return default + logger.warning("Environment variable %s not found", var_name) + return match.group(0) + + return re.sub(pattern, replace, value) + + elif isinstance(value, dict): + return {k: substitute_env_vars(v) for k, v in value.items()} + + elif isinstance(value, list): + return [substitute_env_vars(item) for item in value] + + return value + + +def apply_naming_convention_overrides(config: dict[str, Any]) -> dict[str, Any]: + """ + Apply CHECKUP__* environment variable overrides. + + Environment variables like CHECKUP__MATERIALIZER__SQLALCHEMY__CONNECTION_URL + override corresponding config values (only if not already set). + + Args: + config: Configuration dict to apply overrides to + + Returns: + Configuration with environment variable overrides applied + """ + + prefix = "CHECKUP__" + + for key, value in os.environ.items(): + if not key.startswith(prefix): + continue + + parts = key[len(prefix) :].lower().split("__") + section = parts[0] + + if section == "materializer": + if len(parts) >= 3: + _apply_materializer_override(config, parts, value, key) + else: + logger.warning( + "Ignoring malformed env var %s (expected CHECKUP__MATERIALIZER____)", + key, + ) + + elif section == "provider": + if len(parts) >= 3: + _apply_provider_override(config, parts, value, key) + else: + logger.warning( + "Ignoring malformed env var %s (expected CHECKUP__PROVIDER____)", + key, + ) + + else: + logger.warning( + "Ignoring unknown env var %s (unknown section %r)", key, section + ) + + return config + + +def _apply_materializer_override( + config: dict[str, Any], + parts: list[str], + value: str, + key: str, +) -> None: + """ + Apply a materializer config override from env var. + """ + + materializer_type = parts[1] + config_key = "_".join(parts[2:]) + + if "materializer" not in config: + return + + mat_type = config.get("materializer", {}).get("type", "") + if mat_type and mat_type.lower() == materializer_type: + if config_key not in config["materializer"]: + config["materializer"][config_key] = value + logger.debug("Applied env override: %s", key) + + +def _apply_provider_override( + config: dict[str, Any], + parts: list[str], + value: str, + key: str, +) -> None: + """ + Apply a provider config override from env var. + """ + + provider_name = parts[1] + config_key = "_".join(parts[2:]) + + if "providers" not in config: + return + + for provider in config["providers"]: + if isinstance(provider, dict): + name = list(provider.keys())[0] if provider else None + if name and name.lower() == provider_name: + if provider[name] is None: + provider[name] = {} + if config_key not in provider.get(name, {}): + provider[name][config_key] = value + logger.debug("Applied env override: %s", key) diff --git a/src/checkup/configuration/io.py b/src/checkup/configuration/io.py new file mode 100644 index 0000000..1ae4823 --- /dev/null +++ b/src/checkup/configuration/io.py @@ -0,0 +1,213 @@ +""" +Configuration file I/O and merging. +""" + +import logging +from pathlib import Path +from typing import Any + +import yaml + +from checkup.configuration.env import ( + apply_naming_convention_overrides, + substitute_env_vars, +) +from checkup.configuration.models import ( + CheckupConfig, + MaterializerConfig, + MetricConfig, + ProviderConfig, +) + +logger = logging.getLogger(__name__) + +CONFIG_FILENAME = "checkup.yaml" +SCHEMA_FILENAME = "checkup.schema.json" + + +def load_yaml_file(path: Path) -> dict[str, Any]: + """ + Load a single YAML file. + """ + + if not path.exists(): + return {} + + try: + with open(path) as f: + data = yaml.safe_load(f) + return data if data else {} + except yaml.YAMLError as e: + logger.error("Failed to parse YAML config %s: %s", path, e) + raise + + +def find_config_files(start_dir: Path) -> list[Path]: + """ + Find all checkup.yaml files from start_dir up to filesystem root. + + Returns: + List of paths, ordered from root to start_dir (for merging) + """ + + config_files = [] + current = start_dir.resolve() + + while True: + config_path = current / CONFIG_FILENAME + if config_path.exists(): + config_files.append(config_path) + + parent = current.parent + if parent == current: + break + current = parent + + return list(reversed(config_files)) + + +def merge_configs(base: dict[str, Any], override: dict[str, Any]) -> dict[str, Any]: + """ + Merge two configuration dicts (override wins). + """ + + result = base.copy() + + for key, value in override.items(): + if key == "tags" and key in result: + result[key] = {**result.get(key, {}), **value} + elif key == "providers": + result[key] = value + elif key == "metrics": + if key in result: + base_metrics = { + (m if isinstance(m, str) else list(m.keys())[0]): m + for m in result.get(key, []) + } + for metric in value: + name = metric if isinstance(metric, str) else list(metric.keys())[0] + base_metrics[name] = metric + result[key] = list(base_metrics.values()) + else: + result[key] = value + else: + result[key] = value + + return result + + +def parse_providers(raw: list[Any] | None) -> list[ProviderConfig]: + """ + Parse provider configuration from raw YAML. + + Supports: + - name: git + - name: dbt + project_dir: ./dbt + """ + + if not raw: + return [] + + providers = [] + for item in raw: + if isinstance(item, str): + providers.append(ProviderConfig(name=item)) + elif isinstance(item, dict): + name = item.get("name") + if name: + config = {k: v for k, v in item.items() if k != "name"} + providers.append(ProviderConfig(name=name, config=config)) + return providers + + +def parse_metrics(raw: list[Any] | None) -> list[MetricConfig]: + """ + Parse metric configuration from raw YAML. + + Supports: + - name: git_days_since_last_update + - name: python_version + version: "3.12" + """ + + if not raw: + return [] + + metrics = [] + for item in raw: + if isinstance(item, str): + metrics.append(MetricConfig(name=item)) + elif isinstance(item, dict): + name = item.get("name") + if name: + config = {k: v for k, v in item.items() if k != "name"} + metrics.append(MetricConfig(name=name, config=config)) + return metrics + + +def parse_materializer(raw: dict[str, Any] | None) -> MaterializerConfig | None: + """ + Parse materializer configuration from raw YAML. + """ + + if not raw: + return None + + raw_copy = raw.copy() + mat_type = raw_copy.pop("type", None) + if not mat_type: + return None + + return MaterializerConfig(type=mat_type, config=raw_copy) + + +def load_config( + config_path: Path | None = None, + start_dir: Path | None = None, +) -> CheckupConfig: + """ + Load checkup configuration with hierarchy and env var substitution. + + Resolution order: + 1. Find all checkup.yaml files from start_dir up to root + 2. Merge configs (child overrides parent) + 3. Apply naming convention env vars (CHECKUP__*) + 4. Substitute ${VAR} references + + Args: + config_path: Explicit config file path (skips hierarchy search) + start_dir: Directory to start searching from (defaults to cwd) + + Returns: + Merged and resolved CheckupConfig + """ + + if config_path: + raw = load_yaml_file(config_path) + else: + start = start_dir or Path.cwd() + config_files = find_config_files(start) + + if not config_files: + logger.debug("No config files found") + return CheckupConfig.empty() + + raw = {} + for cf in config_files: + logger.debug("Loading config: %s", cf) + file_config = load_yaml_file(cf) + raw = merge_configs(raw, file_config) + + # Apply naming convention env vars first (lowest priority) + raw = apply_naming_convention_overrides(raw) + + # Then substitute ${VAR} references (highest priority) + raw = substitute_env_vars(raw) + + return CheckupConfig( + tags=raw.get("tags", {}), + providers=parse_providers(raw.get("providers")), + metrics=parse_metrics(raw.get("metrics")), + materializer=parse_materializer(raw.get("materializer")), + ) diff --git a/src/checkup/configuration/models.py b/src/checkup/configuration/models.py new file mode 100644 index 0000000..5a499ea --- /dev/null +++ b/src/checkup/configuration/models.py @@ -0,0 +1,41 @@ +""" +Configuration models. +""" + +from typing import Any + +from pydantic import BaseModel, Field + + +class ProviderConfig(BaseModel): + """Configuration for a single provider.""" + + name: str + config: dict[str, Any] = Field(default_factory=dict) + + +class MetricConfig(BaseModel): + """Configuration for a single metric.""" + + name: str + config: dict[str, Any] = Field(default_factory=dict) + + +class MaterializerConfig(BaseModel): + """Configuration for the materializer.""" + + type: str + config: dict[str, Any] = Field(default_factory=dict) + + +class CheckupConfig(BaseModel): + """Complete checkup configuration.""" + + tags: dict[str, Any] = Field(default_factory=dict) + providers: list[ProviderConfig] = Field(default_factory=list) + metrics: list[MetricConfig] = Field(default_factory=list) + materializer: MaterializerConfig | None = None + + @classmethod + def empty(cls) -> "CheckupConfig": + return cls() diff --git a/src/checkup/configuration/schema.py b/src/checkup/configuration/schema.py new file mode 100644 index 0000000..e9b2301 --- /dev/null +++ b/src/checkup/configuration/schema.py @@ -0,0 +1,211 @@ +""" +JSON Schema generation for checkup.yaml. +""" + +import inspect +import json +from pathlib import Path +from typing import Any, get_origin, get_type_hints + +from checkup.registry import get_registry + +SCHEMA_VERSION = "https://json-schema.org/draft/2020-12/schema" +SCHEMA_ID = "https://checkup.dev/schemas/checkup.yaml.json" + + +def _get_pydantic_schema(cls: type) -> dict | None: + """ + Get JSON schema from a Pydantic model. + """ + + if not hasattr(cls, "model_json_schema"): + return None + + try: + schema = cls.model_json_schema() + + # Remove Pydantic metadata we don't need in the config schema + schema.pop("$defs", None) # Inline type definitions + schema.pop("title", None) # We use entry point names, not class names + + if not schema.get("properties"): + return None + + return schema + except Exception: + return None + + +def _python_type_to_json_schema_type(hint: type) -> str: + """ + Map a Python type hint to a JSON Schema type. + """ + + origin = get_origin(hint) + if origin is not None: + # For Union types, just use string as fallback + return "string" + + if hint is str: + return "string" + if hint is int: + return "integer" + if hint is float: + return "number" + if hint is bool: + return "boolean" + if hint is Path or (isinstance(hint, type) and issubclass(hint, Path)): + return "string" + + return "string" + + +def _get_provider_schema(cls: type) -> dict | None: + """ + Get JSON schema for a provider from its __init__ signature. + """ + + try: + sig = inspect.signature(cls.__init__) + hints = get_type_hints(cls.__init__) + except Exception: + return None + + properties = {} + required = [] + + for name, param in sig.parameters.items(): + if name == "self": + continue + + prop: dict[str, Any] = {} + + if name in hints: + prop["type"] = _python_type_to_json_schema_type(hints[name]) + else: + prop["type"] = "string" + + if param.default is not inspect.Parameter.empty: + default = param.default + # Convert Path to string for JSON + if hasattr(default, "__fspath__"): + default = str(default) + prop["default"] = default + else: + required.append(name) + + properties[name] = prop + + if not properties: + return None + + schema: dict[str, Any] = { + "type": "object", + "properties": properties, + } + if required: + schema["required"] = required + + return schema + + +def _build_oneof_schema( + names: list[str], + schemas: dict[str, dict], + key_field: str = "name", +) -> dict[str, Any]: + """ + Build a oneOf schema for a list of named items. + """ + + variants = [] + for name in names: + variant: dict[str, Any] = { + "type": "object", + "properties": {key_field: {"const": name}}, + "required": [key_field], + "additionalProperties": False, + } + if name in schemas and "properties" in schemas[name]: + variant["properties"].update(schemas[name]["properties"]) + variants.append(variant) + + return {"oneOf": variants} if variants else {"type": "object"} + + +def _collect_schemas( + items: dict[str, type], + schema_fn: callable, +) -> tuple[list[str], dict[str, dict]]: + """ + Collect schemas for a dict of named classes. + """ + + names = sorted(items.keys()) + schemas = {} + for name, cls in items.items(): + schema = schema_fn(cls) + if schema: + schemas[name] = schema + return names, schemas + + +def generate_schema() -> dict: + """ + Generate JSON schema for checkup.yaml configuration. + + Dynamically includes available providers, metrics, and materializers + using Pydantic's schema generation. + """ + + registry = get_registry() + + provider_names, provider_schemas = _collect_schemas( + registry.providers, _get_provider_schema + ) + metric_names, metric_schemas = _collect_schemas( + registry.metrics, _get_pydantic_schema + ) + materializer_names, materializer_schemas = _collect_schemas( + registry.materializers, _get_pydantic_schema + ) + + return { + "$schema": SCHEMA_VERSION, + "$id": SCHEMA_ID, + "title": "Checkup Configuration", + "description": "Configuration file for checkup", + "type": "object", + "properties": { + "tags": { + "type": "object", + "description": "Tags to identify the data product (e.g., product, team)", + "additionalProperties": {"type": "string"}, + }, + "providers": { + "type": "array", + "description": "Data providers for context enrichment", + "items": _build_oneof_schema(provider_names, provider_schemas), + }, + "metrics": { + "type": "array", + "description": "Metrics to calculate", + "items": _build_oneof_schema(metric_names, metric_schemas), + }, + "materializer": _build_oneof_schema( + materializer_names, materializer_schemas, key_field="type" + ), + }, + "additionalProperties": False, + } + + +def write_schema(output_path: Path) -> None: + """ + Write JSON schema to file. + """ + + schema = generate_schema() + with open(output_path, "w") as f: + json.dump(schema, f, indent=2) + f.write("\n") diff --git a/src/checkup/materializers/console.py b/src/checkup/materializers/console.py index 3001e58..1951a5b 100644 --- a/src/checkup/materializers/console.py +++ b/src/checkup/materializers/console.py @@ -3,51 +3,88 @@ from rich.console import Console from rich.table import Table -from checkup.materializers.base import Materializer, group_measurements_by_tags +from checkup.materializers.base import Materializer from checkup.metric import Measurement class ConsoleMaterializer(Materializer): - """Output metrics to console. + """ + Output measurements to console. + + Outputs a rich table with measurement details. + Optionally groups measurements by tag values. - Outputs a rich table with metric details. + Args: + group_tags: List of tag names to group by. If empty, no grouping. + include_indirect: If True, include indirect measurements. """ - group_tag_1: str - group_tag_2: str + group_tags: list[str] = [] def materialize( self, measurements: list[Measurement], direct_metric_names: set[str] ) -> None: - """Print measurements to console as a rich table, grouped by tags.""" - filtered = self._filter_measurements(measurements, direct_metric_names) + """ + Print measurements to console as a rich table, optionally grouped by tags. + """ + filtered = self._filter_measurements(measurements, direct_metric_names) console = Console() - groups = group_measurements_by_tags( - filtered, self.group_tag_1, self.group_tag_2 - ) + if not self.group_tags: + self._print_table(console, filtered, title=None) + else: + groups = self._group_by_tags(filtered) + for tag_values, group_measurements in sorted(groups.items()): + title = " | ".join( + f"{tag}: {value}" + for tag, value in zip(self.group_tags, tag_values, strict=True) + ) + self._print_table(console, group_measurements, title=title) + console.print() - # Create a table for each group - for (tag1_value, tag2_value), group_measurements in sorted(groups.items()): - table = Table( - title=f"{self.group_tag_1}: {tag1_value} | {self.group_tag_2}: {tag2_value}" + def _print_table( + self, + console: Console, + measurements: list[Measurement], + title: str | None, + ) -> None: + """ + Print a single table of measurements. + """ + + table = Table(title=title) + + table.add_column("Name", style="cyan", no_wrap=True) + table.add_column("Description", style="dim") + table.add_column("Value", justify="right", style="green") + table.add_column("Unit", style="yellow") + table.add_column("Diagnostics", style="red") + + for measurement in measurements: + table.add_row( + measurement.metric.name, + measurement.metric.description, + str(measurement.value) if measurement.value is not None else "", + measurement.metric.unit, + measurement.diagnostic, ) - table.add_column("Name", style="cyan", no_wrap=True) - table.add_column("Description", style="dim") - table.add_column("Value", justify="right", style="green") - table.add_column("Unit", style="yellow") - table.add_column("Diagnostics", style="red") - - for measurement in group_measurements: - table.add_row( - measurement.metric.name, - measurement.metric.description, - str(measurement.value) if measurement.value is not None else "", - measurement.metric.unit, - measurement.diagnostic, - ) + console.print(table) + + def _group_by_tags( + self, + measurements: list[Measurement], + default: str = "Unknown", + ) -> dict[tuple[str, ...], list[Measurement]]: + """ + Group measurements by tag values. + """ - console.print(table) - console.print() # Add spacing between tables + groups: dict[tuple[str, ...], list[Measurement]] = {} + for measurement in measurements: + key = tuple(measurement.tags.get(tag, default) for tag in self.group_tags) + if key not in groups: + groups[key] = [] + groups[key].append(measurement) + return groups diff --git a/src/checkup/registry/__init__.py b/src/checkup/registry/__init__.py new file mode 100644 index 0000000..7b3a1c4 --- /dev/null +++ b/src/checkup/registry/__init__.py @@ -0,0 +1,13 @@ +""" +Plugin registry for discovering providers, metrics, and materializers. +""" + +from checkup.registry.discovery import ( + PluginRegistry, + get_registry, +) + +__all__ = [ + "PluginRegistry", + "get_registry", +] diff --git a/src/checkup/registry/discovery.py b/src/checkup/registry/discovery.py new file mode 100644 index 0000000..9ac0627 --- /dev/null +++ b/src/checkup/registry/discovery.py @@ -0,0 +1,190 @@ +""" +Plugin discovery via Python entry points. +""" + +import logging +from importlib.metadata import entry_points +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from checkup.materializers import Materializer + from checkup.metric import Metric + from checkup.provider import Provider + +logger = logging.getLogger(__name__) + +# Entry point group names +PROVIDERS_GROUP = "checkup.providers" +METRICS_GROUP = "checkup.metrics" +MATERIALIZERS_GROUP = "checkup.materializers" + + +class PluginRegistry: + """ + Registry for discovering and loading checkup plugins. + + Plugins register providers, metrics, and materializers via entry points + in their pyproject.toml. + """ + + def __init__(self) -> None: + """ + Initialize the registry. + """ + + self._providers: dict[str, type[Provider]] | None = None + self._metrics: dict[str, type[Metric]] | None = None + self._materializers: dict[str, type[Materializer]] | None = None + + def _list_entry_point_names(self, group: str) -> list[str]: + """ + List entry point names without loading them. + """ + + eps = entry_points(group=group) + return [ep.name for ep in eps] + + def _load_entry_points(self, group: str) -> dict[str, type]: + """ + Load all entry points for a group. + """ + + result = {} + eps = entry_points(group=group) + + for ep in eps: + try: + cls = ep.load() + result[ep.name] = cls + logger.debug("Loaded %s: %s", group, ep.name) + except Exception as e: + logger.warning("Failed to load %s '%s': %s", group, ep.name, e) + + return result + + @property + def providers(self) -> dict[str, type["Provider"]]: + """ + Get all registered providers. + """ + + if self._providers is None: + self._providers = self._load_entry_points(PROVIDERS_GROUP) + + return self._providers + + @property + def metrics(self) -> dict[str, type["Metric"]]: + """ + Get all registered metrics. + """ + + if self._metrics is None: + self._metrics = self._load_entry_points(METRICS_GROUP) + + return self._metrics + + @property + def materializers(self) -> dict[str, type["Materializer"]]: + """ + Get all registered materializers. + """ + + if self._materializers is None: + self._materializers = self._load_entry_points(MATERIALIZERS_GROUP) + + return self._materializers + + def get_provider(self, name: str) -> type["Provider"] | None: + """ + Get a provider class by name. + """ + + return self.providers.get(name) + + def get_metric(self, name: str) -> type["Metric"] | None: + """ + Get a metric class by name. + """ + + return self.metrics.get(name) + + def get_materializer(self, name: str) -> type["Materializer"] | None: + """ + Get a materializer class by name. + """ + + return self.materializers.get(name) + + def list_provider_names(self) -> list[str]: + """ + List available provider names without loading them. + """ + + return self._list_entry_point_names(PROVIDERS_GROUP) + + def list_metric_names(self) -> list[str]: + """ + List available metric names without loading them. + """ + + return self._list_entry_point_names(METRICS_GROUP) + + def list_materializer_names(self) -> list[str]: + """ + List available materializer names without loading them. + """ + + return self._list_entry_point_names(MATERIALIZERS_GROUP) + + def list_compatible_metric_names(self, provider_names: list[str]) -> list[str]: + """ + List metric names compatible with the given providers. + + A metric is compatible if all its required providers (from providers() method) + are in the selected provider list. Metrics with no required providers are + always compatible. + """ + + provider_set = set(provider_names) + compatible = [] + + for name, metric_cls in self.metrics.items(): + required_providers = metric_cls.providers() + + if not required_providers: + # No provider requirements - always compatible + compatible.append(name) + continue + + # Check if all required providers are selected + required_names = {p.name for p in required_providers} + if required_names <= provider_set: + compatible.append(name) + + return compatible + + def clear_cache(self) -> None: + """ + Clear cached plugins (useful for testing). + """ + + self._providers = None + self._metrics = None + self._materializers = None + + +# Global registry instance +_registry: PluginRegistry | None = None + + +def get_registry() -> PluginRegistry: + """ + Get the global plugin registry. + """ + + global _registry + if _registry is None: + _registry = PluginRegistry() + + return _registry diff --git a/tests/test_cli_configuration.py b/tests/test_cli_configuration.py new file mode 100644 index 0000000..717a995 --- /dev/null +++ b/tests/test_cli_configuration.py @@ -0,0 +1,304 @@ +""" +Tests for CLI configuration loading and parsing. +""" + +import yaml + +from checkup.cli.utils import parse_cli_item +from checkup.configuration.env import ( + apply_naming_convention_overrides, + substitute_env_vars, +) +from checkup.configuration.io import ( + load_config, + merge_configs, + parse_materializer, + parse_metrics, + parse_providers, +) +from checkup.configuration.models import CheckupConfig + + +class TestParseProviders: + def test_string_shorthand_creates_provider_with_empty_config(self): + raw = ["git", "dbt"] + result = parse_providers(raw) + + assert len(result) == 2 + assert result[0].name == "git" + assert result[0].config == {} + assert result[1].name == "dbt" + + def test_dict_with_name_field_extracts_config(self): + raw = [ + {"name": "git", "repo_path": "/path/to/repo"}, + {"name": "dbt", "project_dir": "./dbt", "profiles_dir": "~/.dbt"}, + ] + result = parse_providers(raw) + + assert len(result) == 2 + assert result[0].name == "git" + assert result[0].config == {"repo_path": "/path/to/repo"} + assert result[1].name == "dbt" + assert result[1].config == {"project_dir": "./dbt", "profiles_dir": "~/.dbt"} + + def test_mixed_string_and_dict_formats(self): + raw = [ + "git", + {"name": "dbt", "project_dir": "./dbt"}, + ] + result = parse_providers(raw) + + assert len(result) == 2 + assert result[0].name == "git" + assert result[0].config == {} + assert result[1].name == "dbt" + assert result[1].config == {"project_dir": "./dbt"} + + def test_empty_list_returns_empty(self): + assert parse_providers([]) == [] + assert parse_providers(None) == [] + + +class TestParseMetrics: + def test_string_shorthand_creates_metric_with_empty_config(self): + raw = ["git_days_since_last_update", "python_version"] + result = parse_metrics(raw) + + assert len(result) == 2 + assert result[0].name == "git_days_since_last_update" + assert result[0].config == {} + + def test_dict_with_name_field_extracts_config(self): + raw = [ + { + "name": "python_version_check", + "min_version": "3.10", + "max_version": "3.13", + }, + ] + result = parse_metrics(raw) + + assert len(result) == 1 + assert result[0].name == "python_version_check" + assert result[0].config == {"min_version": "3.10", "max_version": "3.13"} + + +class TestParseMaterializer: + def test_extracts_type_and_remaining_fields_as_config(self): + raw = {"type": "console", "group_tag_1": "product", "group_tag_2": "team"} + result = parse_materializer(raw) + + assert result.type == "console" + assert result.config == {"group_tag_1": "product", "group_tag_2": "team"} + + def test_returns_none_when_type_missing(self): + assert parse_materializer({"group_tag_1": "product"}) is None + assert parse_materializer(None) is None + + +class TestMergeConfigs: + def test_child_tags_merged_with_parent(self): + parent = {"tags": {"team": "platform", "env": "prod"}} + child = {"tags": {"product": "my-product"}} + + result = merge_configs(parent, child) + + assert result["tags"] == { + "team": "platform", + "env": "prod", + "product": "my-product", + } + + def test_child_tag_overrides_parent(self): + parent = {"tags": {"env": "prod"}} + child = {"tags": {"env": "dev"}} + + result = merge_configs(parent, child) + + assert result["tags"]["env"] == "dev" + + def test_child_providers_replace_parent(self): + parent = {"providers": [{"name": "git"}]} + child = {"providers": [{"name": "dbt"}]} + + result = merge_configs(parent, child) + + assert result["providers"] == [{"name": "dbt"}] + + def test_child_materializer_replaces_parent(self): + parent = {"materializer": {"type": "csv"}} + child = {"materializer": {"type": "console"}} + + result = merge_configs(parent, child) + + assert result["materializer"]["type"] == "console" + + +class TestSubstituteEnvVars: + def test_substitutes_env_var_reference(self, monkeypatch): + monkeypatch.setenv("MY_VAR", "secret_value") + config = {"password": "${MY_VAR}"} + + result = substitute_env_vars(config) + + assert result["password"] == "secret_value" + + def test_uses_default_when_var_not_set(self): + config = {"timeout": "${MISSING_VAR:-30}"} + + result = substitute_env_vars(config) + + assert result["timeout"] == "30" + + def test_leaves_unset_var_without_default_unchanged(self): + """Unset vars without defaults are left as-is (with warning logged).""" + config = {"value": "${DEFINITELY_NOT_SET}"} + + result = substitute_env_vars(config) + + assert result["value"] == "${DEFINITELY_NOT_SET}" + + def test_substitutes_in_nested_structures(self, monkeypatch): + monkeypatch.setenv("DB_HOST", "localhost") + config = { + "providers": [ + {"name": "db", "host": "${DB_HOST}"}, + ] + } + + result = substitute_env_vars(config) + + assert result["providers"][0]["host"] == "localhost" + + +class TestNamingConventionOverrides: + def test_materializer_override_applied_when_type_matches(self, monkeypatch): + monkeypatch.setenv( + "CHECKUP__MATERIALIZER__SQLALCHEMY__CONNECTION_URL", + "postgresql://localhost", + ) + config = {"materializer": {"type": "sqlalchemy"}} + + result = apply_naming_convention_overrides(config) + + assert result["materializer"]["connection_url"] == "postgresql://localhost" + + def test_materializer_override_skipped_when_type_differs(self, monkeypatch): + monkeypatch.setenv( + "CHECKUP__MATERIALIZER__SQLALCHEMY__CONNECTION_URL", + "postgresql://localhost", + ) + config = {"materializer": {"type": "console"}} + + result = apply_naming_convention_overrides(config) + + assert "connection_url" not in result["materializer"] + + def test_explicit_config_wins_over_naming_convention(self, monkeypatch): + monkeypatch.setenv( + "CHECKUP__MATERIALIZER__SQLALCHEMY__CONNECTION_URL", "env-url" + ) + config = {"materializer": {"type": "sqlalchemy", "connection_url": "yaml-url"}} + + result = apply_naming_convention_overrides(config) + + assert result["materializer"]["connection_url"] == "yaml-url" + + def test_malformed_materializer_env_var_logs_warning(self, monkeypatch, caplog): + monkeypatch.setenv("CHECKUP__MATERIALIZER__SQLALCHEMY", "value") + config = {"materializer": {"type": "sqlalchemy"}} + + apply_naming_convention_overrides(config) + + assert "malformed" in caplog.text.lower() + assert "CHECKUP__MATERIALIZER__SQLALCHEMY" in caplog.text + + def test_malformed_provider_env_var_logs_warning(self, monkeypatch, caplog): + monkeypatch.setenv("CHECKUP__PROVIDER__GIT", "value") + config = {"providers": [{"name": "git"}]} + + apply_naming_convention_overrides(config) + + assert "malformed" in caplog.text.lower() + assert "CHECKUP__PROVIDER__GIT" in caplog.text + + +class TestLoadConfig: + def test_loads_yaml_file(self, tmp_path): + config_file = tmp_path / "checkup.yaml" + config_file.write_text( + yaml.dump( + { + "tags": {"product": "test"}, + "providers": [{"name": "git"}], + "metrics": [{"name": "dummy_metric"}], + } + ) + ) + + result = load_config(config_path=config_file) + + assert isinstance(result, CheckupConfig) + assert result.tags == {"product": "test"} + assert len(result.providers) == 1 + assert result.providers[0].name == "git" + + def test_returns_empty_config_when_no_file_found(self, tmp_path): + result = load_config(start_dir=tmp_path) + + assert result.tags == {} + assert result.providers == [] + assert result.metrics == [] + + def test_hierarchical_loading_merges_parent_and_child(self, tmp_path): + # Create parent directory with config + parent_dir = tmp_path / "parent" + parent_dir.mkdir() + (parent_dir / "checkup.yaml").write_text( + yaml.dump({"tags": {"team": "platform"}}) + ) + + # Create child directory with config + child_dir = parent_dir / "child" + child_dir.mkdir() + (child_dir / "checkup.yaml").write_text( + yaml.dump({"tags": {"product": "my-product"}}) + ) + + result = load_config(start_dir=child_dir) + + assert result.tags == {"team": "platform", "product": "my-product"} + + +class TestParseCliItem: + def test_name_only(self): + name, config = parse_cli_item("git") + + assert name == "git" + assert config == {} + + def test_empty_config(self): + name, config = parse_cli_item("git:") + + assert name == "git" + assert config == {} + + def test_name_with_config_pairs(self): + name, config = parse_cli_item("dbt:project_dir=./dbt,profiles_dir=~/.dbt") + + assert name == "dbt" + assert config == {"project_dir": "./dbt", "profiles_dir": "~/.dbt"} + + def test_value_containing_special_characters(self): + name, config = parse_cli_item("db:url=postgres://host:5432,user=name=admin") + + assert name == "db" + assert config == {"url": "postgres://host:5432", "user": "name=admin"} + + def test_malformed_pair_is_skipped(self, caplog): + name, config = parse_cli_item("dbt:project_dir=./dbt,malformed,other=value") + + assert name == "dbt" + assert config == {"project_dir": "./dbt", "other": "value"} + assert "malformed" in caplog.text diff --git a/tests/test_materializers.py b/tests/test_materializers.py index 48d4484..65fe3ac 100644 --- a/tests/test_materializers.py +++ b/tests/test_materializers.py @@ -24,7 +24,7 @@ def test_materializer_is_abstract(): def test_console_materializer(): - """Test console output materializer.""" + """Test console output materializer with two-level grouping.""" metric = DummyMetric(expected_value=42) measurement = metric.measure(value=42) @@ -32,7 +32,7 @@ def test_console_materializer(): captured_output = StringIO() sys.stdout = captured_output - materializer = ConsoleMaterializer(group_tag_1="domain", group_tag_2="project") + materializer = ConsoleMaterializer(group_tags=["domain", "project"]) materializer.materialize([measurement], {"dummy"}) # Reset stdout @@ -43,6 +43,66 @@ def test_console_materializer(): assert "42" in output +def test_console_materializer_no_grouping(): + """Test console materializer without grouping.""" + metric = DummyMetric(expected_value=42) + measurement = metric.measure(value=42) + + captured_output = StringIO() + sys.stdout = captured_output + + materializer = ConsoleMaterializer() # No group_tags + materializer.materialize([measurement], {"dummy"}) + + sys.stdout = sys.__stdout__ + + output = captured_output.getvalue() + assert "dummy" in output + assert "42" in output + + +def test_console_materializer_single_grouping(): + """Test console materializer with single-level grouping.""" + metric = DummyMetric(expected_value=42) + measurement = metric.measure(value=42, tags={"domain": "Analytics"}) + + captured_output = StringIO() + sys.stdout = captured_output + + materializer = ConsoleMaterializer(group_tags=["domain"]) + materializer.materialize([measurement], {"dummy"}) + + sys.stdout = sys.__stdout__ + + output = captured_output.getvalue() + assert "dummy" in output + assert "42" in output + assert "domain: Analytics" in output + + +def test_console_materializer_three_level_grouping(): + """Test console materializer with three-level grouping.""" + metric = DummyMetric(expected_value=42) + measurement = metric.measure( + value=42, tags={"domain": "Analytics", "project": "Core", "env": "prod"} + ) + + captured_output = StringIO() + sys.stdout = captured_output + + materializer = ConsoleMaterializer(group_tags=["domain", "project", "env"]) + materializer.materialize([measurement], {"dummy"}) + + sys.stdout = sys.__stdout__ + + output = captured_output.getvalue() + assert "dummy" in output + assert "42" in output + assert "domain: Analytics" in output + assert "project: Core" in output + assert "env: prod" in output + + def test_csv_materializer(tmp_path): """Test CSV file materializer.""" metric = DummyMetric(expected_value=42) @@ -101,7 +161,7 @@ def test_materializer_filters_indirect_by_default(): captured_output = StringIO() sys.stdout = captured_output - materializer = ConsoleMaterializer(group_tag_1="domain", group_tag_2="project") + materializer = ConsoleMaterializer(group_tags=["domain", "project"]) # Only "dummy" is direct, "indirect" is not materializer.materialize([direct_measurement, indirect_measurement], {"dummy"}) @@ -127,7 +187,7 @@ def test_materializer_includes_indirect_when_configured(): sys.stdout = captured_output materializer = ConsoleMaterializer( - include_indirect=True, group_tag_1="domain", group_tag_2="project" + include_indirect=True, group_tags=["domain", "project"] ) materializer.materialize([direct_measurement, indirect_measurement], {"dummy"}) diff --git a/tests/test_registry.py b/tests/test_registry.py new file mode 100644 index 0000000..e4a84ea --- /dev/null +++ b/tests/test_registry.py @@ -0,0 +1,154 @@ +""" +Tests for plugin registry and discovery. +""" + +from typing import ClassVar +from unittest.mock import MagicMock, patch + +from checkup.metric import Metric +from checkup.provider import Provider +from checkup.registry.discovery import PluginRegistry +from checkup.types import Context + + +class MockGitProvider(Provider): + name: ClassVar[str] = "git" + + def provide(self) -> dict: + return {} + + +class MockDbtProvider(Provider): + name: ClassVar[str] = "dbt" + + def provide(self) -> dict: + return {} + + +class MetricRequiringGit(Metric): + name: ClassVar[str] = "git_metric" + description: ClassVar[str] = "Requires git" + unit: ClassVar[str] = "count" + + @classmethod + def providers(cls) -> list[type[Provider]]: + return [MockGitProvider] + + def calculate(self, _context: Context, _metrics: dict) -> None: + self.value = 1 + + +class MetricRequiringDbt(Metric): + name: ClassVar[str] = "dbt_metric" + description: ClassVar[str] = "Requires dbt" + unit: ClassVar[str] = "count" + + @classmethod + def providers(cls) -> list[type[Provider]]: + return [MockDbtProvider] + + def calculate(self, _context: Context, _metrics: dict) -> None: + self.value = 1 + + +class MetricRequiringBoth(Metric): + name: ClassVar[str] = "both_metric" + description: ClassVar[str] = "Requires git and dbt" + unit: ClassVar[str] = "count" + + @classmethod + def providers(cls) -> list[type[Provider]]: + return [MockGitProvider, MockDbtProvider] + + def calculate(self, _context: Context, _metrics: dict) -> None: + self.value = 1 + + +class StandaloneMetric(Metric): + name: ClassVar[str] = "standalone" + description: ClassVar[str] = "No providers required" + unit: ClassVar[str] = "count" + + def calculate(self, _context: Context, _metrics: dict) -> None: + self.value = 1 + + +class TestCompatibleMetricFiltering: + def test_metric_included_when_required_provider_selected(self): + registry = PluginRegistry() + registry._metrics = { + "git_metric": MetricRequiringGit, + "dbt_metric": MetricRequiringDbt, + } + + compatible = registry.list_compatible_metric_names(["git"]) + + assert "git_metric" in compatible + assert "dbt_metric" not in compatible + + def test_metric_excluded_when_required_provider_missing(self): + registry = PluginRegistry() + registry._metrics = { + "git_metric": MetricRequiringGit, + } + + compatible = registry.list_compatible_metric_names(["dbt"]) + + assert "git_metric" not in compatible + + def test_metric_requiring_multiple_providers_needs_all(self): + registry = PluginRegistry() + registry._metrics = { + "both_metric": MetricRequiringBoth, + } + + # Only git selected - not enough + compatible = registry.list_compatible_metric_names(["git"]) + assert "both_metric" not in compatible + + # Both selected - now compatible + compatible = registry.list_compatible_metric_names(["git", "dbt"]) + assert "both_metric" in compatible + + def test_standalone_metrics_always_compatible(self): + registry = PluginRegistry() + registry._metrics = { + "standalone": StandaloneMetric, + } + + # No providers selected + compatible = registry.list_compatible_metric_names([]) + assert "standalone" in compatible + + # Some providers selected + compatible = registry.list_compatible_metric_names(["git"]) + assert "standalone" in compatible + + +class TestRegistryListing: + def test_list_provider_names_without_loading(self): + """Listing names should not import any modules.""" + registry = PluginRegistry() + + with patch("checkup.registry.discovery.entry_points") as mock_eps: + mock_ep = MagicMock() + mock_ep.name = "git" + mock_eps.return_value = [mock_ep] + + names = registry.list_provider_names() + + assert names == ["git"] + mock_ep.load.assert_not_called() + + def test_list_metric_names_without_loading(self): + registry = PluginRegistry() + + with patch("checkup.registry.discovery.entry_points") as mock_eps: + mock_ep = MagicMock() + mock_ep.name = "git_days_since_last_update" + mock_eps.return_value = [mock_ep] + + names = registry.list_metric_names() + + assert names == ["git_days_since_last_update"] + mock_ep.load.assert_not_called() diff --git a/uv.lock b/uv.lock index e2bc4ae..caad284 100644 --- a/uv.lock +++ b/uv.lock @@ -33,6 +33,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/53/89b197cb472a3175d73384761a3413fd58e6b65a794c1102d148b8de87bd/agate-1.9.1-py2.py3-none-any.whl", hash = "sha256:1cf329510b3dde07c4ad1740b7587c9c679abc3dcd92bb1107eabc10c2e03c50", size = 95085, upload-time = "2023-12-21T20:05:21.954Z" }, ] +[[package]] +name = "annotated-doc" +version = "0.0.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" }, +] + [[package]] name = "annotated-types" version = "0.7.0" @@ -148,8 +157,10 @@ dependencies = [ { name = "jinja2" }, { name = "pydantic" }, { name = "pyyaml" }, + { name = "questionary" }, { name = "rich" }, { name = "sqlalchemy" }, + { name = "typer" }, ] [package.dev-dependencies] @@ -170,8 +181,10 @@ requires-dist = [ { name = "jinja2", specifier = ">=3.1.6" }, { name = "pydantic", specifier = ">=2.11.7" }, { name = "pyyaml", specifier = ">=6.0" }, + { name = "questionary", specifier = ">=2.0" }, { name = "rich", specifier = ">=13.0" }, { name = "sqlalchemy", specifier = ">=2.0" }, + { name = "typer", specifier = ">=0.24" }, ] [package.metadata.requires-dev] @@ -1104,6 +1117,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/5e/9b994b5de36d6aa5caaf09a018d8fe4820db46e4da577c2fd7a1e176b56c/prek-0.3.1-py3-none-win_arm64.whl", hash = "sha256:cfa58365eb36753cff684dc3b00196c1163bb135fe72c6a1c6ebb1a179f5dbdf", size = 4021714, upload-time = "2026-01-31T13:25:34.993Z" }, ] +[[package]] +name = "prompt-toolkit" +version = "3.0.52" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "wcwidth" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/96/06e01a7b38dce6fe1db213e061a4602dd6032a8a97ef6c1a862537732421/prompt_toolkit-3.0.52.tar.gz", hash = "sha256:28cde192929c8e7321de85de1ddbe736f1375148b02f2e17edd840042b1be855", size = 434198, upload-time = "2025-08-27T15:24:02.057Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/03/0d3ce49e2505ae70cf43bc5bb3033955d2fc9f932163e84dc0779cc47f48/prompt_toolkit-3.0.52-py3-none-any.whl", hash = "sha256:9aac639a3bbd33284347de5ad8d68ecc044b91a762dc39b7c21095fcd6a19955", size = 391431, upload-time = "2025-08-27T15:23:59.498Z" }, +] + [[package]] name = "protobuf" version = "6.33.2" @@ -1357,6 +1382,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/11/432f32f8097b03e3cd5fe57e88efb685d964e2e5178a48ed61e841f7fdce/pyyaml_env_tag-1.1-py3-none-any.whl", hash = "sha256:17109e1a528561e32f026364712fee1264bc2ea6715120891174ed1b980d2e04", size = 4722, upload-time = "2025-05-13T15:23:59.629Z" }, ] +[[package]] +name = "questionary" +version = "2.1.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "prompt-toolkit" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f6/45/eafb0bba0f9988f6a2520f9ca2df2c82ddfa8d67c95d6625452e97b204a5/questionary-2.1.1.tar.gz", hash = "sha256:3d7e980292bb0107abaa79c68dd3eee3c561b83a0f89ae482860b181c8bd412d", size = 25845, upload-time = "2025-08-28T19:00:20.851Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3c/26/1062c7ec1b053db9e499b4d2d5bc231743201b74051c973dadeac80a8f43/questionary-2.1.1-py3-none-any.whl", hash = "sha256:a51af13f345f1cdea62347589fbb6df3b290306ab8930713bfae4d475a7d4a59", size = 36753, upload-time = "2025-08-28T19:00:19.56Z" }, +] + [[package]] name = "referencing" version = "0.37.0" @@ -1506,6 +1543,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/6a/40fee331a52339926a92e17ae748827270b288a35ef4a15c9c8f2ec54715/ruff-0.14.14-py3-none-win_arm64.whl", hash = "sha256:56e6981a98b13a32236a72a8da421d7839221fa308b223b9283312312e5ac76c", size = 10920448, upload-time = "2026-01-22T22:30:15.417Z" }, ] +[[package]] +name = "shellingham" +version = "1.5.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" }, +] + [[package]] name = "six" version = "1.17.0" @@ -1588,6 +1634,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/a5/c0b6468d3824fe3fde30dbb5e1f687b291608f9473681bbf7dabbf5a87d7/text_unidecode-1.3-py2.py3-none-any.whl", hash = "sha256:1311f10e8b895935241623731c2ba64f4c455287888b18189350b67134a822e8", size = 78154, upload-time = "2019-08-30T21:37:03.543Z" }, ] +[[package]] +name = "typer" +version = "0.24.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "annotated-doc" }, + { name = "click" }, + { name = "rich" }, + { name = "shellingham" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" }, +] + [[package]] name = "typing-extensions" version = "4.15.0" @@ -1651,6 +1712,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067, upload-time = "2024-11-01T14:07:11.845Z" }, ] +[[package]] +name = "wcwidth" +version = "0.6.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/35/a2/8e3becb46433538a38726c948d3399905a4c7cabd0df578ede5dc51f0ec2/wcwidth-0.6.0.tar.gz", hash = "sha256:cdc4e4262d6ef9a1a57e018384cbeb1208d8abbc64176027e2c2455c81313159", size = 159684, upload-time = "2026-02-06T19:19:40.919Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/68/5a/199c59e0a824a3db2b89c5d2dade7ab5f9624dbf6448dc291b46d5ec94d3/wcwidth-0.6.0-py3-none-any.whl", hash = "sha256:1a3a1e510b553315f8e146c54764f4fb6264ffad731b3d78088cdb1478ffbdad", size = 94189, upload-time = "2026-02-06T19:19:39.646Z" }, +] + [[package]] name = "zipp" version = "3.23.0"