Skip to content
1 change: 1 addition & 0 deletions changelog.d/added/601.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add unified version manifest system for semver-based dataset versioning across GCS and Hugging Face, with rollback support and a single registry file (version_manifest.json) on both backends. Exposes `get_data_version()` and `get_data_manifest()` as public consumer APIs.
1 change: 1 addition & 0 deletions policyengine_us_data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .datasets import *
from .geography import ZIP_CODE_DATASET
from .utils.version_manifest import get_data_version, get_data_manifest
63 changes: 63 additions & 0 deletions policyengine_us_data/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Shared fixtures for version manifest tests."""

from unittest.mock import MagicMock

import pytest

from policyengine_us_data.utils.version_manifest import (
HFVersionInfo,
GCSVersionInfo,
VersionManifest,
VersionRegistry,
)


@pytest.fixture
def sample_generations() -> dict[str, int]:
return {
"enhanced_cps_2024.h5": 1710203948123456,
"cps_2024.h5": 1710203948234567,
"states/AL.h5": 1710203948345678,
}


@pytest.fixture
def sample_hf_info() -> HFVersionInfo:
return HFVersionInfo(
repo="policyengine/policyengine-us-data",
commit="abc123def456",
)


@pytest.fixture
def sample_manifest(
sample_generations: dict[str, int],
sample_hf_info: HFVersionInfo,
) -> VersionManifest:
return VersionManifest(
version="1.72.3",
created_at="2026-03-10T14:30:00Z",
hf=sample_hf_info,
gcs=GCSVersionInfo(
bucket="policyengine-us-data",
generations=sample_generations,
),
)


@pytest.fixture
def sample_registry(
sample_manifest: VersionManifest,
) -> VersionRegistry:
"""A registry with one version entry."""
return VersionRegistry(
current="1.72.3",
versions=[sample_manifest],
)


@pytest.fixture
def mock_bucket() -> MagicMock:
bucket = MagicMock()
bucket.name = "policyengine-us-data"
return bucket
Empty file.
25 changes: 25 additions & 0 deletions policyengine_us_data/tests/fixtures/test_version_manifest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Helper functions for version manifest tests."""

import json
from unittest.mock import MagicMock

from policyengine_us_data.utils.version_manifest import (
VersionRegistry,
)


def make_mock_blob(generation: int) -> MagicMock:
blob = MagicMock()
blob.generation = generation
return blob


def setup_bucket_with_registry(
bucket: MagicMock,
registry: VersionRegistry,
) -> None:
"""Configure a mock bucket to serve a registry."""
registry_json = json.dumps(registry.to_dict())
blob = MagicMock()
blob.download_as_text.return_value = registry_json
bucket.blob.return_value = blob
Loading
Loading