From 1f289c94e87db99fe520a62b7383dd093f423243 Mon Sep 17 00:00:00 2001 From: Max Ghenis Date: Tue, 10 Feb 2026 09:07:06 -0500 Subject: [PATCH] Add parameter counting script and display stats in rules section Adds a Python script that clones policyengine-us and policyengine-uk (sparse checkout, params only) to count leaf parameters, and displays the combined stats (30,763 current parameters, 52,217 historical values, 3,992 parameter files) as StatCards in the rules overview. Co-Authored-By: Claude Opus 4.6 --- scripts/count_params.py | 164 +++++++++++++++++++++++++ src/components/rules/RulesOverview.tsx | 23 ++-- src/data/parameterStats.ts | 8 ++ 3 files changed, 188 insertions(+), 7 deletions(-) create mode 100755 scripts/count_params.py create mode 100644 src/data/parameterStats.ts diff --git a/scripts/count_params.py b/scripts/count_params.py new file mode 100755 index 0000000..27568d5 --- /dev/null +++ b/scripts/count_params.py @@ -0,0 +1,164 @@ +#!/usr/bin/env python3 +""" +Count leaf parameters in policyengine-us and policyengine-uk repos. + +Clones repos with sparse checkout (parameters only, depth 1), +recursively counts all leaf parameters, and writes the results +to src/data/parameterStats.ts. + +Usage: + python scripts/count_params.py +""" + +import yaml +import os +import tempfile +import subprocess +from datetime import date + + +def is_date_key(k): + if isinstance(k, date): + return True + if not isinstance(k, str): + return False + parts = k.split("-") + return len(parts) >= 2 and parts[0].isdigit() and len(parts[0]) == 4 + + +def count_leaf_params(data, current_values, historical_values): + if not isinstance(data, dict): + return + if "values" in data and isinstance(data["values"], dict): + historical_values[0] += len(data["values"]) + current_values[0] += 1 + return + if "brackets" in data and isinstance(data["brackets"], list): + for bracket in data["brackets"]: + if isinstance(bracket, dict): + for k, v in bracket.items(): + count_leaf_params( + v if isinstance(v, dict) else {}, + current_values, + historical_values, + ) + return + dated_keys = [k for k in data.keys() if is_date_key(k)] + if dated_keys and len(dated_keys) > len(data) * 0.3: + historical_values[0] += len(dated_keys) + current_values[0] += 1 + return + skip = { + "description", + "metadata", + "label", + "unit", + "reference", + "documentation", + "breakdown", + "period", + "name", + "uprating", + } + for k, v in data.items(): + if k in skip: + continue + if isinstance(v, dict): + count_leaf_params(v, current_values, historical_values) + + +def process_repo(base_dir): + total_files = 0 + current = [0] + historical = [0] + for root, dirs, files in os.walk(base_dir): + for fname in files: + if not fname.endswith(".yaml"): + continue + total_files += 1 + try: + with open(os.path.join(root, fname)) as f: + data = yaml.safe_load(f) + if isinstance(data, dict): + count_leaf_params(data, current, historical) + except Exception: + pass + return { + "yaml_files": total_files, + "current_params": current[0], + "historical_values": historical[0], + } + + +def clone_sparse(repo_url, target_dir, sparse_path): + """Clone a repo with sparse checkout, only fetching the parameters directory.""" + subprocess.run( + ["git", "clone", "--depth", "1", "--filter=blob:none", "--sparse", repo_url, target_dir], + check=True, + capture_output=True, + ) + subprocess.run( + ["git", "sparse-checkout", "set", sparse_path], + cwd=target_dir, + check=True, + capture_output=True, + ) + + +def main(): + script_dir = os.path.dirname(os.path.abspath(__file__)) + project_root = os.path.dirname(script_dir) + output_path = os.path.join(project_root, "src", "data", "parameterStats.ts") + + repos = { + "us": { + "url": "https://github.com/PolicyEngine/policyengine-us.git", + "sparse_path": "policyengine_us/parameters", + }, + "uk": { + "url": "https://github.com/PolicyEngine/policyengine-uk.git", + "sparse_path": "policyengine_uk/parameters", + }, + } + + results = {} + + with tempfile.TemporaryDirectory() as tmpdir: + for country, config in repos.items(): + print(f"Cloning {country} repo...") + repo_dir = os.path.join(tmpdir, f"policyengine-{country}") + clone_sparse(config["url"], repo_dir, config["sparse_path"]) + + params_dir = os.path.join(repo_dir, config["sparse_path"]) + print(f"Counting parameters in {params_dir}...") + results[country] = process_repo(params_dir) + print(f" {country}: {results[country]}") + + combined = { + "yaml_files": sum(r["yaml_files"] for r in results.values()), + "current_params": sum(r["current_params"] for r in results.values()), + "historical_values": sum(r["historical_values"] for r in results.values()), + } + + today = date.today().isoformat() + + ts_content = f"""// Auto-generated by scripts/count_params.py +// Last updated: {today} +export const parameterStats = {{ + generatedAt: '{today}', + us: {{ yamlFiles: {results['us']['yaml_files']}, currentParams: {results['us']['current_params']}, historicalValues: {results['us']['historical_values']} }}, + uk: {{ yamlFiles: {results['uk']['yaml_files']}, currentParams: {results['uk']['current_params']}, historicalValues: {results['uk']['historical_values']} }}, + combined: {{ yamlFiles: {combined['yaml_files']}, currentParams: {combined['current_params']}, historicalValues: {combined['historical_values']} }}, +}}; +""" + + os.makedirs(os.path.dirname(output_path), exist_ok=True) + with open(output_path, "w") as f: + f.write(ts_content) + + print(f"\nWrote {output_path}") + print(f"Combined: {combined}") + + +if __name__ == "__main__": + main() diff --git a/src/components/rules/RulesOverview.tsx b/src/components/rules/RulesOverview.tsx index f6ab052..aa5686f 100644 --- a/src/components/rules/RulesOverview.tsx +++ b/src/components/rules/RulesOverview.tsx @@ -2,6 +2,7 @@ import { useState, useMemo } from 'react'; import { motion, AnimatePresence } from 'framer-motion'; import { colors, typography, spacing, statusColors } from '../../designTokens'; import { programs, getStatusCount } from '../../data/programs'; +import { parameterStats } from '../../data/parameterStats'; import type { CoverageStatus, Program } from '../../types/Program'; const ALL_STATES = [ @@ -63,7 +64,8 @@ function StatusDot({ status, size = 10 }: { status: CoverageStatus; size?: numbe ); } -function StatCard({ label, count, color, delay }: { label: string; count: number; color: string; delay: number }) { +function StatCard({ label, count, color, delay }: { label: string; count: number | string; color: string; delay: number }) { + const display = typeof count === 'number' ? count.toLocaleString() : count; return ( -
{count}
+
{display}
{label}
); @@ -399,12 +401,19 @@ export default function RulesOverview() { return (
- {/* Summary stats */} + {/* Parameter stats */} +
+ + + +
+ + {/* Program stats */}
- - - - + + + +
{/* View mode toggle + search */} diff --git a/src/data/parameterStats.ts b/src/data/parameterStats.ts new file mode 100644 index 0000000..424e50b --- /dev/null +++ b/src/data/parameterStats.ts @@ -0,0 +1,8 @@ +// Auto-generated by scripts/count_params.py +// Last updated: 2026-02-10 +export const parameterStats = { + generatedAt: '2026-02-10', + us: { yamlFiles: 3544, currentParams: 29928, historicalValues: 49457 }, + uk: { yamlFiles: 448, currentParams: 835, historicalValues: 2760 }, + combined: { yamlFiles: 3992, currentParams: 30763, historicalValues: 52217 }, +};