From 2e0c6c0330fa1f4b1fd9212ab7fad486727b8ebf Mon Sep 17 00:00:00 2001 From: Aaron Montgomery Date: Sat, 21 Mar 2026 23:32:32 -1000 Subject: [PATCH] docs: prepare pii history rewrite runbook --- CHANGELOG.md | 1 + TODO.md | 4 +- .../operations/PII_HISTORY_REWRITE_RUNBOOK.md | 72 ++++++++++++-- docs/security/PII_AUDIT_LOG.md | 12 ++- scripts/cleanup_pii_history.sh | 2 + scripts/pii_history_rewrite_preflight.sh | 99 +++++++++++++++++++ 6 files changed, 174 insertions(+), 16 deletions(-) create mode 100755 scripts/pii_history_rewrite_preflight.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index 02cfefecb..75ba08747 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,6 +31,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added +- Added a non-destructive `scripts/pii_history_rewrite_preflight.sh` readiness check and tightened the live PII history rewrite runbook so the remaining human-only `git filter-repo` purge can be executed from a clean clone with explicit backup, rollback, and collaborator re-clone steps. - Added current mocked browser coverage for the annual-planning review surface, so Playwright now verifies effective program policy, recurring calendar anchors, institutional events, plan provenance, repair preview, and plan history together instead of the older policy-anchor-only copy. - Added admin rollover endpoints for `ProgramPolicySnapshot` and `ProgramCalendarAnchor`, plus annual-planning hub views for the effective program-policy snapshot and effective recurring calendar anchors for a selected academic year. - Added a recent proving-pass report panel to the annual-planning hub so schedulers can review baseline/shock/repair drill outcomes for the selected academic year without leaving the coordinator surface. diff --git a/TODO.md b/TODO.md index 4bb329fdc..6e52fc21c 100644 --- a/TODO.md +++ b/TODO.md @@ -10,7 +10,7 @@ ## P0 — Critical / Blocking -- [ ] **PII in git history** — Resident names in deleted files still in repo history. Requires `git filter-repo` + force push. All collaborators must re-clone after. **Human-only.** See `docs/security/PII_AUDIT_LOG.md`. +- [ ] **PII in git history** — Resident names in deleted files still in repo history. Requires `git filter-repo` + force push. All collaborators must re-clone after. **Human-only execution.** Readiness artifacts now live at `docs/operations/PII_HISTORY_REWRITE_RUNBOOK.md`, `scripts/pii_history_rewrite_preflight.sh`, and `docs/security/PII_AUDIT_LOG.md`. - [ ] **Track A: PGY Graduation Rollover (July 1 deadline)** — `Person.pgy_level` is not academic-year scoped. Updating it on July 1 corrupts historical ACGME queries. Migration exists (`20260224_person_ay.py`), Alembic heads merged (PR #1196), `_sync_academic_year_call_counts()` implemented (PR #1199), and the local dev DB is already at Alembic head (`20260321_program_policy_snapshots`). **Remaining:** 1. Migrate the remaining `Person.pgy_level` consumers to `PersonAcademicYear` per-AY reads @@ -164,7 +164,7 @@ - [ ] SM deterministic preload decision (Option A vs B) — `docs/architecture/SM_DETERMINISTIC_PRELOAD.md` - [ ] MEDCOM ruling on ACGME call duty interpretation -- [ ] PII history purge coordination +- [ ] PII history purge coordination — runbook and preflight are ready; still requires a human-run maintenance window and force-push event ## OPSEC Debt (Cannot Fix) diff --git a/docs/operations/PII_HISTORY_REWRITE_RUNBOOK.md b/docs/operations/PII_HISTORY_REWRITE_RUNBOOK.md index 3d5bdee15..03a507edd 100644 --- a/docs/operations/PII_HISTORY_REWRITE_RUNBOOK.md +++ b/docs/operations/PII_HISTORY_REWRITE_RUNBOOK.md @@ -10,62 +10,100 @@ Rewriting git history will change commit SHAs for all rewritten commits. This requires a force-push and **every collaborator must re-clone or hard reset**. Do not proceed without explicit coordination and approval. +## Readiness Artifacts + +- `scripts/pii_history_rewrite_preflight.sh` + - Non-destructive readiness check for local prerequisites, clean working tree, backup naming, and manual next steps. +- `scripts/cleanup_pii_history.sh` + - Intentional no-op helper with example `git filter-repo` patterns. Use it only as a template after preflight passes. +- `docs/security/PII_AUDIT_LOG.md` + - Incident history and scan commands to re-run before and after the rewrite. + ## Prerequisites - `git` installed - `git filter-repo` installed - - Install: `pipx install git-filter-repo` or `pip install git-filter-repo` + - Install: `brew install git-filter-repo`, `pipx install git-filter-repo`, or `pip install git-filter-repo` - Administrative access to the remote repository - Maintenance window coordinated with all collaborators +- Fresh clone prepared for the actual rewrite + +## Quick Start + +Run this first, before scheduling the force-push window: + +```bash +./scripts/pii_history_rewrite_preflight.sh +``` + +The preflight script does not mutate git history. It only verifies readiness and +prints the backup branch / bundle commands you should run during the maintenance +window. ## Checklist (Do Not Skip) - Identify PII patterns (filenames and/or content strings). - Notify all collaborators of the rewrite window and required re-clone. - Create a backup branch on the remote. +- Create a local bundle backup before running `git filter-repo`. - Ensure no one is pushing during the window. -- Run the script locally (DO NOT run in CI). +- Run the rewrite from a fresh clone (DO NOT run in CI). - Validate the repo state and scans. - Force-push rewritten history. - Confirm all collaborators re-clone. ## Commands (Copy/Paste) -### 1) Create a backup branch (local and remote) +### 1) Create backup artifacts ```bash git fetch origin git checkout main git pull --ff-only origin main +git bundle create aapm-pre-pii-rewrite-$(date +%Y%m%dT%H%M%S).bundle --all git branch backup/pre_pii_rewrite_$(date +%Y%m%d) git push origin backup/pre_pii_rewrite_$(date +%Y%m%d) ``` -### 2) Run the helper script (local only) +### 2) Prepare the rewrite inputs (local only) -Edit and run: +Run the non-destructive preflight, then edit the helper examples into the exact +`git filter-repo` command you intend to use: ```bash +./scripts/pii_history_rewrite_preflight.sh ./scripts/cleanup_pii_history.sh ``` -### 3) Verify rewrite locally +### 3) Rewrite from a fresh clone + +```bash +git filter-repo --replace-text /tmp/pii-replacements.txt --force +git remote add origin +``` + +`git filter-repo` removes remotes by design. Re-add `origin` before any push. + +### 4) Verify rewrite locally ```bash git log --oneline -n 5 git status +./scripts/pii-scan.sh ``` -If you have scanning tools (e.g., `gitleaks`), re-run them here. +If you have scanning tools (for example `gitleaks`), re-run them here. Also +re-run any focused `grep` or `git log -S` checks documented in +`docs/security/PII_AUDIT_LOG.md`. -### 4) Force-push rewritten history +### 5) Force-push rewritten history ```bash git push --force --all origin git push --force --tags origin ``` -### 5) Collaborator re-clone steps (send to team) +### 6) Collaborator re-clone steps (send to team) ```bash rm -rf @@ -85,10 +123,22 @@ git clean -fdx ## Notes - The backup branch preserves the pre-rewrite history for recovery. +- The bundle preserves an offline copy of all refs in case the remote or local + clone is damaged during the rewrite. - If the repo is protected by branch rules, temporarily relax them for the force-push, then restore immediately after. - Consider updating any references to old SHAs in tickets or docs. +## Suggested Freeze Message + +Send this before the maintenance window: + +```text +PII history rewrite scheduled for . During the window, do not push to the repo. +All collaborators must re-clone after the force-push completes. A backup branch and bundle +will be created before the rewrite starts. +``` + ## Rollback Plan If the rewrite fails or data integrity is compromised: @@ -99,4 +149,6 @@ git checkout backup/pre_pii_rewrite_YYYYMMDD git push --force origin backup/pre_pii_rewrite_YYYYMMDD:main ``` -Coordinate rollback with the same level of caution and re-clone steps. +If the remote or local clone is badly damaged, restore from the bundle created +in Step 1, then coordinate rollback with the same level of caution and re-clone +steps. diff --git a/docs/security/PII_AUDIT_LOG.md b/docs/security/PII_AUDIT_LOG.md index b0138f148..5e706e79a 100644 --- a/docs/security/PII_AUDIT_LOG.md +++ b/docs/security/PII_AUDIT_LOG.md @@ -197,15 +197,18 @@ When performing a new audit, add an entry below with: - `20260114_half_day_tables.py` - `20260114_sm_constraints.py` -Requires BFG history rewrite to fully resolve. Plan documented in `docs/planning/PII_HISTORY_PURGE_PLAN.md`. +Requires a coordinated `git filter-repo` history rewrite to fully resolve. Use +`docs/operations/PII_HISTORY_REWRITE_RUNBOOK.md` and +`scripts/pii_history_rewrite_preflight.sh`. ### Remediation Created - `scripts/pii-scan.sh` — pre-commit hook with KNOWN_NAMES list (29 surnames) - `scripts/sanitize_pii.py` — reversible data sanitization tool (stderr warnings added) -- `scripts/cleanup_pii_history.sh` — git history cleanup helper +- `scripts/cleanup_pii_history.sh` — `git filter-repo` helper template - `.github/workflows/pii-scan.yml` — weekly + PR-triggered automated scans - `docs/security/PII_SANITIZATION_SOP.md` — full sanitization workflow -- `docs/planning/PII_HISTORY_PURGE_PLAN.md` — BFG execution plan for history cleanup +- `docs/operations/PII_HISTORY_REWRITE_RUNBOOK.md` — live history-rewrite runbook +- `scripts/pii_history_rewrite_preflight.sh` — non-destructive rewrite readiness check --- @@ -254,6 +257,7 @@ Requires BFG history rewrite to fully resolve. Plan documented in `docs/planning 3. **`#` is not PII**: Regex-based tools need careful pattern tuning 4. **Bundle restores are fast** but lose branches, reflog, stash, and gitignored files 5. **DB is independent**: Bundle restore doesn't affect PostgreSQL; may need migration stamp fix +6. **Use the live preflight + runbook pair**: `scripts/pii_history_rewrite_preflight.sh` and `docs/operations/PII_HISTORY_REWRITE_RUNBOOK.md` --- @@ -263,7 +267,7 @@ If PII is discovered in the repository: 1. **Document** the finding in this log 2. **Remove** immediately (see DATA_SECURITY_POLICY.md) -3. **Scrub history** if in prior commits (use BFG Repo Cleaner) +3. **Scrub history** if in prior commits (use the `git filter-repo` workflow in `docs/operations/PII_HISTORY_REWRITE_RUNBOOK.md`) 4. **Notify** project leads 5. **Determine scope** using this audit log to identify the exposure window diff --git a/scripts/cleanup_pii_history.sh b/scripts/cleanup_pii_history.sh index 1e08e289f..b4dc5846c 100644 --- a/scripts/cleanup_pii_history.sh +++ b/scripts/cleanup_pii_history.sh @@ -4,6 +4,8 @@ set -euo pipefail echo "PII history rewrite helper (DO NOT RUN WITHOUT COORDINATION)" echo "This script will rewrite git history using git filter-repo." echo "Ensure backup branch exists and collaborators are notified." +echo "Run ./scripts/pii_history_rewrite_preflight.sh first." +echo "Primary reference: docs/operations/PII_HISTORY_REWRITE_RUNBOOK.md" echo # Example 1: Remove a sensitive file by path (edit as needed) diff --git a/scripts/pii_history_rewrite_preflight.sh b/scripts/pii_history_rewrite_preflight.sh new file mode 100755 index 000000000..217c268b7 --- /dev/null +++ b/scripts/pii_history_rewrite_preflight.sh @@ -0,0 +1,99 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +cd "$REPO_ROOT" + +timestamp="$(date +%Y%m%dT%H%M%S)" +backup_branch="backup/pre_pii_rewrite_${timestamp}" +backup_bundle="aapm-pre-pii-rewrite-${timestamp}.bundle" + +say() { + printf '%s\n' "$*" +} + +warn() { + printf 'WARNING: %s\n' "$*" >&2 +} + +require_cmd() { + if ! command -v "$1" >/dev/null 2>&1; then + warn "missing required command: $1" + if [[ "$1" == "git-filter-repo" ]]; then + warn "install with: brew install git-filter-repo" + warn "or: pipx install git-filter-repo" + fi + return 1 + fi +} + +status=0 + +say "PII history rewrite preflight" +say "This script is non-destructive. It does not rewrite history or push anything." +say + +require_cmd git || status=1 +require_cmd git-filter-repo || status=1 + +origin_url="$(git remote get-url origin 2>/dev/null || true)" +if [[ -z "$origin_url" ]]; then + warn "origin remote is missing" + status=1 +fi + +working_tree="$(git status --porcelain)" +if [[ -n "$working_tree" ]]; then + warn "working tree is not clean; commit or stash changes before a history rewrite" + status=1 +fi + +current_branch="$(git rev-parse --abbrev-ref HEAD)" +head_sha="$(git rev-parse --short HEAD)" +origin_main_sha="$(git rev-parse --short origin/main 2>/dev/null || echo "unavailable")" + +say "Current branch: ${current_branch}" +say "HEAD: ${head_sha}" +say "origin/main: ${origin_main_sha}" +say "origin: ${origin_url:-missing}" +say + +say "Suggested backup artifacts:" +say " backup branch: ${backup_branch}" +say " bundle file: ${backup_bundle}" +say + +cat <