diff --git a/scripts/lib/skill-generator.js b/scripts/lib/skill-generator.js index de48214..e15eb9a 100644 --- a/scripts/lib/skill-generator.js +++ b/scripts/lib/skill-generator.js @@ -485,6 +485,13 @@ async function generateSkill({ } } + // Copy a sibling checks.json from the skill source dir to the skill root, if present. + // The audit runner reads `.claude/skills//checks.json` to enroll discoverable specialists' checks via audit_add_checks. + const sourceChecksFile = path.join(configDir, 'skills', ...skill._group.split('/'), 'checks.json'); + if (fs.existsSync(sourceChecksFile)) { + fs.copyFileSync(sourceChecksFile, path.join(skillDir, 'checks.json')); + } + // Copy local markdown references from a source references/ directory, if present. // Group config injects a shared `preamble`; per-file `next_step` frontmatter drives continuation links. const sourceReferencesDir = path.join(configDir, 'skills', ...skill._group.split('/'), 'references'); diff --git a/scripts/lib/tests/skill-config-loader.test.js b/scripts/lib/tests/skill-config-loader.test.js index 42ce05d..dc69917 100644 --- a/scripts/lib/tests/skill-config-loader.test.js +++ b/scripts/lib/tests/skill-config-loader.test.js @@ -120,6 +120,50 @@ describe('loadSkillsConfig', () => { expect(Object.keys(config)).toEqual(['has-config']); }); + it('exposes audit subagents as separate skills alongside the runner', () => { + createFixture({ + skills: { + audit: { + 'config.yaml': yaml.dump({ + type: 'docs-only', + template: 'description.md', + variants: [{ id: 'all', display_name: 'PostHog audit' }], + }), + subagents: { + identification: { + 'config.yaml': yaml.dump({ + type: 'docs-only', + template: 'description.md', + variants: [{ id: 'all', display_name: 'Audit — identification' }], + }), + }, + 'event-capture': { + 'config.yaml': yaml.dump({ + type: 'docs-only', + template: 'description.md', + variants: [{ id: 'all', display_name: 'Audit — event capture' }], + }), + }, + 'web-analytics': { + 'config.yaml': yaml.dump({ + type: 'docs-only', + template: 'description.md', + variants: [{ id: 'all', display_name: 'Audit — web analytics' }], + }), + }, + }, + }, + }, + }, tmpDir); + const config = loadSkillsConfig(tmpDir); + expect(Object.keys(config).sort()).toEqual([ + 'audit', + 'audit/subagents/event-capture', + 'audit/subagents/identification', + 'audit/subagents/web-analytics', + ]); + }); + it('handles flat and nested siblings', () => { createFixture({ skills: { diff --git a/scripts/lib/tests/skill-generator-references-folder.test.js b/scripts/lib/tests/skill-generator-references-folder.test.js index f7dca6b..854d994 100644 --- a/scripts/lib/tests/skill-generator-references-folder.test.js +++ b/scripts/lib/tests/skill-generator-references-folder.test.js @@ -70,4 +70,82 @@ describe('generateSkill local references', () => { expect(readFileSync(generatedRef, 'utf8')).toBe('# Product analytics best practices\n\nDetails'); expect(readFileSync(generatedSkill, 'utf8')).toContain('references/product-analytics.md'); }); + + it('copies a sibling checks.json into the generated skill root', async () => { + const checksContent = JSON.stringify([ + { id: 'sample-check', area: 'Sample', label: 'Sample check' }, + ], null, 2); + createFixture({ + skills: { + 'audit-subagent': { + 'description.md': '# {display_name}', + 'checks.json': checksContent, + }, + }, + }, tmpDir); + + const config = { + 'audit-subagent': { + type: 'docs-only', + template: 'description.md', + variants: [{ id: 'all', display_name: 'Audit subagent' }], + }, + }; + + const skill = expandSkillGroups(config, tmpDir)[0]; + const outputDir = join(tmpDir, 'out'); + + await generateSkill({ + skill, + version: 'test', + repoRoot: tmpDir, + configDir: tmpDir, + outputDir, + skipPatterns: { global: [], examples: {} }, + commandmentsConfig: { commandments: {} }, + skillTemplate: skill._template, + sharedDocs: skill._sharedDocs || [], + workflows: [], + }); + + const generatedChecks = join(outputDir, 'audit-subagent', 'checks.json'); + expect(existsSync(generatedChecks)).toBe(true); + expect(readFileSync(generatedChecks, 'utf8')).toBe(checksContent); + }); + + it('omits checks.json when source skill has none', async () => { + createFixture({ + skills: { + integration: { + 'description.md': '# {display_name}', + }, + }, + }, tmpDir); + + const config = { + integration: { + type: 'docs-only', + template: 'description.md', + variants: [{ id: 'all', display_name: 'Integration' }], + }, + }; + + const skill = expandSkillGroups(config, tmpDir)[0]; + const outputDir = join(tmpDir, 'out'); + + await generateSkill({ + skill, + version: 'test', + repoRoot: tmpDir, + configDir: tmpDir, + outputDir, + skipPatterns: { global: [], examples: {} }, + commandmentsConfig: { commandments: {} }, + skillTemplate: skill._template, + sharedDocs: skill._sharedDocs || [], + workflows: [], + }); + + expect(existsSync(join(outputDir, 'integration', 'checks.json'))).toBe(false); + }); }); diff --git a/scripts/lib/tests/skill-group-expander.test.js b/scripts/lib/tests/skill-group-expander.test.js index cb0a406..4d3c721 100644 --- a/scripts/lib/tests/skill-group-expander.test.js +++ b/scripts/lib/tests/skill-group-expander.test.js @@ -224,6 +224,57 @@ describe('expandSkillGroups', () => { expect(skills[0]._examplePaths).toEqual(['basics/django']); }); + it('produces audit + audit-subagents-* ids for the runner+specialists layout', () => { + createFixture({ + skills: { + audit: { + 'description.md': '# {display_name}', + subagents: { + identification: { 'description.md': '# {display_name}' }, + 'event-capture': { 'description.md': '# {display_name}' }, + 'web-analytics': { 'description.md': '# {display_name}' }, + }, + }, + }, + }, tmpDir); + const config = { + audit: { + type: 'docs-only', + template: 'description.md', + variants: [{ id: 'all', display_name: 'PostHog audit' }], + }, + 'audit/subagents/identification': { + type: 'docs-only', + template: 'description.md', + variants: [{ id: 'all', display_name: 'Audit — identification' }], + }, + 'audit/subagents/event-capture': { + type: 'docs-only', + template: 'description.md', + variants: [{ id: 'all', display_name: 'Audit — event capture' }], + }, + 'audit/subagents/web-analytics': { + type: 'docs-only', + template: 'description.md', + variants: [{ id: 'all', display_name: 'Audit — web analytics' }], + }, + }; + const skills = expandSkillGroups(config, tmpDir); + expect(skills.map(s => s.id).sort()).toEqual([ + 'audit', + 'audit-subagents-event-capture', + 'audit-subagents-identification', + 'audit-subagents-web-analytics', + ]); + const runner = skills.find(s => s.id === 'audit'); + expect(runner._category).toBe('audit'); + expect(runner._topic).toBeNull(); + const ident = skills.find(s => s.id === 'audit-subagents-identification'); + expect(ident._category).toBe('audit'); + expect(ident._topic).toBe('subagents/identification'); + expect(ident._group).toBe('audit/subagents/identification'); + }); + it('defaults _examplePaths to empty array when not specified', () => { createFixture({ skills: { diff --git a/transformation-config/skills/audit/config.yaml b/transformation-config/skills/audit/config.yaml index 0dd7276..0715a77 100644 --- a/transformation-config/skills/audit/config.yaml +++ b/transformation-config/skills/audit/config.yaml @@ -1,6 +1,6 @@ type: docs-only template: description.md -description: Audit an existing PostHog integration for correctness and best practices +description: Audit an existing PostHog integration for correctness and best practices, dispatching specialist subagents in parallel tags: [best-practices] references: preamble: "**Read ONLY this file.** Do not read any other reference file until this one tells you to." diff --git a/transformation-config/skills/audit/description.md b/transformation-config/skills/audit/description.md index 278ab37..094cb8e 100644 --- a/transformation-config/skills/audit/description.md +++ b/transformation-config/skills/audit/description.md @@ -1,21 +1,57 @@ # PostHog Audit -This skill audits an existing PostHog integration for **data integrity** in event capture and identification. **Read-only** — the only file you create is the final audit report. +This skill audits an existing PostHog integration for **data integrity** in installation, identification, event capture, and (when applicable) deeper areas like web analytics, feature flags, experiments, LLM analytics, and error tracking. **Read-only** — the only file you create is the final audit report. -Perform the checks described in the referenced skills and only the events referenced in the skills. +Perform the checks described in the referenced step files and only the events referenced there. ## Workflow -The audit runs as a 5-step chain: Installation (SDK + version) → init correctness → identification → event capture → report. Each step file ends with a pointer to the next. Follow them in the order they are written. You must resolve them in order before any source-tree exploration. +The audit runs as a 6-step chain: -The audit ledger is already seeded with the 10 pending checks. Use `mcp__wizard-tools__audit_resolve_checks` to patch each one as you finish it. +1. SDK + version (`references/1-version.md`) +2. Init correctness (`references/2-init.md`) +3. Identification — dispatches `audit-subagents-identification`, waits for it to resolve all 4 identification checks (`references/3-identification.md`) +4. Event capture — dispatches `audit-subagents-event-capture`, waits for it to resolve all 3 event-capture checks (`references/4-event-capture.md`) +5. Dispatch agent + discoverable specialists — the dispatch agent picks which discoverable specialists to run, then the runner enrolls their checks via `audit_add_checks` and fans them out (`references/5-discoverable-dispatch.md`) +6. Read ledger + write report (`references/6-report.md`) + +Each step file ends with a pointer to the next. Follow them in the order they are written. Resolve each step's checks before moving to the next. Identification (Step 3) must be fully resolved before Event Capture (Step 4) is dispatched, and both must be fully resolved before the dispatch agent runs. + +The audit ledger is seeded by the wizard with 10 pending checks (3 install/init + 4 identification + 3 event-capture). Step 5 may add more checks via `audit_add_checks` for discoverable specialists. Use `mcp__wizard-tools__audit_resolve_checks` to patch each check as it's evaluated. Specialists you dispatch own their own checks — they call `audit_resolve_checks` themselves; you do not patch on their behalf. **Start by reading the path relative to this file at `references/1-version.md`.** Do not Glob, ls, or find the skill directory. Do not preload future steps. Do not re-read a step file once you've moved past it. Do not re-read SKILL.md. -`ToolSearch` is only for loading a tool by exact name when the SDK has it deferred (e.g. `select:Grep`). Do **not** use it to browse for other tools — every tool the audit needs (`Glob`, `Grep`, `Read`, `Write`, `Bash`, and the named `mcp__wizard-tools__audit_*` tools) is already named in this skill. +`ToolSearch` is only for loading a tool by exact name when the SDK has it deferred (e.g. `select:Grep`). Do **not** use it to browse for other tools — every tool the audit needs (`Glob`, `Grep`, `Read`, `Write`, `Bash`, `Task`, and the named `mcp__wizard-tools__*` tools) is already named in this skill or its step files. **Do not call `TodoWrite`.** The audit doesn't track its own task list — progress comes from the audit ledger plus `[STATUS]` lines. +## Specialist registries + +### Always-on (Steps 3 + 4, pre-seeded by wizard) + +| Specialist | Step | Skill ID | +|---|---|---| +| identification | 3 | `audit-subagents-identification` | +| event-capture | 4 | `audit-subagents-event-capture` | + +### Discoverable (Step 5, gated by the dispatch agent) + +| Area | Skill ID | +|---|---| +| Web Analytics | `audit-subagents-web-analytics` | +| Feature Flags | `audit-subagents-feature-flags` | +| Experiments | `audit-subagents-experiments` | +| LLM Analytics | `audit-subagents-llm-analytics` | +| Error Tracking | `audit-subagents-error-tracking` | + +The dispatch agent itself: `audit-subagents-dispatch`. + +### Selection overrides + +The wizard's `--only` and `--skip` flags inject a "Specialist selection (override defaults):" block into this prompt: +- `--skip` removes specific basic specialists from Step 3 or Step 4 (their pre-seeded checks must still be patched — mirror them as `{ status: "warning", details: "skipped: suppressed by --skip" }` in that step). +- `--only=identification,event-capture` (or any selection block that excludes the dispatch agent) **suppresses Step 5 entirely** — no dispatch agent spawn, no second wave. + ## Live activity — `[STATUS]` The "Working on …" banner reads from `[STATUS]` lines you emit in plain text. Whenever you start a new sub-step, write a line like: @@ -30,14 +66,15 @@ The wizard intercepts these and updates the spinner. Use them freely — they ar The ledger lives at `.posthog-audit-checks.json` and is rendered live in the "Audit plan" tab. It is owned by MCP tools — **never `Write` this file directly**: -- `mcp__wizard-tools__audit_resolve_checks({ updates })` — patch one or more checks by `id`. Each `update` is `{ id, status, file?, details? }`. Batch updates from the same step into a single call. +- `mcp__wizard-tools__audit_resolve_checks({ updates })` — patch one or more checks by `id`. Each `update` is `{ id, status, file?, details? }`. Batch updates from the same step into a single call where possible. +- `mcp__wizard-tools__audit_add_checks()` — Step 4 only, to enroll discoverable specialists' checks before dispatching them. -All audit ledger calls are atomic and serialize internally — **concurrent calls from parallel subagents cannot lose updates**, so feel free to fan out runtime checks across `Task` subagents when a step says so. +All audit ledger calls are atomic and serialize internally — **concurrent calls from parallel subagents cannot lose updates**, so feel free to fan out runtime checks across `Task` subagents. ### Check entry shape - `id` — stable kebab-case slug. Reuse the existing seeded ids exactly when calling `audit_resolve_checks`. -- `area` — short group name. The current core workflow uses `Installation`, `Identification`, and `Event Capture`. +- `area` — short group name. The current core workflow uses `Installation`, `Identification`, `Event Capture`, plus whatever areas Step 4 adds (`Web Analytics`, `Feature Flags`, etc.). - `label` — short human name. - `status` — `pending` | `pass` | `error` | `warning` | `suggestion`. - `file` — optional `path:line` for findings tied to a location. @@ -54,13 +91,14 @@ After the report is written (Step 5), delete `.posthog-audit-checks.json`. ## Key principles - **Read-only**: Do not edit project source files. The only file you create is the audit report. -- **Evidence-based**: Reference specific `file:line` for every non-pass finding. +- **Evidence-based**: Reference specific `file:line` for every non-pass finding (or hosts/evidence for query-driven specialists). - **Actionable**: Every finding states what to fix and how. +- **One report**: Specialists do not write reports of their own. They write to the ledger, the runner reads the ledger and writes the single report in Step 5. ## Abort statuses Report abort states with `[ABORT]` prefixed messages. The wizard catches these and terminates the run — do not halt yourself. -- No PostHog SDK found +- `[ABORT] No PostHog SDK found` ## Framework guidelines diff --git a/transformation-config/skills/audit/references/3-identification.md b/transformation-config/skills/audit/references/3-identification.md index 001a004..8c9c1b9 100644 --- a/transformation-config/skills/audit/references/3-identification.md +++ b/transformation-config/skills/audit/references/3-identification.md @@ -4,14 +4,16 @@ next_step: 4-event-capture.md # Step 3 — Identification -This step resolves four identification checks **in parallel**, one subagent per check: +This step dispatches the **identification specialist** as a single `Task` and waits for it to fully resolve all four identification checks before continuing. + +The specialist owns its own internal fan-out (4 nested rule subagents in parallel). Each rule subagent emits one `audit_resolve_checks` call for its single check id: - `identify-stable-distinct-id` - `identify-not-late` - `cross-runtime-distinct-id` - `identify-reset-on-logout` -Each subagent owns its own grep, reads, evaluates its single rule, and emits one `audit_resolve_checks` call with one update. The ledger's mutex serializes concurrent writes — there's no race. +The ledger's mutex serializes concurrent writes — there's no race. ## Status @@ -21,96 +23,19 @@ Emit before dispatching: [STATUS] Auditing identification ``` -## Action — dispatch four subagents in one message - -Make **four `Task` tool calls in a single message** so they run concurrently. Wait for all four to return, then continue to `4-event-capture.md`. Do not run any other tools between dispatch and the next step. - -The bundled `identify-users.md` reference holds PostHog's authoritative guidance on `distinct_id`, `identify()` ordering, and cross-runtime identity. It's typically at `.claude/skills/audit/references/identify-users.md`; if that path doesn't exist, discover it with `Glob` `**/skills/audit/references/identify-users.md`. Each subagent reads it once before judging. - -### Task A — `identify-stable-distinct-id` - -`description`: `Audit identify-stable-distinct-id` - -`prompt`: -``` -You are an audit subagent. Resolve exactly one rule and return: identify-stable-distinct-id. - -Read this skill's bundled `identify-users.md` reference once (typically `.claude/skills/audit/references/identify-users.md`; otherwise discover with `Glob` `**/skills/audit/references/identify-users.md`). +## Action — dispatch the identification specialist -Run **one** Grep: `posthog\.identify\(`. Read each file that contains a hit, once. Inspect the first argument passed to identify(). +Make **one `Task` tool call** to `audit-subagents-identification`: -Rule: -- distinct_id must be a stable identifier (auth user id, account id), not a session UUID, ephemeral cookie, or device-only id. -- pass: sources from authenticated user (session.user.id, auth.uid(), etc.) -- error: sources from a session, request, or device id that resets -- warning: source unclear — flag for human review - -Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `identify-stable-distinct-id`, including `file` (path:line) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. -``` - -### Task B — `identify-not-late` - -`description`: `Audit identify-not-late` +`description`: `Audit identification` `prompt`: ``` -You are an audit subagent. Resolve exactly one rule and return: identify-not-late. - -Read this skill's bundled `identify-users.md` reference once (typically `.claude/skills/audit/references/identify-users.md`; otherwise discover with `Glob` `**/skills/audit/references/identify-users.md`). - -Run **two** Greps in parallel: -- `posthog\.identify\(` — where identity is established -- `posthog\.capture\(|getFeatureFlag\(|isFeatureEnabled\(` — where captures and flag evals happen - -Read each file that contains a hit, once. Compare the timing/ordering of identify() against the surrounding capture / flag-eval calls. - -Rule: -- identify() must be called before any posthog.capture for that user, and before any feature-flag eval depending on user identity. -- pass: identify runs at session start / right after login. Captures and flag evals come after. -- warning: identify runs lazily (e.g. settings-page mount), so early captures and flag evals are anonymous. - -Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `identify-not-late`, including `file` (path:line of the identify call) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. +Follow the skill at `.claude/skills/audit-subagents-identification/SKILL.md`. Run all of its checks against the user's project. Resolve each check via `mcp__wizard-tools__audit_resolve_checks` as you finish it. Do not write files. Return a one-line summary when done. ``` -### Task C — `cross-runtime-distinct-id` +Wait for it to return before continuing to Step 4. Do not run any other tools between dispatch and the next step. -`description`: `Audit cross-runtime-distinct-id` +## Selection overrides -`prompt`: -``` -You are an audit subagent. Resolve exactly one rule and return: cross-runtime-distinct-id. - -Read this skill's bundled `identify-users.md` reference once (typically `.claude/skills/audit/references/identify-users.md`; otherwise discover with `Glob` `**/skills/audit/references/identify-users.md`). - -Run **one** Grep: `posthog\.init\(|new PostHog\(|posthog\.Posthog\(|Posthog\(` — locate every PostHog initialization across runtimes. Read each file that contains a hit, once. Determine whether both client and server runtimes initialize PostHog, and if so, how distinct_id flows between them. - -Rule: -- If both client and server runtimes call PostHog, the same distinct_id must be used on both sides for the same user. -- pass: server-side captures source the client's distinct_id (cookie, session token, or explicit hand-off). -- error: server-side captures use a different identifier scheme. -- Skip (`pass` with details: "single runtime"): only one runtime initializes PostHog. - -Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `cross-runtime-distinct-id`, including `file` (path:line of the most relevant init or capture site) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. -``` - -### Task D — `identify-reset-on-logout` - -`description`: `Audit identify-reset-on-logout` - -`prompt`: -``` -You are an audit subagent. Resolve exactly one rule and return: identify-reset-on-logout. - -Read this skill's bundled `identify-users.md` reference once (typically `.claude/skills/audit/references/identify-users.md`; otherwise discover with `Glob` `**/skills/audit/references/identify-users.md`). - -Locate logout, sign-out, and account-switching flows by issuing whatever `Grep` and `Read` calls are needed in parallel. Determine whether those flows clear PostHog state with `posthog.reset()`. - -Rule: -- Logout or account-switching flows should call `posthog.reset()`. Without a reset, when user B logs in on the same device after user A, PostHog's anonymous ID is shared and the next `identify()` can merge both accounts into one person. -- pass: every detected logout/account-switch flow calls `posthog.reset()`. -- error: a logout/account-switch flow is missing `posthog.reset()`. -- Skip (`pass` with details: "no logout/account-switch flow found"): no detectable logout/account-switch flow exists. -- note: `posthog.reset(true)` is valid when a completely clean device ID reset is required. - -Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `identify-reset-on-logout`, including `file` (path:line of the most relevant logout or reset site) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. -``` +If the wizard's prompt contains a "Specialist selection (override defaults):" block listing identification as suppressed, do **not** dispatch the specialist. Instead, emit one `mcp__wizard-tools__audit_resolve_checks` call patching all four identification check ids with `{ status: "warning", details: "skipped: suppressed by --skip" }`, then continue to Step 4. diff --git a/transformation-config/skills/audit/references/4-event-capture.md b/transformation-config/skills/audit/references/4-event-capture.md index 213da81..2e9a757 100644 --- a/transformation-config/skills/audit/references/4-event-capture.md +++ b/transformation-config/skills/audit/references/4-event-capture.md @@ -1,16 +1,18 @@ --- -next_step: 5-report.md +next_step: 5-discoverable-dispatch.md --- # Step 4 — Event capture -This step resolves three event-capture checks **in parallel**, one subagent per check: +This step dispatches the **event-capture specialist** as a single `Task` and waits for it to fully resolve all three event-capture checks before continuing. + +The specialist owns its own internal fan-out (3 nested rule subagents in parallel). Each rule subagent emits one `audit_resolve_checks` call for its single check id: - `capture-event-names-static` - `capture-uses-proxy` - `capture-growth-events` -Each subagent owns its own grep, reads, evaluates its single rule, and emits one `audit_resolve_checks` call with one update. The ledger's mutex serializes concurrent writes. +The ledger's mutex serializes concurrent writes — there's no race. ## Status @@ -20,74 +22,19 @@ Emit before dispatching: [STATUS] Auditing event capture ``` -## Action — dispatch three subagents in one message - -Make **three `Task` tool calls in a single message** so they run concurrently. Wait for all three to return, then continue to `5-report.md`. Do not run any other tools between dispatch and the next step. +## Action — dispatch the event-capture specialist -The bundled `best-practices.md` reference holds PostHog's authoritative guidance on event-name shape, reverse-proxy setup, and growth-event coverage. It's typically at `.claude/skills/audit/references/best-practices.md`; if that path doesn't exist, discover it with `Glob` `**/skills/audit/references/best-practices.md`. Each subagent reads it once before judging. +Make **one `Task` tool call** to `audit-subagents-event-capture`: -### Task A — `capture-event-names-static` - -`description`: `Audit capture-event-names-static` +`description`: `Audit event capture` `prompt`: ``` -You are an audit subagent. Resolve exactly one rule and return: capture-event-names-static. - -Read this skill's bundled `best-practices.md` reference once (typically `.claude/skills/audit/references/best-practices.md`; otherwise discover with `Glob` `**/skills/audit/references/best-practices.md`). - -Run **one** Grep: `posthog\.capture\(`. Read each file that contains a hit, once. Inspect the first argument of every capture() call. - -Rule: -- Event names in posthog.capture("name", …) must be static strings, not template literals or dynamic variables. -- pass: all capture calls use string literals. -- error: any call uses a template literal or variable as the event name. - -Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `capture-event-names-static`, including `file` (path:line of the first violation if any, otherwise of a representative capture call) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. +Follow the skill at `.claude/skills/audit-subagents-event-capture/SKILL.md`. Run all of its checks against the user's project. Resolve each check via `mcp__wizard-tools__audit_resolve_checks` as you finish it. Do not write files. Return a one-line summary when done. ``` -### Task B — `capture-uses-proxy` - -`description`: `Audit capture-uses-proxy` - -`prompt`: -``` -You are an audit subagent. Resolve exactly one rule and return: capture-uses-proxy. - -Read this skill's bundled `best-practices.md` reference once (typically `.claude/skills/audit/references/best-practices.md`; otherwise discover with `Glob` `**/skills/audit/references/best-practices.md`). - -Run **one** Grep: `api_host`. Read each file that contains a hit, once. Determine the configured ingest host the SDK posts to, and whether any browser runtime initializes PostHog at all. +Wait for it to return before continuing to Step 5. Do not run any other tools between dispatch and the next step. -Rule: -- A reverse proxy fronts PostHog's ingest endpoint via `api_host`, so events keep flowing when ad/tracking blockers would otherwise drop them. Without one, a meaningful share of browser captures never reach PostHog. -- pass: `api_host` resolves to a first-party domain on the project's own infra (e.g. `e.example.com`, `posthog.example.com`, `/ingest`-style same-origin path, or a known proxy SaaS like `app.example.com/relay-...`). -- warning: `api_host` is the default PostHog host (`https://us.i.posthog.com`, `https://eu.i.posthog.com`, `https://app.posthog.com`, or omitted entirely so the SDK default applies). -- Skip (`pass` with details: "server-only SDK"): only server-side runtimes init PostHog — a proxy isn't needed when no browser sends captures. +## Selection overrides -Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `capture-uses-proxy`, including `file` (path:line of the init that sets api_host) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. -``` - -### Task C — `capture-growth-events` - -`description`: `Audit capture-growth-events` - -`prompt`: -``` -You are an audit subagent. Resolve exactly one rule and return: capture-growth-events. - -Read this skill's bundled `best-practices.md` reference once (typically `.claude/skills/audit/references/best-practices.md`; otherwise discover with `Glob` `**/skills/audit/references/best-practices.md`). - -Run **two** Greps in parallel: -- `posthog\.capture\(` — explicit capture calls -- `signup|signin|register|checkout|purchase|subscribe|onboard` — likely growth-funnel surfaces - -Read each file that contains a hit, once. Cross-reference: do the growth-funnel surfaces actually emit explicit capture calls? - -Rule: -- Signup, activation/first-key-action, and purchase/subscription should be tracked explicitly. Autocapture isn't enough for funnels. -- pass: at least signup + one activation + (purchase or subscribe) are captured explicitly. -- warning: one or more growth events missing — list which. -- Skip (`pass` with details: "no auth/billing paths detected"): no detectable signup/billing surfaces. - -Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `capture-growth-events`, including `file` (path:line of the most relevant capture or growth-surface site) and `details` (one-line explanation, listing missing growth events when applicable). Return when the call completes. Do not write the audit report. -``` +If the wizard's prompt contains a "Specialist selection (override defaults):" block listing event-capture as suppressed, do **not** dispatch the specialist. Instead, emit one `mcp__wizard-tools__audit_resolve_checks` call patching all three event-capture check ids with `{ status: "warning", details: "skipped: suppressed by --skip" }`, then continue to Step 5. diff --git a/transformation-config/skills/audit/references/5-discoverable-dispatch.md b/transformation-config/skills/audit/references/5-discoverable-dispatch.md new file mode 100644 index 0000000..16f224d --- /dev/null +++ b/transformation-config/skills/audit/references/5-discoverable-dispatch.md @@ -0,0 +1,51 @@ +--- +next_step: 6-report.md +--- + +# Step 5 — Dispatch agent + discoverable specialists + +This step **always runs** unless the wizard injected a "Specialist selection (override defaults):" block into the runner's prompt that explicitly excludes the dispatch agent. The default behavior — including for plain `posthog-wizard audit` invocations with no flags — is to execute Action 1 + Action 2 below. + +The dispatch agent (`audit-subagents-dispatch`) decides which discoverable specialists are appropriate for this project. The runner then enrolls each picked specialist's checks into the ledger via `audit_add_checks` and fans them out as Tasks. Each dispatched specialist owns its checks: it greps, evaluates, and patches the ledger via `audit_resolve_checks`. + +## Status + +Emit before dispatching: + +``` +[STATUS] Asking dispatch agent which specialists to run +``` + +## Action 1 — dispatch the dispatch agent + +Fire one `Task` call to `audit-subagents-dispatch`. + +`description`: `Audit dispatch — pick discoverable specialists` + +`prompt`: + +``` +Follow the skill at `.claude/skills/audit-subagents-dispatch/SKILL.md`. +Decide which discoverable audit specialists should run against this project. +End your turn with a single fenced ```json``` block per the output shape documented in your skill (`{ specialist: "dispatch", schemaVersion, auditedAt, dispatch[], skipped[] }`). +You are the only specialist that returns JSON in chat — your output is a control signal the runner parses to choose what to dispatch next. Do not call `audit_resolve_checks` or `audit_add_checks`. +``` + +Wait for it to return. Locate the **last** fenced ```json``` block in the result and parse it. If parsing fails, treat `dispatch[]` as empty and continue. + +## Action 2 — fan out the second wave + +For each entry in the dispatch agent's `dispatch[]`: + +1. `mcp__wizard-tools__install_skill({ skillId })` +2. `Read .claude/skills//checks.json` — an array of `{ id, area, label }` objects. +3. `mcp__wizard-tools__audit_add_checks()` — enrolls them in the ledger as `pending` so the wizard UI surfaces them mid-run. +4. Fire one `Task` call dispatching the specialist. Use the standard dispatch prompt (substitute ``): + + > Follow the skill at `.claude/skills//SKILL.md`. Run all of its checks against the user's project. Resolve each check via `mcp__wizard-tools__audit_resolve_checks` as you finish it. Do not write files. Return a one-line summary when done. + +Issue all four sub-steps as one batched message per specialist where the harness allows (`install_skill`, `Read`, `audit_add_checks`, `Task` together). Across specialists, the second-wave Tasks themselves can run concurrently — fire them all in one message after the per-specialist setup completes. + +If `dispatch[]` is empty, skip the second-wave fan-out (no specialists to dispatch). The dispatch agent's own work in Action 1 is still required — it must run. + +Wait for every spawned second-wave Task to return before continuing to Step 6. The ledger's mutex serializes concurrent writes — there is no race when multiple specialists patch the ledger at the same time. diff --git a/transformation-config/skills/audit/references/5-report.md b/transformation-config/skills/audit/references/6-report.md similarity index 78% rename from transformation-config/skills/audit/references/5-report.md rename to transformation-config/skills/audit/references/6-report.md index 129f651..05b80fa 100644 --- a/transformation-config/skills/audit/references/5-report.md +++ b/transformation-config/skills/audit/references/6-report.md @@ -2,9 +2,9 @@ next_step: null --- -# Step 5 — Generate the audit report +# Step 6 — Generate the audit report -The audit report is rendered **directly from `.posthog-audit-checks.json`** — that file is the source of truth. Every check the wizard seeded ends up in the report, even passes; nothing is invented. +The audit report is rendered **directly from `.posthog-audit-checks.json`** — that file is the source of truth. Every check the wizard seeded plus every check the runner added via `audit_add_checks` in Step 5 ends up in the report, even passes; nothing is invented. ## Status @@ -18,6 +18,8 @@ Emit: `Read` the ledger once, then transform every entry into the report below. Use `area`, `label`, `status`, `file`, and `details` from each entry verbatim where the report calls for them. +If any ledger entry is still `pending` at this point (a specialist Task crashed or never resolved its check), patch it via `mcp__wizard-tools__audit_resolve_checks` to `{ status: "warning", details: "specialist did not complete" }` before reading the ledger again — every reportable entry must have a terminal status. + `Write` `posthog-audit-report.md` at the project root with the structure shown below. After the report is written, delete `.posthog-audit-checks.json`. The report has four sections in this order: @@ -27,7 +29,7 @@ The report has four sections in this order: 3. **Full audit** — every check the wizard ran, grouped by `area`, including passes. 4. **About this audit** — a short closing block explaining what the audit covered and how to interpret the report. -For the Full audit section, group rows dynamically by each distinct `area` value in the ledger, preserving first-seen area order from the JSON. Today the core audit produces three areas — **Installation**, **Identification**, **Event Capture** — but the report must not hard-code that list; render whatever areas appear. +For the Full audit section, group rows dynamically by each distinct `area` value in the ledger, preserving first-seen area order from the JSON. The core audit produces three areas — **Installation**, **Identification**, **Event Capture** — plus whichever areas the second-wave specialists contributed (`Web Analytics`, `Feature Flags`, …). The report must not hard-code that list; render whatever areas appear. For each area, write a one-paragraph framing immediately under the area heading, then the table. Use the canonical copy below verbatim when the area name matches; otherwise write a one-sentence summary derived from the area's check labels. @@ -83,7 +85,7 @@ If there are no actions, write `_Nothing to fix._`. ## About this audit -The PostHog wizard runs a five-stage chain: SDK installation → init correctness → identification → event capture → this report. Each stage resolves one or more checks against the project's source tree, recording every result — pass or otherwise — in the ledger this report was generated from. +The PostHog wizard runs a stepped chain: SDK installation → init correctness → identification → event capture → dispatch agent + discoverable specialists (web analytics, feature flags, experiments, LLM analytics, error tracking — each gated by the dispatch agent based on actual usage in the project) → this report. Each stage resolves one or more checks against the project's source tree, recording every result — pass or otherwise — in the ledger this report was generated from. - `error` items break correctness now (events lost, identity broken). Fix first. - `warning` items work today but cause subtle data-quality bugs. Fix when convenient. diff --git a/transformation-config/skills/audit/subagents/dispatch/config.yaml b/transformation-config/skills/audit/subagents/dispatch/config.yaml new file mode 100644 index 0000000..9ae642b --- /dev/null +++ b/transformation-config/skills/audit/subagents/dispatch/config.yaml @@ -0,0 +1,10 @@ +type: docs-only +template: description.md +description: Audit dispatch subagent — picks which discoverable specialists to run based on wizard-supplied audit areas + actual project usage signals (read-only, returns JSON dispatch list) +tags: [audit-subagent] +shared_docs: [] +variants: + - id: all + display_name: Audit — dispatch + tags: [] + docs_urls: [] diff --git a/transformation-config/skills/audit/subagents/dispatch/description.md b/transformation-config/skills/audit/subagents/dispatch/description.md new file mode 100644 index 0000000..37a8b1f --- /dev/null +++ b/transformation-config/skills/audit/subagents/dispatch/description.md @@ -0,0 +1,91 @@ +# Audit specialist — Dispatch + +Read-only meta-specialist. Decide which **discoverable** audit specialists should run for this project. You combine two inputs: (a) the wizard's authoritative list of audit areas (fetched via MCP), and (b) per-candidate usage signals (from grep). You output a JSON list of skill IDs the runner should dispatch in the second wave. + +You do not run any audit checks yourself. You do not dispatch specialists yourself — the runner does that. You do not write files. You do not call `mcp__wizard-tools__audit_resolve_checks` or `mcp__wizard-tools__audit_add_checks` — the runner handles ledger writes. + +## Tools you must use + +- `mcp__wizard-tools__audit_get_areas` — **required**. Fetches the wizard's list of areas to audit. Returns `{ areas, allowed, constrained }`. +- `Glob`, `Grep`, `Read` — for usage signal detection. Time-box yourself: a single matching hit per candidate is enough to flip it on. Do not enumerate every callsite. + +## Candidate registry + +| Area | Skill ID | Single grep / glob signal | +|---|---|---| +| Web Analytics | `audit-subagents-web-analytics` | `posthog-js` in a manifest, or `posthog\.init\(` in browser source. | +| Feature Flags | `audit-subagents-feature-flags` | `isFeatureEnabled\|getFeatureFlag\|useFeatureFlagEnabled\|getFeatureFlagPayload\|bootstrap\.featureFlags` | +| Experiments | `audit-subagents-experiments` | `\$feature_flag_called` in source, or experiment-suffixed flag names. | +| LLM Analytics | `audit-subagents-llm-analytics` | `@posthog/ai` import, or `\$ai_generation\|\$ai_trace`. | +| Error Tracking | `audit-subagents-error-tracking` | `posthog\.captureException\|\$exception` | + +## Workflow + +### Action 1 — fetch the wizard's areas (MANDATORY first step) + +Call `mcp__wizard-tools__audit_get_areas` exactly once. This is the **first** thing you do, before any grep, before any read. The tool takes no arguments. + +Capture the response. It will be a JSON object shaped: + +```json +{ "areas": ["Web Analytics", "LLM Analytics"], "allowed": ["Installation","Identification","Event Capture","Web Analytics","Feature Flags","Experiments","LLM Analytics","Error Tracking"], "constrained": true } +``` + +- `constrained: true` + non-empty `areas` → wizard is filtering this run to those areas only. +- `constrained: false` (or `areas` empty) → no wizard constraint, decide purely on usage signals. + +If the call errors or the tool isn't available, treat it as "no constraint" (`constrained: false`) and proceed. + +Emit `[STATUS] Discovering specialists — fetched wizard areas` after this call returns. + +### Action 2 — detect usage signals + +For each candidate in the registry, run **one** targeted `Grep` (or `Glob` for manifest files) using the signal pattern in the table above. Issue all five greps in parallel. Each candidate is either signal-present or signal-absent — that's all you need. + +Emit `[STATUS] Discovering specialists — scanned for usage signals` after the greps return. + +### Action 3 — combine inputs and decide + +The user's directive: **only dispatch what is actually used in the project**. The wizard's `areas` list (when constrained) is an additional filter on top. + +For each of the 5 candidates: + +| `constrained` | In `areas`? | Signal present? | Decision | +|---|---|---|---| +| `true` | yes | yes | **dispatch** | +| `true` | yes | no | `skipped` — `"requested by wizard but no usage signal in project"` | +| `true` | no | (any) | `skipped` — `"area not requested by wizard"` | +| `false` | (n/a) | yes | **dispatch** | +| `false` | (n/a) | no | `skipped` — `"no usage signal in project"` | + +Every candidate from the registry must appear in either `dispatch` or `skipped` — the runner needs a complete decision record. + +### Action 4 — emit the JSON output + +End your turn with one fenced ```json``` block matching this shape, and nothing after it: + +```json +{ + "specialist": "dispatch", + "schemaVersion": 1, + "auditedAt": "", + "dispatch": [ + { + "skillId": "audit-subagents-web-analytics", + "reason": "posthog-js found in package.json:14 (in wizard area list)" + } + ], + "skipped": [ + { + "skillId": "audit-subagents-llm-analytics", + "reason": "no usage signal in project" + } + ] +} +``` + +Do not emit prose after the JSON block. + +## Framework guidelines + +{commandments} diff --git a/transformation-config/skills/audit/subagents/error-tracking/checks.json b/transformation-config/skills/audit/subagents/error-tracking/checks.json new file mode 100644 index 0000000..4d2b839 --- /dev/null +++ b/transformation-config/skills/audit/subagents/error-tracking/checks.json @@ -0,0 +1,4 @@ +[ + { "id": "err-capture-installed", "area": "Error Tracking", "label": "captureException wired into the global handler" }, + { "id": "err-source-maps-uploaded", "area": "Error Tracking", "label": "Source maps uploaded to PostHog" } +] diff --git a/transformation-config/skills/audit/subagents/error-tracking/config.yaml b/transformation-config/skills/audit/subagents/error-tracking/config.yaml new file mode 100644 index 0000000..59afc5c --- /dev/null +++ b/transformation-config/skills/audit/subagents/error-tracking/config.yaml @@ -0,0 +1,10 @@ +type: docs-only +template: description.md +description: Audit specialist — error tracking (placeholder, returns empty findings) +tags: [audit-subagent] +shared_docs: [] +variants: + - id: all + display_name: Audit — error tracking + tags: [] + docs_urls: [] diff --git a/transformation-config/skills/audit/subagents/error-tracking/description.md b/transformation-config/skills/audit/subagents/error-tracking/description.md new file mode 100644 index 0000000..214992e --- /dev/null +++ b/transformation-config/skills/audit/subagents/error-tracking/description.md @@ -0,0 +1,35 @@ +# Audit specialist — Error tracking + +> **Status:** placeholder. The check logic is not yet implemented. Resolve every owned check via `audit_resolve_checks` as `{ status: "warning", details: "specialist not yet implemented" }`. + +You own these ledger checks (enrolled by the runner via `audit_add_checks` before this Task is dispatched): + +- `err-capture-installed` +- `err-source-maps-uploaded` + +## When to dispatch me + +Source contains `posthog.captureException`, error boundaries that report to PostHog, or `$exception` event captures. + +## Workflow + +1. Read this SKILL.md. +2. (TODO: implement the checks.) For now, emit one batched `mcp__wizard-tools__audit_resolve_checks` call with the placeholder updates under "Output". +3. Return a one-line summary (e.g. `Error tracking audit complete: 2 checks resolved (placeholder)`). Do not emit prose after that. + +## Output + +``` +mcp__wizard-tools__audit_resolve_checks({ + updates: [ + { "id": "err-capture-installed", "status": "warning", "details": "specialist not yet implemented" }, + { "id": "err-source-maps-uploaded", "status": "warning", "details": "specialist not yet implemented" } + ] +}) +``` + +Severity ladder when the checks are implemented: `pass | suggestion | warning | error`. Never use `pending`. + +## Framework guidelines + +{commandments} diff --git a/transformation-config/skills/audit/subagents/event-capture/config.yaml b/transformation-config/skills/audit/subagents/event-capture/config.yaml new file mode 100644 index 0000000..cbff4f7 --- /dev/null +++ b/transformation-config/skills/audit/subagents/event-capture/config.yaml @@ -0,0 +1,11 @@ +type: docs-only +template: description.md +description: Audit specialist — PostHog event capture call sites (read-only, returns JSON findings) +tags: [audit-subagent] +shared_docs: + - https://posthog.com/docs/product-analytics/best-practices.md +variants: + - id: all + display_name: Audit — event capture + tags: [] + docs_urls: [] diff --git a/transformation-config/skills/audit/subagents/event-capture/description.md b/transformation-config/skills/audit/subagents/event-capture/description.md new file mode 100644 index 0000000..15a96c8 --- /dev/null +++ b/transformation-config/skills/audit/subagents/event-capture/description.md @@ -0,0 +1,97 @@ +# Audit specialist — Event capture + +This specialist resolves three event-capture checks **in parallel**, one nested subagent per check: + +- `capture-event-names-static` +- `capture-uses-proxy` +- `capture-growth-events` + +Each nested subagent owns its own grep, reads, evaluates its single rule, and emits one `audit_resolve_checks` call with one update. The ledger's mutex serializes concurrent writes. + +## Reference files + +{references} + +## Status + +Emit before dispatching: + +``` +[STATUS] Auditing event capture +``` + +## Action — dispatch three subagents in one message + +Make **three `Task` tool calls in a single message** so they run concurrently. Wait for all three to return. Do not run any other tools between dispatch and returning a one-line summary. + +The bundled `best-practices.md` reference holds PostHog's authoritative guidance on event-name shape, reverse-proxy setup, and growth-event coverage. It's typically at `.claude/skills/audit/references/best-practices.md`; if that path doesn't exist, discover it with `Glob` `**/skills/audit/references/best-practices.md`. Each nested subagent reads it once before judging. + +### Task A — `capture-event-names-static` + +`description`: `Audit capture-event-names-static` + +`prompt`: +``` +You are an audit subagent. Resolve exactly one rule and return: capture-event-names-static. + +Read this skill's bundled `best-practices.md` reference once (typically `.claude/skills/audit/references/best-practices.md`; otherwise discover with `Glob` `**/skills/audit/references/best-practices.md`). + +Run **one** Grep: `posthog\.capture\(`. Read each file that contains a hit, once. Inspect the first argument of every capture() call. + +Rule: +- Event names in posthog.capture("name", …) must be static strings, not template literals or dynamic variables. +- pass: all capture calls use string literals. +- error: any call uses a template literal or variable as the event name. + +Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `capture-event-names-static`, including `file` (path:line of the first violation if any, otherwise of a representative capture call) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. +``` + +### Task B — `capture-uses-proxy` + +`description`: `Audit capture-uses-proxy` + +`prompt`: +``` +You are an audit subagent. Resolve exactly one rule and return: capture-uses-proxy. + +Read this skill's bundled `best-practices.md` reference once (typically `.claude/skills/audit/references/best-practices.md`; otherwise discover with `Glob` `**/skills/audit/references/best-practices.md`). + +Run **one** Grep: `api_host`. Read each file that contains a hit, once. Determine the configured ingest host the SDK posts to, and whether any browser runtime initializes PostHog at all. + +Rule: +- A reverse proxy fronts PostHog's ingest endpoint via `api_host`, so events keep flowing when ad/tracking blockers would otherwise drop them. Without one, a meaningful share of browser captures never reach PostHog. +- pass: `api_host` resolves to a first-party domain on the project's own infra (e.g. `e.example.com`, `posthog.example.com`, `/ingest`-style same-origin path, or a known proxy SaaS like `app.example.com/relay-...`). +- warning: `api_host` is the default PostHog host (`https://us.i.posthog.com`, `https://eu.i.posthog.com`, `https://app.posthog.com`, or omitted entirely so the SDK default applies). +- Skip (`pass` with details: "server-only SDK"): only server-side runtimes init PostHog — a proxy isn't needed when no browser sends captures. + +Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `capture-uses-proxy`, including `file` (path:line of the init that sets api_host) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. +``` + +### Task C — `capture-growth-events` + +`description`: `Audit capture-growth-events` + +`prompt`: +``` +You are an audit subagent. Resolve exactly one rule and return: capture-growth-events. + +Read this skill's bundled `best-practices.md` reference once (typically `.claude/skills/audit/references/best-practices.md`; otherwise discover with `Glob` `**/skills/audit/references/best-practices.md`). + +Run **two** Greps in parallel: +- `posthog\.capture\(` — explicit capture calls +- `signup|signin|register|checkout|purchase|subscribe|onboard` — likely growth-funnel surfaces + +Read each file that contains a hit, once. Cross-reference: do the growth-funnel surfaces actually emit explicit capture calls? + +Rule: +- Signup, activation/first-key-action, and purchase/subscription should be tracked explicitly. Autocapture isn't enough for funnels. +- pass: at least signup + one activation + (purchase or subscribe) are captured explicitly. +- warning: one or more growth events missing — list which. +- Skip (`pass` with details: "no auth/billing paths detected"): no detectable signup/billing surfaces. + +Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `capture-growth-events`, including `file` (path:line of the most relevant capture or growth-surface site) and `details` (one-line explanation, listing missing growth events when applicable). Return when the call completes. Do not write the audit report. +``` + +## Framework guidelines + +{commandments} diff --git a/transformation-config/skills/audit/subagents/experiments/checks.json b/transformation-config/skills/audit/subagents/experiments/checks.json new file mode 100644 index 0000000..626b814 --- /dev/null +++ b/transformation-config/skills/audit/subagents/experiments/checks.json @@ -0,0 +1,4 @@ +[ + { "id": "exp-exposure-captured", "area": "Experiments", "label": "Exposure event captured per experiment view" }, + { "id": "exp-variant-stable", "area": "Experiments", "label": "Variant assignment is stable across sessions" } +] diff --git a/transformation-config/skills/audit/subagents/experiments/config.yaml b/transformation-config/skills/audit/subagents/experiments/config.yaml new file mode 100644 index 0000000..8f206f1 --- /dev/null +++ b/transformation-config/skills/audit/subagents/experiments/config.yaml @@ -0,0 +1,10 @@ +type: docs-only +template: description.md +description: Audit specialist — experiments (placeholder, returns empty findings) +tags: [audit-subagent] +shared_docs: [] +variants: + - id: all + display_name: Audit — experiments + tags: [] + docs_urls: [] diff --git a/transformation-config/skills/audit/subagents/experiments/description.md b/transformation-config/skills/audit/subagents/experiments/description.md new file mode 100644 index 0000000..9f212fc --- /dev/null +++ b/transformation-config/skills/audit/subagents/experiments/description.md @@ -0,0 +1,35 @@ +# Audit specialist — Experiments + +> **Status:** placeholder. The check logic is not yet implemented. Resolve every owned check via `audit_resolve_checks` as `{ status: "warning", details: "specialist not yet implemented" }`. + +You own these ledger checks (enrolled by the runner via `audit_add_checks` before this Task is dispatched): + +- `exp-exposure-captured` +- `exp-variant-stable` + +## When to dispatch me + +Source contains experiment-named feature flags (suffixed `-test`, `-experiment`, `-ab`, or similar), `$feature_flag_called` event captures, or PostHog experiment SDK calls. + +## Workflow + +1. Read this SKILL.md. +2. (TODO: implement the checks.) For now, emit one batched `mcp__wizard-tools__audit_resolve_checks` call with the placeholder updates under "Output". +3. Return a one-line summary (e.g. `Experiments audit complete: 2 checks resolved (placeholder)`). Do not emit prose after that. + +## Output + +``` +mcp__wizard-tools__audit_resolve_checks({ + updates: [ + { "id": "exp-exposure-captured", "status": "warning", "details": "specialist not yet implemented" }, + { "id": "exp-variant-stable", "status": "warning", "details": "specialist not yet implemented" } + ] +}) +``` + +Severity ladder when the checks are implemented: `pass | suggestion | warning | error`. Never use `pending`. + +## Framework guidelines + +{commandments} diff --git a/transformation-config/skills/audit/subagents/feature-flags/checks.json b/transformation-config/skills/audit/subagents/feature-flags/checks.json new file mode 100644 index 0000000..f763b50 --- /dev/null +++ b/transformation-config/skills/audit/subagents/feature-flags/checks.json @@ -0,0 +1,4 @@ +[ + { "id": "ff-eval-after-init", "area": "Feature Flags", "label": "Flag evaluation runs after PostHog init" }, + { "id": "ff-bootstrap-configured", "area": "Feature Flags", "label": "Bootstrap configured for SSR / SPA" } +] diff --git a/transformation-config/skills/audit/subagents/feature-flags/config.yaml b/transformation-config/skills/audit/subagents/feature-flags/config.yaml new file mode 100644 index 0000000..462f9c5 --- /dev/null +++ b/transformation-config/skills/audit/subagents/feature-flags/config.yaml @@ -0,0 +1,10 @@ +type: docs-only +template: description.md +description: Audit specialist — feature flags (placeholder, returns empty findings) +tags: [audit-subagent] +shared_docs: [] +variants: + - id: all + display_name: Audit — feature flags + tags: [] + docs_urls: [] diff --git a/transformation-config/skills/audit/subagents/feature-flags/description.md b/transformation-config/skills/audit/subagents/feature-flags/description.md new file mode 100644 index 0000000..cea5297 --- /dev/null +++ b/transformation-config/skills/audit/subagents/feature-flags/description.md @@ -0,0 +1,35 @@ +# Audit specialist — Feature flags + +> **Status:** placeholder. The check logic is not yet implemented. Resolve every owned check via `audit_resolve_checks` as `{ status: "warning", details: "specialist not yet implemented" }`. + +You own these ledger checks (enrolled by the runner via `audit_add_checks` before this Task is dispatched): + +- `ff-eval-after-init` +- `ff-bootstrap-configured` + +## When to dispatch me + +Source contains any of: `isFeatureEnabled`, `getFeatureFlag`, `useFeatureFlagEnabled`, `getFeatureFlagPayload`, `bootstrap.featureFlags`. + +## Workflow + +1. Read this SKILL.md. +2. (TODO: implement the checks.) For now, emit one batched `mcp__wizard-tools__audit_resolve_checks` call with the placeholder updates under "Output". +3. Return a one-line summary (e.g. `Feature flags audit complete: 2 checks resolved (placeholder)`). Do not emit prose after that. + +## Output + +``` +mcp__wizard-tools__audit_resolve_checks({ + updates: [ + { "id": "ff-eval-after-init", "status": "warning", "details": "specialist not yet implemented" }, + { "id": "ff-bootstrap-configured", "status": "warning", "details": "specialist not yet implemented" } + ] +}) +``` + +Severity ladder when the checks are implemented: `pass | suggestion | warning | error`. Never use `pending`. + +## Framework guidelines + +{commandments} diff --git a/transformation-config/skills/audit/subagents/identification/config.yaml b/transformation-config/skills/audit/subagents/identification/config.yaml new file mode 100644 index 0000000..e2473bd --- /dev/null +++ b/transformation-config/skills/audit/subagents/identification/config.yaml @@ -0,0 +1,11 @@ +type: docs-only +template: description.md +description: Audit specialist — PostHog identification call sites (read-only, returns JSON findings) +tags: [audit-subagent] +shared_docs: + - https://posthog.com/docs/getting-started/identify-users.md +variants: + - id: all + display_name: Audit — identification + tags: [] + docs_urls: [] diff --git a/transformation-config/skills/audit/subagents/identification/description.md b/transformation-config/skills/audit/subagents/identification/description.md new file mode 100644 index 0000000..456d090 --- /dev/null +++ b/transformation-config/skills/audit/subagents/identification/description.md @@ -0,0 +1,120 @@ +# Audit specialist — Identification + +This specialist resolves four identification checks **in parallel**, one nested subagent per check: + +- `identify-stable-distinct-id` +- `identify-not-late` +- `cross-runtime-distinct-id` +- `identify-reset-on-logout` + +Each nested subagent owns its own grep, reads, evaluates its single rule, and emits one `audit_resolve_checks` call with one update. The ledger's mutex serializes concurrent writes — there's no race. + +## Reference files + +{references} + +## Status + +Emit before dispatching: + +``` +[STATUS] Auditing identification +``` + +## Action — dispatch four subagents in one message + +Make **four `Task` tool calls in a single message** so they run concurrently. Wait for all four to return. Do not run any other tools between dispatch and returning a one-line summary. + +The bundled `identify-users.md` reference holds PostHog's authoritative guidance on `distinct_id`, `identify()` ordering, and cross-runtime identity. It's typically at `.claude/skills/audit/references/identify-users.md`; if that path doesn't exist, discover it with `Glob` `**/skills/audit/references/identify-users.md`. Each nested subagent reads it once before judging. + +### Task A — `identify-stable-distinct-id` + +`description`: `Audit identify-stable-distinct-id` + +`prompt`: +``` +You are an audit subagent. Resolve exactly one rule and return: identify-stable-distinct-id. + +Read this skill's bundled `identify-users.md` reference once (typically `.claude/skills/audit/references/identify-users.md`; otherwise discover with `Glob` `**/skills/audit/references/identify-users.md`). + +Run **one** Grep: `posthog\.identify\(`. Read each file that contains a hit, once. Inspect the first argument passed to identify(). + +Rule: +- distinct_id must be a stable identifier (auth user id, account id), not a session UUID, ephemeral cookie, or device-only id. +- pass: sources from authenticated user (session.user.id, auth.uid(), etc.) +- error: sources from a session, request, or device id that resets +- warning: source unclear — flag for human review + +Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `identify-stable-distinct-id`, including `file` (path:line) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. +``` + +### Task B — `identify-not-late` + +`description`: `Audit identify-not-late` + +`prompt`: +``` +You are an audit subagent. Resolve exactly one rule and return: identify-not-late. + +Read this skill's bundled `identify-users.md` reference once (typically `.claude/skills/audit/references/identify-users.md`; otherwise discover with `Glob` `**/skills/audit/references/identify-users.md`). + +Run **two** Greps in parallel: +- `posthog\.identify\(` — where identity is established +- `posthog\.capture\(|getFeatureFlag\(|isFeatureEnabled\(` — where captures and flag evals happen + +Read each file that contains a hit, once. Compare the timing/ordering of identify() against the surrounding capture / flag-eval calls. + +Rule: +- identify() must be called before any posthog.capture for that user, and before any feature-flag eval depending on user identity. +- pass: identify runs at session start / right after login. Captures and flag evals come after. +- warning: identify runs lazily (e.g. settings-page mount), so early captures and flag evals are anonymous. + +Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `identify-not-late`, including `file` (path:line of the identify call) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. +``` + +### Task C — `cross-runtime-distinct-id` + +`description`: `Audit cross-runtime-distinct-id` + +`prompt`: +``` +You are an audit subagent. Resolve exactly one rule and return: cross-runtime-distinct-id. + +Read this skill's bundled `identify-users.md` reference once (typically `.claude/skills/audit/references/identify-users.md`; otherwise discover with `Glob` `**/skills/audit/references/identify-users.md`). + +Run **one** Grep: `posthog\.init\(|new PostHog\(|posthog\.Posthog\(|Posthog\(` — locate every PostHog initialization across runtimes. Read each file that contains a hit, once. Determine whether both client and server runtimes initialize PostHog, and if so, how distinct_id flows between them. + +Rule: +- If both client and server runtimes call PostHog, the same distinct_id must be used on both sides for the same user. +- pass: server-side captures source the client's distinct_id (cookie, session token, or explicit hand-off). +- error: server-side captures use a different identifier scheme. +- Skip (`pass` with details: "single runtime"): only one runtime initializes PostHog. + +Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `cross-runtime-distinct-id`, including `file` (path:line of the most relevant init or capture site) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. +``` + +### Task D — `identify-reset-on-logout` + +`description`: `Audit identify-reset-on-logout` + +`prompt`: +``` +You are an audit subagent. Resolve exactly one rule and return: identify-reset-on-logout. + +Read this skill's bundled `identify-users.md` reference once (typically `.claude/skills/audit/references/identify-users.md`; otherwise discover with `Glob` `**/skills/audit/references/identify-users.md`). + +Locate logout, sign-out, and account-switching flows by issuing whatever `Grep` and `Read` calls are needed in parallel. Determine whether those flows clear PostHog state with `posthog.reset()`. + +Rule: +- Logout or account-switching flows should call `posthog.reset()`. Without a reset, when user B logs in on the same device after user A, PostHog's anonymous ID is shared and the next `identify()` can merge both accounts into one person. +- pass: every detected logout/account-switch flow calls `posthog.reset()`. +- error: a logout/account-switch flow is missing `posthog.reset()`. +- Skip (`pass` with details: "no logout/account-switch flow found"): no detectable logout/account-switch flow exists. +- note: `posthog.reset(true)` is valid when a completely clean device ID reset is required. + +Emit one `mcp__wizard-tools__audit_resolve_checks` call with a single update for id `identify-reset-on-logout`, including `file` (path:line of the most relevant logout or reset site) and `details` (one-line explanation). Return when the call completes. Do not write the audit report. +``` + +## Framework guidelines + +{commandments} diff --git a/transformation-config/skills/audit/subagents/llm-analytics/checks.json b/transformation-config/skills/audit/subagents/llm-analytics/checks.json new file mode 100644 index 0000000..6ff0050 --- /dev/null +++ b/transformation-config/skills/audit/subagents/llm-analytics/checks.json @@ -0,0 +1,4 @@ +[ + { "id": "ai-trace-captured", "area": "LLM Analytics", "label": "AI generations captured with $ai_generation" }, + { "id": "ai-cost-tracked", "area": "LLM Analytics", "label": "Token + cost properties present on AI events" } +] diff --git a/transformation-config/skills/audit/subagents/llm-analytics/config.yaml b/transformation-config/skills/audit/subagents/llm-analytics/config.yaml new file mode 100644 index 0000000..47f602e --- /dev/null +++ b/transformation-config/skills/audit/subagents/llm-analytics/config.yaml @@ -0,0 +1,10 @@ +type: docs-only +template: description.md +description: Audit specialist — LLM analytics (placeholder, returns empty findings) +tags: [audit-subagent] +shared_docs: [] +variants: + - id: all + display_name: Audit — LLM analytics + tags: [] + docs_urls: [] diff --git a/transformation-config/skills/audit/subagents/llm-analytics/description.md b/transformation-config/skills/audit/subagents/llm-analytics/description.md new file mode 100644 index 0000000..05c7bc2 --- /dev/null +++ b/transformation-config/skills/audit/subagents/llm-analytics/description.md @@ -0,0 +1,35 @@ +# Audit specialist — LLM analytics + +> **Status:** placeholder. The check logic is not yet implemented. Resolve every owned check via `audit_resolve_checks` as `{ status: "warning", details: "specialist not yet implemented" }`. + +You own these ledger checks (enrolled by the runner via `audit_add_checks` before this Task is dispatched): + +- `ai-trace-captured` +- `ai-cost-tracked` + +## When to dispatch me + +Source contains an `@posthog/ai` import, OpenAI / Anthropic SDK calls alongside PostHog imports, or `$ai_generation` / `$ai_trace` event names. + +## Workflow + +1. Read this SKILL.md. +2. (TODO: implement the checks.) For now, emit one batched `mcp__wizard-tools__audit_resolve_checks` call with the placeholder updates under "Output". +3. Return a one-line summary (e.g. `LLM analytics audit complete: 2 checks resolved (placeholder)`). Do not emit prose after that. + +## Output + +``` +mcp__wizard-tools__audit_resolve_checks({ + updates: [ + { "id": "ai-trace-captured", "status": "warning", "details": "specialist not yet implemented" }, + { "id": "ai-cost-tracked", "status": "warning", "details": "specialist not yet implemented" } + ] +}) +``` + +Severity ladder when the checks are implemented: `pass | suggestion | warning | error`. Never use `pending`. + +## Framework guidelines + +{commandments} diff --git a/transformation-config/skills/audit/subagents/web-analytics/checks.json b/transformation-config/skills/audit/subagents/web-analytics/checks.json new file mode 100644 index 0000000..12f0411 --- /dev/null +++ b/transformation-config/skills/audit/subagents/web-analytics/checks.json @@ -0,0 +1,7 @@ +[ + { "id": "partial_reverse_proxy", "area": "Web Analytics", "label": "Partial reverse proxy" }, + { "id": "dark_authorized_urls", "area": "Web Analytics", "label": "Dark authorized URLs" }, + { "id": "pageleave_coverage", "area": "Web Analytics", "label": "Pageleave coverage" }, + { "id": "web_vitals_coverage", "area": "Web Analytics", "label": "Web vitals coverage" }, + { "id": "duplicate_canonical_urls", "area": "Web Analytics", "label": "Duplicate canonical URLs across hosts" } +] diff --git a/transformation-config/skills/audit/subagents/web-analytics/config.yaml b/transformation-config/skills/audit/subagents/web-analytics/config.yaml new file mode 100644 index 0000000..f1484fd --- /dev/null +++ b/transformation-config/skills/audit/subagents/web-analytics/config.yaml @@ -0,0 +1,10 @@ +type: docs-only +template: description.md +description: Audit specialist — PostHog web analytics misconfigurations via HogQL (read-only, returns JSON findings) +tags: [audit-subagent, web-analytics] +shared_docs: [] +variants: + - id: all + display_name: Audit — web analytics + tags: [] + docs_urls: [] diff --git a/transformation-config/skills/audit/subagents/web-analytics/description.md b/transformation-config/skills/audit/subagents/web-analytics/description.md new file mode 100644 index 0000000..d3ea57e --- /dev/null +++ b/transformation-config/skills/audit/subagents/web-analytics/description.md @@ -0,0 +1,117 @@ +# Audit specialist — Web analytics + +Read-only specialist that audits an existing PostHog project for common web-analytics misconfigurations using HogQL queries. + +You own these five ledger checks end-to-end (enrolled in the ledger by the runner via `audit_add_checks` before this Task is dispatched): + +- `partial_reverse_proxy` +- `dark_authorized_urls` +- `pageleave_coverage` +- `web_vitals_coverage` +- `duplicate_canonical_urls` + +Run all five checks. Resolve each one via `mcp__wizard-tools__audit_resolve_checks` as you finish it. Do not write files. Do not call `audit_add_checks`. + +## When to dispatch me + +A browser/web PostHog SDK is present (`posthog-js`, `posthog-react-native` browser builds, `@posthog/nextjs-config`, etc.) **AND** `mcp__posthog__query-run` is available in the toolset. + +## Reference files + +{references} + +## Guiding tenets + +1. **Read-only.** Never modify the user's source code or PostHog project settings. +2. **Quote real data.** Every non-pass finding's `details` must cite at least one concrete value (host name, event count, ratio). If a query returns nothing actionable, resolve the check as `pass` with `details` describing what was checked — don't speculate. +3. **No fabricated severities.** Use the severity rules in `references/checks.md` exactly, mapped to the unified ladder (`pass | suggestion | warning | error`). +4. **Skip checks gracefully.** If a check's prerequisite query fails mid-audit (missing permission, transient error, etc.), resolve that check via `audit_resolve_checks` as `{ status: "warning", details: "skipped: " }` and continue to the next check. Do **not** abort unless a hard precondition fails (see Abort statuses). +5. **Bounded query window.** See `references/checks.md` for the 7-day default and 30-day expansion rule. The window is decided once at pre-flight and reused for every check. + +## Available MCP tools + +- `mcp__posthog__query-run` (HogQL) — primary tool. Use for all event queries. +- `mcp__posthog__project-get` — fetch project settings, including `app_urls` (the user-configured authorized URLs). +- `mcp__posthog__feature-flag-get-definition` — only if a check needs flag context. +- `mcp__posthog__docs-search` — to fetch the latest doc URL for a remediation link. + +## Pre-flight + +Before running any checks, verify the project has web analytics events and decide the analysis window in a single query: + +```sql +SELECT + countIf(timestamp > now() - INTERVAL 7 DAY) AS p7, + count() AS p30 +FROM events +WHERE event = '$pageview' + AND timestamp > now() - INTERVAL 30 DAY +``` + +- If `p30 = 0`, emit `[ABORT] No web analytics events`. The wizard middleware catches `[ABORT]` and terminates the run cleanly — do not halt yourself. +- If `p7 >= 100`, use a 7-day window for every check. Otherwise use 30 days. +- If the query returns a permission error, emit `[ABORT] Insufficient permissions`. + +Do not re-query to decide the window per check — the decision is made once here and reused. + +## How to run the audit + +Run ALL checks in `references/checks.md` without pausing for user confirmation. + +The checks share no state — issue their `query-run` calls in parallel when your tool harness supports concurrent calls. Note that Checks 3 and 4 share a single combined query (see `checks.md`); run that query once and apply both pass/fail rules to the result. + +For each check: + +1. Read the check's HogQL query and pass/fail rule. +2. Run the query verbatim via `query-run`. Do not modify the query (other than swapping `INTERVAL 7 DAY` for `INTERVAL 30 DAY` if pre-flight selected the 30-day window). +3. Apply the rule. Build `{ status, details, affected }` from the result. +4. Report `[STATUS] Running check N: ` before each check. +5. Call `mcp__wizard-tools__audit_resolve_checks` with the update for that check's id. Batch updates where possible — one batched call at the end is preferred over five serial calls. + +Each check is independent and required. Do not skip a check based on intermediate findings. Do not invent new checks beyond the ones listed. + +## Output + +Each check resolves into one `audit_resolve_checks` update with shape: + +``` +{ "id": "", "status": "pass|suggestion|warning|error", "details": "" } +``` + +Severity ladder: `pass | suggestion | warning | error`. The legacy `critical | warning | info` ladder maps to `error | warning | suggestion`. Never emit `pending`, `critical`, or `info`. + +Web-analytics checks are query-driven, so `file` is rarely meaningful — omit it. Pack hosts, event counts, and ratios into `details` as a single line (≤ 200 chars). + +After all five checks resolve, return a one-line summary (e.g. `Web analytics audit complete: 5 checks resolved`). Do not emit prose after that. + +## Constraints + +- Do NOT modify any source files. +- Do NOT write to PostHog (no creating dashboards, insights, actions, etc.). +- Do NOT run queries against more than 30 days of data. +- Do NOT include personally identifiable information in resolutions (no email addresses, no user IDs, no session IDs, no IP addresses — host names, paths, and counts only). +- Do NOT fabricate or estimate values. Only report what the queries return. + +## Status + +Report progress with `[STATUS]` prefixed messages: + +- Verifying web analytics events +- Running check 1: Partial reverse proxy +- Running check 2: Dark authorized URLs +- Running check 3: Pageleave coverage per host +- Running check 4: Web Vitals coverage per host +- Running check 5: Duplicate canonical URLs across hosts + +## Abort statuses + +Report abort states with `[ABORT]` prefixed messages — wording must match exactly so the wizard renders the right error UI: + +- `[ABORT] No web analytics events` — pre-flight finds no `$pageview` events in the last 30 days. +- `[ABORT] Insufficient permissions` — `query-run` returns a permissions error on the pre-flight query. + +Stop all further work after emitting `[ABORT]`. + +## Framework guidelines + +{commandments} diff --git a/transformation-config/skills/audit/subagents/web-analytics/references/checks.md b/transformation-config/skills/audit/subagents/web-analytics/references/checks.md new file mode 100644 index 0000000..71a551a --- /dev/null +++ b/transformation-config/skills/audit/subagents/web-analytics/references/checks.md @@ -0,0 +1,183 @@ +# Web analytics checks + +Each check is independent. Run them in order; a failure in one does not block the others. Severity values are **fixed** — do not adjust them. + +Time window: every query below uses `INTERVAL 7 DAY`. The pre-flight in `description.md` decides once whether to use 7 days or 30 days for the entire run; if it picked 30, swap `INTERVAL 7 DAY` for `INTERVAL 30 DAY` everywhere in this file before running. + +All severities below are **already mapped to the unified ladder** (`pass | suggestion | warning | error`). The legacy doctor used `critical | warning | info`; that ladder is gone — emit only the unified values. + +--- + +## Check 1 — Partial reverse proxy + +**checkId:** `partial_reverse_proxy` · **area:** `Web Analytics` · **label:** `Partial reverse proxy` + +**What it detects:** Some hosts in the project route via a reverse proxy (so events survive ad blockers); others go directly to PostHog (where ad blockers can drop events). This is the support-case pattern: `example.com` proxied, `go.example.com` not. + +**Query:** + +```sql +SELECT + properties.$host AS host, + countIf( + coalesce(properties.$lib_custom_api_host, '') = '' + OR coalesce(properties.$lib_custom_api_host, '') LIKE '%i.posthog.com%' + OR coalesce(properties.$lib_custom_api_host, '') LIKE '%posthog.com%' + ) AS direct, + countIf( + coalesce(properties.$lib_custom_api_host, '') != '' + AND coalesce(properties.$lib_custom_api_host, '') NOT LIKE '%posthog.com%' + ) AS proxied, + count() AS total +FROM events +WHERE event = '$pageview' + AND timestamp > now() - INTERVAL 7 DAY + AND properties.$host IS NOT NULL + AND properties.$host != '' +GROUP BY host +HAVING total >= 100 +ORDER BY total DESC +LIMIT 50 +``` + +> **HogQL NULL gotcha:** in HogQL, `NULL != ''` evaluates to TRUE (not NULL as in standard SQL), so an unwrapped `properties.$lib_custom_api_host != ''` will silently match every row where the property is missing. Always wrap the property in `coalesce(..., '')` before comparing or using `LIKE`/`NOT LIKE`. + +**Pass/fail rule:** +- **`warning`** if any host has `proxied >= 1` AND any other host has `proxied = 0 AND direct >= 1`, AND the two hosts share a common registrable parent domain (see "Parent-domain heuristic" below). +- **`suggestion`** if `proxied` and `direct` hosts exist but parent domains differ — cross-environment mixing (e.g. dev vs prod) is plausible and shouldn't trigger a warning. +- Otherwise: `pass`. + +**Parent-domain heuristic:** strip leading `www.`, then take the rightmost two dot-separated labels (so `go.example.com` and `app.example.com` both yield `example.com`). For these compound public suffixes, take the rightmost three labels instead: `co.uk`, `com.au`, `co.jp`, `co.nz`, `com.br`, `github.io`, `vercel.app`, `netlify.app`, `pages.dev`. This is a heuristic, not a public-suffix list — when in doubt, downgrade to `suggestion`. + +**Evidence to include:** the proxied host(s), the direct host(s), each with their event counts. + +**Remediation URL:** `https://posthog.com/docs/advanced/proxy` + +--- + +## Check 2 — Dark authorized URLs + +**checkId:** `dark_authorized_urls` · **area:** `Web Analytics` · **label:** `Dark authorized URLs` + +**What it detects:** The user configured authorized URLs in PostHog (via project settings), but one or more of those URLs has zero events in the analysis window. This often means an ad blocker is silently eating events for that domain — or the SDK was never installed there. + +**Step A — fetch authorized URLs:** call `mcp__posthog__project-get` and read `app_urls` from the response. + +**Step B — query event volume per known host:** + +```sql +SELECT + properties.$host AS host, + count() AS pageviews +FROM events +WHERE event = '$pageview' + AND timestamp > now() - INTERVAL 7 DAY +GROUP BY host +ORDER BY pageviews DESC +LIMIT 200 +``` + +**Step C — compare:** for each entry in `app_urls`, parse out the hostname and check it against the query results in memory (do not issue one query per URL). + +Define `total_project_pageviews` = sum of `pageviews` across every row returned by Step B (all hosts, not just authorized ones). + +**Pass/fail rule:** +- **`warning`** for each authorized URL whose hostname is absent from Step B's results entirely. +- **`warning`** for each authorized URL whose `pageviews / total_project_pageviews < 0.01`, when at least one *other* authorized URL has `pageviews / total_project_pageviews >= 0.10`. (The peer threshold avoids firing on projects where every authorized URL is low-volume.) +- Skip this check (record under `skipped`) if `app_urls` is empty. + +**Evidence:** the dark host name, total project pageviews for context, and any peer authorized hosts with healthy volume. + +**Remediation URL:** `https://posthog.com/docs/web-analytics/faq` + +--- + +## Checks 3 & 4 — Pageleave and Web Vitals coverage per host + +These two checks share a single combined query — run it once and apply both pass/fail rules to the result. + +**Check 3 detects:** A host emits `$pageview` but few or no `$pageleave` events. Pageleave drives bounce rate and session duration — without it, web analytics dashboards under-report engagement for that domain. + +**Check 4 detects:** A host has pageviews but no `$web_vitals` events, meaning LCP/CLS/INP performance metrics aren't captured. + +**Combined query:** + +```sql +SELECT + properties.$host AS host, + countIf(event = '$pageview') AS pageviews, + countIf(event = '$pageleave') AS pageleaves, + countIf(event = '$web_vitals') AS web_vitals, + round(countIf(event = '$pageleave') / nullif(countIf(event = '$pageview'), 0), 3) AS pageleave_ratio +FROM events +WHERE event IN ('$pageview', '$pageleave', '$web_vitals') + AND timestamp > now() - INTERVAL 7 DAY + AND properties.$host IS NOT NULL + AND properties.$host != '' +GROUP BY host +HAVING pageviews >= 100 +ORDER BY pageviews DESC +LIMIT 50 +``` + +**Check 3 — `pageleave_coverage` (area: `Web Analytics`, label: `Pageleave coverage`)** +- **`suggestion`** for any host with `pageleaves = 0` (and `pageviews >= 100`, which the `HAVING` already enforces). +- **`warning`** for any host with `pageleaves > 0 AND pageleave_ratio < 0.5`. Mutually exclusive with the suggestion rule above — never emit both for the same host. +- **Evidence:** host, pageview count, pageleave count, ratio. +- **Remediation URL:** `https://posthog.com/docs/libraries/js#config` — set `capture_pageleave: true`. + +**Check 4 — `web_vitals_coverage` (area: `Web Analytics`, label: `Web vitals coverage`)** +- **`suggestion`** (not warning — many setups intentionally skip vitals) for any host with `web_vitals = 0`. +- **Evidence:** host, pageview count. +- **Remediation URL:** `https://posthog.com/docs/web-analytics/web-vitals` + +Emit Check 3 and Check 4 as separate findings (each with its own `checkId`) even though they came from one query. + +--- + +## Check 5 — Duplicate canonical URLs across hosts + +**checkId:** `duplicate_canonical_urls` · **area:** `Web Analytics` · **label:** `Duplicate canonical URLs across hosts` + +**What it detects:** The same path (e.g. `/pricing`) is tracked under multiple `$host` values. This often means you have a marketing site and an app subdomain that both render the same content but get separate analytics — a sign that one is leaking or that proxy/canonical config is inconsistent. + +**Query:** + +```sql +SELECT + properties.$pathname AS path, + groupUniqArray(properties.$host) AS hosts, + count() AS pageviews +FROM events +WHERE event = '$pageview' + AND timestamp > now() - INTERVAL 7 DAY + AND properties.$host IS NOT NULL + AND properties.$host != '' + AND properties.$pathname IS NOT NULL + AND properties.$pathname != '' +GROUP BY path +HAVING length(hosts) >= 2 AND pageviews >= 100 +ORDER BY pageviews DESC +LIMIT 25 +``` + +**Pass/fail rule:** +- **`suggestion`** if any path appears under ≥ 2 hosts with combined `pageviews >= 100`. + +**Evidence:** the path, the list of hosts, combined pageview count. + +**Remediation URL:** `https://posthog.com/docs/web-analytics/faq` — review `$current_url` / `$host` capture, confirm reverse proxy and canonicalization across all domains. + +--- + +## Stable `checkId` values + +These are the enum values shared with the runner / wizard. Use them exactly: + +- `partial_reverse_proxy` — Check 1 +- `dark_authorized_urls` — Check 2 +- `pageleave_coverage` — Check 3 +- `web_vitals_coverage` — Check 4 +- `duplicate_canonical_urls` — Check 5 + +If you add a new check, pick a snake_case ID and update this list.