diff --git a/.github/workflows/claude-code-review.yml b/.github/workflows/claude-code-review.yml index d3821e9..e8341fb 100644 --- a/.github/workflows/claude-code-review.yml +++ b/.github/workflows/claude-code-review.yml @@ -13,10 +13,10 @@ on: jobs: claude-review: - if: github.event.pull_request.author_association == 'OWNER' + if: github.event.pull_request.author_association == 'OWNER' && !github.event.pull_request.draft runs-on: ubuntu-latest permissions: - contents: read + contents: write pull-requests: write issues: read id-token: write @@ -57,7 +57,7 @@ jobs: plugins: 'code-review@claude-code-plugins' prompt: '/code-review:code-review --comment' track_progress: true - claude_args: '--allowedTools "Read,Write,Edit,Bash(git:*),mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr view:*),Bash(gh pr diff:*),Bash(gh pr checkout:*),Bash(git log:*),Bash(bun run test:*),Bash(bun run lint:*),Bash(bun run build:*),Bash(bun test:*),Bash(npx tsc:*),Bash(bun run tsc:*),Bash(gh pr checks:*),Bash(npx biome check:*),Bash(git fetch:*),Bash(gh issue list:*),Bash(gh issue view:*)"' + claude_args: '--allowedTools "Read,Write,Edit,Bash(git:*),mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr view:*),Bash(gh pr diff:*),Bash(gh pr checkout:*),Bash(git log:*),Bash(bun run test:*),Bash(bun run lint:*),Bash(bun run build:*),Bash(bun test:*),Bash(npx tsc:*),Bash(bun run tsc:*),Bash(bun run typecheck*),Bash(gh pr checks:*),Bash(npx biome check:*),Bash(git fetch:*),Bash(gh issue list:*),Bash(gh issue view:*)"' # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md # or https://code.claude.com/docs/en/cli-reference for available options diff --git a/.github/workflows/claude.yml b/.github/workflows/claude.yml index 7e11de9..7d69053 100644 --- a/.github/workflows/claude.yml +++ b/.github/workflows/claude.yml @@ -72,5 +72,5 @@ jobs: # Optional: Add claude_args to customize behavior and configuration # See https://github.com/anthropics/claude-code-action/blob/main/docs/usage.md # or https://code.claude.com/docs/en/cli-reference for available options - claude_args: '--allowedTools "Read,Write,Edit,Bash(git:*),mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr view:*),Bash(gh pr diff:*),Bash(gh pr checkout:*),Bash(git log:*),Bash(bun run test:*),Bash(bun run lint:*),Bash(bun run build:*),Bash(bun test:*),Bash(npx tsc:*),Bash(bun run tsc:*),Bash(gh pr checks:*),Bash(npx biome check:*),Bash(git fetch:*),Bash(gh issue list:*),Bash(gh issue view:*)"' + claude_args: '--allowedTools "Read,Write,Edit,Bash(git:*),mcp__github_inline_comment__create_inline_comment,Bash(gh pr comment:*),Bash(gh pr view:*),Bash(gh pr diff:*),Bash(gh pr checkout:*),Bash(git log:*),Bash(bun run test:*),Bash(bun run lint:*),Bash(bun run build:*),Bash(bun test:*),Bash(npx tsc:*),Bash(bun run tsc:*),Bash(bun run typecheck*),Bash(gh pr checks:*),Bash(npx biome check:*),Bash(git fetch:*),Bash(gh issue list:*),Bash(gh issue view:*)"' diff --git a/.gitignore b/.gitignore index 6f9bc1c..c32763f 100644 --- a/.gitignore +++ b/.gitignore @@ -37,6 +37,9 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json config.json .claude/settings.local.json .claude/worktrees/ +.claude-pr/ CLAUDE.local.md +lefthook-local.yml +.gitleaks-priv.toml hook-test/fixtures/.state/ diff --git a/README.md b/README.md index 052d674..87b84e1 100644 --- a/README.md +++ b/README.md @@ -46,54 +46,7 @@ See `config.example.json` for the full structure. The config maps space names to **Including spaces from other configs:** Use `includeSpacesFrom` to import space definitions from other config files. This is useful for aggregating spaces from multiple projects into a central config, reducing the need to specify `--config` on CLI commands. Duplicate space names are not allowed. -**Plugins:** Use `plugins` to load parse plugins that read spaces from non-markdown sources. The built-in markdown plugin is always available without any declaration. Plugins are tried in order; the first to return a result wins. The `plugins` field is a map of plugin name to plugin config, and can be declared at the top level (applies to all spaces) or per-space (overrides the top level): - -```json -{ - "spaces": [ - { - "name": "ProductX", - "path": "/path/to/space", - "plugins": { - "markdown": { "fieldMap": { "record_type": "type" } } - } - } - ], - "plugins": { - "ost-tools-confluence": { "baseUrl": "https://example.atlassian.net" } - } -} -``` - -All plugin names must start with `ost-tools-` (the prefix is optional in config and normalised on load). The special name `markdown` refers to the built-in markdown plugin. External plugins are resolved in order: config-adjacent (`{configDir}/plugins/{name}`), then npm. Each plugin must export a `configSchema` JSON Schema; config is validated against it on load. Fields annotated `format: 'path'` in a plugin's `configSchema` are resolved relative to the config file directory. - -**Markdown plugin config** fields (set under `plugins.markdown` per space): -- `templateDir` — directory containing template files (used by `template-sync`) -- `templatePrefix` — filename prefix for templates (default blank) -- `fieldMap` — maps file/frontmatter field names to canonical schema field names (e.g. `{ "record_type": "type" }`) - -**Filter views:** Named filter expressions can be defined per space under `views`. Each view has an `expression` field using the filter expression syntax: - -```json -{ - "spaces": [ - { - "name": "my-space", - "path": "/path/to/space", - "views": { - "active-solutions": { - "expression": "WHERE resolvedType='solution' and status='active'" - }, - "solutions-under-active-opportunity": { - "expression": "WHERE resolvedType='solution' and $exists(ancestors[resolvedType='opportunity' and status='active'])" - } - } - } - ] -} -``` - -Use a view name with `ost-tools show --filter `. +**Plugins and markdown plugin config:** See `ost-tools docs config` for the full reference including `fieldMap`, `typeInference`, `templateDir`, filter views, and plugin loading rules. ### Spaces diff --git a/docs/concepts.md b/docs/concepts.md index cd9eee0..c19923b 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -56,9 +56,19 @@ Parsing behaviour for a space directory: - Files declaring a `space node` type via frontmatter are included as nodes. - Such files may also contain `embedded nodes` in their body, which are extracted and included. - Files declaring a `tooling type` (e.g. `space_on_a_page`, `dashboard`) are excluded from the node set. -- Files without frontmatter, or without a `type` field, are excluded from the node set. +- Files without frontmatter, or without a `type` field, are excluded from the node set (unless **type inference** is configured — see below). - Non-markdown files are not scanned. +#### Type inference + +When `typeInference` is configured on the markdown plugin, files without an explicit `type` field in frontmatter can have their type inferred from their folder path. Explicit `type:` in frontmatter always takes precedence. + +Two modes are available: + +- **`folder-name`** (default) — the leaf directory name is matched case-insensitively against the schema's known type names and alias keys. For example, a file at `concept/page.md` is inferred as type `concept`; a file at `study/page.md` is inferred as `source` if `study` is an alias for `source` in the schema. A folder name that is neither a type name nor an alias key results in no inference. + +- **`folderMap`** — an explicit map from folder path (relative to space root) to a type name or alias. Replaces auto-matching entirely; only folders listed in the map are inferred. Longest-prefix matching is used when folder paths overlap. An unresolvable value (not a known type or alias) is a hard error at parse time. + ### Space on a page **Space on a page** is a single-file backing format for a `space`. An entire planning tree is represented in one markdown document, using heading hierarchy, bullet point annotations, and `anchor` syntax. No separate per-node files are used. This format is most useful for the early development stages of a space, keeping information together in one file with less "boilerplate". diff --git a/docs/config.md b/docs/config.md new file mode 100644 index 0000000..4639294 --- /dev/null +++ b/docs/config.md @@ -0,0 +1,140 @@ +# ost-tools Configuration Reference + +## Config file location + +ost-tools looks for its config file in this order: + +1. `$OST_TOOLS_CONFIG` — explicit path override +2. `~/.config/ost-tools/config.json` (or `$XDG_CONFIG_HOME/ost-tools/config.json`) +3. `./config.json` in the current working directory + +See `config.example.json` for the full structure. Paths in config files are resolved relative to the config file. + +## Spaces + +A space is a named directory or single file registered in the config. Example: + +```json +{ + "spaces": [ + { + "name": "ProductX", + "path": "/path/to/space", + "schema": "general.json" + } + ] +} +``` + +**`includeSpacesFrom`** — import space definitions from other config files. Useful for aggregating spaces from multiple projects into a central config. Duplicate space names are not allowed. + +## Plugins + +Use `plugins` to load parse plugins that read spaces from non-markdown sources. The built-in markdown plugin is always available without any declaration. Plugins are tried in order; the first to return a result wins. The `plugins` field is a map of plugin name to plugin config, and can be declared at the top level (applies to all spaces) or per-space (overrides the top level): + +```json +{ + "spaces": [ + { + "name": "ProductX", + "path": "/path/to/space", + "plugins": { + "markdown": { "fieldMap": { "record_type": "type" } } + } + } + ], + "plugins": { + "ost-tools-confluence": { "baseUrl": "https://example.atlassian.net" } + } +} +``` + +All plugin names must start with `ost-tools-` (the prefix is optional in config and normalised on load). The special name `markdown` refers to the built-in markdown plugin. External plugins are resolved in order: config-adjacent (`{configDir}/plugins/{name}`), then npm. Each plugin must export a `configSchema` JSON Schema; config is validated against it on load. Fields annotated `format: 'path'` in a plugin's `configSchema` are resolved relative to the config file directory. + +## Markdown plugin config + +Set under `plugins.markdown` per space. + +### `fieldMap` + +Maps file/frontmatter field names to canonical schema field names: + +```json +{ "fieldMap": { "record_type": "type" } } +``` + +### `templateDir` and `templatePrefix` + +- `templateDir` — directory containing template files (used by `template-sync` and excluded when parsing) +- `templatePrefix` — filename prefix for templates (default blank) + +### `typeInference` + +Automatically assigns a node type based on folder structure when no `type` field is present in frontmatter. Explicit `type:` always takes precedence. + +**`mode`** — controls the matching strategy: +- `"folder-name"` (default) — matches the leaf directory name case-insensitively against schema type names and alias keys +- `"off"` — disables inference entirely + +```json +{ + "plugins": { + "markdown": { + "typeInference": { "mode": "folder-name" } + } + } +} +``` + +**`folderMap`** — explicit map from folder path (relative to space root) to type name or alias. When set, replaces auto-matching entirely; only folders listed in the map are inferred. + +```json +{ + "plugins": { + "markdown": { + "typeInference": { + "folderMap": { + "Research": "source", + "Personal": "note", + "topics/concepts": "concept" + } + } + } + } +} +``` + +Longest-prefix matching is used when keys overlap (e.g. `a/b` and `a/b/c` both present). Trailing slashes in keys are normalised. Values may be type aliases (resolved to canonical type). An unresolvable value throws a hard error at parse time. + +## Filter views + +Named filter expressions can be defined per space under `views`. Each view has an `expression` field: + +```json +{ + "spaces": [ + { + "name": "my-space", + "path": "/path/to/space", + "views": { + "active-solutions": { + "expression": "WHERE resolvedType='solution' and status='active'" + }, + "solutions-under-active-opportunity": { + "expression": "WHERE resolvedType='solution' and $exists(ancestors[resolvedType='opportunity' and status='active'])" + } + } + } + ] +} +``` + +Use a view name with `ost-tools show --filter `. + +See `ost-tools docs concepts` for full filter expression syntax. + +## Security notice + +**⚠️ Only use schemas and configuration files from trusted sources.** + +The tool executes JSONata expressions defined in schema files for rule validation. A maliciously crafted schema could make JSONata access JavaScript's prototype chain and execute arbitrary code. Only use schemas you've created or reviewed personally. diff --git a/package.json b/package.json index bb89be8..5e7389a 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,11 @@ }, "files": [ "dist/", - "schemas/" + "schemas/", + "docs/concepts.md", + "docs/config.md", + "docs/schemas.md", + "docs/rules.md" ], "exports": { ".": "./dist/index.js", diff --git a/plugin/skills/ost-tools/SKILL.md b/plugin/skills/ost-tools/SKILL.md index d7b661f..14f963c 100644 --- a/plugin/skills/ost-tools/SKILL.md +++ b/plugin/skills/ost-tools/SKILL.md @@ -34,7 +34,9 @@ Before working with a space, use these to understand what's configured: bunx ost-tools spaces --config # per-space: path, schema, fieldMap, templates, miro bunx ost-tools schemas show --space --config # entity types, properties, rules, enums + registry bunx ost-tools schemas show # inspect a bundled partial (e.g. _ost_tools_base.json) -bunx ost-tools readme # full documentation if needed +bunx ost-tools docs # full README +bunx ost-tools docs config # plugin config reference (fieldMap, typeInference, etc.) +bunx ost-tools docs concepts # terminology reference ``` `spaces` is the starting point — it shows each space as a block with its schema name, `fieldMap` @@ -108,31 +110,15 @@ what the rule actually sees in the `current` object, then adjust the rule in the ost-tools supports **plugins** for extending capabilities. Currently, parse plugins allow reading spaces from sources other than markdown (which is a built-in plugin). -Declare plugins in config as a of plugin name → config object: - -```json -{ - "spaces": [ - { - "name": "PDFSpace", - "path": "https://...", - "plugins": { - "ost-tools-pdf": { "baseUrl": "https://example.pdfstore.net" } - } - } - ] -} -``` - -All plugin names must start with `ost-tools-` (the prefix is optional in config and normalised on load). External plugins are resolved in order: config-adjacent (`{configDir}/plugins/{name}`), then npm. +For full plugin and markdown plugin config reference (fieldMap, typeInference, templateDir, filter views), run: -**Markdown plugin config** (under `plugins.markdown` in a space entry): -- `templateDir` — directory for template files used by `template-sync`, and to exclude templates when parsing and validating -- `templatePrefix` — filename prefix for templates (default blank) -- `fieldMap` — maps file field names to canonical schema field names (e.g. `{ "record_type": "type" }`) +```bash +bunx ost-tools docs config +``` ## References -- **`references/schema-authoring.md`** — schema file structure, `$metadata`, `fieldMap`, JSONata rules +- **`references/schema-authoring.md`** — schema file structure, `$metadata`, JSONata rules (run `ost-tools docs schema` for schema dialect reference) - **`references/schema-design.md`** — process for designing a schema from existing content -- **`references/commands.md`** — detailed CLI usage and examples + +For CLI and config reference, use `ost-tools docs ` (topics: `concepts`, `config`, `schema`, `rules`). diff --git a/src/commands/readme.ts b/src/commands/docs.ts similarity index 77% rename from src/commands/readme.ts rename to src/commands/docs.ts index e88048a..a4a85a5 100644 --- a/src/commands/readme.ts +++ b/src/commands/docs.ts @@ -1,8 +1,28 @@ import { readFileSync } from 'node:fs'; import { join } from 'node:path'; -export function readme(): void { - const content = readFileSync(join(import.meta.dir, '..', '..', 'README.md'), 'utf-8'); +const TOPICS: Record = { + concepts: 'concepts.md', + config: 'config.md', + schema: 'schemas.md', + rules: 'rules.md', +}; + +export function docs(topic?: string): void { + let filePath: string; + if (!topic) { + filePath = join(import.meta.dir, '..', '..', 'README.md'); + } else { + const file = TOPICS[topic]; + if (!file) { + const available = Object.keys(TOPICS).join(', '); + console.error(`Unknown topic "${topic}". Available: ${available}`); + process.exit(1); + } + filePath = join(import.meta.dir, '..', '..', 'docs', file); + } + + const content = readFileSync(filePath, 'utf-8'); const cols = process.stdout.columns ?? 80; const rendered = Bun.markdown.render(content, { heading: (children, { level }) => { diff --git a/src/commands/schemas.ts b/src/commands/schemas.ts index 0d7c9fa..9ddd469 100644 --- a/src/commands/schemas.ts +++ b/src/commands/schemas.ts @@ -2,7 +2,7 @@ import { existsSync, readdirSync, readFileSync } from 'node:fs'; import { dirname, join } from 'node:path'; import type { AnySchemaObject } from 'ajv'; import { loadConfig, resolveSchema } from '../config'; -import { bundledSchemasDir, loadSchema, readRawSchema } from '../schema/schema'; +import { bundledSchemasDir, type EntityInfo, extractEntityInfo, loadSchema, readRawSchema } from '../schema/schema'; import { mergeVariantProperties } from '../schema/schema-refs'; import type { SchemaMetadata, SchemaWithMetadata } from '../types'; @@ -28,12 +28,6 @@ interface EntityVariant { required: string[]; } -interface EntityInfo { - type: string; - properties: string[]; - required: string[]; -} - function extractEntities( oneOf: unknown[], schemaRefRegistry: Map, @@ -56,38 +50,6 @@ function extractEntities( }); } -/** - * Extract entity information for ERD generation. - * Returns a flat list of all entity types with their properties. - */ -export function extractEntityInfo( - oneOf: unknown[], - schemaRefRegistry: Map, - schema: SchemaWithMetadata, -): EntityInfo[] { - const result: EntityInfo[] = []; - for (const entry of oneOf) { - const { properties, required } = mergeVariantProperties(entry as AnySchemaObject, schema, schemaRefRegistry); - const typeDef = properties.type as AnySchemaObject | undefined; - if (typeDef?.const) { - result.push({ - type: String(typeDef.const), - properties: Object.keys(properties).filter((k) => k !== 'type'), - required: required.filter((r) => r !== 'type'), - }); - } else if (Array.isArray(typeDef?.enum)) { - for (const t of typeDef.enum as unknown[]) { - result.push({ - type: String(t), - properties: Object.keys(properties).filter((k) => k !== 'type'), - required: required.filter((r) => r !== 'type'), - }); - } - } - } - return result; -} - function showEntities( oneOf: unknown[], schemaRefRegistry: Map, @@ -315,7 +277,7 @@ export function showSchema( // Handle --mermaid-erd: generate ERD and exit if (options.mermaidErd) { - const entityInfo = Array.isArray(schema.oneOf) ? extractEntityInfo(schema.oneOf, schemaRefRegistry, schema) : []; + const entityInfo = extractEntityInfo(schema, schemaRefRegistry); const mermaid = generateMermaidErd(schema.metadata, entityInfo); process.stdout.write(mermaid); return; diff --git a/src/commands/validate.ts b/src/commands/validate.ts index dad916c..5570998 100644 --- a/src/commands/validate.ts +++ b/src/commands/validate.ts @@ -3,12 +3,11 @@ import type { ErrorObject } from 'ajv'; import chokidar from 'chokidar'; import { getConfigSourceFiles } from '../config'; import { readSpace } from '../read/read-space'; -import { bundledSchemasDir } from '../schema/schema'; +import { bundledSchemasDir, extractEntityInfo } from '../schema/schema'; import { validateGraph } from '../schema/validate-graph'; import { validateRules } from '../schema/validate-rules'; import { buildSpaceGraph } from '../space-graph'; import type { GraphViolation, RuleViolation, SchemaWithMetadata, SpaceContext } from '../types'; -import { extractEntityInfo } from './schemas'; export interface FormattedError { message: string; @@ -58,7 +57,7 @@ export function formatErrors( hasOneOfContext = Array.isArray(schema.oneOf); if (hasOneOfContext) { - const entities = extractEntityInfo(schema.oneOf as unknown[], schemaRefRegistry, schema); + const entities = extractEntityInfo(schema, schemaRefRegistry); const validTypes = entities.map((e) => e.type).sort(); if (validTypes.length > 0) { diff --git a/src/index.ts b/src/index.ts index 820c966..29fc202 100755 --- a/src/index.ts +++ b/src/index.ts @@ -2,9 +2,9 @@ import { createRequire } from 'node:module'; import { Command } from 'commander'; import { diagram } from './commands/diagram'; +import { docs } from './commands/docs'; import { dump } from './commands/dump'; import { listPlugins } from './commands/plugins'; -import { readme } from './commands/readme'; import { render, renderList } from './commands/render'; import { listSchemas, showSchema } from './commands/schemas'; import { show } from './commands/show'; @@ -155,9 +155,9 @@ schemasCmd program.addCommand(schemasCmd); program - .command('readme') - .description('Show full README documentation') - .action(() => readme()); + .command('docs [topic]') + .description('Show documentation (no arg: README; topics: concepts, config, schema, rules)') + .action((topic?: string) => docs(topic)); const renderCmd = new Command('render').description('Render a space in a given format'); renderCmd diff --git a/src/plugins/markdown/index.ts b/src/plugins/markdown/index.ts index 6002c97..555d10a 100644 --- a/src/plugins/markdown/index.ts +++ b/src/plugins/markdown/index.ts @@ -6,10 +6,16 @@ import { renderBullets } from './render-bullets'; import { renderMermaid } from './render-mermaid'; import { templateSync } from './template-sync'; +export type TypeInferenceConfig = { + mode?: 'folder-name' | 'off'; + folderMap?: Record; +}; + export type MarkdownPluginConfig = { templateDir?: string; fieldMap?: Record; templatePrefix?: string; + typeInference?: TypeInferenceConfig; }; export const MARKDOWN_CONFIG_SCHEMA = { @@ -18,6 +24,14 @@ export const MARKDOWN_CONFIG_SCHEMA = { templateDir: { type: 'string', format: 'path' }, // format is hint to config loader to resolve relative directories fieldMap: { type: 'object', additionalProperties: { type: 'string' } }, templatePrefix: { type: 'string' }, + typeInference: { + type: 'object', + properties: { + mode: { type: 'string', enum: ['folder-name', 'off'] }, + folderMap: { type: 'object', additionalProperties: { type: 'string' } }, + }, + additionalProperties: false, + }, }, additionalProperties: false, }; diff --git a/src/plugins/markdown/read-space.ts b/src/plugins/markdown/read-space.ts index a5a2a97..0b0ac7d 100644 --- a/src/plugins/markdown/read-space.ts +++ b/src/plugins/markdown/read-space.ts @@ -3,10 +3,11 @@ import { basename, join, resolve } from 'node:path'; import { Glob } from 'bun'; import matter from 'gray-matter'; import type { BaseNode } from '../../plugin-api'; +import { extractSchemaTypeNames } from '../../schema/schema'; import type { ParseResult, PluginContext } from '../util'; import type { MarkdownPluginConfig } from '.'; import { extractEmbeddedNodes, ON_A_PAGE_TYPES } from './parse-embedded'; -import { applyFieldMap, coerceDates } from './util'; +import { applyFieldMap, coerceDates, inferTypeFromPath } from './util'; type ReadSpaceDirectoryOptions = { includeOnAPageFiles?: boolean; @@ -63,6 +64,10 @@ export async function readSpaceDirectory( const templateDir = mdCfg.templateDir; const absoluteTemplateDir = templateDir ? resolve(templateDir) : undefined; + const typeInferenceCfg = mdCfg.typeInference; + const knownTypes = + typeInferenceCfg?.mode !== 'off' ? extractSchemaTypeNames(context.schema, context.schemaRefRegistry) : undefined; + const files = await Array.fromAsync(new Glob('**/*.md').scan({ cwd: directory, followSymlinks: true })); const nodes: BaseNode[] = []; const skipped: string[] = []; @@ -85,6 +90,10 @@ export async function readSpaceDirectory( const data = coerceDates(applyFieldMap(parsed.data, fieldMap)); + if (!data.type && typeInferenceCfg && knownTypes) { + data.type = inferTypeFromPath(file, typeInferenceCfg, knownTypes, context.schema.metadata.typeAliases); + } + if (!data.type) { nonSpace.push(file); continue; diff --git a/src/plugins/markdown/util.ts b/src/plugins/markdown/util.ts index 1bc7645..d31dc7d 100644 --- a/src/plugins/markdown/util.ts +++ b/src/plugins/markdown/util.ts @@ -1,3 +1,56 @@ +import { posix } from 'node:path'; +import type { TypeInferenceConfig } from '.'; + +export function inferTypeFromPath( + filePath: string, + config: TypeInferenceConfig, + knownTypes: Set, + typeAliases: Record | undefined, +): string | undefined { + if (config.mode === 'off') return undefined; + + const normalized = filePath.replace(/\\/g, '/'); + const dir = posix.dirname(normalized); + if (dir === '.') return undefined; + + if (config.folderMap) { + const normalizedMap = Object.fromEntries( + Object.entries(config.folderMap).map(([k, v]) => [k.replace(/\\/g, '/').replace(/\/+$/, ''), v]), + ); + + let bestKey: string | undefined; + for (const key of Object.keys(normalizedMap)) { + if (dir === key || dir.startsWith(`${key}/`)) { + if (!bestKey || key.length > bestKey.length) bestKey = key; + } + } + + if (!bestKey) return undefined; + + const value = normalizedMap[bestKey]!; + if (typeAliases?.[value] !== undefined) return typeAliases[value]; + if (knownTypes.has(value)) return value; + + throw new Error( + `typeInference.folderMap: "${value}" does not resolve to a known type or alias (from key "${bestKey}")`, + ); + } + + const leafDir = posix.basename(dir).toLowerCase(); + + for (const type of knownTypes) { + if (type.toLowerCase() === leafDir) return type; + } + + if (typeAliases) { + for (const [alias, canonical] of Object.entries(typeAliases)) { + if (alias.toLowerCase() === leafDir) return canonical; + } + } + + return undefined; +} + /** * Coerce Date objects in frontmatter/YAML data to ISO date strings (YYYY-MM-DD). * gray-matter and js-yaml parse unquoted ISO dates (e.g. `date: 2026-03-31`) as diff --git a/src/schema/schema.ts b/src/schema/schema.ts index 3707463..33e1be0 100644 --- a/src/schema/schema.ts +++ b/src/schema/schema.ts @@ -14,7 +14,7 @@ import { type Rule, type RuleEntry, } from './metadata-contract'; -import { isObject, resolveJsonPointer } from './schema-refs'; +import { isObject, mergeVariantProperties, resolveJsonPointer } from './schema-refs'; const packageDir = dirname(fileURLToPath(import.meta.url)); export const bundledSchemasDir = join(packageDir, '..', '..', 'schemas'); @@ -431,6 +431,47 @@ function extractMetadata(schema: AnySchemaObject, schemaRefRegistry: Map, +): EntityInfo[] { + if (!Array.isArray(schema.oneOf)) return []; + const result: EntityInfo[] = []; + for (const entry of schema.oneOf as AnySchemaObject[]) { + const { properties, required } = mergeVariantProperties(entry, schema, schemaRefRegistry); + const typeDef = properties.type as AnySchemaObject | undefined; + if (typeDef?.const !== undefined) { + result.push({ + type: String(typeDef.const), + properties: Object.keys(properties).filter((k) => k !== 'type'), + required: required.filter((r) => r !== 'type'), + }); + } else if (Array.isArray(typeDef?.enum)) { + for (const t of typeDef.enum as unknown[]) { + result.push({ + type: String(t), + properties: Object.keys(properties).filter((k) => k !== 'type'), + required: required.filter((r) => r !== 'type'), + }); + } + } + } + return result; +} + +export function extractSchemaTypeNames( + schema: SchemaWithMetadata, + schemaRefRegistry: Map, +): Set { + return new Set(extractEntityInfo(schema, schemaRefRegistry).map((e) => e.type)); +} + export function loadMetadata(schemaPath: string): SchemaMetadata { return extractMetadata(readRawSchema(schemaPath), buildFullRegistry(schemaPath)); } diff --git a/tests/fixtures/type-inference/Personal/personal-page.md b/tests/fixtures/type-inference/Personal/personal-page.md new file mode 100644 index 0000000..04b63bb --- /dev/null +++ b/tests/fixtures/type-inference/Personal/personal-page.md @@ -0,0 +1,3 @@ +--- +title: Personal Note +--- diff --git a/tests/fixtures/type-inference/Research/research-page.md b/tests/fixtures/type-inference/Research/research-page.md new file mode 100644 index 0000000..b3bcf4f --- /dev/null +++ b/tests/fixtures/type-inference/Research/research-page.md @@ -0,0 +1,4 @@ +--- +title: Research Page +url: https://example.com/research +--- diff --git a/tests/fixtures/type-inference/a/b/c/deep.md b/tests/fixtures/type-inference/a/b/c/deep.md new file mode 100644 index 0000000..9404f52 --- /dev/null +++ b/tests/fixtures/type-inference/a/b/c/deep.md @@ -0,0 +1,3 @@ +--- +title: Deep Page +--- diff --git a/tests/fixtures/type-inference/a/b/shallow.md b/tests/fixtures/type-inference/a/b/shallow.md new file mode 100644 index 0000000..868893c --- /dev/null +++ b/tests/fixtures/type-inference/a/b/shallow.md @@ -0,0 +1,3 @@ +--- +title: Shallow Page +--- diff --git a/tests/fixtures/type-inference/concept/concept-page.md b/tests/fixtures/type-inference/concept/concept-page.md new file mode 100644 index 0000000..f6fba88 --- /dev/null +++ b/tests/fixtures/type-inference/concept/concept-page.md @@ -0,0 +1,3 @@ +--- +title: Affordance +--- diff --git a/tests/fixtures/type-inference/note/explicit-type.md b/tests/fixtures/type-inference/note/explicit-type.md new file mode 100644 index 0000000..d9d93eb --- /dev/null +++ b/tests/fixtures/type-inference/note/explicit-type.md @@ -0,0 +1,4 @@ +--- +type: synthesis +title: Explicit Synthesis +--- diff --git a/tests/fixtures/type-inference/root-page.md b/tests/fixtures/type-inference/root-page.md new file mode 100644 index 0000000..6bf2e50 --- /dev/null +++ b/tests/fixtures/type-inference/root-page.md @@ -0,0 +1,3 @@ +--- +title: Root Page +--- diff --git a/tests/fixtures/type-inference/sources/sources-page.md b/tests/fixtures/type-inference/sources/sources-page.md new file mode 100644 index 0000000..d640c9f --- /dev/null +++ b/tests/fixtures/type-inference/sources/sources-page.md @@ -0,0 +1,3 @@ +--- +title: Sources Page +--- diff --git a/tests/fixtures/type-inference/study/case-insensitive.md b/tests/fixtures/type-inference/study/case-insensitive.md new file mode 100644 index 0000000..28b558e --- /dev/null +++ b/tests/fixtures/type-inference/study/case-insensitive.md @@ -0,0 +1,4 @@ +--- +title: Case Insensitive Study +url: https://example.com/study2 +--- diff --git a/tests/fixtures/type-inference/study/study-page.md b/tests/fixtures/type-inference/study/study-page.md new file mode 100644 index 0000000..f4f8d62 --- /dev/null +++ b/tests/fixtures/type-inference/study/study-page.md @@ -0,0 +1,4 @@ +--- +title: A Study Page +url: https://example.com/study +--- diff --git a/tests/fixtures/type-inference/topics/concepts/nested-concept.md b/tests/fixtures/type-inference/topics/concepts/nested-concept.md new file mode 100644 index 0000000..0e6bc7c --- /dev/null +++ b/tests/fixtures/type-inference/topics/concepts/nested-concept.md @@ -0,0 +1,3 @@ +--- +title: Nested Concept +--- diff --git a/tests/helpers/context.ts b/tests/helpers/context.ts index f4bab75..273749a 100644 --- a/tests/helpers/context.ts +++ b/tests/helpers/context.ts @@ -28,6 +28,10 @@ export function makeSpaceContext( } /** Build a PluginContext for testing plugin functions directly. */ -export function makePluginContext(path: string, schemaPath?: string): PluginContext { - return { ...makeSpaceContext(path, schemaPath), pluginConfig: {} }; +export function makePluginContext( + path: string, + schemaPath?: string, + pluginConfig: Record = {}, +): PluginContext { + return { ...makeSpaceContext(path, schemaPath), pluginConfig }; } diff --git a/tests/plugins/markdown/read-space-directory-type-inference.test.ts b/tests/plugins/markdown/read-space-directory-type-inference.test.ts new file mode 100644 index 0000000..a5bdcba --- /dev/null +++ b/tests/plugins/markdown/read-space-directory-type-inference.test.ts @@ -0,0 +1,229 @@ +import { describe, expect, it } from 'bun:test'; +import { join } from 'node:path'; +import { readSpaceDirectory } from '../../../src/plugins/markdown/read-space'; +import { inferTypeFromPath } from '../../../src/plugins/markdown/util'; +import { bundledSchemasDir } from '../../../src/schema/schema'; +import { makePluginContext } from '../../helpers/context'; + +const KNOWLEDGE_WIKI_SCHEMA = join(bundledSchemasDir, 'knowledge_wiki.json'); +const FIXTURE_DIR = join(import.meta.dir, '../../fixtures/type-inference'); + +// knowledge_wiki types and aliases +const KNOWN_TYPES = new Set(['source', 'concept', 'synthesis', 'note', 'index']); +const TYPE_ALIASES: Record = { + source_summary: 'source', + study: 'source', + article: 'source', + paper: 'source', + research: 'source', +}; + +describe('inferTypeFromPath', () => { + describe('mode: off', () => { + it('returns undefined regardless of path', () => { + expect(inferTypeFromPath('concept/page.md', { mode: 'off' }, KNOWN_TYPES, TYPE_ALIASES)).toBeUndefined(); + }); + }); + + describe('folder-name mode (default)', () => { + it('matches canonical type name', () => { + expect(inferTypeFromPath('concept/page.md', {}, KNOWN_TYPES, undefined)).toBe('concept'); + }); + + it('matches canonical type name case-insensitively', () => { + expect(inferTypeFromPath('Concept/page.md', {}, KNOWN_TYPES, undefined)).toBe('concept'); + expect(inferTypeFromPath('NOTE/page.md', {}, KNOWN_TYPES, undefined)).toBe('note'); + }); + + it('matches alias key and returns canonical type', () => { + expect(inferTypeFromPath('study/page.md', {}, KNOWN_TYPES, TYPE_ALIASES)).toBe('source'); + }); + + it('matches alias key case-insensitively', () => { + expect(inferTypeFromPath('Study/page.md', {}, KNOWN_TYPES, TYPE_ALIASES)).toBe('source'); + expect(inferTypeFromPath('STUDY/page.md', {}, KNOWN_TYPES, TYPE_ALIASES)).toBe('source'); + }); + + it('does not match plural folder name without a matching alias', () => { + expect(inferTypeFromPath('sources/page.md', {}, KNOWN_TYPES, TYPE_ALIASES)).toBeUndefined(); + }); + + it('returns undefined for files at space root', () => { + expect(inferTypeFromPath('page.md', {}, KNOWN_TYPES, TYPE_ALIASES)).toBeUndefined(); + }); + + it('uses leaf directory only, not parent dirs', () => { + expect(inferTypeFromPath('archives/concept/page.md', {}, KNOWN_TYPES, undefined)).toBe('concept'); + expect(inferTypeFromPath('archives/unknown/page.md', {}, KNOWN_TYPES, undefined)).toBeUndefined(); + }); + }); + + describe('folderMap mode', () => { + it('infers type from mapped folder', () => { + const cfg = { folderMap: { Research: 'source' } }; + expect(inferTypeFromPath('Research/page.md', cfg, KNOWN_TYPES, TYPE_ALIASES)).toBe('source'); + }); + + it('returns undefined for unmapped folder', () => { + const cfg = { folderMap: { Research: 'source' } }; + expect(inferTypeFromPath('Personal/page.md', cfg, KNOWN_TYPES, TYPE_ALIASES)).toBeUndefined(); + }); + + it('matches nested path exactly', () => { + const cfg = { folderMap: { 'topics/concepts': 'concept' } }; + expect(inferTypeFromPath('topics/concepts/page.md', cfg, KNOWN_TYPES, TYPE_ALIASES)).toBe('concept'); + }); + + it('longest-prefix wins when keys overlap', () => { + const cfg = { folderMap: { 'a/b': 'note', 'a/b/c': 'concept' } }; + expect(inferTypeFromPath('a/b/page.md', cfg, KNOWN_TYPES, TYPE_ALIASES)).toBe('note'); + expect(inferTypeFromPath('a/b/c/page.md', cfg, KNOWN_TYPES, TYPE_ALIASES)).toBe('concept'); + }); + + it('normalises trailing slash in key', () => { + const cfg = { folderMap: { 'Research/': 'source' } }; + expect(inferTypeFromPath('Research/page.md', cfg, KNOWN_TYPES, TYPE_ALIASES)).toBe('source'); + }); + + it('resolves folderMap value that is an alias', () => { + const cfg = { folderMap: { Research: 'study' } }; + expect(inferTypeFromPath('Research/page.md', cfg, KNOWN_TYPES, TYPE_ALIASES)).toBe('source'); + }); + + it('throws hard error for unresolvable folderMap value', () => { + const cfg = { folderMap: { Research: 'unknown-type' } }; + expect(() => inferTypeFromPath('Research/page.md', cfg, KNOWN_TYPES, TYPE_ALIASES)).toThrow(/unknown-type/); + }); + }); +}); + +describe('readSpaceDirectory with type inference', () => { + describe('folder-name mode', () => { + it('infers type from leaf directory matching canonical type', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { mode: 'folder-name' }, + }); + const result = await readSpaceDirectory(ctx); + const node = result.nodes.find((n) => n.label === 'concept/concept-page.md'); + expect(node).toBeDefined(); + expect(node?.type).toBe('concept'); + }); + + it('infers type via alias (study → source)', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { mode: 'folder-name' }, + }); + const result = await readSpaceDirectory(ctx); + const node = result.nodes.find((n) => n.label === 'study/study-page.md'); + expect(node).toBeDefined(); + expect(node?.type).toBe('source'); + }); + + it('infers via alias case-insensitively (Study → source)', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { mode: 'folder-name' }, + }); + const result = await readSpaceDirectory(ctx); + const node = result.nodes.find((n) => n.label.toLowerCase() === 'study/case-insensitive.md'); + expect(node).toBeDefined(); + expect(node?.type).toBe('source'); + }); + + it('does not infer for plural folder with no alias (sources/)', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { mode: 'folder-name' }, + }); + const result = await readSpaceDirectory(ctx); + expect(result.parseIgnored).toContain('sources/sources-page.md'); + }); + + it('explicit type in frontmatter overrides inferred type', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { mode: 'folder-name' }, + }); + const result = await readSpaceDirectory(ctx); + const node = result.nodes.find((n) => n.label === 'note/explicit-type.md'); + expect(node).toBeDefined(); + expect(node?.type).toBe('synthesis'); + }); + + it('does not infer for file at space root', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { mode: 'folder-name' }, + }); + const result = await readSpaceDirectory(ctx); + expect(result.parseIgnored).toContain('root-page.md'); + }); + }); + + describe('mode: off', () => { + it('does not infer type when mode is off', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { mode: 'off' }, + }); + const result = await readSpaceDirectory(ctx); + expect(result.nodes.find((n) => n.label === 'concept/concept-page.md')).toBeUndefined(); + expect(result.parseIgnored).toContain('concept/concept-page.md'); + }); + }); + + describe('no typeInference config', () => { + it('does not infer type when typeInference is not configured', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA); + const result = await readSpaceDirectory(ctx); + expect(result.nodes.find((n) => n.label === 'concept/concept-page.md')).toBeUndefined(); + expect(result.parseIgnored).toContain('concept/concept-page.md'); + }); + }); + + describe('folderMap mode', () => { + it('infers type from mapped folder', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { folderMap: { Research: 'source', Personal: 'note' } }, + }); + const result = await readSpaceDirectory(ctx); + expect(result.nodes.find((n) => n.label === 'Research/research-page.md')?.type).toBe('source'); + expect(result.nodes.find((n) => n.label === 'Personal/personal-page.md')?.type).toBe('note'); + }); + + it('does not infer for unmapped folder', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { folderMap: { Research: 'source' } }, + }); + const result = await readSpaceDirectory(ctx); + expect(result.parseIgnored).toContain('Personal/personal-page.md'); + }); + + it('infers from nested path key', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { folderMap: { 'topics/concepts': 'concept' } }, + }); + const result = await readSpaceDirectory(ctx); + expect(result.nodes.find((n) => n.label === 'topics/concepts/nested-concept.md')?.type).toBe('concept'); + }); + + it('longest-prefix wins', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { folderMap: { 'a/b': 'note', 'a/b/c': 'concept' } }, + }); + const result = await readSpaceDirectory(ctx); + expect(result.nodes.find((n) => n.label === 'a/b/shallow.md')?.type).toBe('note'); + expect(result.nodes.find((n) => n.label === 'a/b/c/deep.md')?.type).toBe('concept'); + }); + + it('folderMap value can be an alias', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { folderMap: { Research: 'study' } }, + }); + const result = await readSpaceDirectory(ctx); + expect(result.nodes.find((n) => n.label === 'Research/research-page.md')?.type).toBe('source'); + }); + + it('throws for unresolvable folderMap value', async () => { + const ctx = makePluginContext(FIXTURE_DIR, KNOWLEDGE_WIKI_SCHEMA, { + typeInference: { folderMap: { Research: 'unknown-type' } }, + }); + expect(readSpaceDirectory(ctx)).rejects.toThrow(/unknown-type/); + }); + }); +});