From cca0c2584187aafb7e6448b72b9b73aab65f6b67 Mon Sep 17 00:00:00 2001 From: Roger Barnes Date: Tue, 21 Apr 2026 10:55:04 +1000 Subject: [PATCH 1/2] Extract content links from markdown nodes (closes #74) Adds a ContentLink type and extracts wikilinks, markdown links, images, and bare URLs from node body and frontmatter fields. Links are resolved to indicate whether they are space-internal, space-external, or external URLs, and exposed on SpaceNode as contentLinks. Graph edge fields are excluded from frontmatter link extraction to avoid duplication. --- docs/concepts.md | 23 ++ plugin/.claude-plugin/plugin.json | 7 +- schemas/_knowledge_wiki.json | 2 +- src/api.ts | 4 + src/commands/schemas.ts | 5 +- src/commands/validate-file.ts | 23 +- src/commands/validate.ts | 15 + src/plugins/markdown/extract-content-links.ts | 198 ++++++++++ src/plugins/markdown/parse-embedded.ts | 9 + src/plugins/markdown/read-space.ts | 4 + src/read/resolve-graph-edges.ts | 48 ++- src/schema/schema.ts | 9 +- src/types.ts | 54 +++ src/validate.ts | 35 ++ tests/filter/augment-nodes.test.ts | 1 + tests/filter/expand-include.test.ts | 1 + tests/filter/filter-nodes.test.ts | 1 + .../general/valid-ost/links_opportunity.md | 19 + .../read-space-directory-general.test.ts | 4 +- tests/read/content-links.test.ts | 347 ++++++++++++++++++ tests/render/render-bullets.test.ts | 1 + tests/render/render-mermaid.test.ts | 1 + tests/schema/evaluate-rule.test.ts | 6 + tests/schema/validate-graph.test.ts | 1 + tests/schema/validate-rules.test.ts | 23 ++ tests/space-graph.test.ts | 1 + tests/validate/general.test.ts | 4 +- 27 files changed, 825 insertions(+), 21 deletions(-) create mode 100644 src/plugins/markdown/extract-content-links.ts create mode 100644 tests/fixtures/general/valid-ost/links_opportunity.md create mode 100644 tests/read/content-links.test.ts diff --git a/docs/concepts.md b/docs/concepts.md index 2603af6..9336237 100644 --- a/docs/concepts.md +++ b/docs/concepts.md @@ -254,6 +254,29 @@ Each entry is a `ResolvedParentRef` object: The `source` label lets downstream consumers distinguish edge types without re-inspecting the schema. Validation routes `hierarchy` edges to structural checks (parent-type rules, skip-level detection) and `relationship` edges to field reference checks (type-match, missing-target). Tree rendering and rule evaluation use the full set. +### Content links + +**Content links** (`resolvedLinks` on `SpaceNode`) are all links extracted from a node's content at parse time — both from frontmatter non-edge fields and from the body text. They are distinct from graph edges: they are never used to infer structure, but are available to consumers for rendering, auditing, or navigation. + +Each entry is a `ResolvedContentLink` object: + +| Field | Type | Description | +|---|---|---| +| `text` | `string` | Display text of the link | +| `target` | `string` | Raw link target (URL or wikilink path) | +| `action` | `'link' \| 'embed'` | Whether the link navigates or transcludes | +| `anchor` | `string?` | Fragment component (heading or block ref) if present | +| `location` | `'node' \| 'internal' \| 'external' \| 'system' \| 'protocol'` | Resolved location classification | + +**Location classification:** +- `node` — wikilink resolved to a known space node +- `internal` — wikilink or relative path to an in-source target that is not a space node +- `external` — `http://` or `https://` URL +- `system` — `file://` URL +- `protocol` — any other scheme (e.g. `obsidian://`) + +**Sources:** Both wikilink syntax (`[[Target]]`, `![[Image]]`) and markdown link syntax (`[text](url)`, `![alt](url)`) are extracted. Bare URLs in frontmatter string fields are also captured. Graph edge fields (hierarchy `parent`, relationship fields) are excluded — those are graph edges, not content links. + ### Anchor An **anchor** is a block anchor (e.g. `^goal1`) appended to a heading in a `typed page`, using Obsidian block anchor syntax. Anchors serve two purposes: diff --git a/plugin/.claude-plugin/plugin.json b/plugin/.claude-plugin/plugin.json index f729ddb..111d167 100644 --- a/plugin/.claude-plugin/plugin.json +++ b/plugin/.claude-plugin/plugin.json @@ -5,10 +5,5 @@ "homepage": "https://github.com/mindsocket/structured-context", "repository": "https://github.com/mindsocket/structured-context", "license": "MIT", - "keywords": [ - "structured-context", - "validation", - "markdown", - "schema" - ] + "keywords": ["structured-context", "validation", "markdown", "schema"] } diff --git a/schemas/_knowledge_wiki.json b/schemas/_knowledge_wiki.json index 6503a00..e827033 100644 --- a/schemas/_knowledge_wiki.json +++ b/schemas/_knowledge_wiki.json @@ -207,7 +207,7 @@ }, "note": { "type": "object", - "description": "Personal notes, journal entries, appointments, experiential records. Your own data about your journey — not sourced from external material.", + "description": "Personal notes, journal entries, talk notes, or rough thoughts not drawn from an external URL.", "allOf": [{ "$ref": "sctx://_sctx_base#/$defs/baseNodeProps" }], "properties": { "type": { "const": "note" }, diff --git a/src/api.ts b/src/api.ts index 64833bb..fbaf64e 100644 --- a/src/api.ts +++ b/src/api.ts @@ -9,6 +9,7 @@ export type { AnySchemaObject, SchemaObject, ValidateFunction } from 'ajv'; export type { Config, SpaceConfig } from './config'; +export { loadConfig, setConfigPath } from './config'; export type { ParseHook, ParseResult, @@ -21,15 +22,18 @@ export type { TemplateSyncOptions, } from './plugins/util'; export type { SharedEmbeddingFields } from './schema/metadata-contract'; +export { bundledSchemasDir, loadSchema, setBundledSchemasDir } from './schema/schema'; export type { SpaceGraph } from './space-graph'; export type { BaseNode, + ContentLink, EdgeDefinition, FileNotInSpaceResult, FileValidationResult, HierarchyLevel, ParseIssue, Relationship, + ResolvedContentLink, SchemaMetadata, SchemaWithMetadata, SpaceContext, diff --git a/src/commands/schemas.ts b/src/commands/schemas.ts index 9ddd469..b678a6b 100644 --- a/src/commands/schemas.ts +++ b/src/commands/schemas.ts @@ -3,7 +3,7 @@ import { dirname, join } from 'node:path'; import type { AnySchemaObject } from 'ajv'; import { loadConfig, resolveSchema } from '../config'; import { bundledSchemasDir, type EntityInfo, extractEntityInfo, loadSchema, readRawSchema } from '../schema/schema'; -import { mergeVariantProperties } from '../schema/schema-refs'; +import { mergeVariantProperties, resolveRef } from '../schema/schema-refs'; import type { SchemaMetadata, SchemaWithMetadata } from '../types'; function isBundledPath(schemaPath: string): boolean { @@ -35,6 +35,7 @@ function extractEntities( ): EntityVariant[] { return oneOf.map((entry) => { const entryObj = entry as AnySchemaObject; + const resolvedObj = resolveRef(entryObj, schema, schemaRefRegistry) ?? entryObj; const { properties, required } = mergeVariantProperties(entryObj, schema, schemaRefRegistry); const typeDef = properties.type as AnySchemaObject | undefined; let types: string[] = []; @@ -43,7 +44,7 @@ function extractEntities( return { types, - description: typeof entryObj.description === 'string' ? entryObj.description : undefined, + description: typeof resolvedObj.description === 'string' ? resolvedObj.description : undefined, properties: Object.keys(properties).filter((k) => k !== 'type'), required: required.filter((r) => r !== 'type'), }; diff --git a/src/commands/validate-file.ts b/src/commands/validate-file.ts index d607989..c207430 100644 --- a/src/commands/validate-file.ts +++ b/src/commands/validate-file.ts @@ -26,22 +26,35 @@ export async function validateFileCommand(filePath: string, options: { json?: bo printHumanReadable(result); } - return Object.keys(result.errors).length > 0 ? 1 : 0; + return result.errorCount > 0 ? 1 : 0; } function printHumanReadable(result: FileValidationResult): void { const reset = '\x1b[0m'; const green = '\x1b[32m'; const red = '\x1b[31m'; + const yellow = '\x1b[33m'; - if (result.errorCount === 0) { + if (result.errorCount === 0 && result.warningCount === 0) { console.log(`${green}✓${reset} ${result.label} (space: ${result.space})`); return; } - console.log(`\n${red}✗${reset} ${result.label} (space: ${result.space}) — ${result.errorCount} error(s)\n`); - for (const { kind, message } of Object.values(result.errors)) { - console.log(` [${kind}] ${message}`); + if (result.errorCount > 0) { + console.log(`\n${red}✗${reset} ${result.label} (space: ${result.space}) — ${result.errorCount} error(s)\n`); + for (const { kind, message } of Object.values(result.errors)) { + console.log(` [${kind}] ${message}`); + } + } else { + console.log(`\n${green}✓${reset} ${result.label} (space: ${result.space})`); + } + + if (result.warningCount > 0) { + console.log(`\n ${yellow}${result.warningCount} warning(s):${reset}`); + for (const { kind, message } of Object.values(result.warnings)) { + console.log(` ${yellow}[${kind}]${reset} ${message}`); + } } + console.log(''); } diff --git a/src/commands/validate.ts b/src/commands/validate.ts index f0bd894..cf0543d 100644 --- a/src/commands/validate.ts +++ b/src/commands/validate.ts @@ -60,6 +60,7 @@ export async function validate(context: SpaceContext, options: { json?: boolean errors: errorsByFile, orphanCount: result.orphans.length, parseIssues: result.parseIssues, + unresolvedContentLinks: result.unresolvedContentLinks, }, null, 2, @@ -100,6 +101,7 @@ export async function validate(context: SpaceContext, options: { json?: boolean console.log(fmt(' Rule violations', result.ruleViolations.length, true)); console.log(fmt(' Hierarchy violations', result.hierarchyViolations.length, true)); console.log(fmt(' Orphans (hierarchy nodes - no parent)', result.orphans.length, true, true)); + console.log(fmt(' Unresolved content links', result.unresolvedContentLinks.length, true, true)); const parseIssueErrorCount = result.parseIssues.filter((i) => i.severity === 'error').length; console.log(fmt(' Excluded during parsing', result.parseIssues.length, true, parseIssueErrorCount === 0)); @@ -108,6 +110,19 @@ export async function validate(context: SpaceContext, options: { json?: boolean for (const node of result.orphans) console.log(` ${node.label}`); } + if (result.unresolvedContentLinks.length > 0) { + console.log(`\nUnresolved content links (may be outside-space or broken):`); + const byFile = new Map(); + for (const { file, target } of result.unresolvedContentLinks) { + if (!byFile.has(file)) byFile.set(file, []); + byFile.get(file)!.push(target); + } + for (const [file, targets] of byFile) { + console.log(` ${file}:`); + for (const target of targets) console.log(` ${target}`); + } + } + if (result.parseIssues.length > 0) { console.log(`\nExcluded during parsing:`); for (const issue of result.parseIssues) { diff --git a/src/plugins/markdown/extract-content-links.ts b/src/plugins/markdown/extract-content-links.ts new file mode 100644 index 0000000..6a45d05 --- /dev/null +++ b/src/plugins/markdown/extract-content-links.ts @@ -0,0 +1,198 @@ +import type { Image, Link, Node, Parent, Root, Text } from 'mdast'; +import { toString as mdastToString } from 'mdast-util-to-string'; +import remarkGfm from 'remark-gfm'; +import remarkParse from 'remark-parse'; +import { unified } from 'unified'; +import type { ContentLink, SchemaMetadata } from '../../types'; + +/** + * Parse an Obsidian wikilink inner string into its components. + * Handles: [[target]], [[target#anchor]], [[target|alias]], [[target#anchor|alias]] + */ +function parseWikilinkInner(inner: string): { target: string; anchor?: string; displayText?: string } { + // Alias: [[target|alias]] or [[target#anchor|alias]] + const pipeIdx = inner.indexOf('|'); + let core = inner; + let displayText: string | undefined; + if (pipeIdx >= 0) { + core = inner.slice(0, pipeIdx); + const alias = inner.slice(pipeIdx + 1).trim(); + if (alias) displayText = alias; + } + + // Anchor: [[target#anchor]] or [[target#^block]] + const hashIdx = core.indexOf('#'); + if (hashIdx >= 0) { + const target = core.slice(0, hashIdx).trim(); + const anchor = core.slice(hashIdx + 1).trim() || undefined; + return { target, anchor, displayText }; + } + + return { target: core.trim(), displayText }; +} + +/** + * Extract wikilinks (and Obsidian embed wikilinks) from a plain text string. + * Matches [[target]], ![[target]], [[target#anchor]], [[target|alias]], etc. + */ +function extractWikilinksFromText(text: string): ContentLink[] { + const links: ContentLink[] = []; + + for (const match of text.matchAll(/(!?)\[\[([^\]]+)\]\]/g)) { + const isEmbed = match[1] === '!'; + const inner = match[2]!; + const { target, anchor, displayText } = parseWikilinkInner(inner); + if (!target) continue; + + links.push({ + text: displayText ?? target, + target, + action: isEmbed ? 'embed' : 'link', + ...(anchor !== undefined ? { anchor } : {}), + linkSyntax: 'wikilink', + }); + } + + return links; +} + +/** + * Extract bare URLs (http/https) from a plain text string. + * Skips URLs that are already inside a markdown link `[text](url)` to avoid duplication. + */ +function extractBareUrlsFromText(text: string): ContentLink[] { + const links: ContentLink[] = []; + // Negative lookbehind: skip URLs immediately preceded by `](` (already a markdown link target) + for (const match of text.matchAll(/(?]+/g)) { + links.push({ text: match[0], target: match[0], action: 'link', linkSyntax: 'markdown' }); + } + return links; +} + +/** + * Extract standard markdown links and images from a plain text string. + * Used for scanning frontmatter string values. + */ +function extractMarkdownLinksFromText(text: string): ContentLink[] { + const links: ContentLink[] = []; + + for (const match of text.matchAll(/(!?)\[([^\]]*)\]\(([^)]+)\)/g)) { + const isEmbed = match[1] === '!'; + const linkText = match[2]!; + const url = match[3]!.trim(); + if (!url) continue; + + links.push({ + text: linkText, + target: url, + action: isEmbed ? 'embed' : 'link', + linkSyntax: 'markdown', + }); + } + + return links; +} + +/** + * Extract all links (wikilinks, markdown links, and bare URLs) from a plain text string. + * Used for scanning frontmatter string field values. + */ +export function extractLinksFromText(text: string): ContentLink[] { + return [...extractWikilinksFromText(text), ...extractMarkdownLinksFromText(text), ...extractBareUrlsFromText(text)]; +} + +/** + * Recursively walk an mdast subtree and collect all links. + * Handles standard markdown link/image nodes and scans text nodes for wikilinks. + */ +export function extractLinksFromAstNode(node: Node): ContentLink[] { + const links: ContentLink[] = []; + + function walk(n: Node): void { + switch (n.type) { + case 'link': { + const linkNode = n as Link; + links.push({ + text: mdastToString(linkNode), + target: linkNode.url, + action: 'link', + linkSyntax: 'markdown', + }); + // Don't recurse into children — they are display text, not link targets + break; + } + case 'image': { + const imgNode = n as Image; + links.push({ + text: imgNode.alt ?? '', + target: imgNode.url, + action: 'embed', + linkSyntax: 'markdown', + }); + break; + } + case 'text': { + // Remark does not parse wikilinks natively; scan text nodes for [[...]] patterns + links.push(...extractWikilinksFromText((n as Text).value)); + break; + } + default: { + if ('children' in n) { + for (const child of (n as Parent).children) { + walk(child); + } + } + break; + } + } + } + + walk(node); + return links; +} + +/** + * Extract all links from a raw markdown body string. + * Parses the full document and collects links from every node in the tree. + */ +export function extractLinksFromBody(body: string): ContentLink[] { + const tree = unified().use(remarkParse).use(remarkGfm).parse(body) as Root; + return extractLinksFromAstNode(tree); +} + +/** + * Build the set of field names that serve as graph edges (hierarchy + relationship fields). + * Used to exclude edge fields from frontmatter link extraction. + */ +export function getEdgeFieldNames(metadata: SchemaMetadata): Set { + const fields = new Set(); + for (const level of metadata.hierarchy?.levels ?? []) { + fields.add(level.field); + if (level.selfRefField) fields.add(level.selfRefField); + } + for (const rel of metadata.relationships ?? []) { + fields.add(rel.field); + } + return fields; +} + +/** + * Extract links from frontmatter data fields, excluding known graph edge fields. + * Scans string values and string array elements for both wikilinks and markdown links. + */ +export function extractLinksFromFrontmatter(data: Record, edgeFields: Set): ContentLink[] { + const links: ContentLink[] = []; + for (const [key, value] of Object.entries(data)) { + if (edgeFields.has(key)) continue; + if (typeof value === 'string') { + links.push(...extractLinksFromText(value)); + } else if (Array.isArray(value)) { + for (const item of value) { + if (typeof item === 'string') { + links.push(...extractLinksFromText(item)); + } + } + } + } + return links; +} diff --git a/src/plugins/markdown/parse-embedded.ts b/src/plugins/markdown/parse-embedded.ts index a8d72ce..28cb95c 100644 --- a/src/plugins/markdown/parse-embedded.ts +++ b/src/plugins/markdown/parse-embedded.ts @@ -12,6 +12,7 @@ import type { SchemaMetadata, SharedEmbeddingFields, } from '../../api'; +import { extractLinksFromAstNode } from './extract-content-links'; import { applyFieldMap, coerceDates } from './util'; /** Type values that identify a space_on_a_page container (not themselves space nodes). */ @@ -266,6 +267,7 @@ function processListItem( schemaData, linkTargets, type, + contentLinks: [], }; nodes.push(newNode); @@ -347,6 +349,7 @@ export function extractEmbeddedNodes(body: string, options: ExtractEmbeddedOptio schemaData: { type: 'space_on_a_page' }, linkTargets: [], type: 'space_on_a_page', + contentLinks: [], }; const tree = unified().use(remarkParse).use(remarkGfm).parse(body) as Root; @@ -618,6 +621,7 @@ export function extractEmbeddedNodes(body: string, options: ExtractEmbeddedOptio schemaData, linkTargets, type, + contentLinks: [], }; // Push to stack BEFORE resolving semantic parent — stack[-2] is the correct parent. @@ -788,6 +792,7 @@ export function extractEmbeddedNodes(body: string, options: ExtractEmbeddedOptio schemaData, linkTargets, type: rowTypeStr, + contentLinks: [], }; nodes.push(rowNode); @@ -827,6 +832,8 @@ export function extractEmbeddedNodes(body: string, options: ExtractEmbeddedOptio Object.assign(activeNode.schemaData, allFields); if (remainingText) appendContent(activeNode, remainingText); + activeNode.contentLinks ??= []; + activeNode.contentLinks.push(...extractLinksFromAstNode(child)); } else if (child.type === 'code' && (child as Code).lang?.trim() === 'yaml') { const code = child as Code; const parsed = yamlLoad(code.value); @@ -839,6 +846,8 @@ export function extractEmbeddedNodes(body: string, options: ExtractEmbeddedOptio } } else { appendContent(activeNode, mdastToString(child)); + activeNode.contentLinks ??= []; + activeNode.contentLinks.push(...extractLinksFromAstNode(child)); } } } diff --git a/src/plugins/markdown/read-space.ts b/src/plugins/markdown/read-space.ts index 2d18ec2..82985d2 100644 --- a/src/plugins/markdown/read-space.ts +++ b/src/plugins/markdown/read-space.ts @@ -7,6 +7,7 @@ import { extractSchemaTypeNames } from '../../schema/schema'; import type { ParseIssue } from '../../types'; import type { ParseResult, PluginContext } from '../util'; import type { MarkdownPluginConfig } from '.'; +import { extractLinksFromBody, extractLinksFromFrontmatter, getEdgeFieldNames } from './extract-content-links'; import { extractEmbeddedNodes, ON_A_PAGE_TYPES } from './parse-embedded'; import { applyFieldMap, coerceDates, inferTypeFromPath } from './util'; @@ -75,6 +76,8 @@ export async function readSpaceDirectory( const knownTypes = typeInferenceCfg?.mode !== 'off' ? extractSchemaTypeNames(context.schema, context.schemaRefRegistry) : undefined; + const edgeFields = getEdgeFieldNames(metadata); + const files = await fg('**/*.md', { cwd: directory, followSymbolicLinks: true }); const nodes: BaseNode[] = []; const parseIssues: ParseIssue[] = []; @@ -136,6 +139,7 @@ export async function readSpaceDirectory( schemaData: { title, ...data }, linkTargets: [title, fileBase], type: pageType, + contentLinks: [...extractLinksFromFrontmatter(data, edgeFields), ...extractLinksFromBody(parsed.content)], }); if (!ON_A_PAGE_TYPES.includes(pageType)) { diff --git a/src/read/resolve-graph-edges.ts b/src/read/resolve-graph-edges.ts index b37396a..4f5169e 100644 --- a/src/read/resolve-graph-edges.ts +++ b/src/read/resolve-graph-edges.ts @@ -1,5 +1,14 @@ import { resolveNodeType } from '../schema/schema'; -import type { BaseNode, EdgeDefinition, ResolvedParentRef, SchemaMetadata, SpaceNode, UnresolvedRef } from '../types'; +import type { + BaseNode, + ContentLink, + EdgeDefinition, + ResolvedContentLink, + ResolvedParentRef, + SchemaMetadata, + SpaceNode, + UnresolvedRef, +} from '../types'; import { buildTargetIndex, wikilinkToTarget } from './wikilink-utils'; /** @@ -136,6 +145,37 @@ function resolveEdge( } } +/** + * Classify a ContentLink into a ResolvedContentLink by determining its location. + * + * Wikilinks are resolved against the target index: if found, location is 'node', + * otherwise 'internal' (in the source system but not a space node). + * Markdown links are classified by URL scheme: external (http/https), system (file://), + * protocol (other scheme), or internal (relative path / no scheme). + */ +function classifyContentLink(link: ContentLink, targetIndex: Map): ResolvedContentLink { + const { linkSyntax, target, text, action, anchor } = link; + const base = { text, target, action, ...(anchor !== undefined ? { anchor } : {}) }; + + if (linkSyntax === 'wikilink') { + // Try the full target#anchor key first, then the bare target — either means it's a space node. + const lookupKey = anchor !== undefined ? `${target}#${anchor}` : target; + const inIndex = targetIndex.has(lookupKey) || (anchor !== undefined && targetIndex.has(target)); + return { ...base, location: inIndex ? 'node' : 'internal' }; + } + + // Markdown link: classify by URL scheme. + const colonIdx = target.indexOf('://'); + if (colonIdx > 0) { + const scheme = target.slice(0, colonIdx).toLowerCase(); + const location = scheme === 'https' || scheme === 'http' ? 'external' : scheme === 'file' ? 'system' : 'protocol'; + return { ...base, location }; + } + + // Relative path or fragment — in the source system but not a resolvable space node. + return { ...base, location: 'internal' }; +} + /** * Enrich parsed nodes into SpaceNodes by applying type alias resolution and resolving * parent links using the hierarchy levels and relationships from schema metadata. @@ -155,6 +195,7 @@ export function resolveGraphEdges( ...n, resolvedType: resolveNodeType(n.type, typeAliases), resolvedParents: [], + resolvedLinks: [], })); const targetIndex = buildTargetIndex(spaceNodes); @@ -232,6 +273,11 @@ export function resolveGraphEdges( resolveEdge(nodesByType, targetIndex, edge, 'relationship', rel.type === rel.parent, unresolvedRefs, typeAliases); } + // 3. Resolve content links — classify each raw ContentLink into a ResolvedContentLink. + for (const node of spaceNodes) { + node.resolvedLinks = (node.contentLinks ?? []).map((link) => classifyContentLink(link, targetIndex)); + } + // Deduplicate by (label, field, ref) — the same broken link may be encountered across // multiple resolveEdge calls (e.g. selfRef + regular hierarchy share the same field). const seen = new Set(); diff --git a/src/schema/schema.ts b/src/schema/schema.ts index e486f9e..e3adc06 100644 --- a/src/schema/schema.ts +++ b/src/schema/schema.ts @@ -16,8 +16,13 @@ import { } from './metadata-contract'; import { isObject, mergeVariantProperties, resolveJsonPointer } from './schema-refs'; -const packageDir = dirname(fileURLToPath(import.meta.url)); -export const bundledSchemasDir = join(packageDir, '..', '..', 'schemas'); +const packageDir = import.meta.url ? dirname(fileURLToPath(import.meta.url)) : ''; +export let bundledSchemasDir = packageDir ? join(packageDir, '..', '..', 'schemas') : ''; + +/** Override the bundled schemas directory (e.g. when running in a bundled context). */ +export function setBundledSchemasDir(dir: string): void { + bundledSchemasDir = dir; +} const validateMetadataContract = new Ajv().compile(METADATA_SCHEMA); diff --git a/src/types.ts b/src/types.ts index 9f86551..82fc58e 100644 --- a/src/types.ts +++ b/src/types.ts @@ -67,6 +67,47 @@ export type UnresolvedRef = { message: string; }; +/** + * A link extracted from node content at parse time, before location classification. + * Populated by plugins that support content link extraction (e.g. the markdown plugin). + */ +export type ContentLink = { + /** Display text of the link (text content for markdown links, inner text or target for wikilinks). */ + text: string; + /** Raw link target: URL for markdown links, wikilink path (without [[ ]]) for wikilinks. */ + target: string; + /** Whether the link navigates to a resource or transcludes/embeds it. */ + action: 'link' | 'embed'; + /** Anchor fragment (heading or block ref, e.g. 'heading-title' or '^blockid') if present. */ + anchor?: string; + /** Link syntax — used during resolution to determine how to classify the target. */ + linkSyntax: 'wikilink' | 'markdown'; +}; + +/** + * A fully classified link from node content, after location resolution. + * Present on SpaceNode after graph resolution. + */ +export type ResolvedContentLink = { + /** Display text of the link. */ + text: string; + /** Link target (URL or wikilink target without [[ ]]). */ + target: string; + /** Whether the link navigates or embeds. */ + action: 'link' | 'embed'; + /** Anchor fragment if present. */ + anchor?: string; + /** + * Resolved location classification: + * - 'node' — wikilink resolved to a known space node + * - 'internal' — wikilink or relative path, in the source system but not a space node + * - 'external' — http/https URL + * - 'system' — file:// URL + * - 'protocol' — other scheme (e.g. obsidian://) + */ + location: 'node' | 'internal' | 'external' | 'system' | 'protocol'; +}; + /** * A node as produced by a parse plugin — raw type from content, no graph resolution applied. * Core enriches this into a SpaceNode after parsing. @@ -82,6 +123,11 @@ export type BaseNode = { linkTargets: string[]; /** Raw type string from content, as written by the user. */ type: string; + /** + * Links extracted from content body and non-edge string fields at parse time. + * Populated by plugins that support content link extraction; absent if not populated. + */ + contentLinks?: ContentLink[]; }; /** @@ -97,6 +143,11 @@ export type SpaceNode = BaseNode & { * Always present, empty if no parents resolved. */ resolvedParents: ResolvedParentRef[]; + /** + * Links from content body and non-edge fields, with location classified after graph resolution. + * Always present, empty if the plugin did not populate rawContentLinks. + */ + resolvedLinks: ResolvedContentLink[]; }; export type ParseIssue = { @@ -184,6 +235,9 @@ export interface FileValidationResult { /** Errors keyed by composite id (e.g. `schema:/status:enum:active`, `rule:my-rule-id`). */ errors: Record; errorCount: number; + /** Warnings keyed by composite id. Do not affect exit code or errorCount. */ + warnings: Record; + warningCount: number; inSpace: true; } diff --git a/src/validate.ts b/src/validate.ts index 05bfbfb..0c359a1 100644 --- a/src/validate.ts +++ b/src/validate.ts @@ -34,6 +34,8 @@ export interface ValidationResult { hierarchyViolations: GraphViolation[]; orphans: SpaceNode[]; parseIssues: ParseIssue[]; + /** Content links that could not be resolved to a space node. Warning only — not counted in errors. */ + unresolvedContentLinks: Array<{ file: string; target: string }>; } /** @@ -165,6 +167,7 @@ export async function validateSpace(context: SpaceContext): Promise(); + for (const node of nodes) { + for (const link of node.resolvedLinks) { + if (link.location === 'internal') { + const key = `${node.label}::${link.target}`; + if (!seen.has(key)) { + seen.add(key); + result.unresolvedContentLinks.push({ file: node.label, target: link.target }); + } + } + } + } + return result; } @@ -358,12 +375,30 @@ export async function validateFile(filePath: string, config: Config): Promise = {}; + const targetNode = nodes.find((n) => n.label === label); + if (targetNode) { + const seen = new Set(); + for (const link of targetNode.resolvedLinks) { + if (link.location === 'internal' && !seen.has(link.target)) { + seen.add(link.target); + warnings[`content-link:${link.target}`] = { + kind: 'content-link', + message: `Unresolved content link: ${link.target}`, + }; + } + } + } + return { file: isAbsolute(filePath) ? filePath : resolve(process.cwd(), filePath), label, space: space.name, errors, errorCount: Object.keys(errors).length, + warnings, + warningCount: Object.keys(warnings).length, inSpace: true, } satisfies FileValidationResult; } diff --git a/tests/filter/augment-nodes.test.ts b/tests/filter/augment-nodes.test.ts index 6b34a0e..ad5ae9a 100644 --- a/tests/filter/augment-nodes.test.ts +++ b/tests/filter/augment-nodes.test.ts @@ -13,6 +13,7 @@ function makeNode(title: string, type: string, parentRefs: ReturnType = type, resolvedType: type, resolvedParents: [], + resolvedLinks: [], }; } diff --git a/tests/filter/filter-nodes.test.ts b/tests/filter/filter-nodes.test.ts index a5778cd..5f120fa 100644 --- a/tests/filter/filter-nodes.test.ts +++ b/tests/filter/filter-nodes.test.ts @@ -15,6 +15,7 @@ function makeNode(title: string, type: string, extra: Record = type, resolvedType: type, resolvedParents: [], + resolvedLinks: [], }; } diff --git a/tests/fixtures/general/valid-ost/links_opportunity.md b/tests/fixtures/general/valid-ost/links_opportunity.md new file mode 100644 index 0000000..cdcc45f --- /dev/null +++ b/tests/fixtures/general/valid-ost/links_opportunity.md @@ -0,0 +1,19 @@ +--- +type: opportunity +status: active +parent: "[[Personal Vision]]" +summary: An opportunity with various links in content and frontmatter +source: https://example.com/research +--- + +See [[Personal Mission]] for context and also [[missing-page]] which doesn't exist. + +Check out this [external resource](https://www.example.com/tool) and this +[another link](https://docs.example.org/guide). + +Some content with an ![[embedded-image.png]] embed and an +obsidian link: [obsidian vault](obsidian://open?vault=MyVault). + +Also a file link: [local file](file:///path/to/notes.txt). + +A relative path link: [sibling note](./other-note.md). diff --git a/tests/plugins/markdown/read-space-directory-general.test.ts b/tests/plugins/markdown/read-space-directory-general.test.ts index 78f4b2c..47f377d 100644 --- a/tests/plugins/markdown/read-space-directory-general.test.ts +++ b/tests/plugins/markdown/read-space-directory-general.test.ts @@ -20,8 +20,8 @@ describe('readSpaceDirectory', () => { result = await readSpaceDirectory(makePluginContext(VALID_DIR)); }); - it('returns 12 OST nodes (5 original + vision_page + 2 embedded + solution_page + anchor_vision + 2 embedded)', () => { - expect(result.nodes).toHaveLength(12); + it('returns 13 OST nodes (5 original + vision_page + 2 embedded + solution_page + anchor_vision + 2 embedded + links_opportunity)', () => { + expect(result.nodes).toHaveLength(13); }); it('injects title from filename for file-based nodes', () => { diff --git a/tests/read/content-links.test.ts b/tests/read/content-links.test.ts new file mode 100644 index 0000000..945ec9c --- /dev/null +++ b/tests/read/content-links.test.ts @@ -0,0 +1,347 @@ +import { beforeAll, describe, expect, it } from 'bun:test'; +import { join } from 'node:path'; +import { readSpaceDirectory } from '../../src/plugins/markdown/read-space'; +import { resolveGraphEdges } from '../../src/read/resolve-graph-edges'; +import { bundledSchemasDir, loadMetadata } from '../../src/schema/schema'; +import type { ContentLink, SpaceNode } from '../../src/types'; +import { makePluginContext } from '../helpers/context'; + +const VALID_DIR = join(import.meta.dir, '../fixtures/general/valid-ost'); +const metadata = loadMetadata(join(bundledSchemasDir, 'strategy_general.json')); + +// --------------------------------------------------------------------------- +// Unit tests: extract-content-links utilities +// --------------------------------------------------------------------------- + +import type { Root } from 'mdast'; +import remarkGfm from 'remark-gfm'; +import remarkParse from 'remark-parse'; +import { unified } from 'unified'; +import { + extractLinksFromAstNode, + extractLinksFromFrontmatter, + extractLinksFromText, + getEdgeFieldNames, +} from '../../src/plugins/markdown/extract-content-links'; + +function parseBody(markdown: string): Root { + return unified().use(remarkParse).use(remarkGfm).parse(markdown) as Root; +} + +describe('extractLinksFromText', () => { + it('extracts wikilinks', () => { + const links = extractLinksFromText('See [[Personal Vision]] for more.'); + expect(links).toHaveLength(1); + expect(links[0]).toMatchObject({ + text: 'Personal Vision', + target: 'Personal Vision', + action: 'link', + linkSyntax: 'wikilink', + }); + }); + + it('extracts wikilinks with anchors', () => { + const links = extractLinksFromText('Go to [[vision_page#^embmission]].'); + expect(links).toHaveLength(1); + expect(links[0]).toMatchObject({ target: 'vision_page', anchor: '^embmission', linkSyntax: 'wikilink' }); + }); + + it('extracts wikilinks with aliases', () => { + const links = extractLinksFromText('See [[Personal Vision|our vision]].'); + expect(links[0]).toMatchObject({ text: 'our vision', target: 'Personal Vision' }); + }); + + it('extracts embed wikilinks', () => { + const links = extractLinksFromText('![[image.png]]'); + expect(links[0]).toMatchObject({ action: 'embed', target: 'image.png', linkSyntax: 'wikilink' }); + }); + + it('extracts markdown links', () => { + const links = extractLinksFromText('[click here](https://example.com)'); + expect(links[0]).toMatchObject({ text: 'click here', target: 'https://example.com', linkSyntax: 'markdown' }); + }); + + it('extracts markdown image embeds', () => { + const links = extractLinksFromText('![alt text](https://example.com/img.png)'); + expect(links[0]).toMatchObject({ action: 'embed', text: 'alt text', target: 'https://example.com/img.png' }); + }); + + it('extracts multiple mixed links from one string', () => { + const links = extractLinksFromText('See [[A]] and [B](https://b.com) and [[C]].'); + expect(links).toHaveLength(3); + expect(links.map((l) => l.target)).toEqual(['A', 'C', 'https://b.com']); + }); +}); + +describe('extractLinksFromAstNode', () => { + it('extracts a markdown link node', () => { + const tree = parseBody('[visit](https://example.com)'); + const para = tree.children[0]!; + const links = extractLinksFromAstNode(para); + expect(links).toHaveLength(1); + expect(links[0]).toMatchObject({ + text: 'visit', + target: 'https://example.com', + action: 'link', + linkSyntax: 'markdown', + }); + }); + + it('extracts a markdown image node', () => { + const tree = parseBody('![alt](https://example.com/pic.jpg)'); + const links = extractLinksFromAstNode(tree.children[0]!); + expect(links[0]).toMatchObject({ action: 'embed', text: 'alt', target: 'https://example.com/pic.jpg' }); + }); + + it('extracts wikilinks from text nodes within a paragraph', () => { + const tree = parseBody('See [[My Goal]] for context.'); + const links = extractLinksFromAstNode(tree.children[0]!); + expect(links).toHaveLength(1); + expect(links[0]).toMatchObject({ target: 'My Goal', linkSyntax: 'wikilink' }); + }); + + it('extracts both markdown links and wikilinks from a paragraph', () => { + const tree = parseBody('See [[My Goal]] and [external](https://example.com).'); + const links = extractLinksFromAstNode(tree.children[0]!); + expect(links).toHaveLength(2); + const wikilink = links.find((l) => l.linkSyntax === 'wikilink'); + const mdLink = links.find((l) => l.linkSyntax === 'markdown'); + expect(wikilink?.target).toBe('My Goal'); + expect(mdLink?.target).toBe('https://example.com'); + }); +}); + +describe('extractLinksFromFrontmatter', () => { + const edgeFields = new Set(['parent', 'parents']); + + it('extracts links from string fields', () => { + const data = { title: 'My Node', source: 'See [[Reference]] and https://example.com' }; + const links = extractLinksFromFrontmatter(data, edgeFields); + expect(links.some((l) => l.target === 'Reference')).toBe(true); + }); + + it('skips edge fields', () => { + const data = { title: 'My Node', parent: '[[Parent Node]]', source: '[[Reference]]' }; + const links = extractLinksFromFrontmatter(data, edgeFields); + expect(links.some((l) => l.target === 'Parent Node')).toBe(false); + expect(links.some((l) => l.target === 'Reference')).toBe(true); + }); + + it('extracts links from string array fields', () => { + const data = { tags: ['[[Tag A]]', '[[Tag B]]'] }; + const links = extractLinksFromFrontmatter(data, edgeFields); + expect(links.map((l) => l.target)).toEqual(expect.arrayContaining(['Tag A', 'Tag B'])); + }); +}); + +describe('getEdgeFieldNames', () => { + it('collects hierarchy and relationship field names', () => { + const fields = getEdgeFieldNames(metadata); + expect(fields.has('parent')).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// Integration tests: contentLinks on parsed nodes, resolvedLinks on SpaceNodes +// --------------------------------------------------------------------------- + +describe('contentLinks and resolvedLinks integration', () => { + let nodes: SpaceNode[]; + + beforeAll(async () => { + const result = await readSpaceDirectory(makePluginContext(VALID_DIR)); + ({ nodes } = resolveGraphEdges(result.nodes, metadata)); + }); + + it('every SpaceNode has a resolvedLinks array', () => { + for (const node of nodes) { + expect(Array.isArray(node.resolvedLinks)).toBe(true); + } + }); + + describe('links_opportunity node', () => { + let node: SpaceNode; + + beforeAll(() => { + node = nodes.find((n) => n.label === 'links_opportunity.md')!; + expect(node).toBeDefined(); + }); + + it('classifies the frontmatter source URL as external', () => { + const link = node.resolvedLinks.find((l) => l.target === 'https://example.com/research'); + expect(link).toBeDefined(); + expect(link?.location).toBe('external'); + expect(link?.action).toBe('link'); + }); + + it('classifies a content wikilink to a known node as internal node', () => { + const link = node.resolvedLinks.find((l) => l.target === 'Personal Mission'); + expect(link).toBeDefined(); + expect(link?.location).toBe('node'); + }); + + it('classifies a wikilink to an unknown target as internal', () => { + const link = node.resolvedLinks.find((l) => l.target === 'missing-page'); + expect(link).toBeDefined(); + expect(link?.location).toBe('internal'); + }); + + it('classifies a plain https link as external', () => { + const link = node.resolvedLinks.find((l) => l.target === 'https://www.example.com/tool'); + expect(link).toBeDefined(); + expect(link?.location).toBe('external'); + expect(link?.text).toBe('external resource'); + }); + + it('classifies an obsidian:// link as protocol', () => { + const link = node.resolvedLinks.find((l) => l.target.startsWith('obsidian://')); + expect(link).toBeDefined(); + expect(link?.location).toBe('protocol'); + }); + + it('classifies a file:// link as system', () => { + const link = node.resolvedLinks.find((l) => l.target.startsWith('file://')); + expect(link).toBeDefined(); + expect(link?.location).toBe('system'); + }); + + it('classifies a relative path link as internal', () => { + const link = node.resolvedLinks.find((l) => l.target === './other-note.md'); + expect(link).toBeDefined(); + expect(link?.location).toBe('internal'); + }); + + it('classifies an embed wikilink with action embed', () => { + const link = node.resolvedLinks.find((l) => l.target === 'embedded-image.png'); + expect(link).toBeDefined(); + expect(link?.action).toBe('embed'); + expect(link?.location).toBe('internal'); + }); + + it('does not include the parent edge field wikilink', () => { + // parent: "[[Personal Vision]]" is a graph edge — should not appear in resolvedLinks + const parentLinks = node.resolvedLinks.filter((l) => l.target === 'Personal Vision'); + expect(parentLinks).toHaveLength(0); + }); + }); + + it('nodes without body links have empty resolvedLinks (or only frontmatter links)', () => { + const vision = nodes.find((n) => n.label === 'Personal Vision.md')!; + expect(vision).toBeDefined(); + // Personal Vision has no links in body content — may have frontmatter links if any fields contain them + // The summary field is plain text, so resolvedLinks should be empty + expect(vision.resolvedLinks).toHaveLength(0); + }); +}); + +// --------------------------------------------------------------------------- +// Unit tests: classifyContentLink via resolveGraphEdges +// --------------------------------------------------------------------------- + +import { resolveGraphEdges as resolveEdges } from '../../src/read/resolve-graph-edges'; + +describe('link location classification', () => { + const levels = [ + { type: 'goal', field: 'parent', fieldOn: 'child' as const, multiple: false, selfRef: false }, + { type: 'solution', field: 'parent', fieldOn: 'child' as const, multiple: false, selfRef: false }, + ]; + const metadata = { hierarchy: { levels } }; + + function makeBaseNode(title: string, type: string, links: ContentLink[] = []) { + return { + label: `${title}.md`, + title, + type, + schemaData: { title, type }, + linkTargets: [title], + contentLinks: links, + }; + } + + it('classifies wikilink to known node as node', () => { + const goal = makeBaseNode('My Goal', 'goal'); + const solution = makeBaseNode('My Solution', 'solution', [ + { text: 'My Goal', target: 'My Goal', action: 'link', linkSyntax: 'wikilink' }, + ]); + const { nodes } = resolveEdges([goal, solution], metadata); + const sol = nodes.find((n) => n.title === 'My Solution')!; + expect(sol.resolvedLinks[0]?.location).toBe('node'); + }); + + it('classifies wikilink to unknown target as internal', () => { + const solution = makeBaseNode('My Solution', 'solution', [ + { text: 'Unknown', target: 'Unknown', action: 'link', linkSyntax: 'wikilink' }, + ]); + const { nodes } = resolveEdges([solution], metadata); + expect(nodes[0]!.resolvedLinks[0]?.location).toBe('internal'); + }); + + it('classifies https URL as external', () => { + const solution = makeBaseNode('My Solution', 'solution', [ + { text: 'Link', target: 'https://example.com', action: 'link', linkSyntax: 'markdown' }, + ]); + const { nodes } = resolveEdges([solution], metadata); + expect(nodes[0]!.resolvedLinks[0]?.location).toBe('external'); + }); + + it('classifies http URL as external', () => { + const solution = makeBaseNode('My Solution', 'solution', [ + { text: 'Link', target: 'http://example.com', action: 'link', linkSyntax: 'markdown' }, + ]); + const { nodes } = resolveEdges([solution], metadata); + expect(nodes[0]!.resolvedLinks[0]?.location).toBe('external'); + }); + + it('classifies file:// URL as system', () => { + const solution = makeBaseNode('My Solution', 'solution', [ + { text: 'File', target: 'file:///path/to/file.txt', action: 'link', linkSyntax: 'markdown' }, + ]); + const { nodes } = resolveEdges([solution], metadata); + expect(nodes[0]!.resolvedLinks[0]?.location).toBe('system'); + }); + + it('classifies obsidian:// URL as protocol', () => { + const solution = makeBaseNode('My Solution', 'solution', [ + { text: 'Vault', target: 'obsidian://open?vault=x', action: 'link', linkSyntax: 'markdown' }, + ]); + const { nodes } = resolveEdges([solution], metadata); + expect(nodes[0]!.resolvedLinks[0]?.location).toBe('protocol'); + }); + + it('classifies relative path as internal', () => { + const solution = makeBaseNode('My Solution', 'solution', [ + { text: 'Note', target: './other.md', action: 'link', linkSyntax: 'markdown' }, + ]); + const { nodes } = resolveEdges([solution], metadata); + expect(nodes[0]!.resolvedLinks[0]?.location).toBe('internal'); + }); + + it('classifies wikilink with anchor to known node as node', () => { + const goal = { + label: 'vision.md', + title: 'Vision', + type: 'goal', + schemaData: { title: 'Vision', type: 'goal' }, + linkTargets: ['Vision', 'vision#^section1'], + contentLinks: [], + }; + const solution = makeBaseNode('My Solution', 'solution', [ + { + text: 'Vision', + target: 'vision', + anchor: '^section1', + action: 'link' as const, + linkSyntax: 'wikilink' as const, + }, + ]); + const { nodes } = resolveEdges([goal, solution], metadata); + const sol = nodes.find((n) => n.title === 'My Solution')!; + expect(sol.resolvedLinks[0]?.location).toBe('node'); + }); + + it('nodes without contentLinks get empty resolvedLinks', () => { + const goal = makeBaseNode('My Goal', 'goal'); + const { nodes } = resolveEdges([goal], metadata); + expect(nodes[0]!.resolvedLinks).toEqual([]); + }); +}); diff --git a/tests/render/render-bullets.test.ts b/tests/render/render-bullets.test.ts index 6d32fe9..f5b3516 100644 --- a/tests/render/render-bullets.test.ts +++ b/tests/render/render-bullets.test.ts @@ -15,6 +15,7 @@ function makeNode(title: string, type: string): SpaceNode { type, resolvedType: type, resolvedParents: [], + resolvedLinks: [], }; } diff --git a/tests/render/render-mermaid.test.ts b/tests/render/render-mermaid.test.ts index 22c7a47..1851f0f 100644 --- a/tests/render/render-mermaid.test.ts +++ b/tests/render/render-mermaid.test.ts @@ -21,6 +21,7 @@ function makeNode(title: string, type: string, status = 'active'): SpaceNode { type, resolvedType: type, resolvedParents: [], + resolvedLinks: [], }; } diff --git a/tests/schema/evaluate-rule.test.ts b/tests/schema/evaluate-rule.test.ts index 71a5101..20e8124 100644 --- a/tests/schema/evaluate-rule.test.ts +++ b/tests/schema/evaluate-rule.test.ts @@ -18,6 +18,7 @@ describe('evaluate-rule', () => { linkTargets: ['Test Node'], resolvedParents: [makeParentRef('Parent Opportunity')], resolvedType: 'solution', + resolvedLinks: [], }; const mockParent: SpaceNode = { @@ -32,6 +33,7 @@ describe('evaluate-rule', () => { linkTargets: ['Parent Opportunity'], resolvedParents: [], resolvedType: 'opportunity', + resolvedLinks: [], }; const mockNodeIndex = new Map([ @@ -96,6 +98,7 @@ describe('evaluate-rule', () => { linkTargets: ['Orphan Node'], resolvedParents: [], resolvedType: 'goal', // outcome is an alias for goal + resolvedLinks: [], }; const context = buildEvalContext(nodeWithoutParent, allNodes, mockNodeIndex); const result = await evaluateExpression('$exists(parent) = false', context); @@ -132,6 +135,7 @@ describe('evaluate-rule', () => { linkTargets: ['Child'], resolvedParents: [makeParentRef('Parent')], resolvedType: 'solution', + resolvedLinks: [], }; const parentNode: SpaceNode = { @@ -142,6 +146,7 @@ describe('evaluate-rule', () => { linkTargets: ['Parent'], resolvedParents: [], resolvedType: 'opportunity', + resolvedLinks: [], }; const mockNodes = [childNode, parentNode]; @@ -182,6 +187,7 @@ describe('evaluate-rule', () => { linkTargets: ['Orphan'], resolvedParents: [], resolvedType: 'goal', // outcome is an alias for goal + resolvedLinks: [], }; const context = buildEvalContext(orphanNode, mockNodes, nodeIndex); expect(context.parent).toBeUndefined(); diff --git a/tests/schema/validate-graph.test.ts b/tests/schema/validate-graph.test.ts index e398f83..b2e56e2 100644 --- a/tests/schema/validate-graph.test.ts +++ b/tests/schema/validate-graph.test.ts @@ -87,6 +87,7 @@ describe('validate-graph', () => { linkTargets: [title], resolvedParents: parentTitle ? [makeParentRef(parentTitle)] : [], resolvedType: resolveNodeType(type, typeAliases), + resolvedLinks: [], }); describe('hierarchy with selfRef', () => { diff --git a/tests/schema/validate-rules.test.ts b/tests/schema/validate-rules.test.ts index 745cd1c..dd00b53 100644 --- a/tests/schema/validate-rules.test.ts +++ b/tests/schema/validate-rules.test.ts @@ -13,6 +13,7 @@ describe('validate-rules', () => { schemaData: { title: 'Outcome', type: 'outcome', status: 'active', metric: 'Increase X' }, linkTargets: ['Outcome'], resolvedParents: [], + resolvedLinks: [], resolvedType: 'goal', // outcome is an alias for goal }, { @@ -28,6 +29,7 @@ describe('validate-rules', () => { }, linkTargets: ['Opportunity'], resolvedParents: [makeParentRef('Outcome')], + resolvedLinks: [], resolvedType: 'opportunity', }, { @@ -37,6 +39,7 @@ describe('validate-rules', () => { schemaData: { title: 'Solution', type: 'solution', status: 'exploring', parent: '[[Opportunity]]' }, linkTargets: ['Solution'], resolvedParents: [makeParentRef('Opportunity')], + resolvedLinks: [], resolvedType: 'solution', }, { @@ -46,6 +49,7 @@ describe('validate-rules', () => { schemaData: { title: 'Bad Solution', type: 'solution', status: 'exploring', parent: '[[Solution]]' }, linkTargets: ['Bad Solution'], resolvedParents: [makeParentRef('Solution')], + resolvedLinks: [], resolvedType: 'solution', }, { @@ -61,6 +65,7 @@ describe('validate-rules', () => { }, linkTargets: ['Experiment'], resolvedParents: [makeParentRef('Solution')], + resolvedLinks: [], resolvedType: 'experiment', }, { @@ -76,6 +81,7 @@ describe('validate-rules', () => { }, linkTargets: ['Bad Experiment'], resolvedParents: [makeParentRef('Opportunity')], + resolvedLinks: [], resolvedType: 'experiment', }, ]; @@ -140,6 +146,7 @@ describe('validate-rules', () => { schemaData: { title: 'Bad Outcome', type: 'outcome', status: 'active', parent: '[[Vision]]' }, linkTargets: ['Bad Outcome'], resolvedParents: [makeParentRef('Vision')], + resolvedLinks: [], resolvedType: 'outcome', }; const visionNode: SpaceNode = { @@ -149,6 +156,7 @@ describe('validate-rules', () => { schemaData: { title: 'Vision', type: 'vision', status: 'active' }, linkTargets: ['Vision'], resolvedParents: [], + resolvedLinks: [], resolvedType: 'vision', }; // Pass both nodes so the parent can be found in the index @@ -183,6 +191,7 @@ describe('validate-rules', () => { }, linkTargets: ['Opportunity'], resolvedParents: [makeParentRef('Outcome')], + resolvedLinks: [], resolvedType: 'opportunity', }; @@ -198,6 +207,7 @@ describe('validate-rules', () => { }, linkTargets: [`Solution ${i}`], resolvedParents: [makeParentRef('Opportunity')], + resolvedLinks: [], resolvedType: 'solution', })); @@ -220,6 +230,7 @@ describe('validate-rules', () => { }, linkTargets: ['Opportunity'], resolvedParents: [makeParentRef('Outcome')], + resolvedLinks: [], resolvedType: 'opportunity', }; @@ -230,6 +241,7 @@ describe('validate-rules', () => { schemaData: { title: 'Solution', type: 'solution', status: 'exploring', parent: '[[Opportunity]]' }, linkTargets: ['Solution'], resolvedParents: [makeParentRef('Opportunity')], + resolvedLinks: [], resolvedType: 'solution', }; @@ -273,6 +285,7 @@ describe('validate-rules', () => { schemaData: { title: 'Outcome 1', type: 'outcome', status: 'active', metric: 'X' }, linkTargets: ['Outcome 1'], resolvedParents: [], + resolvedLinks: [], resolvedType: 'outcome', }, { @@ -282,6 +295,7 @@ describe('validate-rules', () => { schemaData: { title: 'Outcome 2', type: 'outcome', status: 'active', metric: 'Y' }, linkTargets: ['Outcome 2'], resolvedParents: [], + resolvedLinks: [], resolvedType: 'outcome', }, { @@ -291,6 +305,7 @@ describe('validate-rules', () => { schemaData: { title: 'Unrelated', type: 'solution', status: 'exploring' }, linkTargets: ['Unrelated'], resolvedParents: [], + resolvedLinks: [], resolvedType: 'solution', }, ]; @@ -309,6 +324,7 @@ describe('validate-rules', () => { schemaData: { title: 'Outcome', type: 'outcome', status: 'inactive', metric: 'X' }, linkTargets: ['Outcome'], resolvedParents: [], + resolvedLinks: [], resolvedType: 'outcome', }; const childNode: SpaceNode = { @@ -324,6 +340,7 @@ describe('validate-rules', () => { }, linkTargets: ['Opportunity'], resolvedParents: [makeParentRef('Outcome')], + resolvedLinks: [], resolvedType: 'opportunity', }; const violations = await validateRules([parentNode, childNode], [workflowRules[1]!]); @@ -341,6 +358,7 @@ describe('validate-rules', () => { schemaData: { title: 'Outcome', type: 'outcome', status: 'active', metric: 'X' }, linkTargets: ['Outcome'], resolvedParents: [], + resolvedLinks: [], resolvedType: 'outcome', }; const childNode: SpaceNode = { @@ -356,6 +374,7 @@ describe('validate-rules', () => { }, linkTargets: ['Opportunity'], resolvedParents: [makeParentRef('Outcome')], + resolvedLinks: [], resolvedType: 'opportunity', }; const violations = await validateRules([parentNode, childNode], [workflowRules[1]!]); @@ -390,6 +409,7 @@ describe('validate-rules', () => { schemaData: { title: 'Outcome 1', type: 'outcome', status: 'active', metric: 'X' }, linkTargets: ['Outcome 1'], resolvedParents: [], + resolvedLinks: [], resolvedType: 'outcome', }, { @@ -399,6 +419,7 @@ describe('validate-rules', () => { schemaData: { title: 'Outcome 2', type: 'outcome', status: 'active', metric: 'Y' }, linkTargets: ['Outcome 2'], resolvedParents: [], + resolvedLinks: [], resolvedType: 'outcome', }, { @@ -408,6 +429,7 @@ describe('validate-rules', () => { schemaData: { title: 'Solution', type: 'solution', status: 'exploring', parent: '[[Opportunity]]' }, linkTargets: ['Solution'], resolvedParents: [makeParentRef('Opportunity')], + resolvedLinks: [], resolvedType: 'solution', }, { @@ -417,6 +439,7 @@ describe('validate-rules', () => { schemaData: { title: 'Bad Solution', type: 'solution', status: 'exploring', parent: '[[Solution]]' }, linkTargets: ['Bad Solution'], resolvedParents: [makeParentRef('Solution')], + resolvedLinks: [], resolvedType: 'solution', }, ]; diff --git a/tests/space-graph.test.ts b/tests/space-graph.test.ts index e6d1864..81bb7c4 100644 --- a/tests/space-graph.test.ts +++ b/tests/space-graph.test.ts @@ -14,6 +14,7 @@ function makeNode(title: string, type: string): SpaceNode { linkTargets: [title], resolvedType: type, resolvedParents: [], + resolvedLinks: [], }; } diff --git a/tests/validate/general.test.ts b/tests/validate/general.test.ts index 7ff9641..1ba8b44 100644 --- a/tests/validate/general.test.ts +++ b/tests/validate/general.test.ts @@ -26,8 +26,8 @@ describe('Schema validation', () => { ({ nodes, unresolvedRefs } = resolveGraphEdges(result.nodes, metadata)); }); - it('all 12 nodes pass schema validation', () => { - expect(nodes).toHaveLength(12); + it('all 13 nodes pass schema validation', () => { + expect(nodes).toHaveLength(13); for (const node of nodes) { expect(validateNode(node.schemaData)).toBe(true); } From 4303cebb4d8e732e1ad4ea5cf7445bde50d54405 Mon Sep 17 00:00:00 2001 From: Roger Barnes Date: Tue, 21 Apr 2026 17:07:17 +1000 Subject: [PATCH 2/2] Address PR #98 review: type composition, consistent parsing, rule access - Extract ContentLinkBase to compose ContentLink and ResolvedContentLink from shared fields rather than duplicating them - Use mdast (extractLinksFromBody) for frontmatter string values instead of separate regex helpers, keeping link extraction consistent across body and frontmatter - Expose resolvedLinks on flattened nodes so JSONata rules can query content links - Remove extractLinksFromText and its regex helper functions --- src/plugins/markdown/extract-content-links.ts | 52 ++----------------- src/schema/evaluate-rule.ts | 1 + src/types.ts | 22 ++++---- tests/read/content-links.test.ts | 46 ---------------- 4 files changed, 15 insertions(+), 106 deletions(-) diff --git a/src/plugins/markdown/extract-content-links.ts b/src/plugins/markdown/extract-content-links.ts index 6a45d05..38212be 100644 --- a/src/plugins/markdown/extract-content-links.ts +++ b/src/plugins/markdown/extract-content-links.ts @@ -56,51 +56,6 @@ function extractWikilinksFromText(text: string): ContentLink[] { return links; } -/** - * Extract bare URLs (http/https) from a plain text string. - * Skips URLs that are already inside a markdown link `[text](url)` to avoid duplication. - */ -function extractBareUrlsFromText(text: string): ContentLink[] { - const links: ContentLink[] = []; - // Negative lookbehind: skip URLs immediately preceded by `](` (already a markdown link target) - for (const match of text.matchAll(/(?]+/g)) { - links.push({ text: match[0], target: match[0], action: 'link', linkSyntax: 'markdown' }); - } - return links; -} - -/** - * Extract standard markdown links and images from a plain text string. - * Used for scanning frontmatter string values. - */ -function extractMarkdownLinksFromText(text: string): ContentLink[] { - const links: ContentLink[] = []; - - for (const match of text.matchAll(/(!?)\[([^\]]*)\]\(([^)]+)\)/g)) { - const isEmbed = match[1] === '!'; - const linkText = match[2]!; - const url = match[3]!.trim(); - if (!url) continue; - - links.push({ - text: linkText, - target: url, - action: isEmbed ? 'embed' : 'link', - linkSyntax: 'markdown', - }); - } - - return links; -} - -/** - * Extract all links (wikilinks, markdown links, and bare URLs) from a plain text string. - * Used for scanning frontmatter string field values. - */ -export function extractLinksFromText(text: string): ContentLink[] { - return [...extractWikilinksFromText(text), ...extractMarkdownLinksFromText(text), ...extractBareUrlsFromText(text)]; -} - /** * Recursively walk an mdast subtree and collect all links. * Handles standard markdown link/image nodes and scans text nodes for wikilinks. @@ -178,18 +133,19 @@ export function getEdgeFieldNames(metadata: SchemaMetadata): Set { /** * Extract links from frontmatter data fields, excluding known graph edge fields. - * Scans string values and string array elements for both wikilinks and markdown links. + * Parses each string value as markdown (via mdast) to keep link extraction consistent + * with body content parsing — handles wikilinks, markdown links, images, and bare URLs. */ export function extractLinksFromFrontmatter(data: Record, edgeFields: Set): ContentLink[] { const links: ContentLink[] = []; for (const [key, value] of Object.entries(data)) { if (edgeFields.has(key)) continue; if (typeof value === 'string') { - links.push(...extractLinksFromText(value)); + links.push(...extractLinksFromBody(value)); } else if (Array.isArray(value)) { for (const item of value) { if (typeof item === 'string') { - links.push(...extractLinksFromText(item)); + links.push(...extractLinksFromBody(item)); } } } diff --git a/src/schema/evaluate-rule.ts b/src/schema/evaluate-rule.ts index 01b3991..dc2dc49 100644 --- a/src/schema/evaluate-rule.ts +++ b/src/schema/evaluate-rule.ts @@ -70,6 +70,7 @@ function flattenNode(node: SpaceNode): Record { resolvedType: node.resolvedType, resolvedParentTitle: node.resolvedParents[0]?.title, // first parent or undefined, provided for convenience resolvedParentTitles: node.resolvedParents.map((r) => r.title), // full array of parent titles + resolvedLinks: node.resolvedLinks, }; } diff --git a/src/types.ts b/src/types.ts index 82fc58e..3ab45b1 100644 --- a/src/types.ts +++ b/src/types.ts @@ -68,10 +68,9 @@ export type UnresolvedRef = { }; /** - * A link extracted from node content at parse time, before location classification. - * Populated by plugins that support content link extraction (e.g. the markdown plugin). + * Shared fields for all content link variants. */ -export type ContentLink = { +type ContentLinkBase = { /** Display text of the link (text content for markdown links, inner text or target for wikilinks). */ text: string; /** Raw link target: URL for markdown links, wikilink path (without [[ ]]) for wikilinks. */ @@ -80,6 +79,13 @@ export type ContentLink = { action: 'link' | 'embed'; /** Anchor fragment (heading or block ref, e.g. 'heading-title' or '^blockid') if present. */ anchor?: string; +}; + +/** + * A link extracted from node content at parse time, before location classification. + * Populated by plugins that support content link extraction (e.g. the markdown plugin). + */ +export type ContentLink = ContentLinkBase & { /** Link syntax — used during resolution to determine how to classify the target. */ linkSyntax: 'wikilink' | 'markdown'; }; @@ -88,15 +94,7 @@ export type ContentLink = { * A fully classified link from node content, after location resolution. * Present on SpaceNode after graph resolution. */ -export type ResolvedContentLink = { - /** Display text of the link. */ - text: string; - /** Link target (URL or wikilink target without [[ ]]). */ - target: string; - /** Whether the link navigates or embeds. */ - action: 'link' | 'embed'; - /** Anchor fragment if present. */ - anchor?: string; +export type ResolvedContentLink = ContentLinkBase & { /** * Resolved location classification: * - 'node' — wikilink resolved to a known space node diff --git a/tests/read/content-links.test.ts b/tests/read/content-links.test.ts index 945ec9c..b18fb75 100644 --- a/tests/read/content-links.test.ts +++ b/tests/read/content-links.test.ts @@ -20,7 +20,6 @@ import { unified } from 'unified'; import { extractLinksFromAstNode, extractLinksFromFrontmatter, - extractLinksFromText, getEdgeFieldNames, } from '../../src/plugins/markdown/extract-content-links'; @@ -28,51 +27,6 @@ function parseBody(markdown: string): Root { return unified().use(remarkParse).use(remarkGfm).parse(markdown) as Root; } -describe('extractLinksFromText', () => { - it('extracts wikilinks', () => { - const links = extractLinksFromText('See [[Personal Vision]] for more.'); - expect(links).toHaveLength(1); - expect(links[0]).toMatchObject({ - text: 'Personal Vision', - target: 'Personal Vision', - action: 'link', - linkSyntax: 'wikilink', - }); - }); - - it('extracts wikilinks with anchors', () => { - const links = extractLinksFromText('Go to [[vision_page#^embmission]].'); - expect(links).toHaveLength(1); - expect(links[0]).toMatchObject({ target: 'vision_page', anchor: '^embmission', linkSyntax: 'wikilink' }); - }); - - it('extracts wikilinks with aliases', () => { - const links = extractLinksFromText('See [[Personal Vision|our vision]].'); - expect(links[0]).toMatchObject({ text: 'our vision', target: 'Personal Vision' }); - }); - - it('extracts embed wikilinks', () => { - const links = extractLinksFromText('![[image.png]]'); - expect(links[0]).toMatchObject({ action: 'embed', target: 'image.png', linkSyntax: 'wikilink' }); - }); - - it('extracts markdown links', () => { - const links = extractLinksFromText('[click here](https://example.com)'); - expect(links[0]).toMatchObject({ text: 'click here', target: 'https://example.com', linkSyntax: 'markdown' }); - }); - - it('extracts markdown image embeds', () => { - const links = extractLinksFromText('![alt text](https://example.com/img.png)'); - expect(links[0]).toMatchObject({ action: 'embed', text: 'alt text', target: 'https://example.com/img.png' }); - }); - - it('extracts multiple mixed links from one string', () => { - const links = extractLinksFromText('See [[A]] and [B](https://b.com) and [[C]].'); - expect(links).toHaveLength(3); - expect(links.map((l) => l.target)).toEqual(['A', 'C', 'https://b.com']); - }); -}); - describe('extractLinksFromAstNode', () => { it('extracts a markdown link node', () => { const tree = parseBody('[visit](https://example.com)');