Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions docs/concepts.md
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,29 @@ Each entry is a `ResolvedParentRef` object:

The `source` label lets downstream consumers distinguish edge types without re-inspecting the schema. Validation routes `hierarchy` edges to structural checks (parent-type rules, skip-level detection) and `relationship` edges to field reference checks (type-match, missing-target). Tree rendering and rule evaluation use the full set.

### Content links

**Content links** (`resolvedLinks` on `SpaceNode`) are all links extracted from a node's content at parse time — both from frontmatter non-edge fields and from the body text. They are distinct from graph edges: they are never used to infer structure, but are available to consumers for rendering, auditing, or navigation.

Each entry is a `ResolvedContentLink` object:

| Field | Type | Description |
|---|---|---|
| `text` | `string` | Display text of the link |
| `target` | `string` | Raw link target (URL or wikilink path) |
| `action` | `'link' \| 'embed'` | Whether the link navigates or transcludes |
| `anchor` | `string?` | Fragment component (heading or block ref) if present |
| `location` | `'node' \| 'internal' \| 'external' \| 'system' \| 'protocol'` | Resolved location classification |

**Location classification:**
- `node` — wikilink resolved to a known space node
- `internal` — wikilink or relative path to an in-source target that is not a space node
- `external` — `http://` or `https://` URL
- `system` — `file://` URL
- `protocol` — any other scheme (e.g. `obsidian://`)

**Sources:** Both wikilink syntax (`[[Target]]`, `![[Image]]`) and markdown link syntax (`[text](url)`, `![alt](url)`) are extracted. Bare URLs in frontmatter string fields are also captured. Graph edge fields (hierarchy `parent`, relationship fields) are excluded — those are graph edges, not content links.

### Anchor

An **anchor** is a block anchor (e.g. `^goal1`) appended to a heading in a `typed page`, using Obsidian block anchor syntax. Anchors serve two purposes:
Expand Down
7 changes: 1 addition & 6 deletions plugin/.claude-plugin/plugin.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,5 @@
"homepage": "https://github.com/mindsocket/structured-context",
"repository": "https://github.com/mindsocket/structured-context",
"license": "MIT",
"keywords": [
"structured-context",
"validation",
"markdown",
"schema"
]
"keywords": ["structured-context", "validation", "markdown", "schema"]
}
2 changes: 1 addition & 1 deletion schemas/_knowledge_wiki.json
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@
},
"note": {
"type": "object",
"description": "Personal notes, journal entries, appointments, experiential records. Your own data about your journey — not sourced from external material.",
"description": "Personal notes, journal entries, talk notes, or rough thoughts not drawn from an external URL.",
"allOf": [{ "$ref": "sctx://_sctx_base#/$defs/baseNodeProps" }],
"properties": {
"type": { "const": "note" },
Expand Down
4 changes: 4 additions & 0 deletions src/api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

export type { AnySchemaObject, SchemaObject, ValidateFunction } from 'ajv';
export type { Config, SpaceConfig } from './config';
export { loadConfig, setConfigPath } from './config';
export type {
ParseHook,
ParseResult,
Expand All @@ -21,15 +22,18 @@ export type {
TemplateSyncOptions,
} from './plugins/util';
export type { SharedEmbeddingFields } from './schema/metadata-contract';
export { bundledSchemasDir, loadSchema, setBundledSchemasDir } from './schema/schema';
export type { SpaceGraph } from './space-graph';
export type {
BaseNode,
ContentLink,
EdgeDefinition,
FileNotInSpaceResult,
FileValidationResult,
HierarchyLevel,
ParseIssue,
Relationship,
ResolvedContentLink,
SchemaMetadata,
SchemaWithMetadata,
SpaceContext,
Expand Down
5 changes: 3 additions & 2 deletions src/commands/schemas.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { dirname, join } from 'node:path';
import type { AnySchemaObject } from 'ajv';
import { loadConfig, resolveSchema } from '../config';
import { bundledSchemasDir, type EntityInfo, extractEntityInfo, loadSchema, readRawSchema } from '../schema/schema';
import { mergeVariantProperties } from '../schema/schema-refs';
import { mergeVariantProperties, resolveRef } from '../schema/schema-refs';
import type { SchemaMetadata, SchemaWithMetadata } from '../types';

function isBundledPath(schemaPath: string): boolean {
Expand Down Expand Up @@ -35,6 +35,7 @@ function extractEntities(
): EntityVariant[] {
return oneOf.map((entry) => {
const entryObj = entry as AnySchemaObject;
const resolvedObj = resolveRef(entryObj, schema, schemaRefRegistry) ?? entryObj;
const { properties, required } = mergeVariantProperties(entryObj, schema, schemaRefRegistry);
const typeDef = properties.type as AnySchemaObject | undefined;
let types: string[] = [];
Expand All @@ -43,7 +44,7 @@ function extractEntities(

return {
types,
description: typeof entryObj.description === 'string' ? entryObj.description : undefined,
description: typeof resolvedObj.description === 'string' ? resolvedObj.description : undefined,
properties: Object.keys(properties).filter((k) => k !== 'type'),
required: required.filter((r) => r !== 'type'),
};
Expand Down
23 changes: 18 additions & 5 deletions src/commands/validate-file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,22 +26,35 @@ export async function validateFileCommand(filePath: string, options: { json?: bo
printHumanReadable(result);
}

return Object.keys(result.errors).length > 0 ? 1 : 0;
return result.errorCount > 0 ? 1 : 0;
}

function printHumanReadable(result: FileValidationResult): void {
const reset = '\x1b[0m';
const green = '\x1b[32m';
const red = '\x1b[31m';
const yellow = '\x1b[33m';

if (result.errorCount === 0) {
if (result.errorCount === 0 && result.warningCount === 0) {
console.log(`${green}✓${reset} ${result.label} (space: ${result.space})`);
return;
}

console.log(`\n${red}✗${reset} ${result.label} (space: ${result.space}) — ${result.errorCount} error(s)\n`);
for (const { kind, message } of Object.values(result.errors)) {
console.log(` [${kind}] ${message}`);
if (result.errorCount > 0) {
console.log(`\n${red}✗${reset} ${result.label} (space: ${result.space}) — ${result.errorCount} error(s)\n`);
for (const { kind, message } of Object.values(result.errors)) {
console.log(` [${kind}] ${message}`);
}
} else {
console.log(`\n${green}✓${reset} ${result.label} (space: ${result.space})`);
}

if (result.warningCount > 0) {
console.log(`\n ${yellow}${result.warningCount} warning(s):${reset}`);
for (const { kind, message } of Object.values(result.warnings)) {
console.log(` ${yellow}[${kind}]${reset} ${message}`);
}
}

console.log('');
}
15 changes: 15 additions & 0 deletions src/commands/validate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ export async function validate(context: SpaceContext, options: { json?: boolean
errors: errorsByFile,
orphanCount: result.orphans.length,
parseIssues: result.parseIssues,
unresolvedContentLinks: result.unresolvedContentLinks,
},
null,
2,
Expand Down Expand Up @@ -100,6 +101,7 @@ export async function validate(context: SpaceContext, options: { json?: boolean
console.log(fmt(' Rule violations', result.ruleViolations.length, true));
console.log(fmt(' Hierarchy violations', result.hierarchyViolations.length, true));
console.log(fmt(' Orphans (hierarchy nodes - no parent)', result.orphans.length, true, true));
console.log(fmt(' Unresolved content links', result.unresolvedContentLinks.length, true, true));
const parseIssueErrorCount = result.parseIssues.filter((i) => i.severity === 'error').length;
console.log(fmt(' Excluded during parsing', result.parseIssues.length, true, parseIssueErrorCount === 0));

Expand All @@ -108,6 +110,19 @@ export async function validate(context: SpaceContext, options: { json?: boolean
for (const node of result.orphans) console.log(` ${node.label}`);
}

if (result.unresolvedContentLinks.length > 0) {
console.log(`\nUnresolved content links (may be outside-space or broken):`);
const byFile = new Map<string, string[]>();
for (const { file, target } of result.unresolvedContentLinks) {
if (!byFile.has(file)) byFile.set(file, []);
byFile.get(file)!.push(target);
}
for (const [file, targets] of byFile) {
console.log(` ${file}:`);
for (const target of targets) console.log(` ${target}`);
}
}

if (result.parseIssues.length > 0) {
console.log(`\nExcluded during parsing:`);
for (const issue of result.parseIssues) {
Expand Down
154 changes: 154 additions & 0 deletions src/plugins/markdown/extract-content-links.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
import type { Image, Link, Node, Parent, Root, Text } from 'mdast';
import { toString as mdastToString } from 'mdast-util-to-string';
import remarkGfm from 'remark-gfm';
import remarkParse from 'remark-parse';
import { unified } from 'unified';
import type { ContentLink, SchemaMetadata } from '../../types';

/**
* Parse an Obsidian wikilink inner string into its components.
* Handles: [[target]], [[target#anchor]], [[target|alias]], [[target#anchor|alias]]
*/
function parseWikilinkInner(inner: string): { target: string; anchor?: string; displayText?: string } {
// Alias: [[target|alias]] or [[target#anchor|alias]]
const pipeIdx = inner.indexOf('|');
let core = inner;
let displayText: string | undefined;
if (pipeIdx >= 0) {
core = inner.slice(0, pipeIdx);
const alias = inner.slice(pipeIdx + 1).trim();
if (alias) displayText = alias;
}

// Anchor: [[target#anchor]] or [[target#^block]]
const hashIdx = core.indexOf('#');
if (hashIdx >= 0) {
const target = core.slice(0, hashIdx).trim();
const anchor = core.slice(hashIdx + 1).trim() || undefined;
return { target, anchor, displayText };
}

return { target: core.trim(), displayText };
}

/**
* Extract wikilinks (and Obsidian embed wikilinks) from a plain text string.
* Matches [[target]], ![[target]], [[target#anchor]], [[target|alias]], etc.
*/
function extractWikilinksFromText(text: string): ContentLink[] {
const links: ContentLink[] = [];

for (const match of text.matchAll(/(!?)\[\[([^\]]+)\]\]/g)) {
const isEmbed = match[1] === '!';
const inner = match[2]!;
const { target, anchor, displayText } = parseWikilinkInner(inner);
if (!target) continue;

links.push({
text: displayText ?? target,
target,
action: isEmbed ? 'embed' : 'link',
...(anchor !== undefined ? { anchor } : {}),
linkSyntax: 'wikilink',
});
}

return links;
}

/**
* Recursively walk an mdast subtree and collect all links.
* Handles standard markdown link/image nodes and scans text nodes for wikilinks.
*/
export function extractLinksFromAstNode(node: Node): ContentLink[] {
const links: ContentLink[] = [];

function walk(n: Node): void {
switch (n.type) {
case 'link': {
const linkNode = n as Link;
links.push({
text: mdastToString(linkNode),
target: linkNode.url,
action: 'link',
linkSyntax: 'markdown',
});
// Don't recurse into children — they are display text, not link targets
break;
}
case 'image': {
const imgNode = n as Image;
links.push({
text: imgNode.alt ?? '',
target: imgNode.url,
action: 'embed',
linkSyntax: 'markdown',
});
break;
}
case 'text': {
// Remark does not parse wikilinks natively; scan text nodes for [[...]] patterns
links.push(...extractWikilinksFromText((n as Text).value));
break;
}
default: {
if ('children' in n) {
for (const child of (n as Parent).children) {
walk(child);
}
}
break;
}
}
}

walk(node);
return links;
}

/**
* Extract all links from a raw markdown body string.
* Parses the full document and collects links from every node in the tree.
*/
export function extractLinksFromBody(body: string): ContentLink[] {
const tree = unified().use(remarkParse).use(remarkGfm).parse(body) as Root;
return extractLinksFromAstNode(tree);
}

/**
* Build the set of field names that serve as graph edges (hierarchy + relationship fields).
* Used to exclude edge fields from frontmatter link extraction.
*/
export function getEdgeFieldNames(metadata: SchemaMetadata): Set<string> {
const fields = new Set<string>();
for (const level of metadata.hierarchy?.levels ?? []) {
fields.add(level.field);
if (level.selfRefField) fields.add(level.selfRefField);
}
for (const rel of metadata.relationships ?? []) {
fields.add(rel.field);
}
return fields;
}

/**
* Extract links from frontmatter data fields, excluding known graph edge fields.
* Parses each string value as markdown (via mdast) to keep link extraction consistent
* with body content parsing — handles wikilinks, markdown links, images, and bare URLs.
*/
export function extractLinksFromFrontmatter(data: Record<string, unknown>, edgeFields: Set<string>): ContentLink[] {
const links: ContentLink[] = [];
for (const [key, value] of Object.entries(data)) {
if (edgeFields.has(key)) continue;
if (typeof value === 'string') {
links.push(...extractLinksFromBody(value));
} else if (Array.isArray(value)) {
for (const item of value) {
if (typeof item === 'string') {
links.push(...extractLinksFromBody(item));
}
}
}
}
return links;
}
9 changes: 9 additions & 0 deletions src/plugins/markdown/parse-embedded.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import type {
SchemaMetadata,
SharedEmbeddingFields,
} from '../../api';
import { extractLinksFromAstNode } from './extract-content-links';
import { applyFieldMap, coerceDates } from './util';

/** Type values that identify a space_on_a_page container (not themselves space nodes). */
Expand Down Expand Up @@ -266,6 +267,7 @@ function processListItem(
schemaData,
linkTargets,
type,
contentLinks: [],
};
nodes.push(newNode);

Expand Down Expand Up @@ -347,6 +349,7 @@ export function extractEmbeddedNodes(body: string, options: ExtractEmbeddedOptio
schemaData: { type: 'space_on_a_page' },
linkTargets: [],
type: 'space_on_a_page',
contentLinks: [],
};

const tree = unified().use(remarkParse).use(remarkGfm).parse(body) as Root;
Expand Down Expand Up @@ -618,6 +621,7 @@ export function extractEmbeddedNodes(body: string, options: ExtractEmbeddedOptio
schemaData,
linkTargets,
type,
contentLinks: [],
};

// Push to stack BEFORE resolving semantic parent — stack[-2] is the correct parent.
Expand Down Expand Up @@ -788,6 +792,7 @@ export function extractEmbeddedNodes(body: string, options: ExtractEmbeddedOptio
schemaData,
linkTargets,
type: rowTypeStr,
contentLinks: [],
};
nodes.push(rowNode);

Expand Down Expand Up @@ -827,6 +832,8 @@ export function extractEmbeddedNodes(body: string, options: ExtractEmbeddedOptio

Object.assign(activeNode.schemaData, allFields);
if (remainingText) appendContent(activeNode, remainingText);
activeNode.contentLinks ??= [];
activeNode.contentLinks.push(...extractLinksFromAstNode(child));
} else if (child.type === 'code' && (child as Code).lang?.trim() === 'yaml') {
const code = child as Code;
const parsed = yamlLoad(code.value);
Expand All @@ -839,6 +846,8 @@ export function extractEmbeddedNodes(body: string, options: ExtractEmbeddedOptio
}
} else {
appendContent(activeNode, mdastToString(child));
activeNode.contentLinks ??= [];
activeNode.contentLinks.push(...extractLinksFromAstNode(child));
}
}
}
Expand Down
Loading