-
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathcatalog.yaml
More file actions
88 lines (88 loc) · 8.43 KB
/
catalog.yaml
File metadata and controls
88 lines (88 loc) · 8.43 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
datasets:
- name: structure
description: Markdown files containing structural information about datasets, including their schemas, relationships, and metadata.
tables:
- name: documents
description: One row per source document with extracted metadata, relationships, and embedded content references. Additional document metadata fields are stored either as scalar columns or serialized *_list columns as needed.
columns:
- { name: id, type: int, primary: true, autoincrement: true, description: Autoincrement row id for the versioned document }
- { name: version_id, type: string, description: Encoded seconds-since-2000 identifier for the collection run that produced the row }
- { name: sid, type: string, description: Stable short hash for the document }
- { name: uid, type: string, description: Human-friendly identifier derived from the file path or slug }
- { name: path, type: string, description: Relative source path inside the content tree }
- { name: url, type: string, description: Public route exposed for the document }
- { name: url_type, type: string, description: Indicates whether the URL maps to a directory or a file }
- { name: slug, type: string, description: Slug used for building anchors and nested references }
- { name: title, type: string, description: Document title resolved from folder names or front matter }
- { name: level, type: int, description: Depth of the document in the navigation tree }
- { name: order, type: int, description: Ordering index scoped to siblings sharing the same directory and level; defaults to alphabetical order when omitted }
- { name: tags, type: string_list, description: JSON list of tags assigned to the document via front matter or metadata }
- { name: date, type: string, description: ISO 8601 date assigned to the document via front matter or metadata }
- { name: lastmod, type: string, description: ISO 8601 timestamp of the last modification time of the source file }
- { name: format, type: string, values: [markdown, markdown_card], description: Format of the source document such as standard markdown or markdown_card }
- { name: meta_data, type: string, description: JSON string of metadata fields not mapped to schema columns }
- name: items
description: Flattened AST items representing headings, paragraphs, and asset-backed nodes in reading order.
columns:
- { name: id, type: int, primary: true, autoincrement: true, description: Autoincrement row id for the versioned item }
- { name: version_id, type: string, description: Encoded seconds-since-2000 identifier for the collection run }
- { name: doc_sid, type: string, description: SID of the parent document }
- { name: slug, type: string, description: Page-unique slug usable as intra-page anchor }
- { name: asset_uid, type: string, description: UID of a single referenced asset when applicable }
- { name: level, type: int, description: Approximate nesting or heading depth derived from the AST }
- { name: order_index, type: int, description: Order of appearance within the document }
- { name: type, type: string, description: Item type such as heading, paragraph, table, code, or image }
- { name: body_text, type: string, description: Textual body for the item; asset-linked items reference their asset via asset_uid }
- { name: ast, type: string, description: Serialized AST subtree for complex items, otherwise null }
- name: assets
description: Version-specific rows that connect each document asset to the collection run for downstream joins.
columns:
- { name: id, type: int, primary: true, autoincrement: true, description: Autoincrement row id for the versioned asset }
- { name: version_id, type: string, description: Encoded seconds-since-2000 identifier for the collection run }
- { name: doc_sid, type: string, description: SID of the document that owns the item }
- { name: asset_uid, type: string, description: UID of the referenced asset as stored in the asset_info table }
- { name: blob_uid, type: string, description: UID of the blob row referenced by this asset }
- { name: type, type: string, description: Asset type recorded for this version (mirrors asset_info.type) }
- name: asset_info
description: Asset catalog linking documents to their concrete payloads via blob hashes derived from code blocks, tables, image references, and loose files.
columns:
- { name: id, type: int, primary: true, autoincrement: true, description: Autoincrement row id for the asset catalog }
- { name: uid, type: string, description: Stable identifier composed from the document uid and asset slug }
- { name: type, type: string, values: [file, codeblock, table, model, image, linked_file, gallery_asset, found], description: Asset type such as codeblock, table, file, or model }
- { name: blob_uid, type: string, description: UID joining to the blob_store table entry }
- { name: parent_doc_uid, type: string, description: UID of the document that introduced the asset }
- { name: path, type: string, description: Source-relative path for file-backed assets }
- { name: ext, type: string, description: File extension for referenced files when available }
- { name: params, type: string, description: Raw params associated with the asset (e.g., code block meta) }
- { name: first_seen, type: string, description: ISO timestamp when the asset was first observed }
- { name: last_seen, type: string, description: ISO timestamp when the asset was most recently observed }
- name: blob_store
description: Unique blob payloads materialized on disk and shared across assets via their SHA-512 hash.
columns:
- { name: blob_uid, type: string, primary: true, description: Monotonic hex identifier for the blob row }
- { name: hash, type: string, description: SHA-512 hash identifying the blob }
- { name: path, type: string, description: Directory path within the blob store hierarchy such as YYYY/MM/ff }
- { name: first_seen, type: string, description: ISO timestamp when the blob was first observed }
- { name: last_seen, type: string, description: ISO timestamp when the blob was most recently observed }
- { name: size, type: int, description: Size of the stored blob in bytes }
- { name: compression, type: boolean, description: True when payload is gzip-compressed, false otherwise, null for external blobs }
- { name: payload, type: blob, description: Inline blob payload when stored directly in the table }
- name: images
description: Image metadata derived from on-disk files for image and gallery assets.
columns:
- { name: id, type: int, primary: true, autoincrement: true, description: Autoincrement row id for the image metadata }
- { name: uid, type: string, description: UID of the backing asset (image or gallery_asset) }
- { name: blob_uid, type: string, description: UID of the blob backing this image when available }
- { name: type, type: string, description: Asset type used when the image was extracted }
- { name: name, type: string, description: Filename without extension derived from the asset path }
- { name: extension, type: string, description: File extension for the image }
- { name: width, type: int, description: Pixel width after orientation correction }
- { name: height, type: int, description: Pixel height after orientation correction }
- { name: ratio, type: string, description: Aspect ratio width/height as a decimal string }
- name: versions
description: One row per collection run to track version metadata.
columns:
- { name: version_id, type: string, primary: true, description: Encoded seconds-since-2000 identifier for the collection run }
- { name: created_at, type: string, description: ISO timestamp of when the version was produced }
- { name: type, type: string, values: [daily, weekly, monthly, early, baseline], description: Run cadence classification }
- { name: tags, type: string_list, description: Free-form tags associated with the version }