diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index f350112..eb7aecf 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -18,7 +18,7 @@ jobs: - run: bun install - name: Extract metadata - run: bun run bin/cli.ts extract-table-metadata examples/v1/metadata.json /tmp/databases + run: bun run bin/cli.ts extract-table-metadata examples/v1/table_metadata.json /tmp/databases - name: Diff examples run: diff -r examples/v1/databases /tmp/databases diff --git a/README.md b/README.md index 50333f7..0b2d270 100644 --- a/README.md +++ b/README.md @@ -2,13 +2,13 @@ Metabase represents database metadata — synced databases, their tables, and their fields — as a tree of YAML files. Files are diff-friendly: numeric IDs are omitted entirely, and foreign keys use natural-key tuples like `["Sample Database", "PUBLIC", "ORDERS"]` instead of database identifiers. -This repository contains the specification, examples, and a CLI that converts the `metadata.json` downloaded from a Metabase instance into YAML. +This repository contains the specification, examples, and a CLI that converts the `table_metadata.json` downloaded from a Metabase instance into YAML. ## Specification The format is defined in **[core-spec/v1/spec.md](core-spec/v1/spec.md)** (v1.0.4). It covers entity keys, field types, folder structure, and the shape of each entity. -Reference output for the Sample Database lives in **[examples/v1/](examples/v1/)** — both the raw `metadata.json` and the extracted YAML tree. +Reference output for the Sample Database lives in **[examples/v1/](examples/v1/)** — both the raw `table_metadata.json` and the extracted YAML tree. ### Entities @@ -20,7 +20,19 @@ Reference output for the Sample Database lives in **[examples/v1/](examples/v1/) ## Obtaining metadata -Metadata is fetched from Metabase's `GET /api/ee/serialization/metadata/export` endpoint as a `metadata.json` file — a flat JSON document with three arrays (`databases`, `tables`, and `fields`) streamed so even warehouses with very large schemas can be exported without exhausting server memory. +Metadata is fetched from Metabase's `GET /api/ee/serialization/metadata/export` endpoint as a `table_metadata.json` file — a flat JSON document with three arrays (`databases`, `tables`, and `fields`) streamed so even warehouses with very large schemas can be exported without exhausting server memory. + +The endpoint accepts three boolean query parameters that opt sections in or out — they all default to `false`, so requests must explicitly set the sections they want: + +- `with-databases` — include the `databases` array. +- `with-tables` — include the `tables` array. +- `with-fields` — include the `fields` array. + +A typical full export sets all three to `true`: + +``` +GET /api/ee/serialization/metadata/export?with-databases=true&with-tables=true&with-fields=true +``` ### Extracting metadata to YAML @@ -30,7 +42,7 @@ The CLI turns that JSON into the human- and agent-friendly YAML tree described i bunx @metabase/database-metadata extract-table-metadata ``` -- `` — path to the `metadata.json` downloaded from Metabase. +- `` — path to the `table_metadata.json` downloaded from Metabase. - `` — destination directory. Database folders are created directly under it. ### Extracting the spec @@ -49,11 +61,11 @@ The following is the **default** workflow for a project that wants to use Metaba ### 1. A `.metadata/` directory at the repo root -Create a top-level `.metadata/` directory and **add it to `.gitignore`**. This is where the raw `metadata.json` and the extracted `databases/` YAML tree live: +Create a top-level `.metadata/` directory and **add it to `.gitignore`**. This is where the raw `table_metadata.json` and the extracted `databases/` YAML tree live: ``` .metadata/ -├── metadata.json +├── table_metadata.json └── databases/ └── … ``` @@ -70,17 +82,17 @@ Each developer (or a CI job) fetches metadata on demand from their own Metabase ### 3. Download from Metabase and extract -Each developer downloads `metadata.json` from their Metabase instance and drops it into `.metadata/`. Then run the extractor: +Each developer downloads `table_metadata.json` from their Metabase instance and drops it into `.metadata/`. Then run the extractor: ```sh mkdir -p .metadata -# Drop metadata.json from Metabase into .metadata/ +# Drop table_metadata.json from Metabase into .metadata/ rm -rf .metadata/databases -bunx @metabase/database-metadata extract-table-metadata .metadata/metadata.json .metadata/databases +bunx @metabase/database-metadata extract-table-metadata .metadata/table_metadata.json .metadata/databases ``` -After this, tools and agents should read the YAML tree under `.metadata/databases/` — not `metadata.json`, which exists only as input to the extractor. +After this, tools and agents should read the YAML tree under `.metadata/databases/` — not `table_metadata.json`, which exists only as input to the extractor. ## Publishing to NPM @@ -94,7 +106,7 @@ The workflow requires an `NPM_RELEASE_TOKEN` secret with publish access to the ` ```sh bun install -bun bin/cli.ts extract-table-metadata examples/v1/metadata.json /tmp/.metadata/databases +bun bin/cli.ts extract-table-metadata examples/v1/table_metadata.json /tmp/.metadata/databases ``` ### Scripts diff --git a/bin/cli.test.ts b/bin/cli.test.ts index 5ecbf49..660f63c 100644 --- a/bin/cli.test.ts +++ b/bin/cli.test.ts @@ -5,7 +5,7 @@ import { join, resolve } from "path"; const REPO_ROOT = resolve(import.meta.dirname, ".."); const CLI = "bin/cli.ts"; -const EXAMPLE_INPUT = "examples/v1/metadata.json"; +const EXAMPLE_INPUT = "examples/v1/table_metadata.json"; type RunResult = { stdout: string; diff --git a/bin/cli.ts b/bin/cli.ts index 533317b..966eb52 100644 --- a/bin/cli.ts +++ b/bin/cli.ts @@ -33,7 +33,7 @@ function parseArguments() { }); } -async function handleExtractMetadata(positionals: string[]): Promise { +function handleExtractMetadata(positionals: string[]): void { const inputFile = positionals[1]; const outputFolder = positionals[2]; @@ -44,7 +44,7 @@ async function handleExtractMetadata(positionals: string[]): Promise { process.exit(1); } - const stats = await extractTableMetadata({ inputFile, outputFolder }); + const stats = extractTableMetadata({ inputFile, outputFolder }); console.log( `Extracted ${stats.databases} databases, ${stats.tables} tables, ${stats.fields} fields`, ); @@ -57,7 +57,7 @@ function handleExtractSpec(values: ParsedValues): void { process.exit(0); } -async function main(): Promise { +function main(): void { const { values, positionals } = parseArguments(); const command = positionals[0]; @@ -77,4 +77,4 @@ async function main(): Promise { } } -await main(); +main(); diff --git a/bun.lock b/bun.lock index 5b9342d..de5ae96 100644 --- a/bun.lock +++ b/bun.lock @@ -5,7 +5,6 @@ "": { "name": "@metabase/database-metadata", "dependencies": { - "@streamparser/json-node": "^0.0.22", "js-yaml": "^4.1.0", }, "devDependencies": { @@ -85,10 +84,6 @@ "@oxfmt/binding-win32-x64-msvc": ["@oxfmt/binding-win32-x64-msvc@0.45.0", "", { "os": "win32", "cpu": "x64" }, "sha512-w5MMTRCK1dpQeRA+HHqXQXyN33DlG/N2LOYxJmaT4fJjcmZrbNnqw7SmIk7I2/a2493PPLZ+2E/Ar6t2iKVMug=="], - "@streamparser/json": ["@streamparser/json@0.0.22", "", {}, "sha512-b6gTSBjJ8G8SuO3Gbbj+zXbVx8NSs1EbpbMKpzGLWMdkR+98McH9bEjSz3+0mPJf68c5nxa3CrJHp5EQNXM6zQ=="], - - "@streamparser/json-node": ["@streamparser/json-node@0.0.22", "", { "dependencies": { "@streamparser/json": "^0.0.22" } }, "sha512-sJT2ptNRwqB1lIsQrQlCoWk5rF4tif9wDh+7yluAGijJamAhrHGYpFB/Zg3hJeceoZypi74ftXk8DHzwYpbZSg=="], - "@types/bun": ["@types/bun@1.3.12", "", { "dependencies": { "bun-types": "1.3.12" } }, "sha512-DBv81elK+/VSwXHDlnH3Qduw+KxkTIWi7TXkAeh24zpi5l0B2kUg9Ga3tb4nJaPcOFswflgi/yAvMVBPrxMB+A=="], "@types/esrecurse": ["@types/esrecurse@4.3.1", "", {}, "sha512-xJBAbDifo5hpffDBuHl0Y8ywswbiAp/Wi7Y/GtAgSlZyIABppyurxVueOPE8LUQOxdlgi6Zqce7uoEpqNTeiUw=="], diff --git a/core-spec/v1/spec.md b/core-spec/v1/spec.md index ec8ef27..329f10d 100644 --- a/core-spec/v1/spec.md +++ b/core-spec/v1/spec.md @@ -8,7 +8,7 @@ Metabase database metadata is a read-only snapshot of databases, tables, and fie The format is designed to be **portable** and **reviewable**: numeric IDs are omitted or replaced with human-readable natural keys (database name, `[database, schema, table]` tuples, etc.). Files can be diffed, grepped, and edited by hand. -The raw `metadata.json` is a single flat JSON document with `databases`, `tables`, and `fields` arrays, optimized for transport rather than reading. It can be arbitrarily large — tens or hundreds of megabytes on warehouses with many tables — and is not intended for direct consumption. Tools and humans should read the extracted YAML tree under `databases/` instead, where each entity lives in its own small file. +The raw `table_metadata.json` is a single flat JSON document with `databases`, `tables`, and `fields` arrays, optimized for transport rather than reading. It can be arbitrarily large — tens or hundreds of megabytes on warehouses with many tables — and is not intended for direct consumption. Tools and humans should read the extracted YAML tree under `databases/` instead, where each entity lives in its own small file. ## Table of Contents @@ -252,4 +252,3 @@ parent_id: - DATA - user ``` - diff --git a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ACCOUNTS.yaml b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ACCOUNTS.yaml index 87d9723..37ce31d 100644 --- a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ACCOUNTS.yaml +++ b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ACCOUNTS.yaml @@ -1,7 +1,7 @@ -db_id: Sample Database name: ACCOUNTS schema: PUBLIC description: Information on customer accounts registered with Piespace. Each account represents a new organization signing up for on-demand pies. +db_id: Sample Database fields: - name: LONGITUDE base_type: type/Float diff --git a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ANALYTIC_EVENTS.yaml b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ANALYTIC_EVENTS.yaml index f6d9cf4..2f942ab 100644 --- a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ANALYTIC_EVENTS.yaml +++ b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ANALYTIC_EVENTS.yaml @@ -1,7 +1,7 @@ -db_id: Sample Database name: ANALYTIC_EVENTS schema: PUBLIC description: Piespace does some anonymous analytics tracking on how users interact with their platform. They’ve only had time to implement a few events, but you know how it is. Pies come first. +db_id: Sample Database fields: - name: BUTTON_LABEL base_type: type/Text diff --git a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/FEEDBACK.yaml b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/FEEDBACK.yaml index d3724c8..42bb44b 100644 --- a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/FEEDBACK.yaml +++ b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/FEEDBACK.yaml @@ -1,7 +1,7 @@ -db_id: Sample Database name: FEEDBACK schema: PUBLIC description: With each order of pies sent out, Piespace includes a place for customers to submit feedback and review their order. +db_id: Sample Database fields: - name: ID base_type: type/BigInteger diff --git a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/INVOICES.yaml b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/INVOICES.yaml index 09d8009..11fc50e 100644 --- a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/INVOICES.yaml +++ b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/INVOICES.yaml @@ -1,7 +1,7 @@ -db_id: Sample Database name: INVOICES schema: PUBLIC description: Confirmed payments from Piespace’s customers. Most accounts pay for their pie subscription on a monthly basis. +db_id: Sample Database fields: - name: PLAN base_type: type/Text diff --git a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ORDERS.yaml b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ORDERS.yaml index 38d94cf..b0c0b40 100644 --- a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ORDERS.yaml +++ b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/ORDERS.yaml @@ -1,7 +1,7 @@ -db_id: Sample Database name: ORDERS schema: PUBLIC description: Confirmed Sample Company orders for a product, from a user. +db_id: Sample Database fields: - name: QUANTITY description: Number of products bought. diff --git a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/PEOPLE.yaml b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/PEOPLE.yaml index f27375d..4f4ac7b 100644 --- a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/PEOPLE.yaml +++ b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/PEOPLE.yaml @@ -1,7 +1,7 @@ -db_id: Sample Database name: PEOPLE schema: PUBLIC description: Information on the user accounts registered with Sample Company. +db_id: Sample Database fields: - name: STATE description: The state or province of the account’s billing address diff --git a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/PRODUCTS.yaml b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/PRODUCTS.yaml index 0b98b74..e417fa5 100644 --- a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/PRODUCTS.yaml +++ b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/PRODUCTS.yaml @@ -1,7 +1,7 @@ -db_id: Sample Database name: PRODUCTS schema: PUBLIC description: Includes a catalog of all the products ever sold by the famed Sample Company. +db_id: Sample Database fields: - name: ID description: The numerical product number. Only used internally. All external communication should use the title or EAN. diff --git a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/REVIEWS.yaml b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/REVIEWS.yaml index 6cd318f..8c77fea 100644 --- a/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/REVIEWS.yaml +++ b/examples/v1/databases/Sample Database/schemas/PUBLIC/tables/REVIEWS.yaml @@ -1,7 +1,7 @@ -db_id: Sample Database name: REVIEWS schema: PUBLIC description: Reviews that Sample Company customers have left on our products. +db_id: Sample Database fields: - name: RATING description: The rating (on a scale of 1-5) the user left. diff --git a/examples/v1/metadata.json b/examples/v1/metadata.json deleted file mode 100644 index 91ffc23..0000000 --- a/examples/v1/metadata.json +++ /dev/null @@ -1,1360 +0,0 @@ -{ - "databases": [ - { - "id": "Sample Database", - "name": "Sample Database", - "engine": "postgres" - } - ], - "tables": [ - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "db_id": "Sample Database", - "name": "PEOPLE", - "schema": "PUBLIC", - "description": "Information on the user accounts registered with Sample Company." - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ORDERS" - ], - "db_id": "Sample Database", - "name": "ORDERS", - "schema": "PUBLIC", - "description": "Confirmed Sample Company orders for a product, from a user." - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS" - ], - "db_id": "Sample Database", - "name": "PRODUCTS", - "schema": "PUBLIC", - "description": "Includes a catalog of all the products ever sold by the famed Sample Company." - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "REVIEWS" - ], - "db_id": "Sample Database", - "name": "REVIEWS", - "schema": "PUBLIC", - "description": "Reviews that Sample Company customers have left on our products." - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK" - ], - "db_id": "Sample Database", - "name": "FEEDBACK", - "schema": "PUBLIC", - "description": "With each order of pies sent out, Piespace includes a place for customers to submit feedback and review their order." - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "db_id": "Sample Database", - "name": "ACCOUNTS", - "schema": "PUBLIC", - "description": "Information on customer accounts registered with Piespace. Each account represents a new organization signing up for on-demand pies." - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS" - ], - "db_id": "Sample Database", - "name": "ANALYTIC_EVENTS", - "schema": "PUBLIC", - "description": "Piespace does some anonymous analytics tracking on how users interact with their platform. They\u2019ve only had time to implement a few events, but you know how it is. Pies come first." - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "INVOICES" - ], - "db_id": "Sample Database", - "name": "INVOICES", - "schema": "PUBLIC", - "description": "Confirmed payments from Piespace\u2019s customers. Most accounts pay for their pie subscription on a monthly basis." - } - ], - "fields": [ - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "STATE" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "STATE", - "description": "The state or province of the account\u2019s billing address", - "base_type": "type/Text", - "database_type": "CHARACTER", - "semantic_type": "type/State" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "ID", - "description": "A unique identifier given to each user.", - "base_type": "type/BigInteger", - "database_type": "BIGINT", - "semantic_type": "type/PK" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "EMAIL" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "EMAIL", - "description": "The contact email for the account.", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Email" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "BIRTH_DATE" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "BIRTH_DATE", - "description": "The date of birth of the user", - "base_type": "type/Date", - "database_type": "DATE" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "SOURCE" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "SOURCE", - "description": "The channel through which we acquired this user. Valid values include: Affiliate, Facebook, Google, Organic and Twitter", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Source" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "NAME" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "NAME", - "description": "The name of the user who owns an account", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Name" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "CREATED_AT" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "CREATED_AT", - "description": "The date the user record was created. Also referred to as the user\u2019s \"join date\"", - "base_type": "type/DateTime", - "database_type": "TIMESTAMP", - "semantic_type": "type/CreationTimestamp" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "ADDRESS" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "ADDRESS", - "description": "The street address of the account\u2019s billing address", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "LATITUDE" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "LATITUDE", - "description": "This is the latitude of the user on sign-up. It might be updated in the future to the last seen location.", - "base_type": "type/Float", - "database_type": "DOUBLE PRECISION", - "semantic_type": "type/Latitude" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "CITY" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "CITY", - "description": "The city of the account\u2019s billing address", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/City" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "PASSWORD" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "PASSWORD", - "description": "This is the salted password of the user. It should not be visible", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "LONGITUDE" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "LONGITUDE", - "description": "This is the longitude of the user on sign-up. It might be updated in the future to the last seen location.", - "base_type": "type/Float", - "database_type": "DOUBLE PRECISION", - "semantic_type": "type/Longitude" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "ZIP" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE" - ], - "name": "ZIP", - "description": "The postal code of the account\u2019s billing address", - "base_type": "type/Text", - "database_type": "CHARACTER", - "semantic_type": "type/ZipCode" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ORDERS", - "QUANTITY" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ORDERS" - ], - "name": "QUANTITY", - "description": "Number of products bought.", - "base_type": "type/Integer", - "database_type": "INTEGER", - "semantic_type": "type/Quantity" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ORDERS", - "DISCOUNT" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ORDERS" - ], - "name": "DISCOUNT", - "description": "Discount amount.", - "base_type": "type/Float", - "database_type": "DOUBLE PRECISION", - "semantic_type": "type/Discount" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ORDERS", - "TOTAL" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ORDERS" - ], - "name": "TOTAL", - "description": "The total billed amount.", - "base_type": "type/Float", - "database_type": "DOUBLE PRECISION" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ORDERS", - "TAX" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ORDERS" - ], - "name": "TAX", - "description": "This is the amount of local and federal taxes that are collected on the purchase. Note that other governmental fees on some products are not included here, but instead are accounted for in the subtotal.", - "base_type": "type/Float", - "database_type": "DOUBLE PRECISION" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ORDERS", - "ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ORDERS" - ], - "name": "ID", - "description": "This is a unique ID for the product. It is also called the \u201cInvoice number\u201d or \u201cConfirmation number\u201d in customer facing emails and screens.", - "base_type": "type/BigInteger", - "database_type": "BIGINT", - "semantic_type": "type/PK" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ORDERS", - "SUBTOTAL" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ORDERS" - ], - "name": "SUBTOTAL", - "description": "The raw, pre-tax cost of the order. Note that this might be different in the future from the product price due to promotions, credits, etc.", - "base_type": "type/Float", - "database_type": "DOUBLE PRECISION" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ORDERS", - "USER_ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ORDERS" - ], - "name": "USER_ID", - "description": "The id of the user who made this order. Note that in some cases where an order was created on behalf of a customer who phoned the order in, this might be the employee who handled the request.", - "base_type": "type/Integer", - "database_type": "INTEGER", - "semantic_type": "type/FK", - "fk_target_field_id": [ - "Sample Database", - "PUBLIC", - "PEOPLE", - "ID" - ] - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ORDERS", - "CREATED_AT" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ORDERS" - ], - "name": "CREATED_AT", - "description": "The date and time an order was submitted.", - "base_type": "type/DateTime", - "database_type": "TIMESTAMP", - "semantic_type": "type/CreationTimestamp" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ORDERS", - "PRODUCT_ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ORDERS" - ], - "name": "PRODUCT_ID", - "description": "The product ID. This is an internal identifier for the product, NOT the SKU.", - "base_type": "type/Integer", - "database_type": "INTEGER", - "semantic_type": "type/FK", - "fk_target_field_id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS", - "ID" - ] - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS", - "ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS" - ], - "name": "ID", - "description": "The numerical product number. Only used internally. All external communication should use the title or EAN.", - "base_type": "type/BigInteger", - "database_type": "BIGINT", - "semantic_type": "type/PK" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS", - "EAN" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS" - ], - "name": "EAN", - "description": "The international article number. A 13 digit number uniquely identifying the product.", - "base_type": "type/Text", - "database_type": "CHARACTER" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS", - "RATING" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS" - ], - "name": "RATING", - "description": "The average rating users have given the product. This ranges from 1 - 5", - "base_type": "type/Float", - "database_type": "DOUBLE PRECISION", - "semantic_type": "type/Score" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS", - "TITLE" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS" - ], - "name": "TITLE", - "description": "The name of the product as it should be displayed to customers.", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Title" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS", - "CATEGORY" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS" - ], - "name": "CATEGORY", - "description": "The type of product, valid values include: Doohicky, Gadget, Gizmo and Widget", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Category" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS", - "VENDOR" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS" - ], - "name": "VENDOR", - "description": "The source of the product.", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Company" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS", - "PRICE" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS" - ], - "name": "PRICE", - "description": "The list price of the product. Note that this is not always the price the product sold for due to discounts, promotions, etc.", - "base_type": "type/Float", - "database_type": "DOUBLE PRECISION" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS", - "CREATED_AT" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS" - ], - "name": "CREATED_AT", - "description": "The date the product was added to our catalog.", - "base_type": "type/DateTime", - "database_type": "TIMESTAMP", - "semantic_type": "type/CreationTimestamp" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "REVIEWS", - "RATING" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "REVIEWS" - ], - "name": "RATING", - "description": "The rating (on a scale of 1-5) the user left.", - "base_type": "type/Integer", - "database_type": "SMALLINT", - "semantic_type": "type/Score" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "REVIEWS", - "CREATED_AT" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "REVIEWS" - ], - "name": "CREATED_AT", - "description": "The day and time a review was written by a user.", - "base_type": "type/DateTime", - "database_type": "TIMESTAMP", - "semantic_type": "type/CreationTimestamp" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "REVIEWS", - "ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "REVIEWS" - ], - "name": "ID", - "description": "A unique internal identifier for the review. Should not be used externally.", - "base_type": "type/BigInteger", - "database_type": "BIGINT", - "semantic_type": "type/PK" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "REVIEWS", - "PRODUCT_ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "REVIEWS" - ], - "name": "PRODUCT_ID", - "description": "The product the review was for", - "base_type": "type/Integer", - "database_type": "INTEGER", - "semantic_type": "type/FK", - "fk_target_field_id": [ - "Sample Database", - "PUBLIC", - "PRODUCTS", - "ID" - ] - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "REVIEWS", - "REVIEWER" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "REVIEWS" - ], - "name": "REVIEWER", - "description": "The user who left the review", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "REVIEWS", - "BODY" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "REVIEWS" - ], - "name": "BODY", - "description": "The review the user left. Limited to 2000 characters.", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Description" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK", - "ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK" - ], - "name": "ID", - "base_type": "type/BigInteger", - "database_type": "BIGINT", - "semantic_type": "type/PK" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK", - "RATING" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK" - ], - "name": "RATING", - "base_type": "type/Integer", - "database_type": "SMALLINT", - "semantic_type": "type/Score" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK", - "RATING_MAPPED" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK" - ], - "name": "RATING_MAPPED", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Category" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK", - "ACCOUNT_ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK" - ], - "name": "ACCOUNT_ID", - "base_type": "type/BigInteger", - "database_type": "BIGINT", - "semantic_type": "type/FK", - "fk_target_field_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "ID" - ] - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK", - "EMAIL" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK" - ], - "name": "EMAIL", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Email" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK", - "DATE_RECEIVED" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK" - ], - "name": "DATE_RECEIVED", - "base_type": "type/DateTime", - "database_type": "TIMESTAMP" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK", - "BODY" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "FEEDBACK" - ], - "name": "BODY", - "base_type": "type/Text", - "database_type": "CHARACTER LARGE OBJECT" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "LONGITUDE" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "LONGITUDE", - "base_type": "type/Float", - "database_type": "DOUBLE PRECISION", - "semantic_type": "type/Longitude" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "ID", - "base_type": "type/BigInteger", - "database_type": "BIGINT", - "semantic_type": "type/PK" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "CREATED_AT" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "CREATED_AT", - "base_type": "type/DateTime", - "database_type": "TIMESTAMP", - "semantic_type": "type/CreationTimestamp" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "SEATS" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "SEATS", - "base_type": "type/Integer", - "database_type": "INTEGER" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "LAST_NAME" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "LAST_NAME", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Name" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "FIRST_NAME" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "FIRST_NAME", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Name" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "TRIAL_CONVERTED" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "TRIAL_CONVERTED", - "base_type": "type/Boolean", - "database_type": "BOOLEAN" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "CANCELED_AT" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "CANCELED_AT", - "base_type": "type/DateTime", - "database_type": "TIMESTAMP", - "semantic_type": "type/CancelationTimestamp" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "TRIAL_ENDS_AT" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "TRIAL_ENDS_AT", - "base_type": "type/DateTime", - "database_type": "TIMESTAMP" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "SOURCE" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "SOURCE", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Source" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "ACTIVE_SUBSCRIPTION" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "ACTIVE_SUBSCRIPTION", - "base_type": "type/Boolean", - "database_type": "BOOLEAN" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "PLAN" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "PLAN", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Category" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "LEGACY_PLAN" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "LEGACY_PLAN", - "base_type": "type/Boolean", - "database_type": "BOOLEAN" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "COUNTRY" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "COUNTRY", - "base_type": "type/Text", - "database_type": "CHARACTER", - "semantic_type": "type/Country" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "LATITUDE" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "LATITUDE", - "base_type": "type/Float", - "database_type": "DOUBLE PRECISION", - "semantic_type": "type/Latitude" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "EMAIL" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS" - ], - "name": "EMAIL", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Email" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS", - "BUTTON_LABEL" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS" - ], - "name": "BUTTON_LABEL", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Category" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS", - "EVENT" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS" - ], - "name": "EVENT", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Category" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS", - "TIMESTAMP" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS" - ], - "name": "TIMESTAMP", - "base_type": "type/DateTime", - "database_type": "TIMESTAMP" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS", - "ACCOUNT_ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS" - ], - "name": "ACCOUNT_ID", - "base_type": "type/BigInteger", - "database_type": "BIGINT", - "semantic_type": "type/FK", - "fk_target_field_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "ID" - ] - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS", - "PAGE_URL" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS" - ], - "name": "PAGE_URL", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/URL" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS", - "ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "ANALYTIC_EVENTS" - ], - "name": "ID", - "base_type": "type/BigInteger", - "database_type": "BIGINT", - "semantic_type": "type/PK" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "INVOICES", - "PLAN" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "INVOICES" - ], - "name": "PLAN", - "base_type": "type/Text", - "database_type": "CHARACTER VARYING", - "semantic_type": "type/Category" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "INVOICES", - "PAYMENT" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "INVOICES" - ], - "name": "PAYMENT", - "base_type": "type/Float", - "database_type": "DOUBLE PRECISION" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "INVOICES", - "ACCOUNT_ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "INVOICES" - ], - "name": "ACCOUNT_ID", - "base_type": "type/BigInteger", - "database_type": "BIGINT", - "semantic_type": "type/FK", - "fk_target_field_id": [ - "Sample Database", - "PUBLIC", - "ACCOUNTS", - "ID" - ] - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "INVOICES", - "ID" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "INVOICES" - ], - "name": "ID", - "base_type": "type/BigInteger", - "database_type": "BIGINT", - "semantic_type": "type/PK" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "INVOICES", - "EXPECTED_INVOICE" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "INVOICES" - ], - "name": "EXPECTED_INVOICE", - "base_type": "type/Boolean", - "database_type": "BOOLEAN" - }, - { - "id": [ - "Sample Database", - "PUBLIC", - "INVOICES", - "DATE_RECEIVED" - ], - "table_id": [ - "Sample Database", - "PUBLIC", - "INVOICES" - ], - "name": "DATE_RECEIVED", - "base_type": "type/DateTime", - "database_type": "TIMESTAMP" - } - ] -} diff --git a/examples/v1/table_metadata.json b/examples/v1/table_metadata.json new file mode 100644 index 0000000..b0d47da --- /dev/null +++ b/examples/v1/table_metadata.json @@ -0,0 +1,659 @@ +{ + "databases": [ + { + "id": 1, + "name": "Sample Database", + "engine": "postgres" + } + ], + "tables": [ + { + "id": 1, + "db_id": 1, + "name": "PEOPLE", + "schema": "PUBLIC", + "description": "Information on the user accounts registered with Sample Company." + }, + { + "id": 2, + "db_id": 1, + "name": "ORDERS", + "schema": "PUBLIC", + "description": "Confirmed Sample Company orders for a product, from a user." + }, + { + "id": 3, + "db_id": 1, + "name": "PRODUCTS", + "schema": "PUBLIC", + "description": "Includes a catalog of all the products ever sold by the famed Sample Company." + }, + { + "id": 4, + "db_id": 1, + "name": "REVIEWS", + "schema": "PUBLIC", + "description": "Reviews that Sample Company customers have left on our products." + }, + { + "id": 5, + "db_id": 1, + "name": "FEEDBACK", + "schema": "PUBLIC", + "description": "With each order of pies sent out, Piespace includes a place for customers to submit feedback and review their order." + }, + { + "id": 6, + "db_id": 1, + "name": "ACCOUNTS", + "schema": "PUBLIC", + "description": "Information on customer accounts registered with Piespace. Each account represents a new organization signing up for on-demand pies." + }, + { + "id": 7, + "db_id": 1, + "name": "ANALYTIC_EVENTS", + "schema": "PUBLIC", + "description": "Piespace does some anonymous analytics tracking on how users interact with their platform. They’ve only had time to implement a few events, but you know how it is. Pies come first." + }, + { + "id": 8, + "db_id": 1, + "name": "INVOICES", + "schema": "PUBLIC", + "description": "Confirmed payments from Piespace’s customers. Most accounts pay for their pie subscription on a monthly basis." + } + ], + "fields": [ + { + "id": 1, + "table_id": 1, + "name": "STATE", + "description": "The state or province of the account’s billing address", + "base_type": "type/Text", + "database_type": "CHARACTER", + "semantic_type": "type/State" + }, + { + "id": 4, + "table_id": 1, + "name": "ID", + "description": "A unique identifier given to each user.", + "base_type": "type/BigInteger", + "database_type": "BIGINT", + "semantic_type": "type/PK" + }, + { + "id": 7, + "table_id": 1, + "name": "EMAIL", + "description": "The contact email for the account.", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Email" + }, + { + "id": 12, + "table_id": 1, + "name": "BIRTH_DATE", + "description": "The date of birth of the user", + "base_type": "type/Date", + "database_type": "DATE" + }, + { + "id": 30, + "table_id": 1, + "name": "SOURCE", + "description": "The channel through which we acquired this user. Valid values include: Affiliate, Facebook, Google, Organic and Twitter", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Source" + }, + { + "id": 48, + "table_id": 1, + "name": "NAME", + "description": "The name of the user who owns an account", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Name" + }, + { + "id": 50, + "table_id": 1, + "name": "CREATED_AT", + "description": "The date the user record was created. Also referred to as the user’s \"join date\"", + "base_type": "type/DateTime", + "database_type": "TIMESTAMP", + "semantic_type": "type/CreationTimestamp" + }, + { + "id": 51, + "table_id": 1, + "name": "ADDRESS", + "description": "The street address of the account’s billing address", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING" + }, + { + "id": 52, + "table_id": 1, + "name": "LATITUDE", + "description": "This is the latitude of the user on sign-up. It might be updated in the future to the last seen location.", + "base_type": "type/Float", + "database_type": "DOUBLE PRECISION", + "semantic_type": "type/Latitude" + }, + { + "id": 53, + "table_id": 1, + "name": "CITY", + "description": "The city of the account’s billing address", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/City" + }, + { + "id": 54, + "table_id": 1, + "name": "PASSWORD", + "description": "This is the salted password of the user. It should not be visible", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING" + }, + { + "id": 58, + "table_id": 1, + "name": "LONGITUDE", + "description": "This is the longitude of the user on sign-up. It might be updated in the future to the last seen location.", + "base_type": "type/Float", + "database_type": "DOUBLE PRECISION", + "semantic_type": "type/Longitude" + }, + { + "id": 61, + "table_id": 1, + "name": "ZIP", + "description": "The postal code of the account’s billing address", + "base_type": "type/Text", + "database_type": "CHARACTER", + "semantic_type": "type/ZipCode" + }, + { + "id": 2, + "table_id": 2, + "name": "QUANTITY", + "description": "Number of products bought.", + "base_type": "type/Integer", + "database_type": "INTEGER", + "semantic_type": "type/Quantity" + }, + { + "id": 3, + "table_id": 2, + "name": "DISCOUNT", + "description": "Discount amount.", + "base_type": "type/Float", + "database_type": "DOUBLE PRECISION", + "semantic_type": "type/Discount" + }, + { + "id": 5, + "table_id": 2, + "name": "TOTAL", + "description": "The total billed amount.", + "base_type": "type/Float", + "database_type": "DOUBLE PRECISION" + }, + { + "id": 6, + "table_id": 2, + "name": "TAX", + "description": "This is the amount of local and federal taxes that are collected on the purchase. Note that other governmental fees on some products are not included here, but instead are accounted for in the subtotal.", + "base_type": "type/Float", + "database_type": "DOUBLE PRECISION" + }, + { + "id": 9, + "table_id": 2, + "name": "ID", + "description": "This is a unique ID for the product. It is also called the “Invoice number” or “Confirmation number” in customer facing emails and screens.", + "base_type": "type/BigInteger", + "database_type": "BIGINT", + "semantic_type": "type/PK" + }, + { + "id": 10, + "table_id": 2, + "name": "SUBTOTAL", + "description": "The raw, pre-tax cost of the order. Note that this might be different in the future from the product price due to promotions, credits, etc.", + "base_type": "type/Float", + "database_type": "DOUBLE PRECISION" + }, + { + "id": 11, + "table_id": 2, + "name": "USER_ID", + "fk_target_field_id": 4, + "description": "The id of the user who made this order. Note that in some cases where an order was created on behalf of a customer who phoned the order in, this might be the employee who handled the request.", + "base_type": "type/Integer", + "database_type": "INTEGER", + "semantic_type": "type/FK" + }, + { + "id": 13, + "table_id": 2, + "name": "CREATED_AT", + "description": "The date and time an order was submitted.", + "base_type": "type/DateTime", + "database_type": "TIMESTAMP", + "semantic_type": "type/CreationTimestamp" + }, + { + "id": 14, + "table_id": 2, + "name": "PRODUCT_ID", + "fk_target_field_id": 8, + "description": "The product ID. This is an internal identifier for the product, NOT the SKU.", + "base_type": "type/Integer", + "database_type": "INTEGER", + "semantic_type": "type/FK" + }, + { + "id": 8, + "table_id": 3, + "name": "ID", + "description": "The numerical product number. Only used internally. All external communication should use the title or EAN.", + "base_type": "type/BigInteger", + "database_type": "BIGINT", + "semantic_type": "type/PK" + }, + { + "id": 15, + "table_id": 3, + "name": "EAN", + "description": "The international article number. A 13 digit number uniquely identifying the product.", + "base_type": "type/Text", + "database_type": "CHARACTER" + }, + { + "id": 16, + "table_id": 3, + "name": "RATING", + "description": "The average rating users have given the product. This ranges from 1 - 5", + "base_type": "type/Float", + "database_type": "DOUBLE PRECISION", + "semantic_type": "type/Score" + }, + { + "id": 17, + "table_id": 3, + "name": "TITLE", + "description": "The name of the product as it should be displayed to customers.", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Title" + }, + { + "id": 18, + "table_id": 3, + "name": "CATEGORY", + "description": "The type of product, valid values include: Doohicky, Gadget, Gizmo and Widget", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Category" + }, + { + "id": 34, + "table_id": 3, + "name": "VENDOR", + "description": "The source of the product.", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Company" + }, + { + "id": 44, + "table_id": 3, + "name": "PRICE", + "description": "The list price of the product. Note that this is not always the price the product sold for due to discounts, promotions, etc.", + "base_type": "type/Float", + "database_type": "DOUBLE PRECISION" + }, + { + "id": 63, + "table_id": 3, + "name": "CREATED_AT", + "description": "The date the product was added to our catalog.", + "base_type": "type/DateTime", + "database_type": "TIMESTAMP", + "semantic_type": "type/CreationTimestamp" + }, + { + "id": 19, + "table_id": 4, + "name": "RATING", + "description": "The rating (on a scale of 1-5) the user left.", + "base_type": "type/Integer", + "database_type": "SMALLINT", + "semantic_type": "type/Score" + }, + { + "id": 55, + "table_id": 4, + "name": "CREATED_AT", + "description": "The day and time a review was written by a user.", + "base_type": "type/DateTime", + "database_type": "TIMESTAMP", + "semantic_type": "type/CreationTimestamp" + }, + { + "id": 59, + "table_id": 4, + "name": "ID", + "description": "A unique internal identifier for the review. Should not be used externally.", + "base_type": "type/BigInteger", + "database_type": "BIGINT", + "semantic_type": "type/PK" + }, + { + "id": 65, + "table_id": 4, + "name": "PRODUCT_ID", + "fk_target_field_id": 8, + "description": "The product the review was for", + "base_type": "type/Integer", + "database_type": "INTEGER", + "semantic_type": "type/FK" + }, + { + "id": 67, + "table_id": 4, + "name": "REVIEWER", + "description": "The user who left the review", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING" + }, + { + "id": 69, + "table_id": 4, + "name": "BODY", + "description": "The review the user left. Limited to 2000 characters.", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Description" + }, + { + "id": 20, + "table_id": 5, + "name": "ID", + "base_type": "type/BigInteger", + "database_type": "BIGINT", + "semantic_type": "type/PK" + }, + { + "id": 22, + "table_id": 5, + "name": "RATING", + "base_type": "type/Integer", + "database_type": "SMALLINT", + "semantic_type": "type/Score" + }, + { + "id": 23, + "table_id": 5, + "name": "RATING_MAPPED", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Category" + }, + { + "id": 25, + "table_id": 5, + "name": "ACCOUNT_ID", + "fk_target_field_id": 24, + "base_type": "type/BigInteger", + "database_type": "BIGINT", + "semantic_type": "type/FK" + }, + { + "id": 26, + "table_id": 5, + "name": "EMAIL", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Email" + }, + { + "id": 27, + "table_id": 5, + "name": "DATE_RECEIVED", + "base_type": "type/DateTime", + "database_type": "TIMESTAMP" + }, + { + "id": 28, + "table_id": 5, + "name": "BODY", + "base_type": "type/Text", + "database_type": "CHARACTER LARGE OBJECT" + }, + { + "id": 21, + "table_id": 6, + "name": "LONGITUDE", + "base_type": "type/Float", + "database_type": "DOUBLE PRECISION", + "semantic_type": "type/Longitude" + }, + { + "id": 24, + "table_id": 6, + "name": "ID", + "base_type": "type/BigInteger", + "database_type": "BIGINT", + "semantic_type": "type/PK" + }, + { + "id": 31, + "table_id": 6, + "name": "CREATED_AT", + "base_type": "type/DateTime", + "database_type": "TIMESTAMP", + "semantic_type": "type/CreationTimestamp" + }, + { + "id": 33, + "table_id": 6, + "name": "SEATS", + "base_type": "type/Integer", + "database_type": "INTEGER" + }, + { + "id": 36, + "table_id": 6, + "name": "LAST_NAME", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Name" + }, + { + "id": 37, + "table_id": 6, + "name": "FIRST_NAME", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Name" + }, + { + "id": 38, + "table_id": 6, + "name": "TRIAL_CONVERTED", + "base_type": "type/Boolean", + "database_type": "BOOLEAN" + }, + { + "id": 39, + "table_id": 6, + "name": "CANCELED_AT", + "base_type": "type/DateTime", + "database_type": "TIMESTAMP", + "semantic_type": "type/CancelationTimestamp" + }, + { + "id": 40, + "table_id": 6, + "name": "TRIAL_ENDS_AT", + "base_type": "type/DateTime", + "database_type": "TIMESTAMP" + }, + { + "id": 41, + "table_id": 6, + "name": "SOURCE", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Source" + }, + { + "id": 42, + "table_id": 6, + "name": "ACTIVE_SUBSCRIPTION", + "base_type": "type/Boolean", + "database_type": "BOOLEAN" + }, + { + "id": 43, + "table_id": 6, + "name": "PLAN", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Category" + }, + { + "id": 47, + "table_id": 6, + "name": "LEGACY_PLAN", + "base_type": "type/Boolean", + "database_type": "BOOLEAN" + }, + { + "id": 56, + "table_id": 6, + "name": "COUNTRY", + "base_type": "type/Text", + "database_type": "CHARACTER", + "semantic_type": "type/Country" + }, + { + "id": 57, + "table_id": 6, + "name": "LATITUDE", + "base_type": "type/Float", + "database_type": "DOUBLE PRECISION", + "semantic_type": "type/Latitude" + }, + { + "id": 62, + "table_id": 6, + "name": "EMAIL", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Email" + }, + { + "id": 29, + "table_id": 7, + "name": "BUTTON_LABEL", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Category" + }, + { + "id": 32, + "table_id": 7, + "name": "EVENT", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Category" + }, + { + "id": 35, + "table_id": 7, + "name": "TIMESTAMP", + "base_type": "type/DateTime", + "database_type": "TIMESTAMP" + }, + { + "id": 45, + "table_id": 7, + "name": "ACCOUNT_ID", + "fk_target_field_id": 24, + "base_type": "type/BigInteger", + "database_type": "BIGINT", + "semantic_type": "type/FK" + }, + { + "id": 49, + "table_id": 7, + "name": "PAGE_URL", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/URL" + }, + { + "id": 60, + "table_id": 7, + "name": "ID", + "base_type": "type/BigInteger", + "database_type": "BIGINT", + "semantic_type": "type/PK" + }, + { + "id": 46, + "table_id": 8, + "name": "PLAN", + "base_type": "type/Text", + "database_type": "CHARACTER VARYING", + "semantic_type": "type/Category" + }, + { + "id": 64, + "table_id": 8, + "name": "PAYMENT", + "base_type": "type/Float", + "database_type": "DOUBLE PRECISION" + }, + { + "id": 66, + "table_id": 8, + "name": "ACCOUNT_ID", + "fk_target_field_id": 24, + "base_type": "type/BigInteger", + "database_type": "BIGINT", + "semantic_type": "type/FK" + }, + { + "id": 68, + "table_id": 8, + "name": "ID", + "base_type": "type/BigInteger", + "database_type": "BIGINT", + "semantic_type": "type/PK" + }, + { + "id": 70, + "table_id": 8, + "name": "EXPECTED_INVOICE", + "base_type": "type/Boolean", + "database_type": "BOOLEAN" + }, + { + "id": 71, + "table_id": 8, + "name": "DATE_RECEIVED", + "base_type": "type/DateTime", + "database_type": "TIMESTAMP" + } + ] +} diff --git a/package.json b/package.json index 9786f00..32f4977 100644 --- a/package.json +++ b/package.json @@ -39,7 +39,6 @@ "test": "bun test" }, "dependencies": { - "@streamparser/json-node": "^0.0.22", "js-yaml": "^4.1.0" }, "devDependencies": { diff --git a/src/extract-table-metadata.test.ts b/src/extract-table-metadata.test.ts index 38fb274..ad03ed3 100644 --- a/src/extract-table-metadata.test.ts +++ b/src/extract-table-metadata.test.ts @@ -13,7 +13,7 @@ import yaml from "js-yaml"; import { extractTableMetadata } from "./extract-table-metadata.js"; const REPO_ROOT = resolve(import.meta.dirname, ".."); -const EXAMPLE_INPUT = join(REPO_ROOT, "examples/v1/metadata.json"); +const EXAMPLE_INPUT = join(REPO_ROOT, "examples/v1/table_metadata.json"); describe("extractTableMetadata", () => { let workdir: string; @@ -26,8 +26,8 @@ describe("extractTableMetadata", () => { rmSync(workdir, { recursive: true, force: true }); }); - it("extracts the bundled sample database to YAML", async () => { - const stats = await extractTableMetadata({ + it("extracts the bundled sample database to YAML", () => { + const stats = extractTableMetadata({ inputFile: EXAMPLE_INPUT, outputFolder: workdir, }); @@ -48,11 +48,8 @@ describe("extractTableMetadata", () => { expect(existsSync(ordersPath)).toBe(true); }); - it("strips numeric ids and uses natural-key db_id on tables", async () => { - await extractTableMetadata({ - inputFile: EXAMPLE_INPUT, - outputFolder: workdir, - }); + it("strips numeric ids and uses natural-key db_id on tables", () => { + extractTableMetadata({ inputFile: EXAMPLE_INPUT, outputFolder: workdir }); const tablePath = join( workdir, "Sample Database", @@ -71,11 +68,8 @@ describe("extractTableMetadata", () => { expect(Array.isArray(table.fields)).toBe(true); }); - it("rewrites fk_target_field_id as a natural-key tuple", async () => { - await extractTableMetadata({ - inputFile: EXAMPLE_INPUT, - outputFolder: workdir, - }); + it("rewrites fk_target_field_id as a natural-key tuple", () => { + extractTableMetadata({ inputFile: EXAMPLE_INPUT, outputFolder: workdir }); const tablePath = join( workdir, "Sample Database", @@ -98,29 +92,26 @@ describe("extractTableMetadata", () => { ]); }); - it("escapes slashes in entity names", async () => { + it("escapes slashes in entity names", () => { const input = join(workdir, "input.json"); writeFileSync( input, JSON.stringify({ - databases: [{ id: "weird/name", name: "weird/name", engine: "h2" }], + databases: [{ id: 1, name: "weird/name" }], tables: [], fields: [], }), ); const out = join(workdir, "out"); - await extractTableMetadata({ inputFile: input, outputFolder: out }); + extractTableMetadata({ inputFile: input, outputFolder: out }); expect( existsSync(join(out, "weird__SLASH__name", "weird__SLASH__name.yaml")), ).toBe(true); }); - it("regenerates output that matches the bundled examples", async () => { - await extractTableMetadata({ - inputFile: EXAMPLE_INPUT, - outputFolder: workdir, - }); + it("regenerates output that matches the bundled examples", () => { + extractTableMetadata({ inputFile: EXAMPLE_INPUT, outputFolder: workdir }); const checkedIn = readFileSync( join( diff --git a/src/extract-table-metadata.ts b/src/extract-table-metadata.ts index 7ddfb68..3c90ac6 100644 --- a/src/extract-table-metadata.ts +++ b/src/extract-table-metadata.ts @@ -1,45 +1,77 @@ -import { - appendFileSync, - createReadStream, - existsSync, - mkdirSync, - writeFileSync, -} from "node:fs"; -import { dirname, join } from "node:path"; +import { mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { join } from "node:path"; import yaml from "js-yaml"; -import { JSONParser } from "@streamparser/json-node"; -type DatabaseId = string; -type TableId = [DatabaseId, string | null, string]; -type FieldId = [...TableId, string, ...string[]]; +type DatabaseID = number; +type TableID = number; +type FieldID = number; -type Database = { - id: DatabaseId; +type DatabaseKey = string; +type TableKey = [DatabaseKey, string | null, string]; +type FieldKey = [...TableKey, string, ...string[]]; + +type RawDatabase = { + id: DatabaseID; name: string; engine: string; }; -type Table = { - id: TableId; - db_id: DatabaseId; +type RawTable = { + id: TableID; + db_id: DatabaseID; name: string; schema: string | null; description?: string; }; +type RawField = { + id: FieldID; + table_id: TableID; + name: string; + description?: string; + base_type?: string; + database_type?: string; + semantic_type?: string; + parent_id?: FieldID | null; + fk_target_field_id?: FieldID | null; +}; + +type RawMetadata = { + databases: RawDatabase[]; + tables: RawTable[]; + fields: RawField[]; +}; + +type MetadataIndex = { + databases: RawDatabase[]; + databasesById: Map; + tablesByDbId: Map; + tablesById: Map; + fieldsByTableId: Map; + fieldsById: Map; +}; + +type Database = { + name: string; + engine: string; +}; + type Field = { - id: FieldId; - table_id: TableId; name: string; description?: string; base_type?: string; database_type?: string; - effective_type?: string; semantic_type?: string; - coercion_strategy?: string; - parent_id?: FieldId; - fk_target_field_id?: FieldId; - nfc_path?: string[]; + parent_id?: FieldKey; + fk_target_field_id?: FieldKey; +}; + +type Table = { + name: string; + schema: string | null; + description?: string; + db_id: DatabaseKey; + fields: Field[]; }; export type ExtractMetadataOptions = { @@ -53,234 +85,182 @@ export type ExtractMetadataResult = { fields: number; }; -type Order = "tables-first" | "fields-first"; - -type TouchState = { - lastTouched: string | null; -}; - -type FieldState = { - buffer: string; - bufferedPath: string | null; -}; - -const YAML_OPTS = { lineWidth: -1, noRefs: true } as const; - -// Per-table field buffer size before flushing -const FIELD_BUFFER_LIMIT = 1024 * 1024; - function escapeFilename(name: string): string { return name.replace(/\//g, "__SLASH__").replace(/\\/g, "__BACKSLASH__"); } -function getDatabasePath(outputFolder: string, dbName: string): string { - const safe = escapeFilename(dbName); - return join(outputFolder, safe, `${safe}.yaml`); +function getDatabaseFolder(outputFolder: string, db: RawDatabase): string { + return join(outputFolder, escapeFilename(db.name)); } -function getTablePath( +function getTablesFolder( outputFolder: string, - dbName: DatabaseId, - tableSchema: string | null, - tableName: string, + db: RawDatabase, + table: RawTable, ): string { - const dbFolder = join(outputFolder, escapeFilename(dbName)); - const tablesFolder = tableSchema - ? join(dbFolder, "schemas", escapeFilename(tableSchema), "tables") - : join(dbFolder, "tables"); - return join(tablesFolder, `${escapeFilename(tableName)}.yaml`); -} - -function indentLines(text: string, prefix: string): string { - return text.replace(/^(?=.)/gm, prefix); -} - -function formatDatabase(db: Database) { - const { id: _id, ...rest } = db; - return rest; + const dbFolder = getDatabaseFolder(outputFolder, db); + if (table.schema) { + return join(dbFolder, "schemas", escapeFilename(table.schema), "tables"); + } + return join(dbFolder, "tables"); } -function formatTable(table: Table) { - const { id: _id, ...rest } = table; - return rest; +function getDatabasePath(outputFolder: string, db: RawDatabase): string { + return join( + getDatabaseFolder(outputFolder, db), + `${escapeFilename(db.name)}.yaml`, + ); } -function formatField(field: Field) { - const { id: _id, table_id: _table_id, ...rest } = field; - return rest; +function getTablePath( + outputFolder: string, + db: RawDatabase, + table: RawTable, +): string { + return join( + getTablesFolder(outputFolder, db, table), + `${escapeFilename(table.name)}.yaml`, + ); } -function isDatabase(value: unknown): value is Database { - return typeof value === "object" && value !== null && "engine" in value; +function getDatabaseKey(db: RawDatabase): DatabaseKey { + return db.name; } -function isField(value: unknown): value is Field { - return typeof value === "object" && value !== null && "table_id" in value; +function getTableKey(db: RawDatabase, table: RawTable): TableKey { + return [getDatabaseKey(db), table.schema ?? null, table.name]; } -function isTable(value: unknown): value is Table { - return typeof value === "object" && value !== null && "db_id" in value; +function getFieldKey( + db: RawDatabase, + table: RawTable, + field: RawField, + fieldsById: Map, +): FieldKey | null { + if (!field.parent_id) { + return [...getTableKey(db, table), field.name]; + } + const parent = fieldsById.get(field.parent_id); + if (!parent) { + return null; + } + const parentKey = getFieldKey(db, table, parent, fieldsById); + return parentKey && [...parentKey, field.name]; } -// Subpass 1: write a database yaml file. -function writeDatabase( - outputFolder: string, - db: Database, - stats: ExtractMetadataResult, -): void { - mkdirSync(join(outputFolder, escapeFilename(db.name)), { recursive: true }); - writeFileSync( - getDatabasePath(outputFolder, db.name), - yaml.dump(formatDatabase(db), YAML_OPTS), - ); - stats.databases++; +function createFolder(folderPath: string): void { + mkdirSync(folderPath, { recursive: true }); } -// Subpass 2: touch each parent table file so the table phase can detect "has fields" -// via existsSync. Skips the syscall for runs of consecutive fields sharing a path. -function touchTableFile( - outputFolder: string, - field: Field, - state: TouchState, -): void { - const [dbName, tableSchema, tableName] = field.table_id; - const path = getTablePath(outputFolder, dbName, tableSchema, tableName); - if (path === state.lastTouched) { - return; - } - if (!existsSync(path)) { - mkdirSync(dirname(path), { recursive: true }); - writeFileSync(path, ""); - } - state.lastTouched = path; +function writeYaml(filePath: string, data: unknown): void { + writeFileSync(filePath, yaml.dump(data, { lineWidth: -1, noRefs: true })); } -// Subpass 3: write the table yaml; if the file already exists (touched by subpass 2), -// append a bare `fields:` trailer so subpass 4 can stream items underneath. -function writeTable( - outputFolder: string, - table: Table, - stats: ExtractMetadataResult, -): void { - const path = getTablePath( - outputFolder, - table.db_id, - table.schema, - table.name, - ); - const hasFields = existsSync(path); - if (!hasFields) { - mkdirSync(dirname(path), { recursive: true }); - } - let content = yaml.dump(formatTable(table), YAML_OPTS); - if (hasFields) { - content += "fields:\n"; +function groupBy(items: T[], keyFn: (item: T) => K): Map { + const result = new Map(); + for (const item of items) { + const key = keyFn(item); + const existing = result.get(key); + if (existing) { + existing.push(item); + } else { + result.set(key, [item]); + } } - writeFileSync(path, content); - stats.tables++; + return result; } -function flushFieldBuffer(state: FieldState): void { - if (state.bufferedPath !== null) { - appendFileSync(state.bufferedPath, state.buffer); - state.buffer = ""; - } +function buildIndex(metadata: RawMetadata): MetadataIndex { + return { + databases: metadata.databases, + databasesById: new Map(metadata.databases.map((d) => [d.id, d])), + tablesByDbId: groupBy(metadata.tables, (t) => t.db_id), + tablesById: new Map(metadata.tables.map((t) => [t.id, t])), + fieldsByTableId: groupBy(metadata.fields, (f) => f.table_id), + fieldsById: new Map(metadata.fields.map((f) => [f.id, f])), + }; } -// Subpass 4: append a field as a 2-space-indented YAML list item, buffering -// consecutive fields sharing a path so they coalesce into one appendFileSync per table. -// Wide tables flush mid-stream once the buffer exceeds FIELD_BUFFER_LIMIT bytes. -// The caller flushes the trailing buffer once the stream ends. -function writeField( - outputFolder: string, - field: Field, - state: FieldState, - stats: ExtractMetadataResult, -): void { - const [dbName, tableSchema, tableName] = field.table_id; - const path = getTablePath(outputFolder, dbName, tableSchema, tableName); - if (path !== state.bufferedPath) { - flushFieldBuffer(state); - state.bufferedPath = path; - } - state.buffer += indentLines(yaml.dump([formatField(field)], YAML_OPTS), " "); - if (state.buffer.length >= FIELD_BUFFER_LIMIT) { - flushFieldBuffer(state); - } - stats.fields++; +function formatDatabase(db: RawDatabase): Database { + const { id: _id, ...result } = db; + return result; } -function streamAll(inputFile: string, paths: string[]): JSONParser { - const parser = new JSONParser({ paths, keepStack: false }); - createReadStream(inputFile).pipe(parser); - return parser; +function formatTable(db: RawDatabase, table: RawTable): Omit { + const { id: _id, db_id: _db_id, ...rest } = table; + return { ...rest, db_id: getDatabaseKey(db) }; } -// Pass 1: stream the entire JSON. Always run subpass 1 (dbs) + subpass 2 (touch). -// Detect order from the first non-database hit; if fields appear before tables, also run -// subpass 3 (writeTable) here so pass 2 only has to write fields. -async function firstPass( - inputFile: string, - outputFolder: string, - stats: ExtractMetadataResult, -): Promise { - let order: Order | null = null; - const state: TouchState = { lastTouched: null }; - - for await (const { value } of streamAll(inputFile, [ - "$.databases.*", - "$.tables.*", - "$.fields.*", - ])) { - if (isDatabase(value)) { - writeDatabase(outputFolder, value, stats); - } else if (isField(value)) { - if (order === null) { - order = "fields-first"; - } - touchTableFile(outputFolder, value, state); - } else if (isTable(value)) { - if (order === null) { - order = "tables-first"; - } - if (order === "fields-first") { - writeTable(outputFolder, value, stats); - } - // tables-first: skip — pass 2 will write them. +function formatField( + db: RawDatabase, + table: RawTable, + field: RawField, + index: MetadataIndex, +): Field { + const { fieldsById, tablesById, databasesById } = index; + const { + id: _id, + table_id: _table_id, + parent_id, + fk_target_field_id, + ...rest + } = field; + const result: Field = { ...rest }; + // Silently drop parent_id / fk_target_field_id if the referenced entity can't be resolved. + if (parent_id) { + const parent = fieldsById.get(parent_id); + const parentKey = parent && getFieldKey(db, table, parent, fieldsById); + if (parentKey) { + result.parent_id = parentKey; } } - - return order ?? "tables-first"; -} - -// Pass 2: in tables-first mode, run subpass 3 (writeTable) + subpass 4 (writeField). -// In fields-first mode, only subpass 4 — tables were already written in pass 1. -async function secondPass( - inputFile: string, - outputFolder: string, - order: Order, - stats: ExtractMetadataResult, -): Promise { - const state: FieldState = { buffer: "", bufferedPath: null }; - const paths = - order === "tables-first" ? ["$.tables.*", "$.fields.*"] : ["$.fields.*"]; - - for await (const { value } of streamAll(inputFile, paths)) { - if (isTable(value)) { - writeTable(outputFolder, value, stats); - } else if (isField(value)) { - writeField(outputFolder, value, state, stats); + if (fk_target_field_id) { + const targetField = fieldsById.get(fk_target_field_id); + const targetTable = targetField && tablesById.get(targetField.table_id); + const targetDb = targetTable && databasesById.get(targetTable.db_id); + const targetKey = + targetDb && + targetTable && + targetField && + getFieldKey(targetDb, targetTable, targetField, fieldsById); + if (targetKey) { + result.fk_target_field_id = targetKey; } } - flushFieldBuffer(state); + return result; +} + +function buildStats(metadata: RawMetadata): ExtractMetadataResult { + return { + databases: metadata.databases.length, + tables: metadata.tables.length, + fields: metadata.fields.length, + }; } -export async function extractTableMetadata({ +export function extractTableMetadata({ inputFile, outputFolder, -}: ExtractMetadataOptions): Promise { - const stats: ExtractMetadataResult = { databases: 0, tables: 0, fields: 0 }; - const order = await firstPass(inputFile, outputFolder, stats); - await secondPass(inputFile, outputFolder, order, stats); - return stats; +}: ExtractMetadataOptions): ExtractMetadataResult { + const metadata = JSON.parse(readFileSync(inputFile, "utf-8")) as RawMetadata; + const index = buildIndex(metadata); + const { databases, tablesByDbId, fieldsByTableId } = index; + + for (const db of databases) { + createFolder(getDatabaseFolder(outputFolder, db)); + writeYaml(getDatabasePath(outputFolder, db), formatDatabase(db)); + + for (const table of tablesByDbId.get(db.id) ?? []) { + const fields = (fieldsByTableId.get(table.id) ?? []).map((field) => + formatField(db, table, field, index), + ); + createFolder(getTablesFolder(outputFolder, db, table)); + writeYaml(getTablePath(outputFolder, db, table), { + ...formatTable(db, table), + fields, + }); + } + } + + return buildStats(metadata); }