diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ab834b787..33312884d 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -54,12 +54,31 @@ jobs: working-directory: ${{ github.workspace }} run: pnpm install - - name: Build all + - name: Get postgres-pglite submodule hash + id: postgres-pglite-commit-id + run: echo "hash=$(git rev-parse HEAD:postgres-pglite)" >> "$GITHUB_OUTPUT" + + - name: Cache WASM build artifacts + id: postgres-pglite-cache + uses: actions/cache@v4 + with: + path: | + packages/pglite/release + packages/pglite-tools/release + packages/pglite-postgis/release + key: wasm-build-${{ steps.postgres-pglite-commit-id.outputs.hash }} + + - name: Build all if necessary + if: steps.postgres-pglite-cache.outputs.cache-hit != 'true' working-directory: ${{ github.workspace }} env: PGSRC: ${{ github.workspace }}/postgres-pglite - run: | - pnpm build:all + run: pnpm build:all + + - name: Build only typeScript packages if postgres-pglite artifacts already available + if: steps.postgres-pglite-cache.outputs.cache-hit == 'true' + working-directory: ${{ github.workspace }} + run: pnpm ts:build - name: Upload PGlite Interim to Github artifacts id: upload-pglite-interim-build-files @@ -156,7 +175,7 @@ jobs: - name: Test Deno run: pnpm test:deno - - name: Pack for distribution + - name: Pack pglite for distribution run: pnpm pack - name: Upload PGlite distribution artifact @@ -194,6 +213,17 @@ jobs: - PGlite with node v${{ matrix.node }}: ${{ steps.upload-pglite-package.outputs.artifact-url }} edit-mode: append + - name: Pack pglite-icu-full for distribution + working-directory: ./packages/pglite-icu-full + run: pnpm build && pnpm pack + + - name: Upload pglite-icu-full distribution artifact + id: upload-pglite-icu-full + uses: actions/upload-artifact@v4 + with: + name: pglite-icu-full-node-v${{ matrix.node }} + path: ./packages/pglite-icu-full/electric-sql-pglite-icu-full-*.tgz + build-and-test-pglite-dependents: name: Build and Test packages dependent on PGlite runs-on: blacksmith-32vcpu-ubuntu-2204 @@ -258,7 +288,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: pglite-postgis-dist-node-v${{ matrix.node }} - path: ./packages/pglite-postgis/dist/* + path: ./packages/pglite-postgis/dist/* publish-website-with-demos: name: Publish website with demos @@ -303,7 +333,7 @@ jobs: uses: actions/download-artifact@v4 with: name: pglite-postgis-dist-node-v20.x - path: ./packages/pglite-postgis/dist/ + path: ./packages/pglite-postgis/dist/ - name: Install dependencies run: pnpm install --frozen-lockfile diff --git a/packages/pglite-icu-full/.gitignore b/packages/pglite-icu-full/.gitignore new file mode 100644 index 000000000..ae02570c9 --- /dev/null +++ b/packages/pglite-icu-full/.gitignore @@ -0,0 +1 @@ +release/ \ No newline at end of file diff --git a/packages/pglite-icu-full/README.md b/packages/pglite-icu-full/README.md new file mode 100644 index 000000000..0500373ba --- /dev/null +++ b/packages/pglite-icu-full/README.md @@ -0,0 +1,35 @@ +# pglite-icu-full + +A package containing all the resources from [libicu](https://github.com/unicode-org/icu) that can be used with PGlite to build localized applications. + +## Installation + +```bash +npm install @electric-sql/pglite-icu-full +# or +yarn add @electric-sql/pglite-icu-full +# or +pnpm add @electric-sql/pglite-icu-full +``` + +## Usage + +This loads the entire locale set provided by libicu, which might be quite large. + +```typescript +import { PGlite } from '@electric-sql/pglite' +import { icuDataDir } from '@electric-sql/pglite-icu-full' + +// Create a PGlite instance with the icu resources +const pg = await PGlite.create({ + icuDataDir: await icuDataDir(), +}) + +// just an example, query the available collations +const collations = await pg.exec('select * from pg_collation') + +``` + +# Documentation + +https://www.postgresql.org/docs/current/locale.html \ No newline at end of file diff --git a/packages/pglite-icu-full/eslint.config.js b/packages/pglite-icu-full/eslint.config.js new file mode 100644 index 000000000..d85bdb2aa --- /dev/null +++ b/packages/pglite-icu-full/eslint.config.js @@ -0,0 +1,29 @@ +import globals from 'globals' +import rootConfig from '../../eslint.config.js' + +export default [ + ...rootConfig, + { + ignores: ['release/**/*', 'examples/**/*', 'dist/**/*'], + }, + { + languageOptions: { + globals: { + ...globals.browser, + ...globals.node, + }, + }, + rules: { + ...rootConfig.rules, + '@typescript-eslint/no-explicit-any': 'off', + }, + }, + { + files: ['tests/targets/deno/**/*.js'], + languageOptions: { + globals: { + Deno: false, + }, + }, + }, +] diff --git a/packages/pglite-icu-full/examples/README.md b/packages/pglite-icu-full/examples/README.md new file mode 100644 index 000000000..6c63fecd9 --- /dev/null +++ b/packages/pglite-icu-full/examples/README.md @@ -0,0 +1,49 @@ +# Generating an ICU package for PGlite + +This document shows you how to generate your own icu file that contains only the locales that you want in your PGlite enabled application. + +## Download libicu code and data + +Currently PGlite is tested to work with libicu v76.1. Get the source and data for it: + +wget https://github.com/unicode-org/icu/releases/download/release-76-1/icu4c-76_1-src.tgz +wget https://github.com/unicode-org/icu/releases/download/release-78.3/icu4c-78.3-data.zip + +Important: You must have the data sources in order to use the ICU Data Build Tool. Check for the file icu4c/source/data/locales/root.txt. If that file is missing, you need to download “icu4c-*-data.zip”, delete the old icu4c/source/data directory, and replace it with the data directory from the zip file. If there is a *.dat file in icu4c/source/data/in, that file will be used even if you gave ICU custom filter rules. + +## Create a filters.json file + +This will allow you to only generate the data that you need. + +Here's a simple example: +``` +{ + "localeFilter": { + "filterType": "locale", + "includelist": [ + "en_US" + ] + } +} +``` + +For more info, see https://unicode-org.github.io/icu/userguide/icu_data/buildtool.html. + +## Build ICU + +$ ICU_DATA_FILTER_FILE= ./icu/source/configure --with-data-packaging=files --disable-shared --enable-static --disable-tests --disable-samples --disable-extras --disable-icuio --disable-layoutex --prefix= + +$ make -j && make install + + +## Create an archive with the icu data + +The previous steps have installed everything related to ICU in . You only need the data files: + +$ cd /share/icu/76.1/ && tar cvfz icu_76.tgz icudt76l/ + +Now `icu_76.tgz` contains the localisation data that you can use with PGlite. + +## Example + +The subfolder `Switzerland` contains the `filter.json` and the generated data file that can be used with PGlite. \ No newline at end of file diff --git a/packages/pglite-icu-full/examples/Switzerland/filters_switzerland.json b/packages/pglite-icu-full/examples/Switzerland/filters_switzerland.json new file mode 100644 index 000000000..822e0b83b --- /dev/null +++ b/packages/pglite-icu-full/examples/Switzerland/filters_switzerland.json @@ -0,0 +1,29 @@ +{ + "localeFilter": { + "filterType": "locale", + "includelist": ["root", "de_CH", "fr_CH", "it_CH", "rm"], + "includeChildren": false + }, + "featureFilters": { + "brkitr_rules": "exclude", + "brkitr_dictionaries": "exclude", + "brkitr_tree": "exclude", + "conversion_mappings": "exclude", + "confusables": "exclude", + "curr_supplemental": "exclude", + "curr_tree": "exclude", + "lang_tree": "exclude", + "normalization": "exclude", + "region_tree": "exclude", + "rbnf_tree": "exclude", + "stringprep": "exclude", + "zone_tree": "exclude", + "translit": "exclude", + "unames": "exclude", + "ulayout": "exclude", + "uemoji": "exclude", + "unit_tree": "exclude", + "cnvalias": "exclude" + }, + "collationUCAData": "implicithan" +} \ No newline at end of file diff --git a/packages/pglite-icu-full/examples/Switzerland/icu_76_ch.tgz b/packages/pglite-icu-full/examples/Switzerland/icu_76_ch.tgz new file mode 100644 index 000000000..56b357608 Binary files /dev/null and b/packages/pglite-icu-full/examples/Switzerland/icu_76_ch.tgz differ diff --git a/packages/pglite-icu-full/package.json b/packages/pglite-icu-full/package.json new file mode 100644 index 000000000..74daf92a7 --- /dev/null +++ b/packages/pglite-icu-full/package.json @@ -0,0 +1,63 @@ +{ + "name": "@electric-sql/pglite-icu-full", + "version": "0.0.1", + "description": "ICU support", + "author": "Electric DB Limited", + "homepage": "https://pglite.dev", + "license": "Apache-2.0", + "repository": { + "type": "git", + "url": "git+https://github.com/electric-sql/pglite", + "directory": "packages/pglite-icu-full" + }, + "keywords": [ + "postgres", + "sql", + "database", + "wasm", + "pglite", + "initdb" + ], + "private": false, + "publishConfig": { + "access": "public" + }, + "files": [ + "./dist" + ], + "type": "module", + "types": "dist/index.d.ts", + "main": "dist/index.cjs", + "module": "dist/index.js", + "exports": { + ".": { + "import": { + "types": "./dist/index.d.ts", + "default": "./dist/index.js" + }, + "require": { + "types": "./dist/index.d.cts", + "default": "./dist/index.cjs" + } + } + }, + "scripts": { + "build": "tsup && cp static/* ./dist/", + "check:exports": "attw . --pack --profile node16", + "lint": "eslint ./tests --report-unused-disable-directives --max-warnings 0", + "format": "prettier --write ./tests", + "typecheck": "tsc", + "stylecheck": "pnpm lint && prettier --check ./tests", + "test": "vitest", + "prepublishOnly": "pnpm check:exports" + }, + "devDependencies": { + "@arethetypeswrong/cli": "^0.18.1", + "@electric-sql/pglite": "workspace:*", + "@types/emscripten": "^1.41.1", + "@types/node": "^20.16.11", + "tsx": "^4.19.2", + "vitest": "^1.3.1", + "@electric-sql/pglite-utils": "workspace:*" + } +} diff --git a/packages/pglite-icu-full/src/index.ts b/packages/pglite-icu-full/src/index.ts new file mode 100644 index 000000000..ced98f27e --- /dev/null +++ b/packages/pglite-icu-full/src/index.ts @@ -0,0 +1,13 @@ +import { pglUtils } from '@electric-sql/pglite-utils' + +export async function icuDataDir(): Promise { + const moduleUrl = new URL('../dist/icu.76.tgz', import.meta.url) + if (pglUtils.IN_NODE) { + const fs = await import('fs/promises') + const buffer = await fs.readFile(moduleUrl) + return new Blob([new Uint8Array(buffer)]) + } else { + const downloadPromise = await fetch(moduleUrl) + return downloadPromise.blob() + } +} diff --git a/packages/pglite-icu-full/static/icu.76.tgz b/packages/pglite-icu-full/static/icu.76.tgz new file mode 100644 index 000000000..cf0d4f280 Binary files /dev/null and b/packages/pglite-icu-full/static/icu.76.tgz differ diff --git a/packages/pglite-icu-full/tests/icuDataDir.test.ts b/packages/pglite-icu-full/tests/icuDataDir.test.ts new file mode 100644 index 000000000..b762c2f71 --- /dev/null +++ b/packages/pglite-icu-full/tests/icuDataDir.test.ts @@ -0,0 +1,487 @@ +import { describe, it, expect, beforeAll, afterAll } from 'vitest' +import { PGlite } from '@electric-sql/pglite' +import * as fs from 'fs/promises' +import { resolve } from 'path' + +describe('full icu tests', () => { + it('load icu', async () => { + const pg_defIcu = await PGlite.create() + + const defLocales = await pg_defIcu.exec(` +SELECT n.nspname AS schema, c.collname AS name, c.collcollate AS locale, + c.collctype AS ctype, c.collprovider AS provider, c.collversion +FROM pg_collation c +JOIN pg_namespace n ON c.collnamespace = n.oid +ORDER BY schema, name; +`) + + expect(defLocales[0].rows.length).toBeGreaterThanOrEqual(10) + + const icuDataDir = await fs.readFile( + resolve(import.meta.dirname, '../dist/icu.76.tgz'), + ) + const pg_fullIcu = await PGlite.create({ + icuDataDir: new Blob([new Uint8Array(icuDataDir)]), + }) + + const allLocales = await pg_fullIcu.exec(` +SELECT n.nspname AS schema, c.collname AS name, c.collcollate AS locale, + c.collctype AS ctype, c.collprovider AS provider, c.collversion +FROM pg_collation c +JOIN pg_namespace n ON c.collnamespace = n.oid +ORDER BY schema, name; + `) + + expect(allLocales[0].rows.length).toBeGreaterThanOrEqual(879) + expect(defLocales[0].rows.length).toBeLessThan(allLocales[0].rows.length) + }) + + it.skip('use locale-provider icu with german locale', async () => { + const icuDataDir = await fs.readFile( + resolve(import.meta.dirname, '../dist/icu.76.tgz'), + ) + const _pg = await PGlite.create({ + icuDataDir: new Blob([new Uint8Array(icuDataDir)]), + initDbStartParams: ['--locale-provider=icu', '--icu-locale=de'], + }) + }) + + describe('Switzerland custom ICU file', () => { + let pg: PGlite + + beforeAll(async () => { + const icuDataDir = await fs.readFile( + resolve(import.meta.dirname, '../examples/Switzerland/icu_76_ch.tgz'), + ) + pg = await PGlite.create({ + icuDataDir: new Blob([new Uint8Array(icuDataDir)]), + }) + }) + + afterAll(async () => { + await pg?.close() + }) + + it('includes Swiss locale collations', async () => { + const locales = await pg.exec(` + SELECT collname FROM pg_collation + WHERE collprovider = 'i' + ORDER BY collname; + `) + const names = locales[0].rows.map((r: any) => r.collname) + expect(names).toContain('de-CH-x-icu') + expect(names).toContain('fr-CH-x-icu') + expect(names).toContain('it-CH-x-icu') + }) + + it('German (CH) collation: ä sorts near a', async () => { + const res = await pg.query<{ b: string }>(` + SELECT val AS b FROM (VALUES ('Birne'), ('Apfel'), ('Ärger'), ('Banane')) + AS t(val) ORDER BY val COLLATE "de-CH-x-icu" + `) + const values = res.rows.map((r) => r.b) + expect(values.indexOf('Ärger')).toBeLessThan(values.indexOf('Banane')) + }) + + it('French (CH) collation: accented characters sort correctly', async () => { + const res = await pg.query<{ b: string }>(` + SELECT val AS b FROM (VALUES ('côte'), ('coté'), ('cote'), ('côté')) + AS t(val) ORDER BY val COLLATE "fr-CH-x-icu" + `) + const values = res.rows.map((r) => r.b) + expect(values[0]).toBe('cote') + }) + + it('Italian (CH) collation: handles accented vowels', async () => { + const res = await pg.query<{ b: string }>(` + SELECT val AS b FROM (VALUES ('perché'), ('pera'), ('perciò'), ('percorso')) + AS t(val) ORDER BY val COLLATE "it-CH-x-icu" + `) + const values = res.rows.map((r) => r.b) + expect(values.indexOf('pera')).toBeLessThan(values.indexOf('perché')) + }) + + it('German phonebook collation for Swiss German: ö expands to oe', async () => { + await pg.exec(` + CREATE COLLATION IF NOT EXISTS ch_de_phonebook + (provider = icu, locale = 'de-CH@collation=phonebook'); + `) + const res = await pg.query<{ std: boolean; phone: boolean }>(` + SELECT + 'Goldmann' < 'Götz' COLLATE "de-CH-x-icu" AS std, + 'Goldmann' > 'Götz' COLLATE ch_de_phonebook AS phone + `) + expect(res.rows[0].std).toBe(true) + expect(res.rows[0].phone).toBe(true) + }) + + it('case conversion with Swiss French', async () => { + const res = await pg.query<{ lo: string; up: string }>(` + SELECT + lower('GÉNÈVE' COLLATE "fr-CH-x-icu") AS lo, + upper('génève' COLLATE "fr-CH-x-icu") AS up + `) + expect(res.rows[0].lo).toBe('génève') + expect(res.rows[0].up).toBe('GÉNÈVE') + }) + + it('case conversion with Swiss German', async () => { + const res = await pg.query<{ lo: string; up: string }>(` + SELECT + lower('ZÜRICH' COLLATE "de-CH-x-icu") AS lo, + upper('zürich' COLLATE "de-CH-x-icu") AS up + `) + expect(res.rows[0].lo).toBe('zürich') + expect(res.rows[0].up).toBe('ZÜRICH') + }) + + it('case conversion with Swiss Italian', async () => { + const res = await pg.query<{ lo: string; up: string }>(` + SELECT + lower('LUGANO' COLLATE "it-CH-x-icu") AS lo, + upper('bellinzona' COLLATE "it-CH-x-icu") AS up + `) + expect(res.rows[0].lo).toBe('lugano') + expect(res.rows[0].up).toBe('BELLINZONA') + }) + + it('numeric collation works with Swiss locales', async () => { + await pg.exec(` + CREATE COLLATION IF NOT EXISTS ch_numeric + (provider = icu, locale = 'de-CH@colNumeric=yes'); + `) + const res = await pg.query<{ b: string }>(` + SELECT val AS b FROM (VALUES ('Haus-9'), ('Haus-10'), ('Haus-2'), ('Haus-1')) + AS t(val) ORDER BY val COLLATE ch_numeric + `) + const values = res.rows.map((r) => r.b) + expect(values).toEqual(['Haus-1', 'Haus-2', 'Haus-9', 'Haus-10']) + }) + + it('case-insensitive collation with Swiss locale', async () => { + await pg.exec(` + CREATE COLLATION IF NOT EXISTS ch_ci + (provider = icu, locale = 'de-CH@colStrength=secondary', deterministic = false); + `) + const res = await pg.query<{ eq: boolean }>(` + SELECT 'Grüezi' COLLATE ch_ci = 'grüezi' COLLATE ch_ci AS eq + `) + expect(res.rows[0].eq).toBe(true) + }) + }) +}) + +describe('icu functionality', () => { + let pg: PGlite + + beforeAll(async () => { + const icuData = await fs.readFile( + resolve(import.meta.dirname, '../dist/icu.76.tgz'), + ) + pg = await PGlite.create({ + icuDataDir: new Blob([new Uint8Array(icuData)]), + }) + + await pg.exec(` + CREATE TABLE collate_data (a int, b text); + INSERT INTO collate_data VALUES (1, 'abc'), (2, 'äbc'), (3, 'bbc'), (4, 'ABC'); + `) + }) + + afterAll(async () => { + await pg?.close() + }) + + it('icu_unicode_version() returns a value', async () => { + const res = await pg.query<{ icu_unicode_version: string }>( + `SELECT icu_unicode_version() AS icu_unicode_version`, + ) + expect(res.rows[0].icu_unicode_version).toMatch(/^\d+/) + }) + + describe('locale-aware sorting', () => { + it('English collation sorts äbc near abc', async () => { + const res = await pg.query<{ b: string }>( + `SELECT b FROM collate_data ORDER BY b COLLATE "en-x-icu"`, + ) + const values = res.rows.map((r) => r.b) + expect(values.indexOf('äbc')).toBeLessThan(values.indexOf('bbc')) + }) + + it('Swedish collation sorts ä after z', async () => { + const res = await pg.query<{ b: string }>( + `SELECT b FROM collate_data ORDER BY b COLLATE "sv-x-icu"`, + ) + const values = res.rows.map((r) => r.b) + expect(values.indexOf('äbc')).toBe(values.length - 1) + }) + + it('C collation sorts by codepoint (uppercase before lowercase)', async () => { + const res = await pg.query<{ b: string }>( + `SELECT b FROM collate_data ORDER BY b COLLATE "C"`, + ) + const values = res.rows.map((r) => r.b) + expect(values[0]).toBe('ABC') + }) + + it('en-x-icu and sv-x-icu produce different orderings', async () => { + const en = await pg.query<{ b: string }>( + `SELECT b FROM collate_data ORDER BY b COLLATE "en-x-icu"`, + ) + const sv = await pg.query<{ b: string }>( + `SELECT b FROM collate_data ORDER BY b COLLATE "sv-x-icu"`, + ) + expect(en.rows.map((r) => r.b)).not.toEqual(sv.rows.map((r) => r.b)) + }) + + it('constant expression comparison differs by locale', async () => { + const res = await pg.query<{ en: boolean; sv: boolean }>(` + SELECT + 'bbc' COLLATE "en-x-icu" > 'äbc' COLLATE "en-x-icu" AS en, + 'bbc' COLLATE "sv-x-icu" > 'äbc' COLLATE "sv-x-icu" AS sv + `) + expect(res.rows[0].en).toBe(true) + expect(res.rows[0].sv).toBe(false) + }) + }) + + describe('upper/lower case conversion', () => { + it('basic upper/lower/initcap with ICU', async () => { + const res = await pg.query<{ + lo: string + up: string + ic: string + }>(` + SELECT + lower('HIJ' COLLATE "en-x-icu") AS lo, + upper('hij' COLLATE "en-x-icu") AS up, + initcap('hello world' COLLATE "en-x-icu") AS ic + `) + expect(res.rows[0].lo).toBe('hij') + expect(res.rows[0].up).toBe('HIJ') + expect(res.rows[0].ic).toBe('Hello World') + }) + + it('Turkish dotless-i: lower(I) produces ı', async () => { + const res = await pg.query<{ en: string; tr: string }>(` + SELECT + lower('I' COLLATE "en-x-icu") AS en, + lower('I' COLLATE "tr-x-icu") AS tr + `) + expect(res.rows[0].en).toBe('i') + expect(res.rows[0].tr).toBe('\u0131') + }) + + it('Turkish upper(i) produces İ', async () => { + const res = await pg.query<{ en: string; tr: string }>(` + SELECT + upper('i' COLLATE "en-x-icu") AS en, + upper('i' COLLATE "tr-x-icu") AS tr + `) + expect(res.rows[0].en).toBe('I') + expect(res.rows[0].tr).toBe('\u0130') + }) + }) + + describe('ILIKE with locale awareness', () => { + it('English ILIKE matches KI in Türkiye', async () => { + const res = await pg.query<{ m: boolean }>( + `SELECT 'Türkiye' COLLATE "en-x-icu" ILIKE '%KI%' AS m`, + ) + expect(res.rows[0].m).toBe(true) + }) + + it('Turkish ILIKE does not match KI in Türkiye', async () => { + const res = await pg.query<{ m: boolean }>( + `SELECT 'Türkiye' COLLATE "tr-x-icu" ILIKE '%KI%' AS m`, + ) + expect(res.rows[0].m).toBe(false) + }) + + it('Turkish dotless-i ILIKE behavior', async () => { + const res = await pg.query<{ en: boolean; tr: boolean }>(` + SELECT + 'bıt' ILIKE 'BIT' COLLATE "en-x-icu" AS en, + 'bıt' ILIKE 'BIT' COLLATE "tr-x-icu" AS tr + `) + expect(res.rows[0].en).toBe(false) + expect(res.rows[0].tr).toBe(true) + }) + }) + + describe('custom ICU collation attributes', () => { + beforeAll(async () => { + await pg.exec(` + SET client_min_messages = WARNING; + CREATE COLLATION IF NOT EXISTS testcoll_ignore_accents + (provider = icu, locale = '@colStrength=primary;colCaseLevel=yes'); + CREATE COLLATION IF NOT EXISTS testcoll_backwards + (provider = icu, locale = '@colBackwards=yes'); + CREATE COLLATION IF NOT EXISTS testcoll_lower_first + (provider = icu, locale = '@colCaseFirst=lower'); + CREATE COLLATION IF NOT EXISTS testcoll_upper_first + (provider = icu, locale = '@colCaseFirst=upper'); + CREATE COLLATION IF NOT EXISTS testcoll_shifted + (provider = icu, locale = '@colAlternate=shifted'); + CREATE COLLATION IF NOT EXISTS testcoll_numeric + (provider = icu, locale = '@colNumeric=yes'); + RESET client_min_messages; + `) + }) + + it('ignore accents: aaá treated equal to AAA at primary level', async () => { + const res = await pg.query<{ und: boolean; ign: boolean }>(` + SELECT + 'aaá' > 'AAA' COLLATE "und-x-icu" AS und, + 'aaá' < 'AAA' COLLATE testcoll_ignore_accents AS ign + `) + expect(res.rows[0].und).toBe(true) + expect(res.rows[0].ign).toBe(true) + }) + + it('backwards accents: coté/côte ordering flips', async () => { + const res = await pg.query<{ und: boolean; bw: boolean }>(` + SELECT + 'coté' < 'côte' COLLATE "und-x-icu" AS und, + 'coté' > 'côte' COLLATE testcoll_backwards AS bw + `) + expect(res.rows[0].und).toBe(true) + expect(res.rows[0].bw).toBe(true) + }) + + it('case first: lower vs upper ordering', async () => { + const res = await pg.query<{ lo: boolean; up: boolean }>(` + SELECT + 'aaa' < 'AAA' COLLATE testcoll_lower_first AS lo, + 'aaa' > 'AAA' COLLATE testcoll_upper_first AS up + `) + expect(res.rows[0].lo).toBe(true) + expect(res.rows[0].up).toBe(true) + }) + + it('shifted: punctuation ignored in comparison', async () => { + const res = await pg.query<{ und: boolean; shifted: boolean }>(` + SELECT + 'de-luge' < 'deanza' COLLATE "und-x-icu" AS und, + 'de-luge' > 'deanza' COLLATE testcoll_shifted AS shifted + `) + expect(res.rows[0].und).toBe(true) + expect(res.rows[0].shifted).toBe(true) + }) + + it('numeric collation: A-21 sorts before A-123', async () => { + const res = await pg.query<{ und: boolean; num: boolean }>(` + SELECT + 'A-21' > 'A-123' COLLATE "und-x-icu" AS und, + 'A-21' < 'A-123' COLLATE testcoll_numeric AS num + `) + expect(res.rows[0].und).toBe(true) + expect(res.rows[0].num).toBe(true) + }) + }) + + describe('custom collation rules', () => { + it('custom rule &a < g reorders g after a', async () => { + await pg.exec(` + CREATE COLLATION IF NOT EXISTS testcoll_rules1 + (provider = icu, locale = '', rules = '&a < g'); + CREATE TABLE IF NOT EXISTS test_rules (a text); + DELETE FROM test_rules; + INSERT INTO test_rules VALUES + ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green'); + `) + + const enOrder = await pg.query<{ a: string }>( + `SELECT a FROM test_rules ORDER BY a COLLATE "en-x-icu"`, + ) + const customOrder = await pg.query<{ a: string }>( + `SELECT a FROM test_rules ORDER BY a COLLATE testcoll_rules1`, + ) + + const enValues = enOrder.rows.map((r) => r.a) + const customValues = customOrder.rows.map((r) => r.a) + + expect(enValues).not.toEqual(customValues) + expect(customValues.indexOf('green')).toBeLessThan( + customValues.indexOf('bird'), + ) + }) + }) + + describe('nondeterministic collations', () => { + beforeAll(async () => { + await pg.exec(` + CREATE COLLATION IF NOT EXISTS ctest_det + (provider = icu, locale = '', deterministic = true); + CREATE COLLATION IF NOT EXISTS ctest_nondet + (provider = icu, locale = '', deterministic = false); + CREATE COLLATION IF NOT EXISTS case_insensitive + (provider = icu, locale = '@colStrength=secondary', deterministic = false); + `) + }) + + it('case-insensitive: abc = ABC', async () => { + const res = await pg.query<{ eq: boolean }>( + `SELECT 'abc' COLLATE case_insensitive = 'ABC' COLLATE case_insensitive AS eq`, + ) + expect(res.rows[0].eq).toBe(true) + }) + + it('case-sensitive: abc != ABC', async () => { + const res = await pg.query<{ le: boolean; ge: boolean }>(` + SELECT + 'abc' <= 'ABC' COLLATE ctest_det AS le, + 'abc' >= 'ABC' COLLATE ctest_det AS ge + `) + const { le, ge } = res.rows[0] + // In deterministic collation, abc and ABC are not equal (one of le/ge is false) + expect(le && ge).toBe(false) + }) + + it('Unicode normalization: NFC = NFD under nondeterministic collation', async () => { + await pg.exec(` + CREATE TABLE IF NOT EXISTS test_norm (a int, b text); + DELETE FROM test_norm; + INSERT INTO test_norm VALUES (1, U&'\\00E4bc'); + INSERT INTO test_norm VALUES (2, U&'\\0061\\0308bc'); + `) + + const det = await pg.query<{ a: number }>( + `SELECT * FROM test_norm WHERE b = 'äbc' COLLATE ctest_det`, + ) + const nondet = await pg.query<{ a: number }>( + `SELECT * FROM test_norm WHERE b = 'äbc' COLLATE ctest_nondet`, + ) + + expect(det.rows.length).toBe(1) + expect(nondet.rows.length).toBe(2) + }) + + it('Greek sigma: ὀδυσσεύς = ὈΔΥΣΣΕΎΣ case-insensitively', async () => { + const res = await pg.query<{ cs: boolean; ci: boolean }>(` + SELECT + 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE ctest_det AS cs, + 'ὀδυσσεύς' = 'ὈΔΥΣΣΕΎΣ' COLLATE case_insensitive AS ci + `) + expect(res.rows[0].cs).toBe(false) + expect(res.rows[0].ci).toBe(true) + }) + }) + + describe('German phonebook collation', () => { + it('Götz sorts differently in standard vs phonebook order', async () => { + await pg.exec(` + CREATE COLLATION IF NOT EXISTS testcoll_de_phonebook + (provider = icu, locale = 'de@collation=phonebook'); + `) + + const res = await pg.query<{ standard: boolean; phonebook: boolean }>(` + SELECT + 'Goldmann' < 'Götz' COLLATE "de-x-icu" AS standard, + 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook AS phonebook + `) + expect(res.rows[0].standard).toBe(true) + expect(res.rows[0].phonebook).toBe(true) + }) + }) +}) diff --git a/packages/pglite-icu-full/tsconfig.json b/packages/pglite-icu-full/tsconfig.json new file mode 100644 index 000000000..ac9f11d02 --- /dev/null +++ b/packages/pglite-icu-full/tsconfig.json @@ -0,0 +1,10 @@ +{ + "extends": "../../tsconfig.json", + "compilerOptions": { + "types": [ + "@types/emscripten", + "node" + ] + }, + "include": ["src", "tsup.config.ts", "vitest.config.ts"] +} diff --git a/packages/pglite-icu-full/tsup.config.ts b/packages/pglite-icu-full/tsup.config.ts new file mode 100644 index 000000000..80ecab45d --- /dev/null +++ b/packages/pglite-icu-full/tsup.config.ts @@ -0,0 +1,13 @@ +import { defineConfig } from 'tsup' + +export default defineConfig([ + { + entry: ['src/index.ts'], + format: ['esm', 'cjs'], + outDir: 'dist', + dts: true, + sourcemap: true, + clean: true, + shims: true, + } +]) diff --git a/packages/pglite-icu-full/vitest.config.ts b/packages/pglite-icu-full/vitest.config.ts new file mode 100644 index 000000000..4b363cea5 --- /dev/null +++ b/packages/pglite-icu-full/vitest.config.ts @@ -0,0 +1,10 @@ +import { defineConfig } from 'vitest/config' + +export default defineConfig({ + test: { + globals: true, + environment: 'node', + testTimeout: 30000, + // setupFiles: ['./tests/setup.ts'], + }, +}) diff --git a/packages/pglite-prepopulatedfs/src/index.ts b/packages/pglite-prepopulatedfs/src/index.ts index e607da5ff..6410c527b 100644 --- a/packages/pglite-prepopulatedfs/src/index.ts +++ b/packages/pglite-prepopulatedfs/src/index.ts @@ -7,7 +7,7 @@ export async function dataDir(): Promise { const buffer = await fs.readFile(moduleUrl) return new Blob([new Uint8Array(buffer)]) } else { - const wasmDownloadPromise = await fetch(moduleUrl) - return wasmDownloadPromise.blob() + const downloadPromise = await fetch(moduleUrl) + return downloadPromise.blob() } } diff --git a/packages/pglite-utils/src/utils.ts b/packages/pglite-utils/src/utils.ts index 10fd36915..351c3033b 100644 --- a/packages/pglite-utils/src/utils.ts +++ b/packages/pglite-utils/src/utils.ts @@ -126,3 +126,35 @@ export function toPostgresName(input: string): string { } return output } + +interface MinimalFS { + readdir(path: string): string[] + unlink(path: string): void + rmdir(path: string): void +} + +export function rmdirRecursive(fs: MinimalFS, path: string) { + try { + // If readdir succeeds it's a directory + const entries = fs.readdir(path).filter((n: any) => n !== '.' && n !== '..') + for (const name of entries) { + const child = path + '/' + name + // Recurse or unlink depending on whether child is a directory + try { + fs.readdir(child) + rmdirRecursive(fs, child) + } catch (e) { + // readdir failed => not a directory + fs.unlink(child) + } + } + fs.rmdir(path) + } catch (e) { + // not a directory: try unlink + try { + fs.unlink(path) + } catch (_) { + /* ignore if already gone */ + } + } +} diff --git a/packages/pglite/src/fs/tarUtils.ts b/packages/pglite/src/fs/tarUtils.ts index fb712cab8..682398cc6 100644 --- a/packages/pglite/src/fs/tarUtils.ts +++ b/packages/pglite/src/fs/tarUtils.ts @@ -61,7 +61,7 @@ export async function loadTar( } for (const file of files) { - const filePath = pgDataDir + file.name + const filePath = `${pgDataDir}/${file.name}` // Ensure the directory structure exists const dirPath = filePath.split('/').slice(0, -1) @@ -81,7 +81,9 @@ export async function loadTar( dateToUnixTimestamp(file.modifyTime), ) } else if (file.type === DIRTYPE) { - FS.mkdir(filePath) + if (!FS.analyzePath(filePath).exists) { + FS.mkdir(filePath) + } } } } diff --git a/packages/pglite/src/initdb.ts b/packages/pglite/src/initdb.ts index 17bccaa93..cf99fb805 100644 --- a/packages/pglite/src/initdb.ts +++ b/packages/pglite/src/initdb.ts @@ -10,8 +10,10 @@ function assert(condition: unknown, message?: string): asserts condition { export const PG_ROOT = '/pglite' export const PGDATA = PG_ROOT + '/data' +export const ICU_DATA_PATH = PG_ROOT + '/icu' +export const INITDB_EXE_PATH = PG_ROOT + '/bin/initdb' +export const POSTGRES_EXE_PATH = PG_ROOT + '/bin/postgres' -const initdbExePath = PG_ROOT + '/bin/initdb' const pgstdoutPath = PG_ROOT + '/pgstdout' const pgstdinPath = PG_ROOT + '/pgstdin' @@ -87,7 +89,7 @@ async function execInitdb({ const emscriptenOpts: Partial = { arguments: args, noExitRuntime: false, - thisProgram: initdbExePath, + thisProgram: INITDB_EXE_PATH, // Provide a stdin that returns EOF to avoid browser prompt stdin: () => null, print: (text) => { @@ -114,6 +116,7 @@ async function execInitdb({ mod.ENV.HOME = '/home/postgres' mod.ENV.USER = 'postgres' mod.ENV.LOGNAME = 'postgres' + mod.ENV.ICU_DATA = ICU_DATA_PATH }, (mod: InitdbMod) => { mod.onRuntimeInitialized = () => { diff --git a/packages/pglite/src/interface.ts b/packages/pglite/src/interface.ts index aa4c4a10a..95829e7c2 100644 --- a/packages/pglite/src/interface.ts +++ b/packages/pglite/src/interface.ts @@ -92,6 +92,7 @@ export interface PGliteOptions { relaxedDurability?: boolean extensions?: TExtensions loadDataDir?: Blob | File + icuDataDir?: Blob | File initialMemory?: number pgliteWasmModule?: WebAssembly.Module initdbWasmModule?: WebAssembly.Module @@ -99,6 +100,7 @@ export interface PGliteOptions { parsers?: ParserOptions serializers?: SerializerOptions startParams?: string[] + initDbStartParams?: string[] } export type PGliteInterface = diff --git a/packages/pglite/src/pglite.ts b/packages/pglite/src/pglite.ts index 297efd2b8..6b086cdfa 100644 --- a/packages/pglite/src/pglite.ts +++ b/packages/pglite/src/pglite.ts @@ -30,13 +30,16 @@ import { NotificationResponseMessage, } from '@electric-sql/pg-protocol/messages' -import { initdb, PGDATA } from './initdb' +import { + ICU_DATA_PATH, + initdb, + INITDB_EXE_PATH, + PGDATA, + POSTGRES_EXE_PATH, +} from './initdb' import { pglUtils } from '@electric-sql/pglite-utils' -const postgresExePath = '/pglite/bin/postgres' -const initdbExePath = '/pglite/bin/initdb' - export class PGlite extends BasePGlite implements PGliteInterface, AsyncDisposable @@ -308,7 +311,7 @@ export class PGlite }) let emscriptenOpts: Partial = { - thisProgram: postgresExePath, + thisProgram: POSTGRES_EXE_PATH, WASM_PREFIX, arguments: args, noExitRuntime: true, @@ -417,11 +420,11 @@ export class PGlite mod.ENV.PGDATA = PGDATA mod.ENV.PGUSER = options.username ?? 'postgres' mod.ENV.PGDATABASE = options.database ?? 'postgres' - mod.ENV.LC_CTYPE = 'en_US.UTF-8' + mod.ENV.LANG = mod.ENV.LC_COLLATE = mod.ENV.LC_CTYPE = 'en_US.UTF-8' mod.ENV.TZ = 'UTC' mod.ENV.PGTZ = 'UTC' mod.ENV.PGCLIENTENCODING = 'UTF8' - + mod.ENV.ICU_DATA = ICU_DATA_PATH // some extensions might need their own ENV variables // TODO: move this to the extension init function for (const [extName] of Object.entries(this.#extensions)) { @@ -432,8 +435,8 @@ export class PGlite }, (mod: PostgresMod) => { mod.FS.chmod('/home/postgres/.pgpass', 0o0600) // https://www.postgresql.org/docs/current/libpq-pgpass.html - mod.FS.chmod(initdbExePath, 0o0555) - mod.FS.chmod(postgresExePath, 0o0555) + mod.FS.chmod(INITDB_EXE_PATH, 0o0555) + mod.FS.chmod(POSTGRES_EXE_PATH, 0o0555) }, ], } @@ -491,6 +494,10 @@ export class PGlite // Sync the filesystem from any previous store await this.fs!.initialSyncFs() + if (options.icuDataDir) { + await this.#fillIcuDataDir(options.icuDataDir) + } + if (!options.noInitDb) { // If the user has provided a tarball to load the database from, do that now. // We do this after the initial sync so that we can throw if the database @@ -520,6 +527,7 @@ export class PGlite pg: pg_initDb, debug: options.debug, wasmModule: options.initdbWasmModule, + args: options.initDbStartParams, }) if (initdbResult.exitCode !== 0) { @@ -539,7 +547,6 @@ export class PGlite await this.syncToFs() } } - // Start compiling dynamic extensions present in FS. await loadExtensions(this.mod, (...args) => this.#log(...args)) @@ -569,6 +576,16 @@ export class PGlite } } + async #fillIcuDataDir(icuDataDir: Blob | File) { + this.#log( + `pglite: icuDataDir specified, removing default icu data dir at ${ICU_DATA_PATH}`, + ) + pglUtils.rmdirRecursive(this.mod!.FS, ICU_DATA_PATH) + this.#log(`pglite: loading icu data from tarball ${icuDataDir}`) + this.mod!.FS.mkdirTree(ICU_DATA_PATH) + await loadTar(this.mod!.FS, icuDataDir, ICU_DATA_PATH) + } + #onRuntimeInitialized(mod: PostgresMod) { // we override system() to intercept any calls that might generate unexpected output this.#system_fn = mod.addFunction((cmd_ptr: number) => { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 94c12888a..78224586b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -211,6 +211,30 @@ importers: specifier: ^2.1.2 version: 2.1.2(@types/node@20.16.11)(jsdom@24.1.3)(terser@5.34.1) + packages/pglite-icu-full: + devDependencies: + '@arethetypeswrong/cli': + specifier: ^0.18.1 + version: 0.18.1 + '@electric-sql/pglite': + specifier: workspace:* + version: link:../pglite + '@electric-sql/pglite-utils': + specifier: workspace:* + version: link:../pglite-utils + '@types/emscripten': + specifier: ^1.41.1 + version: 1.41.1 + '@types/node': + specifier: ^20.16.11 + version: 20.16.11 + tsx: + specifier: ^4.19.2 + version: 4.19.2 + vitest: + specifier: ^1.3.1 + version: 1.6.0(@types/node@20.16.11)(jsdom@24.1.3)(terser@5.34.1) + packages/pglite-postgis: devDependencies: '@arethetypeswrong/cli': diff --git a/postgres-pglite b/postgres-pglite index 01792c31a..b4638a2ce 160000 --- a/postgres-pglite +++ b/postgres-pglite @@ -1 +1 @@ -Subproject commit 01792c31a62b7045eb22e93d7dad022bb64b1184 +Subproject commit b4638a2cef60f61634aa53aba79741146c4f4e0b