From f5db4e13fbfe791581ffbb0fdc1f402262945caf Mon Sep 17 00:00:00 2001 From: chenqiang Date: Sat, 9 May 2026 12:54:32 +0800 Subject: [PATCH 1/2] Fix: FDW OPTIONS encoding accepts symbolic names (issue #1726) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both the FDW catalog reader (src/backend/access/external/external.c) and the gp_exttable_fdw option validator (gpcontrib/gp_exttable_fdw/option.c) parsed the "encoding" OPTIONS value with atoi(). atoi("UTF8") returns 0 (PG_SQL_ASCII) and PG_VALID_ENCODING(0) is true, so symbolic names like 'UTF8', 'utf-8', 'GBK' silently fell through validation and were stored as SQL_ASCII at read time. By contrast, the legacy CREATE EXTERNAL TABLE ... ENCODING ... path resolves names via pg_char_to_encoding() and persists a numeric form into OPTIONS — only the FDW OPTIONS entry point bypassed that translation. Add a small shared helper parse_fdw_encoding_option(const char *) in src/backend/access/external/external.c (declared in src/include/access/external.h): - first try pg_char_to_encoding(name) — same logic as the legacy path; - otherwise try a strict numeric form via strtol() with end-of-string and PG_VALID_ENCODING() checks (atoi is intentionally avoided, since atoi("UTF8")==0 is the bug being fixed); - otherwise ereport(ERROR). Both the validator and GetExtFromForeignTableOptions() call this helper. On-disk values in pg_foreign_table.ftoptions are stored verbatim as the user wrote them; correctness is established at read time. This avoids a ProcessUtility_hook approach, which is unworkable here because the extension's _PG_init runs lazily on the first dlopen, after the current statement's hook check has already passed. Affected scope: gp_exttable_fdw (used by gp_exttable_server). The standalone pxf_fdw is unaffected — its validator already routes encoding through ProcessCopyOptions, which is name-aware. Behavior change on upgrade: existing rows whose ftoptions literally contain encoding= have, until now, been silently interpreted as SQL_ASCII. After this fix they are interpreted as the named encoding. This will be called out in the release notes; a detection query is provided in the PR description for operators who wish to pin specific tables to numeric form before upgrade. Tests added in gpcontrib/gp_exttable_fdw/{input,output}/gp_exttable_fdw.source cover encoding '6' / 'UTF8' / 'utf-8' / 'GBK' / 'bogus' and an ALTER FOREIGN TABLE ... OPTIONS (SET encoding 'UTF8') path. The pre-existing encoding '-1' error case has its expected error message updated to match the new helper's wording. --- .../input/gp_exttable_fdw.source | 64 ++++++++++++- gpcontrib/gp_exttable_fdw/option.c | 12 ++- .../output/gp_exttable_fdw.source | 92 ++++++++++++++++++- src/backend/access/external/external.c | 43 ++++++++- src/include/access/external.h | 5 + 5 files changed, 206 insertions(+), 10 deletions(-) diff --git a/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source b/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source index 41012e73c81..81754fbc996 100644 --- a/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source +++ b/gpcontrib/gp_exttable_fdw/input/gp_exttable_fdw.source @@ -53,12 +53,18 @@ OPTIONS (format_type 'c', delimiter ',', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv', reject_limit_type 'p', reject_limit '120'); --- Error, invalid encoding +-- Error, invalid encoding (negative numeric ID) CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server OPTIONS (format_type 'c', delimiter ',', encoding '-1', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); +-- Error, invalid encoding (unknown name) +CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) +SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', encoding 'bogus', + location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); + -- OK, no execute_on | log_errors | encoding | is_writable option CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server @@ -79,3 +85,59 @@ SELECT urilocation FROM pg_exttable WHERE reloid = 'public.ext_special_uri'::reg SELECT ftoptions FROM pg_foreign_table WHERE ftrelid='public.ext_special_uri'::regclass; \a SELECT * FROM ext_special_uri ORDER BY a; + +-- =================================================================== +-- Tests for issue #1726: FDW OPTIONS encoding accepts both numeric IDs +-- and symbolic names (UTF8, utf-8, GBK, ...). Names previously parsed +-- via atoi() and silently degraded to SQL_ASCII. +-- =================================================================== + +-- Numeric form (baseline; worked before the fix as well). +CREATE FOREIGN TABLE ext_enc_num (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '6'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_num'::regclass; + +-- Symbolic name 'UTF8' — used to be silently SQL_ASCII (the bug). +CREATE FOREIGN TABLE ext_enc_utf8 (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8'::regclass; + +-- Case + dash variant resolved by pg_char_to_encoding(). +CREATE FOREIGN TABLE ext_enc_utf8_dash (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'utf-8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8_dash'::regclass; + +-- Non-UTF8 symbolic name. +CREATE FOREIGN TABLE ext_enc_gbk (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'GBK'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_gbk'::regclass; + +-- ALTER FOREIGN TABLE ... OPTIONS (SET encoding 'UTF8') — same code +-- path, this proves the read-side resolution works after an ALTER too. +CREATE FOREIGN TABLE ext_enc_alter (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '0'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; +ALTER FOREIGN TABLE ext_enc_alter OPTIONS (SET encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; + +DROP FOREIGN TABLE ext_enc_num; +DROP FOREIGN TABLE ext_enc_utf8; +DROP FOREIGN TABLE ext_enc_utf8_dash; +DROP FOREIGN TABLE ext_enc_gbk; +DROP FOREIGN TABLE ext_enc_alter; diff --git a/gpcontrib/gp_exttable_fdw/option.c b/gpcontrib/gp_exttable_fdw/option.c index 04cccfe0e47..59bd6b99014 100644 --- a/gpcontrib/gp_exttable_fdw/option.c +++ b/gpcontrib/gp_exttable_fdw/option.c @@ -135,11 +135,13 @@ gp_exttable_permission_check(PG_FUNCTION_ARGS) } else if(pg_strcasecmp(def->defname, "encoding") == 0) { - char *encoding = (char *) defGetString(def); - if (!PG_VALID_ENCODING(atoi(encoding))) - ereport(ERROR, - (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), - errmsg("%s is not a valid encoding code", encoding))); + /* + * Accept either a symbolic encoding name (e.g. 'UTF8', 'GBK') + * or a numeric encoding ID. Reject anything else explicitly, + * rather than letting atoi() silently mistranslate non-numeric + * names to SQL_ASCII. + */ + (void) parse_fdw_encoding_option((char *) defGetString(def)); } } diff --git a/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source b/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source index a3191eb0853..4b4903120e4 100644 --- a/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source +++ b/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source @@ -52,12 +52,18 @@ OPTIONS (format_type 'c', delimiter ',', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv', reject_limit_type 'p', reject_limit '120'); ERROR: segment reject limit in PERCENT must be between 1 and 100 (got 120) (seg1 127.0.0.1:7003 pid=5173) --- Error, invalid encoding +-- Error, invalid encoding (negative numeric ID) CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server OPTIONS (format_type 'c', delimiter ',', encoding '-1', location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); -ERROR: -1 is not a valid encoding code (seg0 127.0.0.1:7002 pid=8289) +ERROR: "-1" is not a valid encoding name or code +-- Error, invalid encoding (unknown name) +CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) +SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', encoding 'bogus', + location_uris 'file://@hostname@@abs_srcdir@/data/tableless.csv'); +ERROR: "bogus" is not a valid encoding name or code -- OK, no execute_on | log_errors | encoding | is_writable option CREATE FOREIGN TABLE tableless_ext_fdw(a int, b int) SERVER gp_exttable_server @@ -89,10 +95,90 @@ ftoptions (1 row) \a SELECT * FROM ext_special_uri ORDER BY a; - a | b + a | b ---+--- 1 | 1 2 | 2 3 | 3 (3 rows) +-- =================================================================== +-- Tests for issue #1726: FDW OPTIONS encoding accepts both numeric IDs +-- and symbolic names (UTF8, utf-8, GBK, ...). Names previously parsed +-- via atoi() and silently degraded to SQL_ASCII. +-- =================================================================== +-- Numeric form (baseline; worked before the fix as well). +CREATE FOREIGN TABLE ext_enc_num (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '6'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_num'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +-- Symbolic name 'UTF8' — used to be silently SQL_ASCII (the bug). +CREATE FOREIGN TABLE ext_enc_utf8 (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +-- Case + dash variant resolved by pg_char_to_encoding(). +CREATE FOREIGN TABLE ext_enc_utf8_dash (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'utf-8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_utf8_dash'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +-- Non-UTF8 symbolic name. +CREATE FOREIGN TABLE ext_enc_gbk (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding 'GBK'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_gbk'::regclass; + pg_encoding_to_char +--------------------- + GBK +(1 row) + +-- ALTER FOREIGN TABLE ... OPTIONS (SET encoding 'UTF8') — same code +-- path, this proves the read-side resolution works after an ALTER too. +CREATE FOREIGN TABLE ext_enc_alter (a int) SERVER gp_exttable_server +OPTIONS (format_type 'c', delimiter ',', + location_uris 'file:///tmp/ext_enc_ignored.csv', + encoding '0'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; + pg_encoding_to_char +--------------------- + SQL_ASCII +(1 row) + +ALTER FOREIGN TABLE ext_enc_alter OPTIONS (SET encoding 'UTF8'); +SELECT pg_encoding_to_char(encoding) FROM pg_exttable +WHERE reloid = 'ext_enc_alter'::regclass; + pg_encoding_to_char +--------------------- + UTF8 +(1 row) + +DROP FOREIGN TABLE ext_enc_num; +DROP FOREIGN TABLE ext_enc_utf8; +DROP FOREIGN TABLE ext_enc_utf8_dash; +DROP FOREIGN TABLE ext_enc_gbk; +DROP FOREIGN TABLE ext_enc_alter; + diff --git a/src/backend/access/external/external.c b/src/backend/access/external/external.c index 62cff75db53..e42b4255af3 100644 --- a/src/backend/access/external/external.c +++ b/src/backend/access/external/external.c @@ -34,6 +34,47 @@ static List *create_external_scan_uri_list(ExtTableEntry *ext, bool *ismasteronly); +/* + * parse_fdw_encoding_option + * + * Parse the value of an "encoding" FDW OPTIONS entry (whether on creation, + * during validation, or when reading back stored ftoptions) into a numeric + * encoding ID. Accepts a symbolic encoding name (e.g. "UTF8", "utf-8", "GBK") + * resolved via pg_char_to_encoding(), or a strictly numeric string (e.g. "6") + * validated via PG_VALID_ENCODING(). Anything else raises ERROR. + * + * Note: atoi() is intentionally avoided in the numeric fallback. atoi("UTF8") + * silently returns 0 (= SQL_ASCII), which is exactly the bug this helper + * exists to fix. strtol() with end-of-string and range checks is strict. + */ +int +parse_fdw_encoding_option(const char *value) +{ + int encoding; + char *endptr; + long n; + + if (value == NULL || *value == '\0') + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("encoding option must not be empty"))); + + encoding = pg_char_to_encoding(value); + if (encoding >= 0) + return encoding; + + errno = 0; + n = strtol(value, &endptr, 10); + if (endptr != value && *endptr == '\0' && errno == 0 && + n >= 0 && PG_VALID_ENCODING((int) n)) + return (int) n; + + ereport(ERROR, + (errcode(ERRCODE_FDW_INVALID_ATTRIBUTE_VALUE), + errmsg("\"%s\" is not a valid encoding name or code", value))); + return -1; /* unreachable, keeps compiler happy */ +} + void gfile_printf_then_putc_newline(const char *format,...) { @@ -277,7 +318,7 @@ GetExtFromForeignTableOptions(List *ftoptons, Oid relid) if (pg_strcasecmp(def->defname, "encoding") == 0) { - extentry->encoding = atoi(defGetString(def)); + extentry->encoding = parse_fdw_encoding_option(defGetString(def)); encoding_found = true; continue; } diff --git a/src/include/access/external.h b/src/include/access/external.h index 35933f54f75..453c3d59179 100644 --- a/src/include/access/external.h +++ b/src/include/access/external.h @@ -48,5 +48,10 @@ extern ExtTableEntry *GetExtFromForeignTableOptions(List *ftoptons, Oid relid); extern ExternalScanInfo *MakeExternalScanInfo(ExtTableEntry *extEntry); +/* + * Parse an "encoding" FDW OPTIONS value (symbolic name or numeric string) + * into a numeric encoding ID. ereports on invalid input. + */ +extern int parse_fdw_encoding_option(const char *value); #endif /* EXTERNAL_H */ From 12e93d1fc3a8be899abc5a25fd99462bb0c77de1 Mon Sep 17 00:00:00 2001 From: chenqiang Date: Sat, 9 May 2026 13:06:02 +0800 Subject: [PATCH 2/2] test: pad expected output headers to match psql separator widths The new tests added in the previous commit had column header lines without the trailing-space padding that psql's aligned output emits to match the separator. The pre-existing ext_special_uri header (' a | b') was also unintentionally stripped of its trailing space during the same edit. Pure whitespace fix. No behavior change. --- .../gp_exttable_fdw/output/gp_exttable_fdw.source | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source b/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source index 4b4903120e4..547b925258b 100644 --- a/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source +++ b/gpcontrib/gp_exttable_fdw/output/gp_exttable_fdw.source @@ -95,7 +95,7 @@ ftoptions (1 row) \a SELECT * FROM ext_special_uri ORDER BY a; - a | b + a | b ---+--- 1 | 1 2 | 2 @@ -114,7 +114,7 @@ OPTIONS (format_type 'c', delimiter ',', encoding '6'); SELECT pg_encoding_to_char(encoding) FROM pg_exttable WHERE reloid = 'ext_enc_num'::regclass; - pg_encoding_to_char + pg_encoding_to_char --------------------- UTF8 (1 row) @@ -126,7 +126,7 @@ OPTIONS (format_type 'c', delimiter ',', encoding 'UTF8'); SELECT pg_encoding_to_char(encoding) FROM pg_exttable WHERE reloid = 'ext_enc_utf8'::regclass; - pg_encoding_to_char + pg_encoding_to_char --------------------- UTF8 (1 row) @@ -138,7 +138,7 @@ OPTIONS (format_type 'c', delimiter ',', encoding 'utf-8'); SELECT pg_encoding_to_char(encoding) FROM pg_exttable WHERE reloid = 'ext_enc_utf8_dash'::regclass; - pg_encoding_to_char + pg_encoding_to_char --------------------- UTF8 (1 row) @@ -150,7 +150,7 @@ OPTIONS (format_type 'c', delimiter ',', encoding 'GBK'); SELECT pg_encoding_to_char(encoding) FROM pg_exttable WHERE reloid = 'ext_enc_gbk'::regclass; - pg_encoding_to_char + pg_encoding_to_char --------------------- GBK (1 row) @@ -163,7 +163,7 @@ OPTIONS (format_type 'c', delimiter ',', encoding '0'); SELECT pg_encoding_to_char(encoding) FROM pg_exttable WHERE reloid = 'ext_enc_alter'::regclass; - pg_encoding_to_char + pg_encoding_to_char --------------------- SQL_ASCII (1 row) @@ -171,7 +171,7 @@ WHERE reloid = 'ext_enc_alter'::regclass; ALTER FOREIGN TABLE ext_enc_alter OPTIONS (SET encoding 'UTF8'); SELECT pg_encoding_to_char(encoding) FROM pg_exttable WHERE reloid = 'ext_enc_alter'::regclass; - pg_encoding_to_char + pg_encoding_to_char --------------------- UTF8 (1 row)