From 0520e6583d82c0c29358e7656cdf0a2f8e2b0b40 Mon Sep 17 00:00:00 2001 From: aviralgarg05 Date: Sat, 2 May 2026 23:50:12 +0530 Subject: [PATCH 1/3] Fix ORCA fallback for unsafe targetlist functions --- .../gpopt/translate/CTranslatorQueryToDXL.cpp | 66 +++++++++++++++---- .../gpopt/translate/CTranslatorQueryToDXL.h | 6 +- 2 files changed, 57 insertions(+), 15 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp index 99d87917b38..312edd7120b 100644 --- a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp @@ -20,12 +20,15 @@ extern "C" { #include "access/sysattr.h" #include "catalog/heap.h" #include "catalog/pg_class.h" +#include "catalog/pg_language.h" +#include "catalog/pg_proc.h" #include "nodes/makefuncs.h" #include "nodes/parsenodes.h" #include "nodes/plannodes.h" #include "optimizer/walkers.h" #include "utils/guc.h" #include "utils/rel.h" +#include "utils/syscache.h" } #include "gpos/base.h" @@ -213,8 +216,8 @@ CTranslatorQueryToDXL::CTranslatorQueryToDXL( // check if the query has any unsupported node types CheckUnsupportedNodeTypes(query); - // check if the query has SIRV functions in the targetlist without a FROM clause - CheckSirvFuncsWithoutFromClause(query); + // check if the query has SIRV functions in the targetlist + CheckSirvFuncsInTargetList(query); // first normalize the query m_query = @@ -256,6 +259,37 @@ CTranslatorQueryToDXL::QueryToDXLInstance(CMemoryPool *mp, ); } +static BOOL +HasUnsafeCoordinatorFunc(Oid funcid) +{ + HeapTuple proctup; + HeapTuple langtup; + Form_pg_proc procform; + Form_pg_language langform; + BOOL is_unsafe; + + proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(proctup)) + elog(ERROR, "cache lookup failed for function %u", funcid); + + procform = (Form_pg_proc) GETSTRUCT(proctup); + langtup = SearchSysCache1(LANGOID, ObjectIdGetDatum(procform->prolang)); + if (!HeapTupleIsValid(langtup)) + { + ReleaseSysCache(proctup); + elog(ERROR, "cache lookup failed for language %u", procform->prolang); + } + + langform = (Form_pg_language) GETSTRUCT(langtup); + is_unsafe = (strcmp(NameStr(langform->lanname), "internal") != 0 && + strcmp(NameStr(langform->lanname), "c") != 0); + + ReleaseSysCache(langtup); + ReleaseSysCache(proctup); + + return is_unsafe; +} + //--------------------------------------------------------------------------- // @function: // CTranslatorQueryToDXL::~CTranslatorQueryToDXL @@ -337,20 +371,23 @@ CTranslatorQueryToDXL::CheckUnsupportedNodeTypes(Query *query) //--------------------------------------------------------------------------- // @function: -// CTranslatorQueryToDXL::CheckSirvFuncsWithoutFromClause +// CTranslatorQueryToDXL::CheckSirvFuncsInTargetList // // @doc: -// Check for SIRV functions in the target list without a FROM clause, and -// throw an exception when found +// Check for SIRV functions in the target list, and throw an exception +// when found. +// +// ORCA can place target list projections above a Motion and execute +// them on the coordinator. That is unsafe for SIRV functions because +// they may perform SPI, dispatch, or other volatile work that must not +// be moved to the coordinator in a distributed query. // //--------------------------------------------------------------------------- void -CTranslatorQueryToDXL::CheckSirvFuncsWithoutFromClause(Query *query) +CTranslatorQueryToDXL::CheckSirvFuncsInTargetList(Query *query) { - // if there is a FROM clause or if target list is empty, look no further - if ((nullptr != query->jointree && - 0 < gpdb::ListLength(query->jointree->fromlist)) || - NIL == query->targetList) + // if target list is empty, look no further + if (NIL == query->targetList) { return; } @@ -368,7 +405,11 @@ CTranslatorQueryToDXL::CheckSirvFuncsWithoutFromClause(Query *query) // CTranslatorQueryToDXL::HasSirvFunctions // // @doc: -// Check for SIRV functions in the tree rooted at the given node +// Check for SIRV functions in the tree rooted at the given node that +// might be unsafe to execute on the coordinator. ORCA already handles +// volatile built-in/internal functions like random(), but procedural +// or SQL-language functions can execute SPI and must not be moved above +// a Motion. // //--------------------------------------------------------------------------- BOOL @@ -385,7 +426,8 @@ CTranslatorQueryToDXL::HasSirvFunctions(Node *node) const { FuncExpr *func_expr = (FuncExpr *) lfirst(lc); if (CTranslatorUtils::IsSirvFunc(m_mp, m_md_accessor, - func_expr->funcid)) + func_expr->funcid) && + HasUnsafeCoordinatorFunc(func_expr->funcid)) { has_sirv = true; break; diff --git a/src/include/gpopt/translate/CTranslatorQueryToDXL.h b/src/include/gpopt/translate/CTranslatorQueryToDXL.h index c74892e5df2..cc324c90ef5 100644 --- a/src/include/gpopt/translate/CTranslatorQueryToDXL.h +++ b/src/include/gpopt/translate/CTranslatorQueryToDXL.h @@ -150,9 +150,9 @@ class CTranslatorQueryToDXL // walker to check if SUBLINK node is present in the security quals static BOOL CheckSublinkInSecurityQuals(Node *node, void *context); - // check for SIRV functions in the targetlist without a FROM clause and - // throw an exception when found - void CheckSirvFuncsWithoutFromClause(Query *query); + // check for SIRV functions in the targetlist and throw an exception + // when found + void CheckSirvFuncsInTargetList(Query *query); // check for SIRV functions in the tree rooted at the given node BOOL HasSirvFunctions(Node *node) const; From afab32dd79d2117b11f634731153b87a696c3f4e Mon Sep 17 00:00:00 2001 From: aviralgarg05 Date: Sun, 3 May 2026 19:06:27 +0530 Subject: [PATCH 2/3] gpopt: narrow sirv fallback for targetlist functions --- .../gpopt/translate/CTranslatorQueryToDXL.cpp | 110 ++++++++++++++---- .../gpopt/translate/CTranslatorQueryToDXL.h | 13 ++- 2 files changed, 99 insertions(+), 24 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp index 312edd7120b..d6c0f5fcff5 100644 --- a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp @@ -216,8 +216,18 @@ CTranslatorQueryToDXL::CTranslatorQueryToDXL( // check if the query has any unsupported node types CheckUnsupportedNodeTypes(query); - // check if the query has SIRV functions in the targetlist - CheckSirvFuncsInTargetList(query); + // Preserve the historical fallback for SIRV functions in targetlists + // without a FROM clause for statement-level and derived-table queries. + // Scalar subqueries use a copied outer var mapping and should retain + // ORCA's existing handling for expressions such as nested random(). + if (nullptr == var_colid_mapping) + { + CheckSirvFuncsWithoutFromClause(query); + } + + // Add a separate guard for targetlist functions that are unsafe to + // execute on the coordinator. + CheckUnsafeSirvFuncsInTargetList(query); // first normalize the query m_query = @@ -371,20 +381,76 @@ CTranslatorQueryToDXL::CheckUnsupportedNodeTypes(Query *query) //--------------------------------------------------------------------------- // @function: -// CTranslatorQueryToDXL::CheckSirvFuncsInTargetList +// CTranslatorQueryToDXL::CheckSirvFuncsWithoutFromClause +// +// @doc: +// Check for SIRV functions in the target list without a FROM clause, and +// throw an exception when found +// +//--------------------------------------------------------------------------- +void +CTranslatorQueryToDXL::CheckSirvFuncsWithoutFromClause(Query *query) +{ + ListCell *lc = nullptr; + + // if there is a FROM clause or if target list is empty, look no further + if (!((nullptr != query->jointree && + 0 < gpdb::ListLength(query->jointree->fromlist)) || + NIL == query->targetList)) + { + // see if we have SIRV functions in the immediate target list + if (HasSirvFunctions((Node *) query->targetList, + false /*descend_into_subqueries*/, + false /*check_coordinator_safety*/)) + { + GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature, + GPOS_WSZ_LIT("SIRV functions")); + } + } + + // Derived tables should keep the legacy fallback behavior, but scalar + // subqueries in targetlist expressions are handled separately and must + // not force an outer-query fallback. + ForEach(lc, query->rtable) + { + RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc); + + if (RTE_SUBQUERY == rte->rtekind && nullptr != rte->subquery) + { + CheckSirvFuncsWithoutFromClause(rte->subquery); + } + } + + ForEach(lc, query->cteList) + { + CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); + + if (nullptr != cte->ctequery) + { + CheckSirvFuncsWithoutFromClause((Query *) cte->ctequery); + } + } +} + +//--------------------------------------------------------------------------- +// @function: +// CTranslatorQueryToDXL::CheckUnsafeSirvFuncsInTargetList // // @doc: -// Check for SIRV functions in the target list, and throw an exception -// when found. +// Check for SIRV functions in the target list that may be unsafe to +// execute on the coordinator and throw an exception when found. // // ORCA can place target list projections above a Motion and execute -// them on the coordinator. That is unsafe for SIRV functions because -// they may perform SPI, dispatch, or other volatile work that must not -// be moved to the coordinator in a distributed query. +// them on the coordinator. That is unsafe for procedural or SQL-language +// SIRV functions because they may perform SPI, dispatch, or other +// volatile work that must not be moved to the coordinator in a +// distributed query. Only inspect the immediate target list expression +// tree here; functions inside nested subqueries are planned within their +// own query context and should not force a fallback for the outer query. // //--------------------------------------------------------------------------- void -CTranslatorQueryToDXL::CheckSirvFuncsInTargetList(Query *query) +CTranslatorQueryToDXL::CheckUnsafeSirvFuncsInTargetList(Query *query) { // if target list is empty, look no further if (NIL == query->targetList) @@ -392,8 +458,10 @@ CTranslatorQueryToDXL::CheckSirvFuncsInTargetList(Query *query) return; } - // see if we have SIRV functions in the target list - if (HasSirvFunctions((Node *) query->targetList)) + // see if we have unsafe coordinator functions in the target list + if (HasSirvFunctions((Node *) query->targetList, + false /*descend_into_subqueries*/, + true /*check_coordinator_safety*/)) { GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature, GPOS_WSZ_LIT("SIRV functions")); @@ -405,20 +473,18 @@ CTranslatorQueryToDXL::CheckSirvFuncsInTargetList(Query *query) // CTranslatorQueryToDXL::HasSirvFunctions // // @doc: -// Check for SIRV functions in the tree rooted at the given node that -// might be unsafe to execute on the coordinator. ORCA already handles -// volatile built-in/internal functions like random(), but procedural -// or SQL-language functions can execute SPI and must not be moved above -// a Motion. +// Check for SIRV functions in the tree rooted at the given node // //--------------------------------------------------------------------------- BOOL -CTranslatorQueryToDXL::HasSirvFunctions(Node *node) const +CTranslatorQueryToDXL::HasSirvFunctions(Node *node, + bool descend_into_subqueries, + bool check_coordinator_safety) const { GPOS_ASSERT(nullptr != node); List *function_list = gpdb::ExtractNodesExpression( - node, T_FuncExpr, true /*descendIntoSubqueries*/); + node, T_FuncExpr, descend_into_subqueries); ListCell *lc = nullptr; BOOL has_sirv = false; @@ -427,7 +493,8 @@ CTranslatorQueryToDXL::HasSirvFunctions(Node *node) const FuncExpr *func_expr = (FuncExpr *) lfirst(lc); if (CTranslatorUtils::IsSirvFunc(m_mp, m_md_accessor, func_expr->funcid) && - HasUnsafeCoordinatorFunc(func_expr->funcid)) + (!check_coordinator_safety || + HasUnsafeCoordinatorFunc(func_expr->funcid))) { has_sirv = true; break; @@ -3949,7 +4016,10 @@ CTranslatorQueryToDXL::TranslateTVFToDXL(const RangeTblEntry *rte, FuncExpr *funcexpr = (FuncExpr *) rtfunc->funcexpr; // check if arguments contain SIRV functions - if (NIL != funcexpr->args && HasSirvFunctions((Node *) funcexpr->args)) + if (NIL != funcexpr->args && + HasSirvFunctions((Node *) funcexpr->args, + true /*descend_into_subqueries*/, + false /*check_coordinator_safety*/)) { GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature, GPOS_WSZ_LIT("SIRV functions")); diff --git a/src/include/gpopt/translate/CTranslatorQueryToDXL.h b/src/include/gpopt/translate/CTranslatorQueryToDXL.h index cc324c90ef5..7753c946369 100644 --- a/src/include/gpopt/translate/CTranslatorQueryToDXL.h +++ b/src/include/gpopt/translate/CTranslatorQueryToDXL.h @@ -150,12 +150,17 @@ class CTranslatorQueryToDXL // walker to check if SUBLINK node is present in the security quals static BOOL CheckSublinkInSecurityQuals(Node *node, void *context); - // check for SIRV functions in the targetlist and throw an exception - // when found - void CheckSirvFuncsInTargetList(Query *query); + // check for SIRV functions in the targetlist without a FROM clause and + // throw an exception when found + void CheckSirvFuncsWithoutFromClause(Query *query); + + // check for targetlist SIRV functions that may be unsafe to execute on the + // coordinator, and throw an exception when found + void CheckUnsafeSirvFuncsInTargetList(Query *query); // check for SIRV functions in the tree rooted at the given node - BOOL HasSirvFunctions(Node *node) const; + BOOL HasSirvFunctions(Node *node, bool descend_into_subqueries, + bool check_coordinator_safety) const; // translate FromExpr (in the GPDB query) into a CDXLLogicalJoin or CDXLLogicalGet CDXLNode *TranslateFromExprToDXL(FromExpr *from_expr); From e9663ec08d26c7b66df87ad88337f78969ba10a0 Mon Sep 17 00:00:00 2001 From: aviralgarg05 Date: Sat, 9 May 2026 12:37:07 +0530 Subject: [PATCH 3/3] gpopt: fallback for row-dependent sirv targetlists --- .../gpopt/translate/CTranslatorQueryToDXL.cpp | 127 ++++++++++-------- .../gpopt/translate/CTranslatorQueryToDXL.h | 13 +- 2 files changed, 76 insertions(+), 64 deletions(-) diff --git a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp index d6c0f5fcff5..743b5f2ebfb 100644 --- a/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp +++ b/src/backend/gpopt/translate/CTranslatorQueryToDXL.cpp @@ -225,9 +225,10 @@ CTranslatorQueryToDXL::CTranslatorQueryToDXL( CheckSirvFuncsWithoutFromClause(query); } - // Add a separate guard for targetlist functions that are unsafe to - // execute on the coordinator. - CheckUnsafeSirvFuncsInTargetList(query); + // Add a separate guard for targetlist expressions that mix SIRV + // functions with current-level Vars from the FROM clause. Those + // expressions are row-dependent and must stay on the QEs. + CheckSirvFuncsWithCurrentLevelVarsInTargetList(query); // first normalize the query m_query = @@ -269,37 +270,6 @@ CTranslatorQueryToDXL::QueryToDXLInstance(CMemoryPool *mp, ); } -static BOOL -HasUnsafeCoordinatorFunc(Oid funcid) -{ - HeapTuple proctup; - HeapTuple langtup; - Form_pg_proc procform; - Form_pg_language langform; - BOOL is_unsafe; - - proctup = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); - if (!HeapTupleIsValid(proctup)) - elog(ERROR, "cache lookup failed for function %u", funcid); - - procform = (Form_pg_proc) GETSTRUCT(proctup); - langtup = SearchSysCache1(LANGOID, ObjectIdGetDatum(procform->prolang)); - if (!HeapTupleIsValid(langtup)) - { - ReleaseSysCache(proctup); - elog(ERROR, "cache lookup failed for language %u", procform->prolang); - } - - langform = (Form_pg_language) GETSTRUCT(langtup); - is_unsafe = (strcmp(NameStr(langform->lanname), "internal") != 0 && - strcmp(NameStr(langform->lanname), "c") != 0); - - ReleaseSysCache(langtup); - ReleaseSysCache(proctup); - - return is_unsafe; -} - //--------------------------------------------------------------------------- // @function: // CTranslatorQueryToDXL::~CTranslatorQueryToDXL @@ -400,8 +370,7 @@ CTranslatorQueryToDXL::CheckSirvFuncsWithoutFromClause(Query *query) { // see if we have SIRV functions in the immediate target list if (HasSirvFunctions((Node *) query->targetList, - false /*descend_into_subqueries*/, - false /*check_coordinator_safety*/)) + false /*descend_into_subqueries*/)) { GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature, GPOS_WSZ_LIT("SIRV functions")); @@ -434,37 +403,46 @@ CTranslatorQueryToDXL::CheckSirvFuncsWithoutFromClause(Query *query) //--------------------------------------------------------------------------- // @function: -// CTranslatorQueryToDXL::CheckUnsafeSirvFuncsInTargetList +// CTranslatorQueryToDXL::CheckSirvFuncsWithCurrentLevelVarsInTargetList // // @doc: -// Check for SIRV functions in the target list that may be unsafe to -// execute on the coordinator and throw an exception when found. +// Check for target list expressions that contain both SIRV functions and +// current-level Vars from the FROM clause, and throw an exception when +// found. // // ORCA can place target list projections above a Motion and execute -// them on the coordinator. That is unsafe for procedural or SQL-language -// SIRV functions because they may perform SPI, dispatch, or other -// volatile work that must not be moved to the coordinator in a -// distributed query. Only inspect the immediate target list expression -// tree here; functions inside nested subqueries are planned within their -// own query context and should not force a fallback for the outer query. +// them on the coordinator. If a SIRV expression also depends on columns +// produced by the current query's FROM clause, it is row-dependent and +// must stay on the segment workers alongside those rows. Only inspect +// the immediate target list expression tree here; functions or Vars +// inside nested subqueries are planned within their own query context +// and should not force a fallback for the outer query. // //--------------------------------------------------------------------------- void -CTranslatorQueryToDXL::CheckUnsafeSirvFuncsInTargetList(Query *query) +CTranslatorQueryToDXL::CheckSirvFuncsWithCurrentLevelVarsInTargetList( + Query *query) { + ListCell *lc = nullptr; + // if target list is empty, look no further if (NIL == query->targetList) { return; } - // see if we have unsafe coordinator functions in the target list - if (HasSirvFunctions((Node *) query->targetList, - false /*descend_into_subqueries*/, - true /*check_coordinator_safety*/)) + ForEach(lc, query->targetList) { - GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature, - GPOS_WSZ_LIT("SIRV functions")); + TargetEntry *target_entry = (TargetEntry *) lfirst(lc); + + if (HasSirvFunctions((Node *) target_entry->expr, + false /*descend_into_subqueries*/) && + HasCurrentLevelVars((Node *) target_entry->expr, + false /*descend_into_subqueries*/)) + { + GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature, + GPOS_WSZ_LIT("SIRV functions")); + } } } @@ -478,8 +456,7 @@ CTranslatorQueryToDXL::CheckUnsafeSirvFuncsInTargetList(Query *query) //--------------------------------------------------------------------------- BOOL CTranslatorQueryToDXL::HasSirvFunctions(Node *node, - bool descend_into_subqueries, - bool check_coordinator_safety) const + bool descend_into_subqueries) const { GPOS_ASSERT(nullptr != node); @@ -492,9 +469,7 @@ CTranslatorQueryToDXL::HasSirvFunctions(Node *node, { FuncExpr *func_expr = (FuncExpr *) lfirst(lc); if (CTranslatorUtils::IsSirvFunc(m_mp, m_md_accessor, - func_expr->funcid) && - (!check_coordinator_safety || - HasUnsafeCoordinatorFunc(func_expr->funcid))) + func_expr->funcid)) { has_sirv = true; break; @@ -505,6 +480,41 @@ CTranslatorQueryToDXL::HasSirvFunctions(Node *node, return has_sirv; } +//--------------------------------------------------------------------------- +// @function: +// CTranslatorQueryToDXL::HasCurrentLevelVars +// +// @doc: +// Check for Vars from the current query level in the tree rooted at the +// given node +// +//--------------------------------------------------------------------------- +BOOL +CTranslatorQueryToDXL::HasCurrentLevelVars(Node *node, + bool descend_into_subqueries) const +{ + GPOS_ASSERT(nullptr != node); + + List *var_list = gpdb::ExtractNodesExpression(node, T_Var, + descend_into_subqueries); + ListCell *lc = nullptr; + + BOOL has_current_level_var = false; + ForEach(lc, var_list) + { + Var *var = (Var *) lfirst(lc); + + if (0 == var->varlevelsup) + { + has_current_level_var = true; + break; + } + } + gpdb::ListFree(var_list); + + return has_current_level_var; +} + //--------------------------------------------------------------------------- // @function: // CTranslatorQueryToDXL::CheckSupportedCmdType @@ -4018,8 +4028,7 @@ CTranslatorQueryToDXL::TranslateTVFToDXL(const RangeTblEntry *rte, // check if arguments contain SIRV functions if (NIL != funcexpr->args && HasSirvFunctions((Node *) funcexpr->args, - true /*descend_into_subqueries*/, - false /*check_coordinator_safety*/)) + true /*descend_into_subqueries*/)) { GPOS_RAISE(gpdxl::ExmaDXL, gpdxl::ExmiQuery2DXLUnsupportedFeature, GPOS_WSZ_LIT("SIRV functions")); diff --git a/src/include/gpopt/translate/CTranslatorQueryToDXL.h b/src/include/gpopt/translate/CTranslatorQueryToDXL.h index 7753c946369..5bce3501f71 100644 --- a/src/include/gpopt/translate/CTranslatorQueryToDXL.h +++ b/src/include/gpopt/translate/CTranslatorQueryToDXL.h @@ -154,13 +154,16 @@ class CTranslatorQueryToDXL // throw an exception when found void CheckSirvFuncsWithoutFromClause(Query *query); - // check for targetlist SIRV functions that may be unsafe to execute on the - // coordinator, and throw an exception when found - void CheckUnsafeSirvFuncsInTargetList(Query *query); + // check for targetlist expressions that contain both SIRV functions and + // current-level Vars from the query's FROM clause, and throw an exception + // when found + void CheckSirvFuncsWithCurrentLevelVarsInTargetList(Query *query); // check for SIRV functions in the tree rooted at the given node - BOOL HasSirvFunctions(Node *node, bool descend_into_subqueries, - bool check_coordinator_safety) const; + BOOL HasSirvFunctions(Node *node, bool descend_into_subqueries) const; + + // check for Vars from the current query level in the given tree + BOOL HasCurrentLevelVars(Node *node, bool descend_into_subqueries) const; // translate FromExpr (in the GPDB query) into a CDXLLogicalJoin or CDXLLogicalGet CDXLNode *TranslateFromExprToDXL(FromExpr *from_expr);