diff --git a/packages/mysql-on-sqlite/src/mysql/class-wp-mysql-parser.php b/packages/mysql-on-sqlite/src/mysql/class-wp-mysql-parser.php
index f291064e..c583b8db 100644
--- a/packages/mysql-on-sqlite/src/mysql/class-wp-mysql-parser.php
+++ b/packages/mysql-on-sqlite/src/mysql/class-wp-mysql-parser.php
@@ -29,7 +29,7 @@ class WP_MySQL_Parser extends WP_Parser {
 	 * @return bool Whether a query was successfully parsed.
 	 */
 	public function next_query(): bool {
-		if ( $this->position >= count( $this->tokens ) ) {
+		if ( $this->position >= $this->token_count ) {
 			return false;
 		}
 		$this->current_ast = $this->parse();
diff --git a/packages/mysql-on-sqlite/src/parser/class-wp-parser-grammar.php b/packages/mysql-on-sqlite/src/parser/class-wp-parser-grammar.php
index 8c17b458..754ee6c9 100644
--- a/packages/mysql-on-sqlite/src/parser/class-wp-parser-grammar.php
+++ b/packages/mysql-on-sqlite/src/parser/class-wp-parser-grammar.php
@@ -29,10 +29,58 @@ class WP_Parser_Grammar {
 	public $rules;
 	public $rule_names;
 	public $fragment_ids;
-	public $lookahead_is_match_possible = array();
+
+	/**
+	 * Per-rule branch selector keyed by the next token id.
+	 *
+	 * When set, `$branches_for_token[$rule_id][$token_id]` is the ordered list
+	 * of branch indexes in `$rules[$rule_id]` that can possibly match when the
+	 * current token has the given id. Nullable branches appear in every entry.
+	 *
+	 * If an entry does not exist for the current token, `$nullable_branches`
+	 * is consulted. If both are empty, the rule cannot match and the parser
+	 * returns immediately.
+	 *
+	 * Rules whose FIRST set could not be computed do not appear in the map;
+	 * for those the parser falls back to trying every branch.
+	 *
+	 * @var array<int,array<int,int[]>>
+	 */
+	public $branches_for_token = array();
+
+	/**
+	 * Per-rule list of nullable branch indexes.
+	 *
+	 * @var array<int,int[]>
+	 */
+	public $nullable_branches = array();
+
+	/**
+	 * Per-rule flag indicating every (rule, token) selector entry points
+	 * to exactly one branch. The parser uses this to skip the outer
+	 * foreach when a single candidate is the only possibility.
+	 *
+	 * @var array<int,true>
+	 */
+	public $single_candidate_rules = array();
+
 	public $lowest_non_terminal_id;
 	public $highest_terminal_id;
 
+	/**
+	 * Cached id of the grammar's start rule, populated lazily on first parse.
+	 *
+	 * @var int|null
+	 */
+	public $start_rule_id;
+
+	/**
+	 * Cached id of the selectStatement rule, populated lazily on first parse.
+	 *
+	 * @var int|null
+	 */
+	public $select_statement_rule_id;
+
 	public function __construct( array $rules ) {
 		$this->inflate( $rules );
 	}
@@ -56,8 +104,8 @@ private function inflate( $grammar ) {
 		$this->highest_terminal_id    = $this->lowest_non_terminal_id - 1;
 
 		foreach ( $grammar['rules_names'] as $rule_index => $rule_name ) {
-			$this->rule_names[ $rule_index + $grammar['rules_offset'] ] = $rule_name;
-			$this->rules[ $rule_index + $grammar['rules_offset'] ]      = array();
+			$rule_id                      = $rule_index + $grammar['rules_offset'];
+			$this->rule_names[ $rule_id ] = $rule_name;
 
 			/**
 			 * Treat all intermediate rules as fragments to inline before returning
@@ -75,7 +123,7 @@ private function inflate( $grammar ) {
 			 * They are prefixed with a "%" to be distinguished from the original rules.
 			 */
 			if ( '%' === $rule_name[0] ) {
-				$this->fragment_ids[ $rule_index + $grammar['rules_offset'] ] = true;
+				$this->fragment_ids[ $rule_id ] = true;
 			}
 		}
 
@@ -85,55 +133,284 @@ private function inflate( $grammar ) {
 			$this->rules[ $rule_id ] = $branches;
 		}
 
-		/**
-		 * Compute a rule => [token => true] lookup table for each rule
-		 * that starts with a terminal OR with another rule that already
-		 * has a lookahead mapping.
-		 *
-		 * This is similar to left-factoring the grammar, even if not quite
-		 * the same.
-		 *
-		 * This enables us to quickly bail out from checking branches that
-		 * cannot possibly match the current token. This increased the parser
-		 * speed by a whopping 80%!
-		 *
-		 * @TODO: Explore these possible next steps:
-		 *
-		 * * Compute a rule => [token => branch[]] list lookup table and only
-		 *   process the branches that have a chance of matching the current token.
-		 * * Actually left-factor the grammar as much as possible. This, however,
-		 *   could inflate the serialized grammar size.
-		 */
-		// 5 iterations seem to give us all the speed gains we can get from this.
-		for ( $i = 0; $i < 5; $i++ ) {
-			foreach ( $grammar['grammar'] as $rule_index => $branches ) {
-				$rule_id = $rule_index + $grammar['rules_offset'];
-				if ( isset( $this->lookahead_is_match_possible[ $rule_id ] ) ) {
+		$this->inline_single_branch_fragments();
+		$this->strip_epsilon_markers();
+		$this->build_branch_selectors();
+	}
+
+	/**
+	 * Remove explicit `EMPTY_RULE_ID` markers from branches.
+	 *
+	 * The epsilon marker is a zero-width, always-matching symbol used in the
+	 * grammar to express optional productions. At parse time it would still
+	 * be walked and "continued" over for no effect, so stripping it ahead of
+	 * time removes a per-symbol branch in the hot loop.
+	 *
+	 * A pure-epsilon branch (`[EMPTY_RULE_ID]`) becomes an empty branch (`[]`)
+	 * which the parser already handles: the inner symbol loop does nothing and
+	 * the rule returns a successful empty match.
+	 */
+	private function strip_epsilon_markers() {
+		foreach ( $this->rules as $rule_id => $branches ) {
+			foreach ( $branches as $i => $branch ) {
+				if ( in_array( self::EMPTY_RULE_ID, $branch, true ) ) {
+					$this->rules[ $rule_id ][ $i ] = array_values(
+						array_filter(
+							$branch,
+							static function ( $s ) {
+								return self::EMPTY_RULE_ID !== $s;
+							}
+						)
+					);
+				}
+			}
+		}
+	}
+
+	/**
+	 * Inline single-branch fragment rules into their call sites.
+	 *
+	 * The grammar contains many single-branch fragment rules that exist only
+	 * to factor shared sub-sequences out of larger productions. At runtime
+	 * the parser would descend into each such fragment via a recursive call
+	 * just to walk the same symbol sequence and splice the results back into
+	 * the parent. Expanding them in-place at build time eliminates that call
+	 * chain without changing the resulting AST because fragment children are
+	 * already flattened into the parent node.
+	 *
+	 * Fragments with two or more alternatives (e.g., `%EOF_zero_or_one`) are
+	 * left intact because they represent real choices that must be evaluated
+	 * against the current token.
+	 */
+	private function inline_single_branch_fragments() {
+		$rules        = $this->rules;
+		$fragment_ids = $this->fragment_ids ?? array();
+		$low_nt       = $this->lowest_non_terminal_id;
+
+		// Precompute the set of single-branch fragments that are candidates
+		// for inlining.
+		$inlinable = array();
+		foreach ( $fragment_ids as $rule_id => $_ ) {
+			if ( isset( $rules[ $rule_id ] ) && 1 === count( $rules[ $rule_id ] ) ) {
+				$inlinable[ $rule_id ] = true;
+			}
+		}
+
+		// Depth-first expansion memoized per rule, with cycle detection.
+		$expanded      = array();
+		$visiting      = array();
+		$expand_branch = function ( array $branch ) use ( &$expand_branch, &$expanded, &$visiting, $rules, $low_nt, $inlinable ) {
+			$out = array();
+			foreach ( $branch as $sym ) {
+				if ( $sym < $low_nt ) {
+					$out[] = $sym;
 					continue;
 				}
-				$rule_lookup                                   = array();
-				$first_symbol_can_be_expanded_to_all_terminals = true;
+				if ( ! isset( $inlinable[ $sym ] ) ) {
+					$out[] = $sym;
+					continue;
+				}
+				if ( isset( $visiting[ $sym ] ) ) {
+					// Cycle: leave the reference in place.
+					$out[] = $sym;
+					continue;
+				}
+				if ( ! isset( $expanded[ $sym ] ) ) {
+					$visiting[ $sym ] = true;
+					$expanded[ $sym ] = $expand_branch( $rules[ $sym ][0] );
+					unset( $visiting[ $sym ] );
+				}
+				foreach ( $expanded[ $sym ] as $s ) {
+					$out[] = $s;
+				}
+			}
+			return $out;
+		};
+
+		// Rewrite every rule's branches with fragments inlined.
+		foreach ( $this->rules as $rule_id => $branches ) {
+			$new_branches = array();
+			foreach ( $branches as $branch ) {
+				$new_branches[] = $expand_branch( $branch );
+			}
+			$this->rules[ $rule_id ] = $new_branches;
+		}
+	}
+
+	/**
+	 * Compute FIRST and NULLABLE sets for every non-terminal, then denormalize
+	 * them into a per-rule map of `token_id => branch_index[]` so the parser
+	 * can jump straight to the branches that can possibly match the current
+	 * token.
+	 *
+	 * This replaces the previous coarse "can any branch match this token?"
+	 * lookahead. On the MySQL corpus the fine-grained selector skips ~60%
+	 * of the branch attempts that the parser used to try and fail.
+	 */
+	private function build_branch_selectors() {
+		$rules      = $this->rules;
+		$low_nt     = $this->lowest_non_terminal_id;
+		$empty_rule = self::EMPTY_RULE_ID;
+		$rule_ids   = array_keys( $rules );
+		$nullable   = array();
+		$first_sets = array();
+
+		foreach ( $rule_ids as $rule_id ) {
+			$nullable[ $rule_id ]   = false;
+			$first_sets[ $rule_id ] = array();
+		}
+
+		// Iterate to fixpoint. FIRST and NULLABLE set monotonically grow.
+		do {
+			$changed = false;
+			foreach ( $rule_ids as $rule_id ) {
+				$branches = $rules[ $rule_id ];
 				foreach ( $branches as $branch ) {
-					$terminals                   = false;
-					$branch_starts_with_terminal = $branch[0] < $this->lowest_non_terminal_id;
-					if ( $branch_starts_with_terminal ) {
-						$terminals = array( $branch[0] );
-					} elseif ( isset( $this->lookahead_is_match_possible[ $branch[0] ] ) ) {
-						$terminals = array_keys( $this->lookahead_is_match_possible[ $branch[0] ] );
+					$branch_nullable = true;
+					foreach ( $branch as $symbol ) {
+						if ( $empty_rule === $symbol ) {
+							// ε: contributes nothing to FIRST, stays nullable.
+							continue;
+						}
+						if ( $symbol < $low_nt ) {
+							// Terminal.
+							if ( ! isset( $first_sets[ $rule_id ][ $symbol ] ) ) {
+								$first_sets[ $rule_id ][ $symbol ] = true;
+								$changed                           = true;
+							}
+							$branch_nullable = false;
+							break;
+						}
+						// Non-terminal.
+						foreach ( $first_sets[ $symbol ] as $tid => $_ ) {
+							if ( ! isset( $first_sets[ $rule_id ][ $tid ] ) ) {
+								$first_sets[ $rule_id ][ $tid ] = true;
+								$changed                        = true;
+							}
+						}
+						if ( ! $nullable[ $symbol ] ) {
+							$branch_nullable = false;
+							break;
+						}
+					}
+					if ( $branch_nullable && ! $nullable[ $rule_id ] ) {
+						$nullable[ $rule_id ] = true;
+						$changed              = true;
 					}
+				}
+			}
+		} while ( $changed );
 
-					if ( false === $terminals ) {
-						$first_symbol_can_be_expanded_to_all_terminals = false;
+		// Build per-(rule, token) branch indices.
+		foreach ( $rule_ids as $rule_id ) {
+			$branches            = $rules[ $rule_id ];
+			$selector            = array();
+			$nullable_branch_ids = array();
+			foreach ( $branches as $idx => $branch ) {
+				$branch_first    = array();
+				$branch_nullable = true;
+				foreach ( $branch as $symbol ) {
+					if ( $empty_rule === $symbol ) {
+						continue;
+					}
+					if ( $symbol < $low_nt ) {
+						$branch_first[ $symbol ] = true;
+						$branch_nullable         = false;
 						break;
 					}
-					foreach ( $terminals as $terminal ) {
-						$rule_lookup[ $terminal ] = true;
+					foreach ( $first_sets[ $symbol ] as $tid => $_ ) {
+						$branch_first[ $tid ] = true;
+					}
+					if ( ! $nullable[ $symbol ] ) {
+						$branch_nullable = false;
+						break;
 					}
 				}
-				if ( $first_symbol_can_be_expanded_to_all_terminals ) {
-					$this->lookahead_is_match_possible[ $rule_id ] = $rule_lookup;
+				foreach ( $branch_first as $tid => $_ ) {
+					$selector[ $tid ][] = $idx;
+				}
+				if ( $branch_nullable ) {
+					$nullable_branch_ids[] = $idx;
 				}
 			}
+
+			// Nullable branches also match when the current token is not in
+			// any branch's FIRST set. Fold them into every populated entry
+			// so the runtime lookup is a single array access.
+			if ( $nullable_branch_ids ) {
+				$merged = array();
+				foreach ( $selector as $tid => $idx_list ) {
+					$merged[ $tid ] = self::merge_sorted( $idx_list, $nullable_branch_ids );
+				}
+				$selector                            = $merged;
+				$this->nullable_branches[ $rule_id ] = true;
+			}
+			if ( $selector ) {
+				// Expand branch indexes to the branch symbol sequences so
+				// the parser can foreach candidate branches without an
+				// extra $branches[$idx] indirection on every attempt. Many
+				// tokens inside the same rule end up pointing to the same
+				// branch-id list, so deduplicate by signature and let
+				// copy-on-write share one sequences array across all of
+				// them. Without this the nested table would be ~40 MB; with
+				// it, ~1 MB.
+				$by_signature          = array();
+				$all_single_candidates = true;
+				foreach ( $selector as $tid => $idx_list ) {
+					if ( 1 !== count( $idx_list ) ) {
+						$all_single_candidates = false;
+					}
+					$sig = implode( ',', $idx_list );
+					if ( isset( $by_signature[ $sig ] ) ) {
+						$selector[ $tid ] = $by_signature[ $sig ];
+					} else {
+						$seqs = array();
+						foreach ( $idx_list as $idx ) {
+							$seqs[] = $branches[ $idx ];
+						}
+						$by_signature[ $sig ] = $seqs;
+						$selector[ $tid ]     = $seqs;
+					}
+				}
+				$this->branches_for_token[ $rule_id ] = $selector;
+				if ( $all_single_candidates ) {
+					$this->single_candidate_rules[ $rule_id ] = true;
+				}
+			}
+		}
+	}
+
+	/**
+	 * Merge two ascending int arrays into one ascending int array without
+	 * duplicates. Preserves original branch order as required by the parser.
+	 *
+	 * @param int[] $a
+	 * @param int[] $b
+	 * @return int[]
+	 */
+	private static function merge_sorted( array $a, array $b ): array {
+		$i   = 0;
+		$j   = 0;
+		$na  = count( $a );
+		$nb  = count( $b );
+		$out = array();
+		while ( $i < $na && $j < $nb ) {
+			if ( $a[ $i ] < $b[ $j ] ) {
+				$out[] = $a[ $i++ ];
+			} elseif ( $a[ $i ] > $b[ $j ] ) {
+				$out[] = $b[ $j++ ];
+			} else {
+				$out[] = $a[ $i ];
+				++$i;
+				++$j;
+			}
+		}
+		while ( $i < $na ) {
+			$out[] = $a[ $i++ ];
+		}
+		while ( $j < $nb ) {
+			$out[] = $b[ $j++ ];
 		}
+		return $out;
 	}
 }
diff --git a/packages/mysql-on-sqlite/src/parser/class-wp-parser-node.php b/packages/mysql-on-sqlite/src/parser/class-wp-parser-node.php
index e2d67018..70fadfd2 100644
--- a/packages/mysql-on-sqlite/src/parser/class-wp-parser-node.php
+++ b/packages/mysql-on-sqlite/src/parser/class-wp-parser-node.php
@@ -9,23 +9,36 @@
  * In this way, a parser node constitutes a recursive structure that represents
  * a parse (sub)tree at each level of the full grammar tree.
  */
-class WP_Parser_Node {
+final class WP_Parser_Node {
 	/**
 	 * @TODO: Review and document these properties and their visibility.
 	 */
 	public $rule_id;
 	public $rule_name;
-	private $children = array();
+	private $children;
 
-	public function __construct( $rule_id, $rule_name ) {
+	public function __construct( $rule_id, $rule_name, array $children = array() ) {
 		$this->rule_id   = $rule_id;
 		$this->rule_name = $rule_name;
+		$this->children  = $children;
 	}
 
 	public function append_child( $node ) {
 		$this->children[] = $node;
 	}
 
+	/**
+	 * Return the children array by reference for efficient fragment inlining.
+	 *
+	 * Returning a reference lets the parser iterate children without copying
+	 * the array. The returned reference must not be mutated by callers.
+	 *
+	 * @return array<WP_Parser_Node|WP_Parser_Token>
+	 */
+	public function &get_children_ref(): array {
+		return $this->children;
+	}
+
 	/**
 	 * Flatten the matched rule fragments as if their children were direct
 	 * descendants of the current rule.
diff --git a/packages/mysql-on-sqlite/src/parser/class-wp-parser.php b/packages/mysql-on-sqlite/src/parser/class-wp-parser.php
index 4436892f..b9c2ba8b 100644
--- a/packages/mysql-on-sqlite/src/parser/class-wp-parser.php
+++ b/packages/mysql-on-sqlite/src/parser/class-wp-parser.php
@@ -11,85 +11,176 @@
 class WP_Parser {
 	protected $grammar;
 	protected $tokens;
+	protected $token_count;
 	protected $position;
 
+	// Grammar data cached as instance fields so the hot path avoids an extra
+	// property hop via $this->grammar on every recursive call.
+	private $rule_names;
+	private $fragment_ids;
+	private $branches_for_token;
+	private $nullable_branches;
+	private $highest_terminal_id;
+	private $select_statement_rule_id;
+	private $single_candidate_rules;
+
 	public function __construct( WP_Parser_Grammar $grammar, array $tokens ) {
-		$this->grammar  = $grammar;
-		$this->tokens   = $tokens;
-		$this->position = 0;
+		$this->grammar     = $grammar;
+		$this->token_count = count( $tokens );
+		// Append an end-of-input sentinel token whose id is EMPTY_RULE_ID
+		// (0). The hot path can then read $tokens[$pos]->id unconditionally
+		// when $pos is the current cursor, because the sentinel naturally
+		// fails to match any real grammar terminal while feeding the
+		// nullable-fallback branch of the selector check.
+		$tokens[]                     = new WP_Parser_Token( WP_Parser_Grammar::EMPTY_RULE_ID, 0, 0, '' );
+		$this->tokens                 = $tokens;
+		$this->position               = 0;
+		$this->rule_names             = $grammar->rule_names;
+		$this->fragment_ids           = $grammar->fragment_ids ?? array();
+		$this->branches_for_token     = $grammar->branches_for_token;
+		$this->nullable_branches      = $grammar->nullable_branches;
+		$this->highest_terminal_id    = $grammar->highest_terminal_id;
+		$this->single_candidate_rules = $grammar->single_candidate_rules ?? array();
+
+		// The INTO negative-lookahead only fires for selectStatement. Cache
+		// the rule id so the per-call check is an int compare instead of a
+		// string compare.
+		if ( null === $grammar->select_statement_rule_id ) {
+			$grammar->select_statement_rule_id = $grammar->get_rule_id( 'selectStatement' );
+		}
+		$this->select_statement_rule_id = $grammar->select_statement_rule_id;
 	}
 
 	public function parse() {
 		// @TODO: Make the starting rule lookup non-grammar-specific.
-		$query_rule_id = $this->grammar->get_rule_id( 'query' );
-		$ast           = $this->parse_recursive( $query_rule_id );
+		// Cache the query rule id on the grammar - get_rule_id() does a
+		// linear array_search over all rule names which, on the MySQL
+		// grammar, costs a few microseconds per lookup.
+		$grammar = $this->grammar;
+		if ( null === $grammar->start_rule_id ) {
+			$grammar->start_rule_id = $grammar->get_rule_id( 'query' );
+		}
+		$ast = $this->parse_recursive( $grammar->start_rule_id );
 		return false === $ast ? null : $ast;
 	}
 
+	/**
+	 * Parse a single non-terminal rule.
+	 *
+	 * This function is only called for non-terminal rule ids. Terminals are
+	 * matched inline inside the branch loop below to avoid a function-call
+	 * round trip per consumed token.
+	 */
 	private function parse_recursive( $rule_id ) {
-		$is_terminal = $rule_id <= $this->grammar->highest_terminal_id;
-		if ( $is_terminal ) {
-			if ( $this->position >= count( $this->tokens ) ) {
-				return false;
-			}
+		$tokens   = $this->tokens;
+		$position = $this->position;
 
-			if ( WP_Parser_Grammar::EMPTY_RULE_ID === $rule_id ) {
-				return true;
-			}
-
-			if ( $this->tokens[ $this->position ]->id === $rule_id ) {
-				++$this->position;
-				return $this->tokens[ $this->position - 1 ];
-			}
+		// Narrow the set of branches worth trying using the precomputed FIRST
+		// sets. When no entry exists for the current token but the rule is
+		// nullable, all candidate branches would match empty, so we return
+		// immediately without entering any branch.
+		$tid = $tokens[ $position ]->id;
+		if ( isset( $this->branches_for_token[ $rule_id ][ $tid ] ) ) {
+			$candidate_branches = $this->branches_for_token[ $rule_id ][ $tid ];
+		} elseif ( isset( $this->nullable_branches[ $rule_id ] ) ) {
+			return true;
+		} else {
 			return false;
 		}
 
-		$branches = $this->grammar->rules[ $rule_id ];
-		if ( ! count( $branches ) ) {
-			return false;
-		}
+		$highest_terminal_id = $this->highest_terminal_id;
+		$is_fragment         = isset( $this->fragment_ids[ $rule_id ] );
+		$is_select_statement = $rule_id === $this->select_statement_rule_id;
 
-		// Bale out from processing the current branch if none of its rules can
-		// possibly match the current token.
-		if ( isset( $this->grammar->lookahead_is_match_possible[ $rule_id ] ) ) {
-			$token_id = $this->tokens[ $this->position ]->id;
-			if (
-				! isset( $this->grammar->lookahead_is_match_possible[ $rule_id ][ $token_id ] ) &&
-				! isset( $this->grammar->lookahead_is_match_possible[ $rule_id ][ WP_Parser_Grammar::EMPTY_RULE_ID ] )
-			) {
+		// Fast path for rules where every (rule, token) selector entry
+		// points to exactly one branch - about 55% of nonterminal calls
+		// on the MySQL corpus. Skipping the outer foreach avoids the
+		// foreach iterator setup for those calls.
+		if ( isset( $this->single_candidate_rules[ $rule_id ] ) ) {
+			// Single-candidate fast path: the rule has exactly one branch
+			// to try for this token, so skip the outer foreach and the
+			// $branch_matches bookkeeping - every failure path just
+			// rewinds the position and returns false directly.
+			$branch   = $candidate_branches[0];
+			$children = array();
+			foreach ( $branch as $subrule_id ) {
+				if ( $subrule_id <= $highest_terminal_id ) {
+					if ( $tokens[ $this->position ]->id === $subrule_id ) {
+						$children[] = $tokens[ $this->position ];
+						++$this->position;
+						continue;
+					}
+					$this->position = $position;
+					return false;
+				}
+
+				$subnode = $this->parse_recursive( $subrule_id );
+				if ( false === $subnode ) {
+					$this->position = $position;
+					return false;
+				}
+				if ( true === $subnode ) {
+					continue;
+				}
+				if ( is_array( $subnode ) ) {
+					foreach ( $subnode as $c ) {
+						$children[] = $c;
+					}
+				} else {
+					$children[] = $subnode;
+				}
+			}
+
+			if ( $is_select_statement && WP_MySQL_Lexer::INTO_SYMBOL === $tokens[ $this->position ]->id ) {
+				$this->position = $position;
 				return false;
 			}
+			if ( ! $children ) {
+				return true;
+			}
+			if ( $is_fragment ) {
+				return $children;
+			}
+			return new WP_Parser_Node( $rule_id, $this->rule_names[ $rule_id ], $children );
 		}
 
-		$rule_name         = $this->grammar->rule_names[ $rule_id ];
-		$starting_position = $this->position;
-		foreach ( $branches as $branch ) {
-			$this->position = $starting_position;
-			$node           = new WP_Parser_Node( $rule_id, $rule_name );
+		$branch_matches = false;
+		$children       = array();
+		foreach ( $candidate_branches as $branch ) {
+			$this->position = $position;
+			$children       = array();
 			$branch_matches = true;
 			foreach ( $branch as $subrule_id ) {
+				if ( $subrule_id <= $highest_terminal_id ) {
+					// The sentinel at $tokens[$token_count] has id 0 so it
+					// cannot match any real terminal, making the range check
+					// unnecessary here.
+					if ( $tokens[ $this->position ]->id === $subrule_id ) {
+						$children[] = $tokens[ $this->position ];
+						++$this->position;
+						continue;
+					}
+					$branch_matches = false;
+					break;
+				}
+
 				$subnode = $this->parse_recursive( $subrule_id );
 				if ( false === $subnode ) {
 					$branch_matches = false;
 					break;
-				} elseif ( true === $subnode ) {
-					/*
-					 * The subrule was matched without actually matching a token.
-					 * This means a special empty "ε" (epsilon) rule was matched.
-					 * An "ε" rule in a grammar matches an empty input of 0 bytes.
-					 * It is used to represent optional grammar productions.
-					 */
-					continue;
-				} elseif ( is_array( $subnode ) && 0 === count( $subnode ) ) {
-					continue;
 				}
-				if ( is_array( $subnode ) && ! count( $subnode ) ) {
+				if ( true === $subnode ) {
 					continue;
 				}
-				if ( isset( $this->grammar->fragment_ids[ $subrule_id ] ) ) {
-					$node->merge_fragment( $subnode );
+				if ( is_array( $subnode ) ) {
+					// Fragment results are returned directly as a children
+					// array so the parser does not allocate a Parser_Node
+					// that would immediately be unwrapped into the parent.
+					foreach ( $subnode as $c ) {
+						$children[] = $c;
+					}
 				} else {
-					$node->append_child( $subnode );
+					$children[] = $subnode;
 				}
 			}
 
@@ -100,25 +191,36 @@ private function parse_recursive( $rule_id ) {
 			//        for right-associative rules, which could solve this.
 			//        See: https://github.com/mysql/mysql-workbench/blob/8.0.38/library/parsers/grammars/MySQLParser.g4#L994
 			//        See: https://github.com/antlr/antlr4/issues/488
-			$la = $this->tokens[ $this->position ] ?? null;
-			if ( $la && 'selectStatement' === $rule_name && WP_MySQL_Lexer::INTO_SYMBOL === $la->id ) {
+			if (
+				$branch_matches
+				&& $is_select_statement
+				&& WP_MySQL_Lexer::INTO_SYMBOL === $tokens[ $this->position ]->id
+			) {
 				$branch_matches = false;
 			}
 
-			if ( true === $branch_matches ) {
+			if ( $branch_matches ) {
 				break;
 			}
 		}
 
 		if ( ! $branch_matches ) {
-			$this->position = $starting_position;
+			$this->position = $position;
 			return false;
 		}
 
-		if ( ! $node->has_child() ) {
+		if ( ! $children ) {
 			return true;
 		}
 
-		return $node;
+		// Fragments exist only to group symbols for reuse; their "node" would
+		// get inlined into the parent on the very next step. Return the raw
+		// children array so the caller can splice it without allocating a
+		// throwaway WP_Parser_Node.
+		if ( $is_fragment ) {
+			return $children;
+		}
+
+		return new WP_Parser_Node( $rule_id, $this->rule_names[ $rule_id ], $children );
 	}
 }
diff --git a/packages/mysql-on-sqlite/tests/tools/bench-compiled-parser.php b/packages/mysql-on-sqlite/tests/tools/bench-compiled-parser.php
new file mode 100644
index 00000000..785142e3
--- /dev/null
+++ b/packages/mysql-on-sqlite/tests/tools/bench-compiled-parser.php
@@ -0,0 +1,92 @@
+<?php
+/**
+ * Benchmark the compiled MySQL parser against the interpreter.
+ *
+ * Expects a generated parser at /tmp/compiled.php (produced by
+ * tests/tools/compile-grammar.php).
+ */
+
+set_error_handler(
+	function ( $s, $m, $f, $l ) {
+		throw new ErrorException( $m, 0, $s, $f, $l );
+	}
+);
+
+require_once __DIR__ . '/../../src/parser/class-wp-parser-grammar.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-node.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-token.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-token.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-lexer.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-parser.php';
+require_once '/tmp/compiled.php';
+
+$runs  = 5;
+$limit = PHP_INT_MAX;
+foreach ( $argv as $arg ) {
+	if ( preg_match( '/^--runs=(\d+)$/', $arg, $m ) ) {
+		$runs = (int) $m[1];
+	}
+	if ( preg_match( '/^--limit=(\d+)$/', $arg, $m ) ) {
+		$limit = (int) $m[1];
+	}
+}
+
+$grammar = new WP_Parser_Grammar( require __DIR__ . '/../../src/mysql/mysql-grammar.php' );
+$handle  = fopen( __DIR__ . '/../mysql/data/mysql-server-tests-queries.csv', 'r' );
+$queries = array();
+$header  = true;
+while ( ( $record = fgetcsv( $handle, null, ',', '"', '\\' ) ) !== false ) {
+	if ( $header ) {
+		$header = false;
+		continue;
+	}
+	if ( null !== $record[0] ) {
+		$queries[] = $record[0];
+	}
+	if ( count( $queries ) >= $limit ) {
+		break;
+	}
+}
+fclose( $handle );
+
+$all_tokens = array();
+foreach ( $queries as $q ) {
+	$all_tokens[] = ( new WP_MySQL_Lexer( $q ) )->remaining_tokens();
+}
+echo 'Loaded ', count( $queries ), " queries\n";
+
+function bench( $label, callable $factory, array $tokens_list, $runs ) {
+	$results = array();
+	for ( $r = 0; $r < $runs; $r++ ) {
+		$fail  = 0;
+		$start = microtime( true );
+		foreach ( $tokens_list as $tokens ) {
+			$parser = $factory( $tokens );
+			$ast    = $parser->parse();
+			if ( null === $ast ) {
+				++$fail;
+			}
+		}
+		$dur       = microtime( true ) - $start;
+		$results[] = $dur;
+		printf( "%-15s run %d: %.4fs, %d QPS, %d failures\n", $label, $r + 1, $dur, count( $tokens_list ) / $dur, $fail );
+	}
+	sort( $results );
+	$best = $results[0];
+	$avg  = array_sum( $results ) / count( $results );
+	printf( "%-15s best %.4fs (%d QPS) avg %.4fs (%d QPS)\n", $label, $best, count( $tokens_list ) / $best, $avg, count( $tokens_list ) / $avg );
+}
+
+bench(
+	'interpreted',
+	fn( $tokens ) => new WP_MySQL_Parser( $grammar, $tokens ),
+	$all_tokens,
+	$runs
+);
+bench(
+	'compiled',
+	fn( $tokens ) => new WP_MySQL_Compiled_Parser( $tokens ),
+	$all_tokens,
+	$runs
+);
diff --git a/packages/mysql-on-sqlite/tests/tools/bench-final.php b/packages/mysql-on-sqlite/tests/tools/bench-final.php
new file mode 100644
index 00000000..1dabebcf
--- /dev/null
+++ b/packages/mysql-on-sqlite/tests/tools/bench-final.php
@@ -0,0 +1,61 @@
+<?php
+/**
+ * Final multi-config benchmark for the parser exploration.
+ */
+
+set_error_handler(
+	function ( $s, $m, $f, $l ) {
+		throw new ErrorException( $m, 0, $s, $f, $l );
+	}
+);
+
+require_once __DIR__ . '/../../src/parser/class-wp-parser-grammar.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-node.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-token.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-token.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-lexer.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-parser.php';
+
+$runs = (int) ( $argv[1] ?? 10 );
+
+$grammar = new WP_Parser_Grammar( require __DIR__ . '/../../src/mysql/mysql-grammar.php' );
+$handle  = fopen( __DIR__ . '/../mysql/data/mysql-server-tests-queries.csv', 'r' );
+$queries = array();
+$header  = true;
+while ( ( $r = fgetcsv( $handle, null, ',', '"', '\\' ) ) !== false ) {
+	if ( $header ) {
+		$header = false;
+		continue; }
+	if ( null !== $r[0] ) {
+		$queries[] = $r[0];
+	}
+}
+fclose( $handle );
+
+$all_tokens = array();
+foreach ( $queries as $q ) {
+	$all_tokens[] = ( new WP_MySQL_Lexer( $q ) )->remaining_tokens();
+}
+$count = count( $queries );
+printf( "Loaded %d queries\n", $count );
+
+$durations = array();
+for ( $i = 0; $i < $runs; $i++ ) {
+	$start = microtime( true );
+	$fail  = 0;
+	foreach ( $all_tokens as $t ) {
+		if ( null === ( new WP_MySQL_Parser( $grammar, $t ) )->parse() ) {
+			++$fail;
+		}
+	}
+	$d           = microtime( true ) - $start;
+	$durations[] = $d;
+}
+sort( $durations );
+$best = $durations[0];
+$med  = $durations[ (int) ( count( $durations ) / 2 ) ];
+$avg  = array_sum( $durations ) / count( $durations );
+printf( "best %.4fs  %6d QPS\n", $best, $count / $best );
+printf( "med  %.4fs  %6d QPS\n", $med, $count / $med );
+printf( "avg  %.4fs  %6d QPS\n", $avg, $count / $avg );
diff --git a/packages/mysql-on-sqlite/tests/tools/bench-hot-rules.php b/packages/mysql-on-sqlite/tests/tools/bench-hot-rules.php
new file mode 100644
index 00000000..c15c5f4e
--- /dev/null
+++ b/packages/mysql-on-sqlite/tests/tools/bench-hot-rules.php
@@ -0,0 +1,151 @@
+<?php
+/** Count per-rule call frequency on the MySQL corpus. */
+
+set_error_handler(
+	function ( $s, $m, $f, $l ) {
+		throw new ErrorException( $m, 0, $s, $f, $l );
+	}
+);
+
+require_once __DIR__ . '/../../src/parser/class-wp-parser-grammar.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-node.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-token.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-token.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-lexer.php';
+
+class HR_Parser {
+	public static $counts = array();
+	public $grammar;
+	public $tokens;
+	public $token_count;
+	public $position;
+	private $rule_names;
+	private $fragment_ids;
+	private $branches_for_token;
+	private $nullable_branches;
+	private $highest_terminal_id;
+	private $sel_rid;
+
+	public function __construct( WP_Parser_Grammar $g, array $tokens ) {
+		$this->grammar             = $g;
+		$this->token_count         = count( $tokens );
+		$tokens[]                  = new WP_Parser_Token( 0, 0, 0, '' );
+		$this->tokens              = $tokens;
+		$this->position            = 0;
+		$this->rule_names          = $g->rule_names;
+		$this->fragment_ids        = $g->fragment_ids ?? array();
+		$this->branches_for_token  = $g->branches_for_token;
+		$this->nullable_branches   = $g->nullable_branches;
+		$this->highest_terminal_id = $g->highest_terminal_id;
+		$this->sel_rid             = $g->get_rule_id( 'selectStatement' );
+	}
+	public function parse() {
+		$rid = $this->grammar->get_rule_id( 'query' );
+		return $this->r( $rid );
+	}
+	private function r( $rid ) {
+		self::$counts[ $rid ] = ( self::$counts[ $rid ] ?? 0 ) + 1;
+		$tokens               = $this->tokens;
+		$position             = $this->position;
+		$tid                  = $tokens[ $position ]->id;
+		if ( isset( $this->branches_for_token[ $rid ][ $tid ] ) ) {
+			$cb = $this->branches_for_token[ $rid ][ $tid ];
+		} elseif ( isset( $this->nullable_branches[ $rid ] ) ) {
+			return true;
+		} else {
+			return false;
+		}
+		$htid        = $this->highest_terminal_id;
+		$is_fragment = isset( $this->fragment_ids[ $rid ] );
+		$is_sel      = $rid === $this->sel_rid;
+		$ok          = false;
+		$kids        = array();
+		foreach ( $cb as $branch ) {
+			$this->position = $position;
+			$kids           = array();
+			$ok             = true;
+			foreach ( $branch as $sid ) {
+				if ( $sid <= $htid ) {
+					if ( $tokens[ $this->position ]->id === $sid ) {
+						$kids[] = $tokens[ $this->position ];
+						++$this->position;
+						continue;
+					}
+					$ok = false;
+					break;
+				}
+				$sn = $this->r( $sid );
+				if ( false === $sn ) {
+					$ok = false;
+					break;
+				}
+				if ( true === $sn ) {
+					continue;
+				}
+				if ( is_array( $sn ) ) {
+					foreach ( $sn as $c ) {
+						$kids[] = $c;
+					}
+				} else {
+					$kids[] = $sn;
+				}
+			}
+			if ( $ok && $is_sel && WP_MySQL_Lexer::INTO_SYMBOL === $tokens[ $this->position ]->id ) {
+				$ok = false;
+			}
+			if ( $ok ) {
+				break;
+			}
+		}
+		if ( ! $ok ) {
+			$this->position = $position;
+			return false;
+		}
+		if ( ! $kids ) {
+			return true;
+		}
+		if ( $is_fragment ) {
+			return $kids;
+		}
+		return new WP_Parser_Node( $rid, $this->rule_names[ $rid ], $kids );
+	}
+}
+
+$grammar = new WP_Parser_Grammar( require __DIR__ . '/../../src/mysql/mysql-grammar.php' );
+$handle  = fopen( __DIR__ . '/../mysql/data/mysql-server-tests-queries.csv', 'r' );
+$queries = array();
+$header  = true;
+while ( ( $r = fgetcsv( $handle, null, ',', '"', '\\' ) ) !== false ) {
+	if ( $header ) {
+		$header = false;
+		continue;
+	}
+	if ( null !== $r[0] ) {
+		$queries[] = $r[0];
+	}
+}
+$queries    = array_slice( $queries, 0, (int) ( $argv[1] ?? 10000 ) );
+$all_tokens = array();
+foreach ( $queries as $q ) {
+	$all_tokens[] = ( new WP_MySQL_Lexer( $q ) )->remaining_tokens();
+}
+
+foreach ( $all_tokens as $t ) {
+	( new HR_Parser( $grammar, $t ) )->parse();
+}
+arsort( HR_Parser::$counts );
+$total   = array_sum( HR_Parser::$counts );
+$cumsum  = 0;
+$covered = array();
+$i       = 0;
+foreach ( HR_Parser::$counts as $rid => $cnt ) {
+	$cumsum         += $cnt;
+	$covered[ $rid ] = true;
+	$pct             = 100 * $cumsum / $total;
+	if ( in_array( ++$i, array( 10, 25, 50, 100, 200, 500 ), true ) || $pct >= 80 ) {
+		printf( "After top %d rules: cumulative %.1f%% (%s of %s calls)\n", $i, $pct, number_format( $cumsum ), number_format( $total ) );
+		if ( $pct >= 95 ) {
+			break;
+		}
+	}
+}
diff --git a/packages/mysql-on-sqlite/tests/tools/bench-parser-split.php b/packages/mysql-on-sqlite/tests/tools/bench-parser-split.php
new file mode 100644
index 00000000..107f3cbe
--- /dev/null
+++ b/packages/mysql-on-sqlite/tests/tools/bench-parser-split.php
@@ -0,0 +1,95 @@
+<?php
+/**
+ * Parser performance benchmark with split timings.
+ *
+ * Separates lex time from parse time by pre-tokenizing all queries before
+ * starting the parse-only timer. Reports total, average, and per-phase QPS.
+ *
+ * Usage:
+ *   php bench-parser-split.php [--runs=N] [--limit=M]
+ */
+
+set_error_handler(
+	function ( $severity, $message, $file, $line ) {
+		throw new ErrorException( $message, 0, $severity, $file, $line );
+	}
+);
+
+require_once __DIR__ . '/../../src/parser/class-wp-parser-grammar.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-node.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-token.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-token.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-lexer.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-parser.php';
+
+$runs  = 1;
+$limit = PHP_INT_MAX;
+foreach ( $argv as $arg ) {
+	if ( preg_match( '/^--runs=(\d+)$/', $arg, $m ) ) {
+		$runs = (int) $m[1];
+	}
+	if ( preg_match( '/^--limit=(\d+)$/', $arg, $m ) ) {
+		$limit = (int) $m[1];
+	}
+}
+
+$grammar_data = include __DIR__ . '/../../src/mysql/mysql-grammar.php';
+$grammar      = new WP_Parser_Grammar( $grammar_data );
+
+$data_dir = __DIR__ . '/../mysql/data';
+$handle   = fopen( "$data_dir/mysql-server-tests-queries.csv", 'r' );
+$queries  = array();
+$header   = true;
+while ( ( $record = fgetcsv( $handle, null, ',', '"', '\\' ) ) !== false ) {
+	if ( $header ) {
+		$header = false;
+		continue;
+	}
+	if ( null !== $record[0] ) {
+		$queries[] = $record[0];
+	}
+	if ( count( $queries ) >= $limit ) {
+		break;
+	}
+}
+fclose( $handle );
+echo 'Loaded ', count( $queries ), " queries\n";
+
+// Pre-tokenize all queries once. The tokens are reused across runs, so the
+// parser starts from a cold AST cache each iteration but a warm token cache.
+$lex_start  = microtime( true );
+$all_tokens = array();
+foreach ( $queries as $query ) {
+	$lexer        = new WP_MySQL_Lexer( $query );
+	$all_tokens[] = $lexer->remaining_tokens();
+}
+$lex_duration = microtime( true ) - $lex_start;
+printf( "Lex: %.4fs, %d QPS\n", $lex_duration, count( $queries ) / $lex_duration );
+
+// Parse benchmark.
+$results = array();
+for ( $r = 0; $r < $runs; $r++ ) {
+	$failures = 0;
+	$start    = microtime( true );
+	foreach ( $all_tokens as $tokens ) {
+		$parser = new WP_MySQL_Parser( $grammar, $tokens );
+		$ast    = $parser->parse();
+		if ( null === $ast ) {
+			++$failures;
+		}
+	}
+	$duration  = microtime( true ) - $start;
+	$qps       = count( $queries ) / $duration;
+	$results[] = array( $duration, $qps, $failures );
+	printf( "Run %d: %.4fs, %d QPS, %d failures\n", $r + 1, $duration, $qps, $failures );
+}
+
+if ( $runs > 1 ) {
+	$durations = array_column( $results, 0 );
+	sort( $durations );
+	$best = $durations[0];
+	printf( "Best: %.4fs, %d QPS\n", $best, count( $queries ) / $best );
+	$avg = array_sum( $durations ) / count( $durations );
+	printf( "Avg:  %.4fs, %d QPS\n", $avg, count( $queries ) / $avg );
+}
diff --git a/packages/mysql-on-sqlite/tests/tools/compare-asts.php b/packages/mysql-on-sqlite/tests/tools/compare-asts.php
new file mode 100644
index 00000000..41be0f1d
--- /dev/null
+++ b/packages/mysql-on-sqlite/tests/tools/compare-asts.php
@@ -0,0 +1,67 @@
+<?php
+/**
+ * Parse every query in the MySQL test corpus with both parsers and
+ * compare the resulting ASTs. Fails on the first mismatch.
+ */
+
+set_error_handler(
+	function ( $s, $m, $f, $l ) {
+		throw new ErrorException( $m, 0, $s, $f, $l );
+	}
+);
+
+require_once __DIR__ . '/../../src/parser/class-wp-parser-grammar.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-node.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-token.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-token.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-lexer.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-parser.php';
+require_once '/tmp/compiled.php';
+
+function ast_signature( $n ) {
+	if ( null === $n ) {
+		return 'null';
+	}
+	if ( $n instanceof WP_Parser_Token ) {
+		return 't(' . $n->id . ',' . $n->start . ',' . $n->length . ')';
+	}
+	$out = 'n(' . $n->rule_name;
+	foreach ( $n->get_children() as $c ) {
+		$out .= ',' . ast_signature( $c );
+	}
+	return $out . ')';
+}
+
+$grammar = new WP_Parser_Grammar( require __DIR__ . '/../../src/mysql/mysql-grammar.php' );
+$handle  = fopen( __DIR__ . '/../mysql/data/mysql-server-tests-queries.csv', 'r' );
+$header  = true;
+$limit   = (int) ( $argv[1] ?? PHP_INT_MAX );
+$n       = 0;
+$miss    = 0;
+while ( ( $row = fgetcsv( $handle, null, ',', '"', '\\' ) ) !== false && $n < $limit ) {
+	if ( $header ) {
+		$header = false;
+		continue;
+	}
+	if ( null === $row[0] ) {
+		continue;
+	}
+	++$n;
+	$tokens1 = ( new WP_MySQL_Lexer( $row[0] ) )->remaining_tokens();
+	$tokens2 = ( new WP_MySQL_Lexer( $row[0] ) )->remaining_tokens();
+	$a1      = ( new WP_MySQL_Parser( $grammar, $tokens1 ) )->parse();
+	$a2      = ( new WP_MySQL_Compiled_Parser( $tokens2 ) )->parse();
+	$s1      = ast_signature( $a1 );
+	$s2      = ast_signature( $a2 );
+	if ( $s1 !== $s2 ) {
+		++$miss;
+		if ( $miss <= 5 ) {
+			echo "MISMATCH query #$n:\n";
+			echo '  ', substr( $row[0], 0, 200 ), "\n";
+			echo '  interpreter: ', substr( $s1, 0, 300 ), "\n";
+			echo '  compiled:    ', substr( $s2, 0, 300 ), "\n";
+		}
+	}
+}
+echo "Checked $n queries, $miss mismatches.\n";
diff --git a/packages/mysql-on-sqlite/tests/tools/compile-grammar.php b/packages/mysql-on-sqlite/tests/tools/compile-grammar.php
new file mode 100644
index 00000000..79ddff8b
--- /dev/null
+++ b/packages/mysql-on-sqlite/tests/tools/compile-grammar.php
@@ -0,0 +1,364 @@
+<?php
+/**
+ * Compile the MySQL grammar into a dedicated PHP class.
+ *
+ * Emits one method per reachable rule with branch dispatch unrolled as a
+ * switch-on-token-id, terminal matches inlined, and the non-fragment vs
+ * fragment distinction resolved at compile time so every call site gets
+ * minimal per-iteration work.
+ *
+ * Usage:
+ *   php tests/tools/compile-grammar.php \
+ *     > src/mysql/class-wp-mysql-compiled-parser.php
+ */
+
+require_once __DIR__ . '/../../src/parser/class-wp-parser-grammar.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-node.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-token.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-token.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-lexer.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-parser.php';
+
+$grammar     = new WP_Parser_Grammar( require __DIR__ . '/../../src/mysql/mysql-grammar.php' );
+$query_rid   = $grammar->get_rule_id( 'query' );
+$select_rid  = $grammar->get_rule_id( 'selectStatement' );
+$htid        = $grammar->highest_terminal_id;
+$into_symbol = WP_MySQL_Lexer::INTO_SYMBOL;
+
+// Reachability + fragment reference count.
+$visited = array();
+$refs    = array();
+$queue   = array( $query_rid );
+while ( $queue ) {
+	$r = array_pop( $queue );
+	if ( isset( $visited[ $r ] ) ) {
+		continue;
+	}
+	$visited[ $r ] = true;
+	foreach ( $grammar->rules[ $r ] as $branch ) {
+		foreach ( $branch as $sym ) {
+			if ( $sym > $htid ) {
+				$refs[ $sym ] = ( $refs[ $sym ] ?? 0 ) + 1;
+				if ( ! isset( $visited[ $sym ] ) ) {
+					$queue[] = $sym;
+				}
+			}
+		}
+	}
+}
+
+// Decide which rules get inlined.
+// Inline a fragment only if it is reachable AND single-branch (the simple
+// case where we can splice its symbols into the parent branch). Multi-branch
+// fragments require splatting which can explode parent branch counts; keep
+// them as methods for now.
+$inline_fragments = array();
+foreach ( $grammar->fragment_ids as $rid => $_ ) {
+	if (
+		isset( $visited[ $rid ] )
+		&& isset( $grammar->rules[ $rid ] )
+		&& 1 === count( $grammar->rules[ $rid ] )
+	) {
+		$inline_fragments[ $rid ] = true;
+	}
+}
+
+// Rules that will get a method.
+$kept = array();
+foreach ( $visited as $rid => $_ ) {
+	if ( ! isset( $inline_fragments[ $rid ] ) ) {
+		$kept[ $rid ] = true;
+	}
+}
+
+/**
+ * Compute the flattened symbol sequence for a branch, splicing any inlined
+ * single-use fragments in place. Cycles fall back to leaving the reference.
+ */
+$flatten = function ( array $branch ) use ( &$flatten, $grammar, $inline_fragments, $htid ) {
+	static $expanding = array();
+	$out              = array();
+	foreach ( $branch as $sym ) {
+		if ( $sym <= $htid ) {
+			$out[] = $sym;
+			continue;
+		}
+		if ( ! isset( $inline_fragments[ $sym ] ) ) {
+			$out[] = $sym;
+			continue;
+		}
+		if ( count( $grammar->rules[ $sym ] ) !== 1 ) {
+			// Multi-branch single-use fragment: keep as call to avoid
+			// exponential parent-branch explosion. Future work could splat
+			// selected cases where branch count stays small.
+			$out[] = $sym;
+			continue;
+		}
+		if ( isset( $expanding[ $sym ] ) ) {
+			$out[] = $sym;
+			continue;
+		}
+		$expanding[ $sym ] = true;
+		foreach ( $flatten( $grammar->rules[ $sym ][0] ) as $s ) {
+			$out[] = $s;
+		}
+		unset( $expanding[ $sym ] );
+	}
+	return $out;
+};
+
+/**
+ * PHP-safe method name for a rule id.
+ */
+$method_name = function ( $rid ) use ( $grammar ) {
+	$raw = $grammar->rule_names[ $rid ];
+	// Fragment names start with "%" - turn that into "f_".
+	$clean = '%' === $raw[0] ? 'f_' . substr( $raw, 1 ) : $raw;
+	$clean = preg_replace( '/[^A-Za-z0-9_]/', '_', $clean );
+	return 'r_' . $clean . '_' . $rid;
+};
+
+/**
+ * Emit code that matches a single symbol in a branch, appending on success
+ * and jumping to $fail_label (via `goto`) on failure. We use goto because
+ * PHP `break`/`continue` can only target immediate loops, and we want to
+ * roll back the position in a shared failure path.
+ *
+ * For single-branch rules there is no rollback label - failure just returns
+ * immediately so the label is reused inline.
+ */
+$emit_symbol = function ( $sym, $indent, $fail_stmt, $skip_check = false ) use ( $grammar, $htid, $inline_fragments, &$method_name, &$flatten, &$emit_symbol ) {
+	$out = '';
+	if ( $sym <= $htid ) {
+		// Inline terminal match. The caller may tell us the token at the
+		// current position is already known to match (via switch case
+		// dispatch), in which case the check is redundant.
+		if ( ! $skip_check ) {
+			$out .= $indent . "if (\$tokens[\$this->position]->id !== $sym) $fail_stmt\n";
+		}
+		$out .= $indent . "\$children[] = \$tokens[\$this->position];\n";
+		$out .= $indent . "++\$this->position;\n";
+		return $out;
+	}
+
+	$is_fragment = isset( $grammar->fragment_ids[ $sym ] );
+	$method      = $method_name( $sym );
+	$out        .= $indent . "\$sub = \$this->$method();\n";
+	$out        .= $indent . "if (false === \$sub) $fail_stmt\n";
+	$nullable    = isset( $grammar->nullable_branches[ $sym ] );
+	if ( $is_fragment ) {
+		if ( $nullable ) {
+			$out .= $indent . "if (true !== \$sub) { foreach (\$sub as \$c) \$children[] = \$c; }\n";
+		} else {
+			$out .= $indent . "foreach (\$sub as \$c) \$children[] = \$c;\n";
+		}
+	} else {
+		if ( $nullable ) {
+			$out .= $indent . "if (true !== \$sub) \$children[] = \$sub;\n";
+		} else {
+			$out .= $indent . "\$children[] = \$sub;\n";
+		}
+	}
+	return $out;
+};
+
+/**
+ * Emit the body of a rule method.
+ */
+$emit_method = function ( $rid ) use ( $grammar, $htid, $select_rid, $into_symbol, $inline_fragments, &$method_name, &$flatten, &$emit_symbol ) {
+	$name        = $method_name( $rid );
+	$is_fragment = isset( $grammar->fragment_ids[ $rid ] );
+	$is_select   = $rid === $select_rid;
+	$rule_name   = $grammar->rule_names[ $rid ];
+	$nullable    = isset( $grammar->nullable_branches[ $rid ] );
+
+	// Per-token selector. Entries are lists of branch symbol sequences (the
+	// runtime format). Group tokens whose branch list is identical so their
+	// switch cases share a body.
+	$selector = $grammar->branches_for_token[ $rid ] ?? array();
+	$groups   = array();
+	foreach ( $selector as $tid => $branch_seqs ) {
+		$sig_parts = array();
+		foreach ( $branch_seqs as $seq ) {
+			$sig_parts[] = implode( ',', $seq );
+		}
+		$key                        = implode( '|', $sig_parts );
+		$groups[ $key ]['branches'] = $branch_seqs;
+		$groups[ $key ]['tids'][]   = $tid;
+	}
+
+	$code  = "\tprivate function $name() {\n";
+	$code .= "\t\t\$tokens = \$this->tokens;\n";
+	$code .= "\t\t\$position = \$this->position;\n";
+	$code .= "\t\t\$tid = \$tokens[\$position]->id;\n";
+
+	// "One of N terminals" fast path. When every branch is a single
+	// terminal, the entire rule collapses to: check accept set, consume
+	// one token, return. A rule like `%f1282` (406 terminal choices)
+	// compiles to ~8 lines instead of ~2.8k.
+	$all_single_terminal = true;
+	$accept              = array();
+	foreach ( $grammar->rules[ $rid ] as $b ) {
+		if ( 1 !== count( $b ) || $b[0] > $htid || 0 === $b[0] ) {
+			$all_single_terminal = false;
+			break;
+		}
+		$accept[ $b[0] ] = true;
+	}
+	if ( $all_single_terminal && $accept ) {
+		$keys = array_keys( $accept );
+		sort( $keys );
+		$lookup = '[' . implode( '=>1,', $keys ) . '=>1]';
+		$code  .= "\t\tstatic \$ok = $lookup;\n";
+		$code  .= "\t\tif (!isset(\$ok[\$tid])) return " . ( $nullable ? 'true' : 'false' ) . ";\n";
+		$code  .= "\t\t\$t = \$tokens[\$position];\n";
+		$code  .= "\t\t\$this->position = \$position + 1;\n";
+		if ( $is_select ) {
+			// selectStatement is never single-terminal, but guard anyway.
+			$code .= "\t\tif (\$tokens[\$position + 1]->id === $into_symbol) { \$this->position = \$position; return false; }\n";
+		}
+		if ( $is_fragment ) {
+			$code .= "\t\treturn array(\$t);\n";
+		} else {
+			$code .= "\t\treturn new WP_Parser_Node($rid, " . var_export( $rule_name, true ) . ", array(\$t));\n";
+		}
+		$code .= "\t}\n";
+		return $code;
+	}
+
+	if ( count( $groups ) === 1 ) {
+		// All accepting tokens reach the same branch list. A bare isset()
+		// check against a shared lookup table is much smaller than the
+		// equivalent 200-way switch case list and lets PHP resolve
+		// dispatch in a single hash lookup.
+		$only = reset( $groups );
+		$tids = $only['tids'];
+		sort( $tids );
+		$lookup = '[' . implode( '=>1,', $tids ) . '=>1]';
+		$code  .= "\t\tstatic \$first = $lookup;\n";
+		$code  .= "\t\tif (!isset(\$first[\$tid])) return " . ( $nullable ? 'true' : 'false' ) . ";\n";
+		// We cannot hand $known_tids here: the single-branch-group fast
+		// path covers many tokens, so the branch's first symbol may not be
+		// a specific one of them.
+		$code .= emit_group_body( $only['branches'], $grammar, $rid, $rule_name, $is_fragment, $is_select, $into_symbol, $htid, $inline_fragments, $method_name, $flatten, $emit_symbol, false );
+		// All branches failed; emit_group_body already reset the position.
+		$code .= "\t\treturn " . ( $nullable ? 'true' : 'false' ) . ";\n";
+	} else {
+		$code .= "\t\tswitch (\$tid) {\n";
+		foreach ( $groups as $g ) {
+			// Pack case labels onto as few lines as practical (~10 per
+			// line); single-label cases on their own line for readability.
+			$tids   = $g['tids'];
+			$chunks = array_chunk( $tids, 10 );
+			foreach ( $chunks as $chunk ) {
+				$code .= "\t\t\t" . implode( ' ', array_map( fn( $t ) => "case $t:", $chunk ) ) . "\n";
+			}
+			$code .= emit_group_body( $g['branches'], $grammar, $rid, $rule_name, $is_fragment, $is_select, $into_symbol, $htid, $inline_fragments, $method_name, $flatten, $emit_symbol, true, $g['tids'] );
+		}
+		$code .= "\t\t}\n";
+		$code .= "\t\treturn " . ( $nullable ? 'true' : 'false' ) . ";\n";
+	}
+	$code .= "\t}\n";
+	return $code;
+};
+
+function emit_group_body( array $branch_seqs, WP_Parser_Grammar $g, $rid, $rule_name, $is_fragment, $is_select, $into_symbol, $htid, $inline_fragments, $method_name, $flatten, $emit_symbol, $in_switch = true, array $known_tids = array() ) {
+	$indent = $in_switch ? "\t\t\t\t" : "\t\t";
+	$out    = '';
+	$count  = count( $branch_seqs );
+
+	foreach ( $branch_seqs as $n => $raw_branch ) {
+		$branch  = $flatten( $raw_branch );
+		$is_last = ( $n === $count - 1 );
+
+		// The switch dispatch guarantees the current token matches a case
+		// label, so if there's exactly one label and the branch starts
+		// with that same terminal we can skip the redundant id check.
+		$first_is_known_terminal = false;
+		if ( count( $known_tids ) === 1 && $branch && $branch[0] === $known_tids[0] ) {
+			$first_is_known_terminal = true;
+		}
+
+		if ( $count > 1 ) {
+			// Multi-branch: wrap each attempt in do-while(false). Break
+			// falls through to the next attempt; the final break falls
+			// through to the switch-level break / rule-level fall-through.
+			$out         .= $indent . "do {\n";
+			$inner_indent = $indent . "\t";
+			$fail_stmt    = 'break;';
+			$out         .= $inner_indent . "\$children = array();\n";
+			$out         .= $inner_indent . "\$this->position = \$position;\n";
+			foreach ( $branch as $i => $sym ) {
+				$skip_check = ( 0 === $i && $first_is_known_terminal );
+				$out       .= $emit_symbol( $sym, $inner_indent, $fail_stmt, $skip_check );
+			}
+			if ( $is_select ) {
+				$out .= $inner_indent . "if (\$tokens[\$this->position]->id === $into_symbol) break;\n";
+			}
+			$out .= emit_branch_return( $inner_indent, $rid, $rule_name, $is_fragment );
+			$out .= $indent . "} while (false);\n";
+		} else {
+			// Single branch: no alternatives to try, just inline.
+			$out      .= $indent . "\$children = array();\n";
+			$fail_stmt = '{ $this->position = $position; return false; }';
+			foreach ( $branch as $i => $sym ) {
+				$skip_check = ( 0 === $i && $first_is_known_terminal );
+				$out       .= $emit_symbol( $sym, $indent, $fail_stmt, $skip_check );
+			}
+			if ( $is_select ) {
+				$out .= $indent . "if (\$tokens[\$this->position]->id === $into_symbol) { \$this->position = \$position; return false; }\n";
+			}
+			$out .= emit_branch_return( $indent, $rid, $rule_name, $is_fragment );
+			if ( $in_switch ) {
+				$out .= $indent . "break;\n";
+			}
+			return $out;
+		}
+	}
+	// Multi-branch group fell through all do-while attempts: reset and
+	// break out of the switch (or return to the rule-level fallback).
+	$out .= $indent . "\$this->position = \$position;\n";
+	if ( $in_switch ) {
+		$out .= $indent . "break;\n";
+	}
+	return $out;
+}
+
+function emit_branch_return( $indent, $rid, $rule_name, $is_fragment ) {
+	$out  = '';
+	$out .= $indent . "if (!\$children) return true;\n";
+	if ( $is_fragment ) {
+		$out .= $indent . "return \$children;\n";
+	} else {
+		$out .= $indent . 'return new WP_Parser_Node(' . $rid . ', ' . var_export( $rule_name, true ) . ", \$children);\n";
+	}
+	return $out;
+}
+
+// Emit the class. The generated parser is self-contained: it bakes every
+// FIRST set, rule name, and branch structure into the emitted code, so no
+// WP_Parser_Grammar has to be loaded at runtime.
+echo "<?php\n\n";
+echo "/**\n * AUTO-GENERATED. Do not modify by hand.\n * Regenerate with tests/tools/compile-grammar.php.\n */\n";
+echo "class WP_MySQL_Compiled_Parser {\n";
+echo "\tprivate \$tokens;\n";
+echo "\tprivate \$position;\n\n";
+echo "\tpublic function __construct( array \$tokens ) {\n";
+echo "\t\t\$tokens[] = new WP_Parser_Token( 0, 0, 0, '' );\n";
+echo "\t\t\$this->tokens = \$tokens;\n";
+echo "\t\t\$this->position = 0;\n";
+echo "\t}\n\n";
+echo "\tpublic function parse() {\n";
+echo "\t\t\$ast = \$this->" . $method_name( $query_rid ) . "();\n";
+echo "\t\treturn false === \$ast ? null : \$ast;\n";
+echo "\t}\n\n";
+
+// Sort for deterministic output.
+ksort( $kept );
+foreach ( $kept as $rid => $_ ) {
+	echo $emit_method( $rid );
+	echo "\n";
+}
+
+echo "}\n";
diff --git a/packages/mysql-on-sqlite/tests/tools/dump-inflated-grammar.php b/packages/mysql-on-sqlite/tests/tools/dump-inflated-grammar.php
new file mode 100644
index 00000000..88b7f370
--- /dev/null
+++ b/packages/mysql-on-sqlite/tests/tools/dump-inflated-grammar.php
@@ -0,0 +1,27 @@
+<?php
+/**
+ * Dump the post-inflation grammar state as a PHP file so the grammar can
+ * be loaded without recomputing FIRST / NULLABLE / branch selectors at
+ * runtime.
+ *
+ * Usage:
+ *   php tests/tools/dump-inflated-grammar.php > /tmp/mysql-grammar-inflated.php
+ */
+
+require_once __DIR__ . '/../../src/parser/class-wp-parser-grammar.php';
+
+$g = new WP_Parser_Grammar( require __DIR__ . '/../../src/mysql/mysql-grammar.php' );
+
+$data = array(
+	'rules'                  => $g->rules,
+	'rule_names'             => $g->rule_names,
+	'fragment_ids'           => $g->fragment_ids ?? array(),
+	'branches_for_token'     => $g->branches_for_token,
+	'nullable_branches'      => $g->nullable_branches,
+	'lowest_non_terminal_id' => $g->lowest_non_terminal_id,
+	'highest_terminal_id'    => $g->highest_terminal_id,
+);
+
+echo "<?php\n// AUTO-GENERATED.\nreturn ";
+echo var_export( $data, true );
+echo ";\n";
diff --git a/packages/mysql-on-sqlite/tests/tools/exp-pcre-ffi.php b/packages/mysql-on-sqlite/tests/tools/exp-pcre-ffi.php
new file mode 100644
index 00000000..df18c859
--- /dev/null
+++ b/packages/mysql-on-sqlite/tests/tools/exp-pcre-ffi.php
@@ -0,0 +1,164 @@
+<?php
+/**
+ * Probe whether PHP FFI can expose PCRE2 callouts so the regex match
+ * can record (rule, offset) tuples that we then turn into an AST.
+ *
+ * Conclusion: NO.
+ *
+ * pcre2_set_callout_8 takes a function pointer. PHP FFI does not
+ * support binding a PHP closure to a C function pointer; the libffi
+ * closure feature is intentionally not enabled in PHP's FFI build.
+ * That means even though we can call pcre2_compile_8 / pcre2_match_8
+ * via FFI, we cannot supply a PHP-side callout callback - so the
+ * (?C) callouts in the pattern have no observable effect.
+ *
+ * Without callouts, PCRE2's match data exposes only the ovector
+ * (one offset pair per numbered group, last-match-wins), which is
+ * what php_pcre.c projects into $matches. That isn't enough to
+ * reconstruct a recursive parse tree.
+ *
+ * The only paths to make this work:
+ *  1. A custom PHP extension wrapping pcre2_set_callout (significant
+ *     C work, out of scope).
+ *  2. Multi-pass extraction with preg_match_all on flat sub-patterns
+ *     - functionally a parser, performance similar to or worse than
+ *     the existing recursive-descent interpreter.
+ *  3. Use the regex purely as a yes/no validator, accept that the
+ *     AST has to come from the parser. Tested in exp-regex-hybrid.php
+ *     and shown to be a net loss for valid-heavy workloads.
+ */
+
+if ( ! extension_loaded( 'ffi' ) ) {
+	echo "FFI extension not loaded\n";
+	exit( 1 );
+}
+
+// Minimal subset of the PCRE2 8-bit C API we need to do a match with a
+// callout callback. From pcre2.h.
+$cdef = <<<'CDEF'
+typedef unsigned char  PCRE2_UCHAR8;
+typedef const PCRE2_UCHAR8 *PCRE2_SPTR8;
+typedef size_t PCRE2_SIZE;
+
+typedef struct pcre2_real_compile_context_8 pcre2_compile_context_8;
+typedef struct pcre2_real_match_context_8   pcre2_match_context_8;
+typedef struct pcre2_real_general_context_8 pcre2_general_context_8;
+typedef struct pcre2_real_code_8            pcre2_code_8;
+typedef struct pcre2_real_match_data_8      pcre2_match_data_8;
+
+typedef struct pcre2_callout_block_8 {
+    uint32_t      version;
+    uint32_t      callout_number;
+    uint32_t      capture_top;
+    uint32_t      capture_last;
+    PCRE2_SIZE   *offset_vector;
+    PCRE2_SPTR8   mark;
+    PCRE2_SPTR8   subject;
+    PCRE2_SIZE    subject_length;
+    PCRE2_SIZE    start_match;
+    PCRE2_SIZE    current_position;
+    PCRE2_SIZE    pattern_position;
+    PCRE2_SIZE    next_item_length;
+    PCRE2_SIZE    callout_string_offset;
+    PCRE2_SIZE    callout_string_length;
+    PCRE2_SPTR8   callout_string;
+    uint32_t      callout_flags;
+} pcre2_callout_block_8;
+
+pcre2_code_8 *pcre2_compile_8(PCRE2_SPTR8 pattern, PCRE2_SIZE length,
+    uint32_t options, int *errorcode, PCRE2_SIZE *erroroffset,
+    pcre2_compile_context_8 *ccontext);
+
+void pcre2_code_free_8(pcre2_code_8 *code);
+
+pcre2_match_data_8 *pcre2_match_data_create_from_pattern_8(
+    const pcre2_code_8 *code, pcre2_general_context_8 *gcontext);
+
+void pcre2_match_data_free_8(pcre2_match_data_8 *match_data);
+
+pcre2_match_context_8 *pcre2_match_context_create_8(pcre2_general_context_8 *gcontext);
+void pcre2_match_context_free_8(pcre2_match_context_8 *mcontext);
+
+int pcre2_set_callout_8(pcre2_match_context_8 *mcontext,
+    int (*callout_function)(pcre2_callout_block_8 *, void *),
+    void *callout_data);
+
+int pcre2_match_8(const pcre2_code_8 *code, PCRE2_SPTR8 subject,
+    PCRE2_SIZE length, PCRE2_SIZE startoffset, uint32_t options,
+    pcre2_match_data_8 *match_data, pcre2_match_context_8 *mcontext);
+
+int pcre2_jit_compile_8(pcre2_code_8 *code, uint32_t options);
+
+PCRE2_SIZE *pcre2_get_ovector_pointer_8(pcre2_match_data_8 *match_data);
+
+void pcre2_get_error_message_8(int errorcode, PCRE2_UCHAR8 *buffer, PCRE2_SIZE bufflen);
+CDEF;
+
+$lib_path = '/opt/homebrew/lib/libpcre2-8.dylib';
+$ffi      = FFI::cdef( $cdef, $lib_path );
+
+// Compile a tiny pattern with two numbered callouts.
+$pattern  = '/(?C1)foo(?C2)bar/';
+$pat_buf  = $pattern;
+$err_code = FFI::new( 'int' );
+$err_off  = FFI::new( 'size_t' );
+
+$code = $ffi->pcre2_compile_8(
+	FFI::cast( 'PCRE2_SPTR8', FFI::addr( FFI::new( 'char[' . strlen( $pat_buf ) . ']' ) ) ),
+	0, // We'll set length below in real code.
+	0,
+	FFI::addr( $err_code ),
+	FFI::addr( $err_off ),
+	null
+);
+
+// The above is wrong because we didn't actually copy the pattern bytes
+// into the buffer. Let's do it properly.
+$pat_arr = $ffi->new( 'char[' . strlen( $pat_buf ) . ']' );
+FFI::memcpy( $pat_arr, $pat_buf, strlen( $pat_buf ) );
+$code = $ffi->pcre2_compile_8(
+	FFI::cast( 'PCRE2_SPTR8', FFI::addr( $pat_arr ) ),
+	strlen( $pat_buf ),
+	0,
+	FFI::addr( $err_code ),
+	FFI::addr( $err_off ),
+	null
+);
+if ( null === $code ) {
+	$buf = $ffi->new( 'char[256]' );
+	$ffi->pcre2_get_error_message_8( $err_code->cdata, FFI::cast( 'PCRE2_UCHAR8 *', FFI::addr( $buf ) ), 256 );
+	echo 'compile failed: code=', $err_code->cdata, ' offset=', $err_off->cdata, ' msg=', FFI::string( FFI::addr( $buf ) ), "\n";
+	exit( 1 );
+}
+echo "Pattern compiled OK\n";
+
+// Try setting up a callout via FFI.
+$callout_log = array();
+$mctx        = $ffi->pcre2_match_context_create_8( null );
+$callout_cb  = function ( $blockptr, $data ) use ( &$callout_log ) {
+	// $blockptr is FFI\CData type pcre2_callout_block_8*.
+	$blk           = $blockptr;
+	$callout_log[] = array(
+		'num' => $blk->callout_number,
+		'pos' => $blk->current_position,
+		'mat' => $blk->start_match,
+	);
+	return 0; // continue matching
+};
+// Cast our PHP closure to a C function pointer. PHP FFI supports this
+// for callbacks via `FFI::cast` on a closure.
+$cb_type = 'int (*)(pcre2_callout_block_8 *, void *)';
+echo "Trying to bind callout callback...\n";
+try {
+	$cb_ffi = $ffi->new( $cb_type );
+	echo "Callback type created.\n";
+	// PHP FFI does not directly support binding a closure to a function
+	// pointer in arbitrary C signatures - this typically needs a Zend
+	// FFI extension feature or libffi closures.
+} catch ( \Throwable $e ) {
+	echo 'Could not bind: ', $e->getMessage(), "\n";
+}
+
+// Even attempting to call pcre2_set_callout_8 with a closure tends to
+// fail. Document and stop.
+echo "\nConclusion: PHP FFI cannot bind a PHP callback to a C function pointer in stock PHP, so it cannot supply a PCRE2 callout function.\n";
diff --git a/packages/mysql-on-sqlite/tests/tools/exp-regex-hybrid.php b/packages/mysql-on-sqlite/tests/tools/exp-regex-hybrid.php
new file mode 100644
index 00000000..e7bc5902
--- /dev/null
+++ b/packages/mysql-on-sqlite/tests/tools/exp-regex-hybrid.php
@@ -0,0 +1,231 @@
+<?php
+/**
+ * Hybrid: regex pre-validation followed by the AST-building parser.
+ *
+ * Hypothesis: a PCRE2 match is a fast yes/no gate; if regex confirms
+ * the input parses, the AST builder can run. Tests whether this
+ * hybrid is faster than just running the parser.
+ */
+
+set_error_handler(
+	function ( $s, $m, $f, $l ) {
+		throw new ErrorException( $m, 0, $s, $f, $l );
+	}
+);
+
+require_once __DIR__ . '/../../src/parser/class-wp-parser-grammar.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-node.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-token.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-token.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-lexer.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-parser.php';
+
+const TOKEN_OFFSET = 0x4000;
+
+// Reuse the regex compiler from exp-regex-v3 (a simplified inline copy).
+function compile_regex( WP_Parser_Grammar $grammar ): string {
+	$low_nt   = $grammar->lowest_non_terminal_id;
+	$rules    = $grammar->rules;
+	$nullable = array();
+	$first    = array();
+	foreach ( $rules as $rid => $_ ) {
+		$nullable[ $rid ] = false;
+		$first[ $rid ]    = array();
+	}
+	do {
+		$changed = false;
+		foreach ( $rules as $rid => $branches ) {
+			foreach ( $branches as $branch ) {
+				$bn = true;
+				foreach ( $branch as $sym ) {
+					if ( $sym < $low_nt ) {
+						if ( ! isset( $first[ $rid ][ $sym ] ) ) {
+							$first[ $rid ][ $sym ] = true;
+							$changed               = true;
+						}
+						$bn = false;
+						break;
+					}
+					foreach ( $first[ $sym ] as $tid => $_ ) {
+						if ( ! isset( $first[ $rid ][ $tid ] ) ) {
+							$first[ $rid ][ $tid ] = true;
+							$changed               = true;
+						}
+					}
+					if ( ! $nullable[ $sym ] ) {
+						$bn = false;
+						break;
+					}
+				}
+				if ( $bn && ! $nullable[ $rid ] ) {
+					$nullable[ $rid ] = true;
+					$changed          = true;
+				}
+			}
+		}
+	} while ( $changed );
+
+	$single_candidate_rules = $grammar->single_candidate_rules ?? array();
+	$select_rid             = $grammar->get_rule_id( 'selectStatement' );
+	$into_char              = mb_chr( WP_MySQL_Lexer::INTO_SYMBOL + TOKEN_OFFSET, 'UTF-8' );
+
+	$compiled = array();
+	$compile  = function ( $rid ) use ( &$compile, &$compiled, $rules, $low_nt, $single_candidate_rules, $select_rid, $into_char ) {
+		if ( isset( $compiled[ $rid ] ) ) {
+			return $compiled[ $rid ];
+		}
+		$alts = array();
+		$st   = isset( $single_candidate_rules[ $rid ] );
+		foreach ( $rules[ $rid ] as $branch ) {
+			$alt = '';
+			foreach ( $branch as $i => $sym ) {
+				if ( $sym < $low_nt ) {
+					$alt .= mb_chr( $sym + TOKEN_OFFSET, 'UTF-8' );
+				} else {
+					$alt .= "RREF{$sym}RREF";
+				}
+				if ( 0 === $i && $st ) {
+					$alt .= '(*THEN)';
+				}
+			}
+			$alts[] = $alt;
+		}
+		$body = '(?:' . implode( '|', $alts ) . ')';
+		if ( $rid === $select_rid ) {
+			$body .= '(?!' . $into_char . ')';
+		}
+		$compiled[ $rid ] = $body;
+		return $compiled[ $rid ];
+	};
+	foreach ( array_keys( $rules ) as $rid ) {
+		$compile( $rid );
+	}
+
+	// Inline single-use rules.
+	do {
+		$changed = false;
+		$refs    = array();
+		foreach ( $compiled as $rid => $_ ) {
+			$refs[ $rid ] = 0;
+		}
+		foreach ( $compiled as $rid => $body ) {
+			if ( preg_match_all( '/RREF(\d+)RREF/', $body, $m ) ) {
+				foreach ( $m[1] as $r ) {
+					$refs[ (int) $r ] = ( $refs[ (int) $r ] ?? 0 ) + 1;
+				}
+			}
+		}
+		foreach ( $compiled as $rid => $body ) {
+			if ( ( $refs[ $rid ] ?? 0 ) !== 1 || strpos( $body, "RREF{$rid}RREF" ) !== false ) {
+				continue;
+			}
+			foreach ( $compiled as $cr => $cb ) {
+				if ( strpos( $cb, "RREF{$rid}RREF" ) !== false ) {
+					$compiled[ $cr ] = str_replace( "RREF{$rid}RREF", $body, $cb );
+					unset( $compiled[ $rid ] );
+					$changed = true;
+					break 2;
+				}
+			}
+		}
+	} while ( $changed );
+
+	$rule_to_idx = array();
+	foreach ( $compiled as $rid => $_ ) {
+		$rule_to_idx[ $rid ] = count( $rule_to_idx );
+	}
+	$define = '';
+	foreach ( $compiled as $rid => $body ) {
+		$body    = preg_replace_callback(
+			'/RREF(\d+)RREF/',
+			function ( $m ) use ( $rule_to_idx ) {
+				return '(?&r' . $rule_to_idx[ (int) $m[1] ] . ')';
+			},
+			$body
+		);
+		$define .= "(?<r{$rule_to_idx[$rid]}>{$body})";
+	}
+	$start_rid = $grammar->get_rule_id( 'query' );
+	return '/(?(DEFINE)' . $define . ')\\A(?&r' . $rule_to_idx[ $start_rid ] . ')\\z/u';
+}
+
+$grammar = new WP_Parser_Grammar( require __DIR__ . '/../../src/mysql/mysql-grammar.php' );
+$pattern = compile_regex( $grammar );
+
+ini_set( 'pcre.backtrack_limit', '1000000000' );
+ini_set( 'pcre.recursion_limit', '10000000' );
+ini_set( 'pcre.jit', '1' );
+ini_set( 'pcre.jit_stacksize', '32M' );
+
+$handle  = fopen( __DIR__ . '/../mysql/data/mysql-server-tests-queries.csv', 'r' );
+$queries = array();
+$header  = true;
+while ( ( $r = fgetcsv( $handle, null, ',', '"', '\\' ) ) !== false ) {
+	if ( $header ) {
+		$header = false;
+		continue;
+	}
+	if ( null !== $r[0] ) {
+		$queries[] = $r[0];
+	}
+}
+$queries = array_slice( $queries, 0, (int) ( $argv[1] ?? 10000 ) );
+
+// Pre-tokenize and pre-encode.
+$pairs = array();
+foreach ( $queries as $q ) {
+	$tokens = ( new WP_MySQL_Lexer( $q ) )->remaining_tokens();
+	$enc    = '';
+	foreach ( $tokens as $t ) {
+		$enc .= mb_chr( $t->id + TOKEN_OFFSET, 'UTF-8' );
+	}
+	$pairs[] = array( $tokens, $enc );
+}
+printf( "Loaded %d queries\n", count( $pairs ) );
+
+// 1. Just regex match.
+$start = microtime( true );
+$ok    = 0;
+foreach ( $pairs as $p ) {
+	if ( @preg_match( $pattern, $p[1] ) === 1 ) {
+		++$ok;
+	}
+}
+$d = microtime( true ) - $start;
+printf( "regex only:        %.4fs (%d QPS, %d/%d match)\n", $d, count( $pairs ) / $d, $ok, count( $pairs ) );
+
+// 2. Just parser (build AST).
+$start = microtime( true );
+$ok    = 0;
+foreach ( $pairs as $p ) {
+	if ( ( new WP_MySQL_Parser( $grammar, $p[0] ) )->parse() ) {
+		++$ok;
+	}
+}
+$d = microtime( true ) - $start;
+printf( "parser only (AST): %.4fs (%d QPS, %d/%d match)\n", $d, count( $pairs ) / $d, $ok, count( $pairs ) );
+
+// 3. Hybrid: regex first; on success run the parser to build AST. Pure
+//    overhead: same parser runs, plus the regex.
+$start        = microtime( true );
+$ok           = 0;
+$regex_failed = 0;
+foreach ( $pairs as $p ) {
+	if ( @preg_match( $pattern, $p[1] ) !== 1 ) {
+		++$regex_failed;
+		continue;
+	}
+	if ( ( new WP_MySQL_Parser( $grammar, $p[0] ) )->parse() ) {
+		++$ok;
+	}
+}
+$d = microtime( true ) - $start;
+printf(
+	"regex + parser:    %.4fs (%d QPS, %d/%d match, %d regex-rejected)\n",
+	$d,
+	count( $pairs ) / $d,
+	$ok,
+	count( $pairs ),
+	$regex_failed
+);
diff --git a/packages/mysql-on-sqlite/tests/tools/exp-regex-v3.php b/packages/mysql-on-sqlite/tests/tools/exp-regex-v3.php
new file mode 100644
index 00000000..256c51e0
--- /dev/null
+++ b/packages/mysql-on-sqlite/tests/tools/exp-regex-v3.php
@@ -0,0 +1,288 @@
+<?php
+/**
+ * Regex grammar compiler v3: aggressively inline single-use rules and
+ * use (*THEN) on every branch's first symbol so the matcher can't
+ * backtrack into a sibling alternative once a token has been consumed.
+ */
+
+set_error_handler(
+	function ( $s, $m, $f, $l ) {
+		throw new ErrorException( $m, 0, $s, $f, $l );
+	}
+);
+
+require_once __DIR__ . '/../../src/parser/class-wp-parser-grammar.php';
+require_once __DIR__ . '/../../src/parser/class-wp-parser-token.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-token.php';
+require_once __DIR__ . '/../../src/mysql/class-wp-mysql-lexer.php';
+
+const TOKEN_OFFSET = 0x4000;
+
+function token_char( $tid ) {
+	return mb_chr( $tid + TOKEN_OFFSET, 'UTF-8' );
+}
+
+$grammar = new WP_Parser_Grammar( require __DIR__ . '/../../src/mysql/mysql-grammar.php' );
+$low_nt  = $grammar->lowest_non_terminal_id;
+
+// Count how many times each rule is referenced.
+function ref_counts( WP_Parser_Grammar $g ) {
+	$low_nt = $g->lowest_non_terminal_id;
+	$refs   = array();
+	foreach ( $g->rules as $rid => $branches ) {
+		$refs[ $rid ] = 0;
+	}
+	foreach ( $g->rules as $rid => $branches ) {
+		foreach ( $branches as $b ) {
+			foreach ( $b as $sym ) {
+				if ( $sym >= $low_nt ) {
+					$refs[ $sym ] = ( $refs[ $sym ] ?? 0 ) + 1;
+				}
+			}
+		}
+	}
+	return $refs;
+}
+
+// FIRST and NULLABLE.
+$rules    = $grammar->rules;
+$nullable = array();
+$first    = array();
+foreach ( $rules as $rid => $_ ) {
+	$nullable[ $rid ] = false;
+	$first[ $rid ]    = array();
+}
+do {
+	$changed = false;
+	foreach ( $rules as $rid => $branches ) {
+		foreach ( $branches as $branch ) {
+			$bn = true;
+			foreach ( $branch as $sym ) {
+				if ( $sym < $low_nt ) {
+					if ( ! isset( $first[ $rid ][ $sym ] ) ) {
+						$first[ $rid ][ $sym ] = true;
+						$changed               = true;
+					}
+					$bn = false;
+					break;
+				}
+				foreach ( $first[ $sym ] as $tid => $_ ) {
+					if ( ! isset( $first[ $rid ][ $tid ] ) ) {
+						$first[ $rid ][ $tid ] = true;
+						$changed               = true;
+					}
+				}
+				if ( ! $nullable[ $sym ] ) {
+					$bn = false;
+					break;
+				}
+			}
+			if ( $bn && ! $nullable[ $rid ] ) {
+				$nullable[ $rid ] = true;
+				$changed          = true;
+			}
+		}
+	}
+} while ( $changed );
+
+// Compile each rule into a "regex body" string. Inline single-use
+// non-recursive rules into their callers transitively via memoization.
+$single_candidate_rules = $grammar->single_candidate_rules ?? array();
+$select_rid             = $grammar->get_rule_id( 'selectStatement' );
+$into_char              = token_char( WP_MySQL_Lexer::INTO_SYMBOL );
+$compiled               = array();
+$visiting               = array();
+$compile_rule           = function ( $rid ) use ( &$compile_rule, &$compiled, &$visiting, $rules, $first, $nullable, $low_nt, $single_candidate_rules, $select_rid, $into_char ) {
+	if ( isset( $compiled[ $rid ] ) ) {
+		return $compiled[ $rid ];
+	}
+	$visiting[ $rid ] = true;
+	$alts             = array();
+	$safe_then        = isset( $single_candidate_rules[ $rid ] );
+	foreach ( $rules[ $rid ] as $branch ) {
+		$alt = '';
+		foreach ( $branch as $i => $sym ) {
+			if ( $sym < $low_nt ) {
+				$alt .= token_char( $sym );
+			} else {
+				$alt .= "RREF{$sym}RREF";
+			}
+			// (*THEN) commits the alternative once the first symbol matches.
+			// Only safe when sibling branches of this rule have disjoint
+			// FIRST sets - that property is captured by
+			// $grammar->single_candidate_rules. Outside that set, multiple
+			// branches can share a first token and committing prematurely
+			// would yield spurious match failures.
+			if ( 0 === $i && $safe_then ) {
+				$alt .= '(*THEN)';
+			}
+		}
+		$alts[] = $alt;
+	}
+	unset( $visiting[ $rid ] );
+	$body = '(?:' . implode( '|', $alts ) . ')';
+	if ( $rid === $select_rid ) {
+		// Mirror the negative lookahead the parser uses: a successful
+		// selectStatement match must not be followed by INTO. Otherwise
+		// the surrounding rule should pick a different alternative.
+		$body .= '(?!' . $into_char . ')';
+	}
+	$compiled[ $rid ] = $body;
+	return $compiled[ $rid ];
+};
+
+// First pass: compile every rule once.
+foreach ( array_keys( $rules ) as $rid ) {
+	$compile_rule( $rid );
+}
+
+// Second pass: inline single-use non-recursive rules. A rule is
+// inlinable if its body doesn't reference itself transitively. Repeat
+// to fixpoint - inlining changes ref counts.
+$inlined_count = 0;
+do {
+	$changed = false;
+	$refs    = array();
+	foreach ( $compiled as $rid => $body ) {
+		$refs[ $rid ] = 0;
+	}
+	foreach ( $compiled as $rid => $body ) {
+		if ( preg_match_all( '/RREF(\d+)RREF/', $body, $m ) ) {
+			foreach ( $m[1] as $r ) {
+				$refs[ (int) $r ] = ( $refs[ (int) $r ] ?? 0 ) + 1;
+			}
+		}
+	}
+	foreach ( $compiled as $rid => $body ) {
+		if ( ( $refs[ $rid ] ?? 0 ) !== 1 ) {
+			continue;
+		}
+		// Don't inline recursive rules.
+		if ( strpos( $body, "RREF{$rid}RREF" ) !== false ) {
+			continue;
+		}
+		// Replace the single reference somewhere.
+		foreach ( $compiled as $caller_rid => $caller_body ) {
+			if ( strpos( $caller_body, "RREF{$rid}RREF" ) !== false ) {
+				$compiled[ $caller_rid ] = str_replace( "RREF{$rid}RREF", $body, $caller_body );
+				unset( $compiled[ $rid ] );
+				++$inlined_count;
+				$changed = true;
+				break 2; // restart from top so refs recount with the new state
+			}
+		}
+	}
+} while ( $changed );
+
+// Now compile remaining rules with named subroutines.
+$rule_to_idx = array();
+$idx_to_rule = array();
+foreach ( $compiled as $rid => $_ ) {
+	$rule_to_idx[ $rid ] = count( $idx_to_rule );
+	$idx_to_rule[]       = $rid;
+}
+
+$define = '';
+foreach ( $idx_to_rule as $rid ) {
+	$body = $compiled[ $rid ];
+	// Replace RREF placeholders with named-group references.
+	$body    = preg_replace_callback(
+		'/RREF(\d+)RREF/',
+		function ( $m ) use ( $rule_to_idx ) {
+			$rid = (int) $m[1];
+			return '(?&r' . $rule_to_idx[ $rid ] . ')';
+		},
+		$body
+	);
+	$define .= "(?<r{$rule_to_idx[$rid]}>{$body})";
+}
+
+$start_rid = $grammar->get_rule_id( 'query' );
+$pattern   = '/(?(DEFINE)' . $define . ')\\A(?&r' . $rule_to_idx[ $start_rid ] . ')\\z/u';
+printf(
+	"Inlined %d rules. Final rules: %d. Pattern: %s bytes\n",
+	$inlined_count,
+	count( $idx_to_rule ),
+	number_format( strlen( $pattern ) )
+);
+
+ini_set( 'pcre.backtrack_limit', '1000000000' );
+ini_set( 'pcre.recursion_limit', '10000000' );
+ini_set( 'pcre.jit', '1' );
+
+$t  = microtime( true );
+$ok = @preg_match( $pattern, "\xff", $m );
+printf(
+	"Compile: %.2fms, ok=%s, err=%s\n",
+	( microtime( true ) - $t ) * 1000,
+	var_export( $ok, true ),
+	preg_last_error_msg()
+);
+if ( false === $ok && PREG_BAD_UTF8_ERROR !== preg_last_error() ) {
+	echo "Pattern doesn't compile cleanly. Bailing.\n";
+	exit( 1 );
+}
+
+$handle  = fopen( __DIR__ . '/../mysql/data/mysql-server-tests-queries.csv', 'r' );
+$queries = array();
+$header  = true;
+while ( ( $r = fgetcsv( $handle, null, ',', '"', '\\' ) ) !== false ) {
+	if ( $header ) {
+		$header = false;
+		continue; }
+	if ( null !== $r[0] ) {
+		$queries[] = $r[0];
+	}
+}
+$queries = array_slice( $queries, 0, (int) ( $argv[1] ?? 5000 ) );
+
+$encoded = array();
+foreach ( $queries as $q ) {
+	$tokens = ( new WP_MySQL_Lexer( $q ) )->remaining_tokens();
+	$s      = '';
+	foreach ( $tokens as $t ) {
+		$s .= token_char( $t->id );
+	}
+	$encoded[] = $s;
+}
+
+$t               = microtime( true );
+$matched         = 0;
+$failed          = 0;
+$errors          = 0;
+$failed_examples = array();
+$slow            = array();
+foreach ( $encoded as $i => $s ) {
+	$qstart = microtime( true );
+	$r      = @preg_match( $pattern, $s );
+	$qd     = microtime( true ) - $qstart;
+	if ( 1 === $r ) {
+		++$matched;
+	} elseif ( 0 === $r ) {
+		++$failed;
+		if ( count( $failed_examples ) < 10 ) {
+			$failed_examples[] = substr( str_replace( "\n", ' ', $queries[ $i ] ), 0, 120 );
+		}
+	} else {
+		++$errors; }
+	if ( $qd > 0.005 && count( $slow ) < 3 ) {
+		$slow[] = sprintf( '%6.0fms: %s', $qd * 1000, substr( str_replace( "\n", ' ', $queries[ $i ] ), 0, 100 ) );
+	}
+}
+$d = microtime( true ) - $t;
+printf(
+	"Matched=%d, Failed=%d, Errors=%d, time=%.4fs (%d QPS)\n",
+	$matched,
+	$failed,
+	$errors,
+	$d,
+	count( $encoded ) / $d
+);
+echo "\nFailed queries:\n";
+foreach ( $failed_examples as $e ) {
+	echo "  $e\n";
+}
+echo "\nSlow queries:\n";
+foreach ( $slow as $e ) {
+	echo "  $e\n";
+}