From 46efdc2faff8122eb737102d81bd076a95d65799 Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Tue, 24 Feb 2026 16:21:09 -0700 Subject: [PATCH 1/7] Handle Capture nodes in TryGetOrdinalCaseInsensitiveString TryGetOrdinalCaseInsensitiveString iterates the children of a Concatenate node to extract an ordinal case-insensitive prefix string. Previously it did not handle Capture or nested Concatenate nodes, causing patterns like \b(in)\b with IgnoreCase to miss the optimal LeadingString_OrdinalIgnoreCase search path and fall through to the slower FixedDistanceSets path. Unwrap Capture nodes transparently and recurse into Concatenate children, matching the behavior of FindPrefixesCore. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../System/Text/RegularExpressions/RegexNode.cs | 17 +++++++++++++++++ .../UnitTests/RegexFindOptimizationsTests.cs | 4 ++++ 2 files changed, 21 insertions(+) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 78d529b7baee72..2e032a7e2f8a6f 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -2969,6 +2969,12 @@ public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChil { RegexNode child = Child(i); + // Unwrap capture groups so their contents can be processed directly. + while (child.Kind is RegexNodeKind.Capture) + { + child = child.Child(0); + } + if (child.Kind is RegexNodeKind.One) { // We only want to include ASCII characters, and only if they don't participate in case conversion @@ -3006,6 +3012,17 @@ public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChil vsb.Append((char)(twoChars[0] | 0x20), child.Kind is RegexNodeKind.Set ? 1 : child.M); } + else if (child.Kind is RegexNodeKind.Concatenate) + { + // This can occur after unwrapping a Capture whose child is a Concatenate. + // Recurse to extract any case-insensitive string from the inner concatenation. + if (!child.TryGetOrdinalCaseInsensitiveString(0, child.ChildCount(), out _, out string? innerStr, consumeZeroWidthNodes)) + { + break; + } + + vsb.Append(innerStr); + } else if (child.Kind is RegexNodeKind.Empty) { // Skip over empty nodes, as they're pure nops. They would ideally have been optimized away, diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs index ca75e1abd4404f..15d929a1c221fc 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs @@ -120,6 +120,10 @@ public void TrailingAnchor(string pattern, int options, int expectedMode, int ex [InlineData(@"(?<=cd)ab", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingString_RightToLeft, "ab")] [InlineData(@"\bab(?=\w)(?!=\d)c\b", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "abc")] [InlineData(@"\bab(?=\w)(?!=\d)c\b", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")] + // Capture groups should be transparent to ordinal case-insensitive prefix extraction + [InlineData(@"(abc)", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")] + [InlineData(@"\b(in)\b", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "in")] + [InlineData(@"\b(from).+(to)\b", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "from")] public void LeadingPrefix(string pattern, int options, int expectedMode, string expectedPrefix) { RegexFindOptimizations opts = ComputeOptimizations(pattern, (RegexOptions)options); From def24edf573d2e7772ce408a61bedb425494ba70 Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Tue, 24 Feb 2026 20:45:56 -0700 Subject: [PATCH 2/7] Add stack guard for recursive Capture unwrapping Add TryEnsureSufficientExecutionStack check before the recursive call in TryGetOrdinalCaseInsensitiveString to safely handle deeply nested capture patterns like ((((ab)))) without risking a stack overflow. Add OuterLoop test exercising 2000-deep capture nesting. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../System/Text/RegularExpressions/RegexNode.cs | 3 ++- .../UnitTests/RegexFindOptimizationsTests.cs | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 2e032a7e2f8a6f..26f73591208c22 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -3016,7 +3016,8 @@ public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChil { // This can occur after unwrapping a Capture whose child is a Concatenate. // Recurse to extract any case-insensitive string from the inner concatenation. - if (!child.TryGetOrdinalCaseInsensitiveString(0, child.ChildCount(), out _, out string? innerStr, consumeZeroWidthNodes)) + if (!StackHelper.TryEnsureSufficientExecutionStack() || + !child.TryGetOrdinalCaseInsensitiveString(0, child.ChildCount(), out _, out string? innerStr, consumeZeroWidthNodes)) { break; } diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs index 15d929a1c221fc..9933fcfa328093 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs @@ -131,6 +131,23 @@ public void LeadingPrefix(string pattern, int options, int expectedMode, string Assert.Equal(expectedPrefix, opts.LeadingPrefix); } + [Fact] + [OuterLoop("Stress test for deep nesting")] + public void LeadingPrefix_DeepCapatureNesting_DoesNotStackOverflow() + { + // Deeply nested captures like (((((...))))) with IgnoreCase exercise the recursive + // Capture-unwrapping path in TryGetOrdinalCaseInsensitiveString. Verify it doesn't SO. + const int Depth = 2000; + string pattern = new string('(', Depth) + "ab" + new string(')', Depth); + RegexFindOptimizations opts = ComputeOptimizations(pattern, RegexOptions.IgnoreCase); + // The prefix may or may not be extracted depending on stack limits, but it must not crash. + // If extraction succeeds, it should find "ab". + if (opts.FindMode == FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight) + { + Assert.Equal("ab", opts.LeadingPrefix); + } + } + [Theory] [InlineData(@"[ab]", 0, (int)FindNextStartingPositionMode.LeadingSet_LeftToRight, "ab")] [InlineData(@"[Aa]", 0, (int)FindNextStartingPositionMode.LeadingSet_LeftToRight, "Aa")] From 311d473f11fc4fa181c0ad4f63bec997f9314d1d Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Tue, 24 Feb 2026 20:46:21 -0700 Subject: [PATCH 3/7] Guard Capture unwrapping to prefix analysis path only TryGetOrdinalCaseInsensitiveString is also called from the compiler and source generator (EmitConcatenation). Although TryGetJoinableLengthCheckChildRange currently excludes Capture nodes from the joinable range, add an explicit unwrapCaptures parameter (default false) as defense-in-depth so only the prefix analysis caller opts into Capture unwrapping. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../Text/RegularExpressions/RegexNode.cs | 20 ++++++++++++++----- .../RegularExpressions/RegexPrefixAnalyzer.cs | 2 +- 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 26f73591208c22..20c5dc9fa442a5 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -2953,8 +2953,13 @@ RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or /// consumed. true is only valid when used as part of a search to determine where to try a full match, not as part of /// actual matching logic. /// + /// + /// Defaults to false. When true, Capture nodes are transparently unwrapped so the string inside a capture group + /// can be extracted. This must only be set to true for prefix analysis, not for the compiler/source generator, + /// as the compiler must not skip Capture nodes (they have side effects that need to be emitted). + /// /// true if a sequence was found; otherwise, false. - public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChildBound, out int nodesConsumed, [NotNullWhen(true)] out string? caseInsensitiveString, bool consumeZeroWidthNodes = false) + public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChildBound, out int nodesConsumed, [NotNullWhen(true)] out string? caseInsensitiveString, bool consumeZeroWidthNodes = false, bool unwrapCaptures = false) { Debug.Assert(Kind == RegexNodeKind.Concatenate, $"Expected Concatenate, got {Kind}"); @@ -2969,10 +2974,15 @@ public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChil { RegexNode child = Child(i); - // Unwrap capture groups so their contents can be processed directly. - while (child.Kind is RegexNodeKind.Capture) + // When used for prefix analysis (unwrapCaptures is true), unwrap capture + // groups so their contents can be examined. This must not be done when used by the + // compiler/source generator, as it would cause capture side effects to be skipped. + if (unwrapCaptures) { - child = child.Child(0); + while (child.Kind is RegexNodeKind.Capture) + { + child = child.Child(0); + } } if (child.Kind is RegexNodeKind.One) @@ -3017,7 +3027,7 @@ public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChil // This can occur after unwrapping a Capture whose child is a Concatenate. // Recurse to extract any case-insensitive string from the inner concatenation. if (!StackHelper.TryEnsureSufficientExecutionStack() || - !child.TryGetOrdinalCaseInsensitiveString(0, child.ChildCount(), out _, out string? innerStr, consumeZeroWidthNodes)) + !child.TryGetOrdinalCaseInsensitiveString(0, child.ChildCount(), out _, out string? innerStr, consumeZeroWidthNodes, unwrapCaptures)) { break; } diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs index 136929aa18eef8..4a3747455c5846 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs @@ -495,7 +495,7 @@ static bool Process(RegexNode node, ref ValueStringBuilder vsb) continue; case RegexNodeKind.Concatenate: - node.TryGetOrdinalCaseInsensitiveString(0, node.ChildCount(), out _, out string? caseInsensitiveString, consumeZeroWidthNodes: true); + node.TryGetOrdinalCaseInsensitiveString(0, node.ChildCount(), out _, out string? caseInsensitiveString, consumeZeroWidthNodes: true, unwrapCaptures: true); return caseInsensitiveString; default: From deaf9c9ba754fff94a13a605185d4e41a52ab16c Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Wed, 25 Feb 2026 11:29:52 -0700 Subject: [PATCH 4/7] Break after partially consumed inner Concatenation When TryGetOrdinalCaseInsensitiveString recurses into an inner Concatenation (from an unwrapped Capture) and only partially consumes it, stop iterating the outer Concatenation. Otherwise subsequent siblings are incorrectly appended to the prefix string. For example (abcde|abcfg)\( was producing 'abc(' instead of 'abc'. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/System/Text/RegularExpressions/RegexNode.cs | 10 +++++++++- .../tests/UnitTests/RegexFindOptimizationsTests.cs | 2 ++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 20c5dc9fa442a5..d242c44bb6fd89 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -3027,12 +3027,20 @@ public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChil // This can occur after unwrapping a Capture whose child is a Concatenate. // Recurse to extract any case-insensitive string from the inner concatenation. if (!StackHelper.TryEnsureSufficientExecutionStack() || - !child.TryGetOrdinalCaseInsensitiveString(0, child.ChildCount(), out _, out string? innerStr, consumeZeroWidthNodes, unwrapCaptures)) + !child.TryGetOrdinalCaseInsensitiveString(0, child.ChildCount(), out int innerNodesConsumed, out string? innerStr, consumeZeroWidthNodes, unwrapCaptures)) { break; } vsb.Append(innerStr); + + // If the inner concatenation wasn't fully consumed, we can't continue past it + // as subsequent siblings aren't guaranteed to immediately follow the extracted prefix. + if (innerNodesConsumed < child.ChildCount()) + { + i++; + break; + } } else if (child.Kind is RegexNodeKind.Empty) { diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs index 9933fcfa328093..4b76569c28a7d3 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs @@ -124,6 +124,8 @@ public void TrailingAnchor(string pattern, int options, int expectedMode, int ex [InlineData(@"(abc)", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")] [InlineData(@"\b(in)\b", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "in")] [InlineData(@"\b(from).+(to)\b", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "from")] + [InlineData(@"(abcde|abcfg)\(", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")] // partial capture followed by non-letter One('(') + [InlineData(@"(abc|abd)e", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")] // partial capture followed by letter Set([Ee]) public void LeadingPrefix(string pattern, int options, int expectedMode, string expectedPrefix) { RegexFindOptimizations opts = ComputeOptimizations(pattern, (RegexOptions)options); From 213bbc3f99f689ecdd20c46c298d73afbf483bee Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Wed, 25 Feb 2026 11:51:44 -0700 Subject: [PATCH 5/7] Add test variations for Capture unwrapping edge cases Add tests for adjacent captures, captures at non-zero position, single-char captures (unwrap to Set), empty captures (unwrap to Empty), partial inner Concatenate consumption with different trailing node kinds, and Atomic groups (documents current conservative behavior). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../UnitTests/RegexFindOptimizationsTests.cs | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs index 4b76569c28a7d3..73cb95cedb3477 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs @@ -124,8 +124,20 @@ public void TrailingAnchor(string pattern, int options, int expectedMode, int ex [InlineData(@"(abc)", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")] [InlineData(@"\b(in)\b", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "in")] [InlineData(@"\b(from).+(to)\b", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "from")] - [InlineData(@"(abcde|abcfg)\(", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")] // partial capture followed by non-letter One('(') - [InlineData(@"(abc|abd)e", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")] // partial capture followed by letter Set([Ee]) + // Partial capture: inner Concatenate not fully consumed, followed by non-letter One('(') + [InlineData(@"(abcde|abcfg)\(", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")] + // Partial capture: inner Concatenate not fully consumed, followed by letter Set([Ee]) + [InlineData(@"(abc|abd)e", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")] + // Adjacent captures: both fully consumed via inner Concatenate recursion, extraction continues across capture boundaries + [InlineData(@"(ab)(cd)", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abcd")] + // Non-capture content before capture: tests Set processing then Capture unwrap in same Concatenate iteration + [InlineData(@"ab(cd)", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abcd")] + // Single-char capture unwraps to Set (not Concatenate), exercises direct Set handling after Capture unwrap + [InlineData(@"a(b)c", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")] + // Empty capture unwraps to Empty node, which is skipped; extraction continues with subsequent content + [InlineData(@"()ab", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")] + // Atomic groups inside a Concatenate are not unwrapped (conservative); extraction stops before them + [InlineData(@"ab(?>cd)ef", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")] public void LeadingPrefix(string pattern, int options, int expectedMode, string expectedPrefix) { RegexFindOptimizations opts = ComputeOptimizations(pattern, (RegexOptions)options); From eba05b973787ee1c71e440759238467f69c76380 Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Wed, 25 Feb 2026 11:53:51 -0700 Subject: [PATCH 6/7] Also unwrap Atomic groups in prefix extraction Atomic groups only affect backtracking behavior, not what text is matched, so they can safely be unwrapped during prefix analysis just like Capture nodes. This allows patterns like ab(?>cd)ef with IgnoreCase to extract the full prefix 'abcdef'. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../src/System/Text/RegularExpressions/RegexNode.cs | 8 +++++--- .../tests/UnitTests/RegexFindOptimizationsTests.cs | 9 +++++++-- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index d242c44bb6fd89..2f98dcf982cf8f 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -2975,11 +2975,13 @@ public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChil RegexNode child = Child(i); // When used for prefix analysis (unwrapCaptures is true), unwrap capture - // groups so their contents can be examined. This must not be done when used by the - // compiler/source generator, as it would cause capture side effects to be skipped. + // groups and atomic groups so their contents can be examined. Capture unwrapping + // must not be done when used by the compiler/source generator, as it would cause + // capture side effects to be skipped. Atomic groups only affect backtracking, not + // what text is matched, so they are safe to unwrap for prefix analysis as well. if (unwrapCaptures) { - while (child.Kind is RegexNodeKind.Capture) + while (child.Kind is RegexNodeKind.Capture or RegexNodeKind.Atomic) { child = child.Child(0); } diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs index 73cb95cedb3477..d34d67215c7088 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs @@ -136,8 +136,13 @@ public void TrailingAnchor(string pattern, int options, int expectedMode, int ex [InlineData(@"a(b)c", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")] // Empty capture unwraps to Empty node, which is skipped; extraction continues with subsequent content [InlineData(@"()ab", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")] - // Atomic groups inside a Concatenate are not unwrapped (conservative); extraction stops before them - [InlineData(@"ab(?>cd)ef", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")] + // Atomic groups inside a Concatenate are unwrapped like Capture (atomicity only affects backtracking, not what's matched) + [InlineData(@"ab(?>cd)ef", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abcdef")] + // Capture wrapping Atomic (and vice versa): while loop peels multiple wrapper layers + [InlineData(@"a((?>bc))d", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abcd")] + [InlineData(@"a(?>(bc))d", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abcd")] + // Capture containing fixed-count repeater: Setloop with M==N is extractable + [InlineData(@"(ab{3}c)", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abbbc")] public void LeadingPrefix(string pattern, int options, int expectedMode, string expectedPrefix) { RegexFindOptimizations opts = ComputeOptimizations(pattern, (RegexOptions)options); From 4e606ac713d728db679f991ba0f7bad33287cd1a Mon Sep 17 00:00:00 2001 From: Dan Moseley Date: Wed, 25 Feb 2026 12:10:04 -0700 Subject: [PATCH 7/7] Fix typo: Capature -> Capture in test name Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- .../tests/UnitTests/RegexFindOptimizationsTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs index d34d67215c7088..b08a8bbe46aa5e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs @@ -152,7 +152,7 @@ public void LeadingPrefix(string pattern, int options, int expectedMode, string [Fact] [OuterLoop("Stress test for deep nesting")] - public void LeadingPrefix_DeepCapatureNesting_DoesNotStackOverflow() + public void LeadingPrefix_DeepCaptureNesting_DoesNotStackOverflow() { // Deeply nested captures like (((((...))))) with IgnoreCase exercise the recursive // Capture-unwrapping path in TryGetOrdinalCaseInsensitiveString. Verify it doesn't SO.