dotnet · danmoseley · Feb 24, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
@@ -2953,8 +2953,13 @@ RegexNodeKind.Onelazy or RegexNodeKind.Oneloop or RegexNodeKind.Oneloopatomic or
         /// consumed. true is only valid when used as part of a search to determine where to try a full match, not as part of
         /// actual matching logic.
         /// </param>
+        /// <param name="unwrapCaptures">
+        /// Defaults to false. When true, Capture nodes are transparently unwrapped so the string inside a capture group
+        /// can be extracted. This must only be set to true for prefix analysis, not for the compiler/source generator,
+        /// as the compiler must not skip Capture nodes (they have side effects that need to be emitted).
+        /// </param>
         /// <returns>true if a sequence was found; otherwise, false.</returns>
-        public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChildBound, out int nodesConsumed, [NotNullWhen(true)] out string? caseInsensitiveString, bool consumeZeroWidthNodes = false)
+        public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChildBound, out int nodesConsumed, [NotNullWhen(true)] out string? caseInsensitiveString, bool consumeZeroWidthNodes = false, bool unwrapCaptures = false)
         {
             Debug.Assert(Kind == RegexNodeKind.Concatenate, $"Expected Concatenate, got {Kind}");
 
@@ -2969,6 +2974,19 @@ public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChil
             {
                 RegexNode child = Child(i);
 
+                // When used for prefix analysis (unwrapCaptures is true), unwrap capture
+                // groups and atomic groups so their contents can be examined. Capture unwrapping
+                // must not be done when used by the compiler/source generator, as it would cause
+                // capture side effects to be skipped. Atomic groups only affect backtracking, not
+                // what text is matched, so they are safe to unwrap for prefix analysis as well.
+                if (unwrapCaptures)
+                {
+                    while (child.Kind is RegexNodeKind.Capture or RegexNodeKind.Atomic)
+                    {
+                        child = child.Child(0);
+                    }
+                }
+
                 if (child.Kind is RegexNodeKind.One)
                 {
                     // We only want to include ASCII characters, and only if they don't participate in case conversion
@@ -3006,6 +3024,26 @@ public bool TryGetOrdinalCaseInsensitiveString(int childIndex, int exclusiveChil
 
                     vsb.Append((char)(twoChars[0] | 0x20), child.Kind is RegexNodeKind.Set ? 1 : child.M);
                 }
+                else if (child.Kind is RegexNodeKind.Concatenate)
+                {
+                    // This can occur after unwrapping a Capture whose child is a Concatenate.
+                    // Recurse to extract any case-insensitive string from the inner concatenation.
+                    if (!StackHelper.TryEnsureSufficientExecutionStack() ||
+                        !child.TryGetOrdinalCaseInsensitiveString(0, child.ChildCount(), out int innerNodesConsumed, out string? innerStr, consumeZeroWidthNodes, unwrapCaptures))
+                    {
+                        break;
+                    }
+
+                    vsb.Append(innerStr);
+
+                    // If the inner concatenation wasn't fully consumed, we can't continue past it
+                    // as subsequent siblings aren't guaranteed to immediately follow the extracted prefix.
+                    if (innerNodesConsumed < child.ChildCount())
+                    {
+                        i++;
+                        break;
+                    }
+                }
                 else if (child.Kind is RegexNodeKind.Empty)
                 {
                     // Skip over empty nodes, as they're pure nops. They would ideally have been optimized away,

diff --git a/.../System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/.../System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
@@ -495,7 +495,7 @@ static bool Process(RegexNode node, ref ValueStringBuilder vsb)
                         continue;
 
                     case RegexNodeKind.Concatenate:
-                        node.TryGetOrdinalCaseInsensitiveString(0, node.ChildCount(), out _, out string? caseInsensitiveString, consumeZeroWidthNodes: true);
+                        node.TryGetOrdinalCaseInsensitiveString(0, node.ChildCount(), out _, out string? caseInsensitiveString, consumeZeroWidthNodes: true, unwrapCaptures: true);
                         return caseInsensitiveString;
 
                     default:

diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs
@@ -120,13 +120,53 @@ public void TrailingAnchor(string pattern, int options, int expectedMode, int ex
         [InlineData(@"(?<=cd)ab", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingString_RightToLeft, "ab")]
         [InlineData(@"\bab(?=\w)(?!=\d)c\b", 0, (int)FindNextStartingPositionMode.LeadingString_LeftToRight, "abc")]
         [InlineData(@"\bab(?=\w)(?!=\d)c\b", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")]
+        // Capture groups should be transparent to ordinal case-insensitive prefix extraction
+        [InlineData(@"(abc)", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")]
+        [InlineData(@"\b(in)\b", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "in")]
+        [InlineData(@"\b(from).+(to)\b", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "from")]
+        // Partial capture: inner Concatenate not fully consumed, followed by non-letter One('(')
+        [InlineData(@"(abcde|abcfg)\(", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")]
+        // Partial capture: inner Concatenate not fully consumed, followed by letter Set([Ee])
+        [InlineData(@"(abc|abd)e", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")]
+        // Adjacent captures: both fully consumed via inner Concatenate recursion, extraction continues across capture boundaries
+        [InlineData(@"(ab)(cd)", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abcd")]
+        // Non-capture content before capture: tests Set processing then Capture unwrap in same Concatenate iteration
+        [InlineData(@"ab(cd)", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abcd")]
+        // Single-char capture unwraps to Set (not Concatenate), exercises direct Set handling after Capture unwrap
+        [InlineData(@"a(b)c", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abc")]
+        // Empty capture unwraps to Empty node, which is skipped; extraction continues with subsequent content
+        [InlineData(@"()ab", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "ab")]
+        // Atomic groups inside a Concatenate are unwrapped like Capture (atomicity only affects backtracking, not what's matched)
+        [InlineData(@"ab(?>cd)ef", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abcdef")]
+        // Capture wrapping Atomic (and vice versa): while loop peels multiple wrapper layers
+        [InlineData(@"a((?>bc))d", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abcd")]
+        [InlineData(@"a(?>(bc))d", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abcd")]
+        // Capture containing fixed-count repeater: Setloop with M==N is extractable
+        [InlineData(@"(ab{3}c)", (int)RegexOptions.IgnoreCase, (int)FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight, "abbbc")]
         public void LeadingPrefix(string pattern, int options, int expectedMode, string expectedPrefix)
         {
             RegexFindOptimizations opts = ComputeOptimizations(pattern, (RegexOptions)options);
             Assert.Equal((FindNextStartingPositionMode)expectedMode, opts.FindMode);
             Assert.Equal(expectedPrefix, opts.LeadingPrefix);
         }
 
+        [Fact]
+        [OuterLoop("Stress test for deep nesting")]
+        public void LeadingPrefix_DeepCaptureNesting_DoesNotStackOverflow()
+        {
+            // Deeply nested captures like (((((...))))) with IgnoreCase exercise the recursive
+            // Capture-unwrapping path in TryGetOrdinalCaseInsensitiveString. Verify it doesn't SO.
+            const int Depth = 2000;
+            string pattern = new string('(', Depth) + "ab" + new string(')', Depth);
+            RegexFindOptimizations opts = ComputeOptimizations(pattern, RegexOptions.IgnoreCase);
+            // The prefix may or may not be extracted depending on stack limits, but it must not crash.
+            // If extraction succeeds, it should find "ab".
+            if (opts.FindMode == FindNextStartingPositionMode.LeadingString_OrdinalIgnoreCase_LeftToRight)
+            {
+                Assert.Equal("ab", opts.LeadingPrefix);
+            }
+        }
+
         [Theory]
         [InlineData(@"[ab]", 0, (int)FindNextStartingPositionMode.LeadingSet_LeftToRight, "ab")]
         [InlineData(@"[Aa]", 0, (int)FindNextStartingPositionMode.LeadingSet_LeftToRight, "Aa")]