Skip to content

[RegexDiff X64] [danmoseley] Handle Capture nodes in TryGetOrdinalCaseInsens ... #1792

@MihuBot

Description

@MihuBot

Job completed in 18 minutes 25 seconds (remote runner delay: 1 minute 5 seconds).
dotnet/runtime#124842
Using arguments: regexdiff
Main commit: dotnet/runtime@9511672
PR commit: danmoseley/runtime@311d473

180 out of 18857 patterns have generated source code changes.

Examples of GeneratedRegex source diffs
"\\b(in)\\b" (658 uses)
[GeneratedRegex("\\b(in)\\b", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
                     // Any possible match is at least 2 characters.
                     if (pos <= inputSpan.Length - 2)
                     {
-                        // The pattern has multiple strings that could begin the match. Search for any of them.
-                        // If none can be found, there's no match.
-                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfAnyStrings_Ordinal_409072BF36F03A4496ACC585815833300ABA306360D979616ACDCED385DDC8FB);
+                        // The pattern has the literal "in" ordinal case-insensitive at the beginning of the pattern. Find the next occurrence.
+                        // If it can't be found, there's no match.
+                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_in_OrdinalIgnoreCase);
                         if (i >= 0)
                         {
                             base.runtextpos = pos + i;
             0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
         };
         
-        /// <summary>Supports searching for the specified strings.</summary>
-        internal static readonly SearchValues<string> s_indexOfAnyStrings_Ordinal_409072BF36F03A4496ACC585815833300ABA306360D979616ACDCED385DDC8FB = SearchValues.Create(["IN", "iN", "In", "in"], StringComparison.Ordinal);
+        /// <summary>Supports searching for the string "in".</summary>
+        internal static readonly SearchValues<string> s_indexOfString_in_OrdinalIgnoreCase = SearchValues.Create(["in"], StringComparison.OrdinalIgnoreCase);
     }
 }
"\\b(from).+(to)\\b.+" (316 uses)
[GeneratedRegex("\\b(from).+(to)\\b.+", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
                     // Any possible match is at least 8 characters.
                     if (pos <= inputSpan.Length - 8)
                     {
-                        // The pattern has multiple strings that could begin the match. Search for any of them.
-                        // If none can be found, there's no match.
-                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfAnyStrings_Ordinal_DA0DF7757216159252C4FA00AB5982AAA4403D2C43304873401C53E36F92CA04);
+                        // The pattern has the literal "from" ordinal case-insensitive at the beginning of the pattern. Find the next occurrence.
+                        // If it can't be found, there's no match.
+                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_from_OrdinalIgnoreCase);
                         if (i >= 0)
                         {
                             base.runtextpos = pos + i;
             0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
         };
         
-        /// <summary>Supports searching for the specified strings.</summary>
-        internal static readonly SearchValues<string> s_indexOfAnyStrings_Ordinal_DA0DF7757216159252C4FA00AB5982AAA4403D2C43304873401C53E36F92CA04 = SearchValues.Create(["FROM", "fROM", "FrOM", "frOM", "FRoM", "fRoM", "FroM", "froM", "FROm", "fROm", "FrOm", "frOm", "FRom", "fRom", "From", "from"], StringComparison.Ordinal);
+        /// <summary>Supports searching for the string "from".</summary>
+        internal static readonly SearchValues<string> s_indexOfString_from_OrdinalIgnoreCase = SearchValues.Create(["from"], StringComparison.OrdinalIgnoreCase);
     }
 }
"(DATEADD|DATEPART)\\(\\s*(YEAR|Y|YY|YYYY|MON ..." (294 uses)
[GeneratedRegex("(DATEADD|DATEPART)\\(\\s*(YEAR|Y|YY|YYYY|MONTH|MM|M|DAYOFYEAR|DY|DAY|DD|D|WEEKDAY|DW|HOUR|HH|MINUTE|MI|N|SECOND|SS|S|MILLISECOND|MS)\\s*\\,", RegexOptions.IgnoreCase | RegexOptions.CultureInvariant)]
                     // Any possible match is at least 10 characters.
                     if (pos <= inputSpan.Length - 10)
                     {
-                        // The pattern has multiple strings that could begin the match. Search for any of them.
-                        // If none can be found, there's no match.
-                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfAnyStrings_OrdinalIgnoreCase_2AC5E9CD8492EE9AF8BE2E7D112B6E7B0E2EB16F4F0FF47ECAA2B811EE26A081);
+                        // The pattern has the literal "date(" ordinal case-insensitive at the beginning of the pattern. Find the next occurrence.
+                        // If it can't be found, there's no match.
+                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_1DE7C48BB4BC0E30E65E38B4F39A75CA57C22461AE122A6380A42312C9E67BCA);
                         if (i >= 0)
                         {
                             base.runtextpos = pos + i;
         /// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
         internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
         
-        /// <summary>Supports searching for the specified strings.</summary>
-        internal static readonly SearchValues<string> s_indexOfAnyStrings_OrdinalIgnoreCase_2AC5E9CD8492EE9AF8BE2E7D112B6E7B0E2EB16F4F0FF47ECAA2B811EE26A081 = SearchValues.Create(["dateadd", "datepart"], StringComparison.OrdinalIgnoreCase);
+        /// <summary>Supports searching for the string "date(".</summary>
+        internal static readonly SearchValues<string> s_indexOfString_1DE7C48BB4BC0E30E65E38B4F39A75CA57C22461AE122A6380A42312C9E67BCA = SearchValues.Create(["date("], StringComparison.OrdinalIgnoreCase);
     }
 }
"\\b(et\\s*(le|la(s)?)?)\\b.+" (291 uses)
[GeneratedRegex("\\b(et\\s*(le|la(s)?)?)\\b.+", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
                     // Any possible match is at least 3 characters.
                     if (pos <= inputSpan.Length - 3)
                     {
-                        // The pattern has multiple strings that could begin the match. Search for any of them.
-                        // If none can be found, there's no match.
-                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfAnyStrings_Ordinal_40190A5AE82B92C9577FE9A45CD09B22413116F9859390E6536F6EF2E5085EA1);
+                        // The pattern has the literal "et" ordinal case-insensitive at the beginning of the pattern. Find the next occurrence.
+                        // If it can't be found, there's no match.
+                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_et_OrdinalIgnoreCase);
                         if (i >= 0)
                         {
                             base.runtextpos = pos + i;
             0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
         };
         
-        /// <summary>Supports searching for the specified strings.</summary>
-        internal static readonly SearchValues<string> s_indexOfAnyStrings_Ordinal_40190A5AE82B92C9577FE9A45CD09B22413116F9859390E6536F6EF2E5085EA1 = SearchValues.Create(["ET", "eT", "Et", "et"], StringComparison.Ordinal);
+        /// <summary>Supports searching for the string "et".</summary>
+        internal static readonly SearchValues<string> s_indexOfString_et_OrdinalIgnoreCase = SearchValues.Create(["et"], StringComparison.OrdinalIgnoreCase);
     }
 }
"\\b(em)\\b" (200 uses)
[GeneratedRegex("\\b(em)\\b", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
                     // Any possible match is at least 2 characters.
                     if (pos <= inputSpan.Length - 2)
                     {
-                        // The pattern has multiple strings that could begin the match. Search for any of them.
-                        // If none can be found, there's no match.
-                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfAnyStrings_Ordinal_00298CB1C9B37035848F363BE27E1EB54A4FE98FE07EEFB24B812417AC25856B);
+                        // The pattern has the literal "em" ordinal case-insensitive at the beginning of the pattern. Find the next occurrence.
+                        // If it can't be found, there's no match.
+                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_em_OrdinalIgnoreCase);
                         if (i >= 0)
                         {
                             base.runtextpos = pos + i;
             0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
         };
         
-        /// <summary>Supports searching for the specified strings.</summary>
-        internal static readonly SearchValues<string> s_indexOfAnyStrings_Ordinal_00298CB1C9B37035848F363BE27E1EB54A4FE98FE07EEFB24B812417AC25856B = SearchValues.Create(["EM", "eM", "Em", "em"], StringComparison.Ordinal);
+        /// <summary>Supports searching for the string "em".</summary>
+        internal static readonly SearchValues<string> s_indexOfString_em_OrdinalIgnoreCase = SearchValues.Create(["em"], StringComparison.OrdinalIgnoreCase);
     }
 }
"\\b(avant)\\b" (195 uses)
[GeneratedRegex("\\b(avant)\\b", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
                     // Any possible match is at least 5 characters.
                     if (pos <= inputSpan.Length - 5)
                     {
-                        // The pattern matches a character in the set [Vv] at index 1.
-                        // Find the next occurrence. If it can't be found, there's no match.
-                        ReadOnlySpan<char> span = inputSpan.Slice(pos);
-                        for (int i = 0; i < span.Length - 4; i++)
+                        // The pattern has the literal "avant" ordinal case-insensitive at the beginning of the pattern. Find the next occurrence.
+                        // If it can't be found, there's no match.
+                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_avant_OrdinalIgnoreCase);
+                        if (i >= 0)
                         {
-                            int indexOfPos = span.Slice(i + 1).IndexOfAny('V', 'v');
-                            if (indexOfPos < 0)
-                            {
-                                goto NoMatchFound;
-                            }
-                            i += indexOfPos;
-                            
-                            // The primary set being searched for was found. 2 more sets will be checked so as
-                            // to minimize the number of places TryMatchAtCurrentPosition is run unnecessarily.
-                            // Make sure they fit in the remainder of the input.
-                            if ((uint)(i + 3) >= (uint)span.Length)
-                            {
-                                goto NoMatchFound;
-                            }
-                            
-                            if (((span[i + 3] | 0x20) == 'n') &&
-                                ((span[i] | 0x20) == 'a'))
-                            {
-                                base.runtextpos = pos + i;
-                                return true;
-                            }
+                            base.runtextpos = pos + i;
+                            return true;
                         }
                     }
                     
                     // No match found.
-                    NoMatchFound:
                     base.runtextpos = inputSpan.Length;
                     return false;
                 }
             0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
             0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
         };
+        
+        /// <summary>Supports searching for the string "avant".</summary>
+        internal static readonly SearchValues<string> s_indexOfString_avant_OrdinalIgnoreCase = SearchValues.Create(["avant"], StringComparison.OrdinalIgnoreCase);
     }
 }
"(week)(\\s*)(?<number>\\d\\d|\\d|0\\d)" (194 uses)
[GeneratedRegex("(week)(\\s*)(?<number>\\d\\d|\\d|0\\d)", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
                     // Any possible match is at least 5 characters.
                     if (pos <= inputSpan.Length - 5)
                     {
-                        // The pattern matches a character in the set [Kk\u212A] at index 3.
-                        // Find the next occurrence. If it can't be found, there's no match.
-                        ReadOnlySpan<char> span = inputSpan.Slice(pos);
-                        for (int i = 0; i < span.Length - 4; i++)
+                        // The pattern has the literal "wee" ordinal case-insensitive at the beginning of the pattern. Find the next occurrence.
+                        // If it can't be found, there's no match.
+                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_wee_OrdinalIgnoreCase);
+                        if (i >= 0)
                         {
-                            int indexOfPos = span.Slice(i + 3).IndexOfAny('K', 'k', 'K');
-                            if (indexOfPos < 0)
-                            {
-                                goto NoMatchFound;
-                            }
-                            i += indexOfPos;
-                            
-                            if (((span[i] | 0x20) == 'w') &&
-                                ((span[i + 1] | 0x20) == 'e'))
-                            {
-                                base.runtextpos = pos + i;
-                                return true;
-                            }
+                            base.runtextpos = pos + i;
+                            return true;
                         }
                     }
                     
                     // No match found.
-                    NoMatchFound:
                     base.runtextpos = inputSpan.Length;
                     return false;
                 }
         
         /// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
         internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
+        
+        /// <summary>Supports searching for the string "wee".</summary>
+        internal static readonly SearchValues<string> s_indexOfString_wee_OrdinalIgnoreCase = SearchValues.Create(["wee"], StringComparison.OrdinalIgnoreCase);
     }
 }
"\\b(entre\\s*(le|la(s)?)?)\\b" (194 uses)
[GeneratedRegex("\\b(entre\\s*(le|la(s)?)?)\\b", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
                     // Any possible match is at least 5 characters.
                     if (pos <= inputSpan.Length - 5)
                     {
-                        // The pattern has multiple strings that could begin the match. Search for any of them.
-                        // If none can be found, there's no match.
-                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfAnyStrings_Ordinal_3200475DE471EA58FF8C7B5F0CA4A9515EFACDBAA912EFAC506148E560A6D596);
+                        // The pattern has the literal "entre" ordinal case-insensitive at the beginning of the pattern. Find the next occurrence.
+                        // If it can't be found, there's no match.
+                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_entre_OrdinalIgnoreCase);
                         if (i >= 0)
                         {
                             base.runtextpos = pos + i;
             0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07
         };
         
-        /// <summary>Supports searching for the specified strings.</summary>
-        internal static readonly SearchValues<string> s_indexOfAnyStrings_Ordinal_3200475DE471EA58FF8C7B5F0CA4A9515EFACDBAA912EFAC506148E560A6D596 = SearchValues.Create(["ENTR", "eNTR", "EnTR", "enTR", "ENtR", "eNtR", "EntR", "entR", "ENTr", "eNTr", "EnTr", "enTr", "ENtr", "eNtr", "Entr", "entr"], StringComparison.Ordinal);
+        /// <summary>Supports searching for the string "entre".</summary>
+        internal static readonly SearchValues<string> s_indexOfString_entre_OrdinalIgnoreCase = SearchValues.Create(["entre"], StringComparison.OrdinalIgnoreCase);
     }
 }
"(mes)(\\s*)((do|da|de))" (193 uses)
[GeneratedRegex("(mes)(\\s*)((do|da|de))", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
                     // Any possible match is at least 5 characters.
                     if (pos <= inputSpan.Length - 5)
                     {
-                        // The pattern has multiple strings that could begin the match. Search for any of them.
-                        // If none can be found, there's no match.
-                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfAnyStrings_Ordinal_DC6FBF049DFCA75A0085CE45822CFFFBACDEEEF2607AA4096D769AC2377EF021);
+                        // The pattern has the literal "mes" ordinal case-insensitive at the beginning of the pattern. Find the next occurrence.
+                        // If it can't be found, there's no match.
+                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_mes_OrdinalIgnoreCase);
                         if (i >= 0)
                         {
                             base.runtextpos = pos + i;
         /// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
         internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
         
-        /// <summary>Supports searching for the specified strings.</summary>
-        internal static readonly SearchValues<string> s_indexOfAnyStrings_Ordinal_DC6FBF049DFCA75A0085CE45822CFFFBACDEEEF2607AA4096D769AC2377EF021 = SearchValues.Create(["MES", "mES", "MeS", "meS", "MEs", "mEs", "Mes", "mes"], StringComparison.Ordinal);
+        /// <summary>Supports searching for the string "mes".</summary>
+        internal static readonly SearchValues<string> s_indexOfString_mes_OrdinalIgnoreCase = SearchValues.Create(["mes"], StringComparison.OrdinalIgnoreCase);
     }
 }
"(semana)(\\s*)((do|da|de))" (193 uses)
[GeneratedRegex("(semana)(\\s*)((do|da|de))", RegexOptions.IgnoreCase | RegexOptions.Singleline)]
                     // Any possible match is at least 8 characters.
                     if (pos <= inputSpan.Length - 8)
                     {
-                        // The pattern has multiple strings that could begin the match. Search for any of them.
-                        // If none can be found, there's no match.
-                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfAnyStrings_Ordinal_1B7E1CD8AF955A2769ABD6F7FC469F9212B5B795E7DC6CF668A8EE08D2419045);
+                        // The pattern has the literal "semana" ordinal case-insensitive at the beginning of the pattern. Find the next occurrence.
+                        // If it can't be found, there's no match.
+                        int i = inputSpan.Slice(pos).IndexOfAny(Utilities.s_indexOfString_semana_OrdinalIgnoreCase);
                         if (i >= 0)
                         {
                             base.runtextpos = pos + i;
         /// <summary>Whether <see cref="s_defaultTimeout"/> is non-infinite.</summary>
         internal static readonly bool s_hasTimeout = s_defaultTimeout != Regex.InfiniteMatchTimeout;
         
-        /// <summary>Supports searching for the specified strings.</summary>
-        internal static readonly SearchValues<string> s_indexOfAnyStrings_Ordinal_1B7E1CD8AF955A2769ABD6F7FC469F9212B5B795E7DC6CF668A8EE08D2419045 = SearchValues.Create(["SEMA", "sEMA", "SeMA", "seMA", "SEmA", "sEmA", "SemA", "semA", "SEMa", "sEMa", "SeMa", "seMa", "SEma", "sEma", "Sema", "sema"], StringComparison.Ordinal);
+        /// <summary>Supports searching for the string "semana".</summary>
+        internal static readonly SearchValues<string> s_indexOfString_semana_OrdinalIgnoreCase = SearchValues.Create(["semana"], StringComparison.OrdinalIgnoreCase);
     }
 }

For more diff examples, see https://gist.github.com/MihuBot/4212adf85284694d34d378af2233fa23

JIT assembly changes
Total bytes of base: 54284087
Total bytes of diff: 54262264
Total bytes of delta: -21823 (-0.04 % of base)
Total relative delta: -31.17
    diff is an improvement.
    relative diff is an improvement.

For a list of JIT diff regressions, see Regressions.md
For a list of JIT diff improvements, see Improvements.md

Sample source code for further analysis
const string JsonPath = "RegexResults-1792.json";
if (!File.Exists(JsonPath))
{
    await using var archiveStream = await new HttpClient().GetStreamAsync("https://mihubot.xyz/r/FHwNbpHA");
    using var archive = new ZipArchive(archiveStream, ZipArchiveMode.Read);
    archive.Entries.First(e => e.Name == "Results.json").ExtractToFile(JsonPath);
}

using FileStream jsonFileStream = File.OpenRead(JsonPath);
RegexEntry[] entries = JsonSerializer.Deserialize<RegexEntry[]>(jsonFileStream, new JsonSerializerOptions { IncludeFields = true })!;
Console.WriteLine($"Working with {entries.Length} patterns");



record KnownPattern(string Pattern, RegexOptions Options, int Count);

sealed class RegexEntry
{
    public required KnownPattern Regex { get; set; }
    public required string MainSource { get; set; }
    public required string PrSource { get; set; }
    public string? FullDiff { get; set; }
    public string? ShortDiff { get; set; }
    public (string Name, string Values)[]? SearchValuesOfChar { get; set; }
    public (string[] Values, StringComparison ComparisonType)[]? SearchValuesOfString { get; set; }
}

Artifacts:

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions