From 87ad04dcb40fa250800d25b0fd3a3204c1e1a34e Mon Sep 17 00:00:00 2001 From: ycherkes Date: Sun, 4 Jan 2026 14:22:22 +0100 Subject: [PATCH 01/12] Add previous non-accurate but faster version of partial ratio strategy It's available by setting GlobalConfig.PartialRatioAccuracy = PartialRatioAccuracy.Fast; --- FuzzySharp/FuzzySharp.csproj | 8 +-- FuzzySharp/GlobalConfig.cs | 16 ++++++ FuzzySharp/Levenshtein.Static.cs | 4 +- .../PartialTokenDifferenceScorer.cs | 2 +- .../Generic/FastPartialRatioStrategyT.cs | 50 +++++++++++++++++++ .../Strategy/Generic/PartialRatioStrategyT.cs | 6 +-- .../Strategy/PartialRatioStrategy.cs | 41 +++++++++++---- 7 files changed, 107 insertions(+), 20 deletions(-) create mode 100644 FuzzySharp/GlobalConfig.cs create mode 100644 FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index 03a4ad4..b5733a3 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -1,10 +1,10 @@  - 3.0.6.0 - 3.0.6.0 - 3.0.6 - 3.0.6 + 3.0.7.0 + 3.0.7.0 + 3.0.7 + 3.0.7 Jacob Bayer;Yevhen Cherkes diff --git a/FuzzySharp/GlobalConfig.cs b/FuzzySharp/GlobalConfig.cs new file mode 100644 index 0000000..42591f5 --- /dev/null +++ b/FuzzySharp/GlobalConfig.cs @@ -0,0 +1,16 @@ +namespace Raffinert.FuzzySharp; + +public static class GlobalConfig +{ + public static PartialRatioAccuracy PartialRatioAccuracy + { + get => SimilarityRatio.Strategy.PartialRatioStrategy.Accuracy; + set => SimilarityRatio.Strategy.PartialRatioStrategy.Accuracy = value; + } +} + +public enum PartialRatioAccuracy +{ + Strict, + Fast +} \ No newline at end of file diff --git a/FuzzySharp/Levenshtein.Static.cs b/FuzzySharp/Levenshtein.Static.cs index 6b12aac..dcf48bb 100644 --- a/FuzzySharp/Levenshtein.Static.cs +++ b/FuzzySharp/Levenshtein.Static.cs @@ -110,9 +110,9 @@ public static List GetMatchingBlocks(ReadOnlySpan s1, ReadO /// First sequence. /// Second sequence. /// List of matching blocks. - public static List GetMatchingBlocks(T[] s1, T[] s2) where T : IEquatable + public static List GetMatchingBlocks(ReadOnlySpan s1, ReadOnlySpan s2) where T : IEquatable { - var editOps = GetEditOps(new ReadOnlySpan(s1), new ReadOnlySpan(s2)); + var editOps = GetEditOps(s1, s2); var matchingBlocks = editOps.AsMatchingBlocks(s1.Length, s2.Length); return matchingBlocks; } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs index 62d418f..45ba63d 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs @@ -5,5 +5,5 @@ namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; public sealed class PartialTokenDifferenceScorer : TokenDifferenceScorerBase { - protected override Func Scorer => PartialRatioStrategy.Calculate; + protected override Func Scorer => static (strings1, strings2) => PartialRatioStrategy.Calculate(strings1.AsSpan(), strings2.AsSpan()); } \ No newline at end of file diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs new file mode 100644 index 0000000..5e73847 --- /dev/null +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs @@ -0,0 +1,50 @@ +using Raffinert.FuzzySharp.Utils; +using System; + +namespace Raffinert.FuzzySharp.SimilarityRatio.Strategy.Generic; + +internal static class FastPartialRatioStrategyT where T : IEquatable +{ + public static int Calculate(ReadOnlySpan input1, ReadOnlySpan input2) + { + if (input1.Length == 0 || input2.Length == 0) + { + return 0; + } + + var shorter = input1; + var longer = input2; + + SequenceUtils.SwapIfSourceIsLonger(ref shorter, ref longer); + + var matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); + + double maxScore = 0; + + foreach (var matchingBlock in matchingBlocks) + { + int dist = matchingBlock.DestPos - matchingBlock.SourcePos; + + int longStart = dist > 0 ? dist : 0; + int longEnd = longStart + shorter.Length; + + if (longEnd > longer.Length) longEnd = longer.Length; + + var longSubstr = longer[longStart..longEnd]; + + double ratio = Indel.NormalizedSimilarity(shorter, longSubstr); + + if (ratio > .995) + { + return 100; + } + + if (ratio > maxScore) + { + maxScore = ratio; + } + } + + return (int)Math.Round(100 * maxScore); + } +} \ No newline at end of file diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs index 7820ed1..f87eb19 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs @@ -11,15 +11,15 @@ internal static class PartialRatioStrategy where T : IEquatable /// Searches for the optimal alignment of the shorter span in the longer span /// and returns the partial fuzz.ratio for that alignment, as a value in [0…100]. /// - public static int Calculate(T[] input1, T[] input2) + public static int Calculate(ReadOnlySpan input1, ReadOnlySpan input2) { if (input1.Length == 0 || input2.Length == 0) { return 0; } - var shorter = (ReadOnlySpan)input1; - var longer = (ReadOnlySpan)input2; + var shorter = input1; + var longer = input2; SequenceUtils.SwapIfSourceIsLonger(ref shorter, ref longer); diff --git a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs index 7db9525..5d290b7 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs @@ -1,11 +1,37 @@ -using Raffinert.FuzzySharp.SimilarityRatio.Strategy.Generic; -using Raffinert.FuzzySharp.Utils; -using System; +using System; +using System.Threading; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy.Generic; namespace Raffinert.FuzzySharp.SimilarityRatio.Strategy; internal static class PartialRatioStrategy { + private static PartialRatioAccuracy _accuracy = PartialRatioAccuracy.Strict; + + internal delegate int PartialRatio(ReadOnlySpan shorter, ReadOnlySpan longer); + + private static PartialRatio _partialRatioImpl = PartialRatioStrategy.Calculate; + + public static PartialRatioAccuracy Accuracy + { + get => _accuracy; + set + { + if (_accuracy != value) + { + PartialRatio partialRatioImpl = value switch + { + PartialRatioAccuracy.Strict => PartialRatioStrategy.Calculate, + PartialRatioAccuracy.Fast => FastPartialRatioStrategyT.Calculate, + _ => throw new ArgumentOutOfRangeException(nameof(value), "Unsupported accuracy mode.") + }; + + Interlocked.Exchange(ref _partialRatioImpl, partialRatioImpl); + _accuracy = value; + } + } + } + /// /// Searches for the optimal alignment of the shorter span in the longer span /// and returns the partial fuzz.ratio for that alignment, as a value in [0…100]. @@ -17,13 +43,8 @@ public static int Calculate(string input1, string input2) return 0; } - var shorter = input1.AsSpan(); - var longer = input2.AsSpan(); - - SequenceUtils.SwapIfSourceIsLonger(ref shorter, ref longer); - - var alignment = PartialRatioStrategy.PartialRatioAlignment(shorter, longer); + var score = _partialRatioImpl(input1.AsSpan(), input2.AsSpan()); - return (int)Math.Round(alignment.Score); + return score; } } \ No newline at end of file From c11d5cfff8998ea093add9f196d2e3817b4c7f41 Mon Sep 17 00:00:00 2001 From: ycherkes Date: Sun, 4 Jan 2026 14:55:12 +0100 Subject: [PATCH 02/12] add benchmarks --- FuzzySharp.Benchmarks/BenchmarkAll.cs | 8 +++++++- .../Strategy/Generic/FastPartialRatioStrategyT.cs | 5 ----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/FuzzySharp.Benchmarks/BenchmarkAll.cs b/FuzzySharp.Benchmarks/BenchmarkAll.cs index f7aea5c..fc26509 100644 --- a/FuzzySharp.Benchmarks/BenchmarkAll.cs +++ b/FuzzySharp.Benchmarks/BenchmarkAll.cs @@ -8,6 +8,12 @@ namespace Raffinert.FuzzySharp.Benchmarks; [MemoryDiagnoser] public class BenchmarkAll { + [GlobalSetup] + public void GlobalSetup() + { + GlobalConfig.PartialRatioAccuracy = PartialRatioAccuracy.Strict; + } + [Benchmark] public int Ratio() { @@ -86,7 +92,7 @@ public int PartialTokenAbbreviationRatio() { return Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); } - + [Benchmark] public int RatioClassic() { diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs index 5e73847..f33a27d 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs @@ -7,11 +7,6 @@ internal static class FastPartialRatioStrategyT where T : IEquatable { public static int Calculate(ReadOnlySpan input1, ReadOnlySpan input2) { - if (input1.Length == 0 || input2.Length == 0) - { - return 0; - } - var shorter = input1; var longer = input2; From 6ea5c5a7c5360f01890a3d5de055abc062d8880c Mon Sep 17 00:00:00 2001 From: ycherkes Date: Sun, 4 Jan 2026 15:20:01 +0100 Subject: [PATCH 03/12] add new benchmark --- FuzzySharp.Benchmarks/BenchmarkFastPartial.cs | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 FuzzySharp.Benchmarks/BenchmarkFastPartial.cs diff --git a/FuzzySharp.Benchmarks/BenchmarkFastPartial.cs b/FuzzySharp.Benchmarks/BenchmarkFastPartial.cs new file mode 100644 index 0000000..b1bfc80 --- /dev/null +++ b/FuzzySharp.Benchmarks/BenchmarkFastPartial.cs @@ -0,0 +1,50 @@ +using BenchmarkDotNet.Attributes; +using Raffinert.FuzzySharp.PreProcess; + +namespace Raffinert.FuzzySharp.Benchmarks; + +[MemoryDiagnoser] +public class BenchmarkFastPartial +{ + [GlobalSetup] + public void GlobalSetup() + { + GlobalConfig.PartialRatioAccuracy = PartialRatioAccuracy.Fast; + } + + [Benchmark] + public int PartialRatio() + { + return Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); + } + + [Benchmark] + public int PartialTokenSortRatio() + { + return Fuzz.PartialTokenSortRatio("order words out of", " words out of order"); + } + + [Benchmark] + public int PartialTokenSetRatio() + { + return Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + } + + [Benchmark] + public int WeightedRatio() + { + return Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"); + } + + [Benchmark] + public int PartialTokenInitialismRatio() + { + return Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + } + + [Benchmark] + public int PartialTokenAbbreviationRatio() + { + return Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); + } +} \ No newline at end of file From 8228dd243d5fea0b78fb7fa09f4beca8124f710f Mon Sep 17 00:00:00 2001 From: ycherkes Date: Sun, 4 Jan 2026 16:53:17 +0100 Subject: [PATCH 04/12] further optimizations --- ...p.Benchmarks.BenchmarkAll-report-github.md | 82 +++++++------- ...Distance.LevenshteinLarge-report-github.md | 24 ++-- ...istance.LevenshteinNormal-report-github.md | 20 ++-- ...Distance.LevenshteinSmall-report-github.md | 24 ++-- .../FuzzySharp.Benchmarks.csproj | 2 +- FuzzySharp.Benchmarks/Program.cs | 4 +- FuzzySharp.Test/FuzzySharp.Test.csproj | 2 +- FuzzySharp/FuzzySharp.csproj | 2 +- FuzzySharp/Indel.Static.cs | 2 + FuzzySharp/LongestCommonSequence.Static.cs | 106 ++++++++++-------- .../Generic/FastPartialRatioStrategyT.cs | 53 ++++++--- FuzzySharp/Utils/CharMaskBuffer.cs | 7 ++ 12 files changed, 184 insertions(+), 144 deletions(-) diff --git a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md index cbdffe3..f23ab1c 100644 --- a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md +++ b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md @@ -1,48 +1,48 @@ ``` -BenchmarkDotNet v0.15.1, Windows 11 (10.0.26100.4351/24H2/2024Update/HudsonValley) -12th Gen Intel Core i7-1255U 2.60GHz, 1 CPU, 12 logical and 10 physical cores -.NET SDK 9.0.301 - [Host] : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 - ShortRun : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 +BenchmarkDotNet v0.15.2, Windows 11 (10.0.22621.6060/22H2/2022Update/SunValley2) +11th Gen Intel Core i7-1185G7 3.00GHz, 1 CPU, 8 logical and 4 physical cores +.NET SDK 10.0.101 + [Host] : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI + ShortRun : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI Job=ShortRun IterationCount=3 LaunchCount=1 WarmupCount=3 ``` -| Method | Mean | Error | StdDev | Median | Gen0 | Gen1 | Allocated | -|------------------------------------- |------------:|--------------:|-------------:|------------:|-------:|-------:|----------:| -| Ratio | 262.0 ns | 164.34 ns | 9.01 ns | 260.3 ns | 0.0191 | - | 120 B | -| PartialRatio | 2,186.5 ns | 2,186.50 ns | 119.85 ns | 2,173.6 ns | 1.5869 | - | 9960 B | -| TokenSortRatio | 869.5 ns | 1,864.63 ns | 102.21 ns | 858.8 ns | 0.1087 | - | 688 B | -| PartialTokenSortRatio | 4,660.6 ns | 5,317.95 ns | 291.49 ns | 4,567.8 ns | 2.9068 | 0.0076 | 18264 B | -| TokenSetRatio | 1,242.0 ns | 1,435.29 ns | 78.67 ns | 1,221.8 ns | 0.3910 | - | 2464 B | -| PartialTokenSetRatio | 5,222.5 ns | 4,743.26 ns | 259.99 ns | 5,223.6 ns | 3.2501 | - | 20392 B | -| WeightedRatio | 5,414.7 ns | 2,906.83 ns | 159.33 ns | 5,489.6 ns | 0.8240 | - | 5184 B | -| TokenInitialismRatio1 | 220.9 ns | 121.77 ns | 6.67 ns | 217.5 ns | 0.0815 | - | 512 B | -| TokenInitialismRatio2 | 265.9 ns | 869.88 ns | 47.68 ns | 259.5 ns | 0.0739 | - | 464 B | -| TokenInitialismRatio3 | 485.0 ns | 2,062.09 ns | 113.03 ns | 466.5 ns | 0.1297 | - | 816 B | -| PartialTokenInitialismRatio | 723.0 ns | 1,349.58 ns | 73.98 ns | 760.4 ns | 0.3500 | - | 2200 B | -| TokenAbbreviationRatio | 694.1 ns | 106.14 ns | 5.82 ns | 696.2 ns | 0.2737 | - | 1720 B | -| PartialTokenAbbreviationRatio | 1,080.5 ns | 4,773.68 ns | 261.66 ns | 1,016.2 ns | 0.3672 | 0.0010 | 2304 B | -| RatioClassic | 276.9 ns | 85.32 ns | 4.68 ns | 278.7 ns | 0.0505 | - | 320 B | -| PartialRatioClassic | 1,172.2 ns | 1,136.42 ns | 62.29 ns | 1,179.3 ns | 0.5360 | 0.0019 | 3368 B | -| TokenSortRatioClassic | 1,462.9 ns | 1,100.31 ns | 60.31 ns | 1,437.7 ns | 0.3414 | - | 2152 B | -| PartialTokenSortRatioClassic | 1,693.2 ns | 3,658.44 ns | 200.53 ns | 1,601.1 ns | 0.3929 | - | 2472 B | -| TokenSetRatioClassic | 2,261.7 ns | 3,267.86 ns | 179.12 ns | 2,178.5 ns | 0.6714 | - | 4224 B | -| PartialTokenSetRatioClassic | 2,710.2 ns | 3,688.19 ns | 202.16 ns | 2,596.2 ns | 0.9079 | - | 5712 B | -| WeightedRatioClassic | 11,356.2 ns | 8,422.49 ns | 461.66 ns | 11,256.2 ns | 2.0294 | - | 12770 B | -| TokenInitialismRatio1Classic | 485.8 ns | 337.46 ns | 18.50 ns | 477.7 ns | 0.1440 | - | 904 B | -| TokenInitialismRatio2Classic | 425.6 ns | 388.93 ns | 21.32 ns | 415.8 ns | 0.1173 | - | 736 B | -| TokenInitialismRatio3Classic | 920.9 ns | 583.42 ns | 31.98 ns | 926.2 ns | 0.2470 | - | 1552 B | -| PartialTokenInitialismRatioClassic | 1,249.5 ns | 1,351.62 ns | 74.09 ns | 1,270.7 ns | 0.3414 | - | 2144 B | -| TokenAbbreviationRatioClassic | 1,380.1 ns | 1,054.49 ns | 57.80 ns | 1,373.7 ns | 0.4749 | - | 2984 B | -| PartialTokenAbbreviationRatioClassic | 1,578.8 ns | 1,061.68 ns | 58.19 ns | 1,558.4 ns | 0.6199 | - | 3896 B | -| ExtractOne | 12,119.2 ns | 10,269.77 ns | 562.92 ns | 11,880.5 ns | 1.9379 | - | 12208 B | -| ExtractOneClassic | 34,581.9 ns | 335,434.41 ns | 18,386.29 ns | 24,937.2 ns | 4.4556 | - | 28003 B | -| FuzzySharpClassicDistance | 1,211.0 ns | 3,749.37 ns | 205.52 ns | 1,152.3 ns | 0.0496 | - | 320 B | -| FuzzySharpDistance | 449.8 ns | 439.37 ns | 24.08 ns | 436.9 ns | 0.0191 | - | 120 B | -| FastenshteinDistance | 672.1 ns | 182.02 ns | 9.98 ns | 668.2 ns | 0.0229 | - | 144 B | -| FuzzySharpDistanceFrom | 125.8 ns | 112.56 ns | 6.17 ns | 123.8 ns | - | - | - | -| FastenshteinDistanceFrom | 810.2 ns | 1,648.46 ns | 90.36 ns | 805.1 ns | - | - | - | -| QuickenshteinDistance | 619.5 ns | 145.74 ns | 7.99 ns | 617.7 ns | - | - | - | +| Method | Mean | Error | StdDev | Gen0 | Gen1 | Allocated | +|------------------------------------- |------------:|------------:|----------:|-------:|-------:|----------:| +| Ratio | 388.5 ns | 1,136.19 ns | 62.28 ns | 0.0200 | - | 128 B | +| PartialRatio | 2,607.4 ns | 3,614.98 ns | 198.15 ns | 1.5869 | - | 9968 B | +| TokenSortRatio | 882.3 ns | 275.55 ns | 15.10 ns | 0.1106 | - | 696 B | +| PartialTokenSortRatio | 4,868.7 ns | 949.09 ns | 52.02 ns | 2.9068 | 0.0076 | 18272 B | +| TokenSetRatio | 1,335.0 ns | 525.48 ns | 28.80 ns | 0.3395 | - | 2136 B | +| PartialTokenSetRatio | 6,094.1 ns | 5,722.86 ns | 313.69 ns | 3.1967 | - | 20064 B | +| WeightedRatio | 6,620.3 ns | 1,842.75 ns | 101.01 ns | 0.7553 | - | 4768 B | +| TokenInitialismRatio1 | 212.8 ns | 58.15 ns | 3.19 ns | 0.0522 | - | 328 B | +| TokenInitialismRatio2 | 198.3 ns | 47.39 ns | 2.60 ns | 0.0508 | - | 320 B | +| TokenInitialismRatio3 | 267.9 ns | 478.62 ns | 26.23 ns | 0.0701 | - | 440 B | +| PartialTokenInitialismRatio | 677.1 ns | 63.48 ns | 3.48 ns | 0.2899 | - | 1824 B | +| TokenAbbreviationRatio | 856.2 ns | 439.38 ns | 24.08 ns | 0.2747 | - | 1728 B | +| PartialTokenAbbreviationRatio | 1,201.7 ns | 2,338.88 ns | 128.20 ns | 0.3681 | - | 2312 B | +| RatioClassic | 254.4 ns | 38.06 ns | 2.09 ns | 0.0505 | - | 320 B | +| PartialRatioClassic | 1,032.9 ns | 263.34 ns | 14.43 ns | 0.5360 | 0.0019 | 3368 B | +| TokenSortRatioClassic | 1,709.1 ns | 658.39 ns | 36.09 ns | 0.3414 | - | 2152 B | +| PartialTokenSortRatioClassic | 1,750.6 ns | 576.74 ns | 31.61 ns | 0.3929 | - | 2472 B | +| TokenSetRatioClassic | 2,320.4 ns | 363.94 ns | 19.95 ns | 0.6714 | - | 4224 B | +| PartialTokenSetRatioClassic | 2,652.7 ns | 2,332.74 ns | 127.87 ns | 0.9079 | - | 5712 B | +| WeightedRatioClassic | 11,768.8 ns | 1,745.34 ns | 95.67 ns | 2.0294 | - | 12770 B | +| TokenInitialismRatio1Classic | 541.3 ns | 238.84 ns | 13.09 ns | 0.1440 | - | 904 B | +| TokenInitialismRatio2Classic | 441.4 ns | 141.55 ns | 7.76 ns | 0.1173 | - | 736 B | +| TokenInitialismRatio3Classic | 1,064.5 ns | 1,128.52 ns | 61.86 ns | 0.2460 | - | 1552 B | +| PartialTokenInitialismRatioClassic | 1,172.7 ns | 155.41 ns | 8.52 ns | 0.3414 | - | 2144 B | +| TokenAbbreviationRatioClassic | 1,399.5 ns | 615.02 ns | 33.71 ns | 0.4749 | - | 2984 B | +| PartialTokenAbbreviationRatioClassic | 1,718.5 ns | 1,322.21 ns | 72.47 ns | 0.6199 | - | 3896 B | +| ExtractOne | 13,824.4 ns | 8,191.93 ns | 449.03 ns | 1.7700 | - | 11184 B | +| ExtractOneClassic | 23,707.0 ns | 5,805.21 ns | 318.20 ns | 4.4556 | - | 28003 B | +| FuzzySharpClassicDistance | 944.4 ns | 45.50 ns | 2.49 ns | 0.0505 | - | 320 B | +| FuzzySharpDistance | 565.6 ns | 117.29 ns | 6.43 ns | 0.0200 | - | 128 B | +| FastenshteinDistance | 896.9 ns | 147.47 ns | 8.08 ns | 0.0229 | - | 144 B | +| FuzzySharpDistanceFrom | 167.5 ns | 30.73 ns | 1.68 ns | - | - | - | +| FastenshteinDistanceFrom | 790.4 ns | 13.07 ns | 0.72 ns | - | - | - | +| QuickenshteinDistance | 898.5 ns | 3,012.57 ns | 165.13 ns | - | - | - | diff --git a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinLarge-report-github.md b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinLarge-report-github.md index 58e2154..a548927 100644 --- a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinLarge-report-github.md +++ b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinLarge-report-github.md @@ -1,19 +1,19 @@ ``` -BenchmarkDotNet v0.15.1, Windows 11 (10.0.26100.4351/24H2/2024Update/HudsonValley) -12th Gen Intel Core i7-1255U 2.60GHz, 1 CPU, 12 logical and 10 physical cores -.NET SDK 9.0.301 - [Host] : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 - ShortRun : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 +BenchmarkDotNet v0.15.2, Windows 11 (10.0.22621.6060/22H2/2022Update/SunValley2) +11th Gen Intel Core i7-1185G7 3.00GHz, 1 CPU, 8 logical and 4 physical cores +.NET SDK 10.0.101 + [Host] : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI + ShortRun : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI Job=ShortRun IterationCount=3 LaunchCount=1 WarmupCount=3 ``` -| Method | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Allocated | Alloc Ratio | -|------------------ |-----------:|-----------:|----------:|------:|--------:|-----------:|-----------:|------------:|------------:| -| NaiveDp | 231.563 ms | 57.5403 ms | 3.1540 ms | 1.00 | 0.02 | 43500.0000 | 34500.0000 | 275312920 B | 1.000 | -| FuzzySharpClassic | 141.820 ms | 4.0905 ms | 0.2242 ms | 0.61 | 0.01 | - | - | 1545732 B | 0.006 | -| Fastenshtein | 123.356 ms | 13.0959 ms | 0.7178 ms | 0.53 | 0.01 | - | - | 34028 B | 0.000 | -| Quickenshtein | 12.918 ms | 12.8046 ms | 0.7019 ms | 0.06 | 0.00 | - | - | 12 B | 0.000 | -| FuzzySharp | 4.970 ms | 0.3311 ms | 0.0181 ms | 0.02 | 0.00 | - | - | 3051 B | 0.000 | +| Method | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Allocated | Alloc Ratio | +|------------------ |-----------:|-----------:|-----------:|------:|--------:|-----------:|-----------:|------------:|------------:| +| NaiveDp | 275.644 ms | 328.375 ms | 17.9993 ms | 1.00 | 0.08 | 43500.0000 | 34500.0000 | 275312720 B | 1.000 | +| FuzzySharpClassic | 172.922 ms | 60.489 ms | 3.3156 ms | 0.63 | 0.04 | - | - | 1545632 B | 0.006 | +| Fastenshtein | 138.357 ms | 35.335 ms | 1.9368 ms | 0.50 | 0.03 | - | - | 33928 B | 0.000 | +| Quickenshtein | 12.897 ms | 3.267 ms | 0.1791 ms | 0.05 | 0.00 | - | - | 64 B | 0.000 | +| FuzzySharp | 6.589 ms | 1.923 ms | 0.1054 ms | 0.02 | 0.00 | - | - | 3337 B | 0.000 | diff --git a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinNormal-report-github.md b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinNormal-report-github.md index 416fd0a..0ddabff 100644 --- a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinNormal-report-github.md +++ b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinNormal-report-github.md @@ -1,10 +1,10 @@ ``` -BenchmarkDotNet v0.15.1, Windows 11 (10.0.26100.4351/24H2/2024Update/HudsonValley) -12th Gen Intel Core i7-1255U 2.60GHz, 1 CPU, 12 logical and 10 physical cores -.NET SDK 9.0.301 - [Host] : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 - ShortRun : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 +BenchmarkDotNet v0.15.2, Windows 11 (10.0.22621.6060/22H2/2022Update/SunValley2) +11th Gen Intel Core i7-1185G7 3.00GHz, 1 CPU, 8 logical and 4 physical cores +.NET SDK 10.0.101 + [Host] : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI + ShortRun : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI Job=ShortRun IterationCount=3 LaunchCount=1 WarmupCount=3 @@ -12,8 +12,8 @@ WarmupCount=3 ``` | Method | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Allocated | Alloc Ratio | |------------------ |-----------:|------------:|----------:|------:|--------:|----------:|---------:|-----------:|------------:| -| NaiveDp | 8,613.1 μs | 4,977.60 μs | 272.84 μs | 1.00 | 0.04 | 1593.7500 | 203.1250 | 10012124 B | 1.000 | -| FuzzySharpClassic | 4,866.5 μs | 866.89 μs | 47.52 μs | 0.57 | 0.02 | 46.8750 | - | 300051 B | 0.030 | -| Fastenshtein | 4,076.7 μs | 1,265.24 μs | 69.35 μs | 0.47 | 0.01 | - | - | 7070 B | 0.001 | -| Quickenshtein | 1,330.2 μs | 111.30 μs | 6.10 μs | 0.15 | 0.00 | - | - | 2 B | 0.000 | -| FuzzySharp | 588.2 μs | 83.65 μs | 4.59 μs | 0.07 | 0.00 | - | - | 3041 B | 0.000 | +| NaiveDp | 9,463.2 μs | 8,455.64 μs | 463.48 μs | 1.00 | 0.06 | 1593.7500 | 203.1250 | 10012112 B | 1.000 | +| FuzzySharpClassic | 5,479.2 μs | 748.67 μs | 41.04 μs | 0.58 | 0.02 | 46.8750 | - | 300048 B | 0.030 | +| Fastenshtein | 4,542.1 μs | 1,550.30 μs | 84.98 μs | 0.48 | 0.02 | - | - | 7064 B | 0.001 | +| Quickenshtein | 1,455.0 μs | 147.24 μs | 8.07 μs | 0.15 | 0.01 | - | - | 1 B | 0.000 | +| FuzzySharp | 497.7 μs | 23.78 μs | 1.30 μs | 0.05 | 0.00 | - | - | 3203 B | 0.000 | diff --git a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinSmall-report-github.md b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinSmall-report-github.md index e02fa45..a6cdfd6 100644 --- a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinSmall-report-github.md +++ b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinSmall-report-github.md @@ -1,19 +1,19 @@ ``` -BenchmarkDotNet v0.15.1, Windows 11 (10.0.26100.4351/24H2/2024Update/HudsonValley) -12th Gen Intel Core i7-1255U 2.60GHz, 1 CPU, 12 logical and 10 physical cores -.NET SDK 9.0.301 - [Host] : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 - ShortRun : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 +BenchmarkDotNet v0.15.2, Windows 11 (10.0.22621.6060/22H2/2022Update/SunValley2) +11th Gen Intel Core i7-1185G7 3.00GHz, 1 CPU, 8 logical and 4 physical cores +.NET SDK 10.0.101 + [Host] : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI + ShortRun : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI Job=ShortRun IterationCount=3 LaunchCount=1 WarmupCount=3 ``` -| Method | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Allocated | Alloc Ratio | -|------------------ |-----------:|----------:|---------:|------:|--------:|---------:|-------:|----------:|------------:| -| NaiveDp | 1,841.4 μs | 753.15 μs | 41.28 μs | 1.00 | 0.03 | 371.0938 | 9.7656 | 2335169 B | 1.000 | -| FuzzySharpClassic | 1,090.0 μs | 23.48 μs | 1.29 μs | 0.59 | 0.01 | 23.4375 | - | 149793 B | 0.064 | -| Fastenshtein | 860.4 μs | 80.93 μs | 4.44 μs | 0.47 | 0.01 | - | - | 3728 B | 0.002 | -| Quickenshtein | 531.9 μs | 52.00 μs | 2.85 μs | 0.29 | 0.01 | - | - | 1 B | 0.000 | -| FuzzySharp | 117.7 μs | 11.88 μs | 0.65 μs | 0.06 | 0.00 | 0.3662 | - | 3040 B | 0.001 | +| Method | Mean | Error | StdDev | Ratio | Gen0 | Gen1 | Allocated | Alloc Ratio | +|------------------ |-----------:|----------:|---------:|------:|---------:|-------:|----------:|------------:| +| NaiveDp | 1,898.5 μs | 385.79 μs | 21.15 μs | 1.00 | 371.0938 | 9.7656 | 2335168 B | 1.000 | +| FuzzySharpClassic | 1,123.4 μs | 191.99 μs | 10.52 μs | 0.59 | 23.4375 | - | 149792 B | 0.064 | +| Fastenshtein | 889.9 μs | 93.41 μs | 5.12 μs | 0.47 | - | - | 3728 B | 0.002 | +| Quickenshtein | 505.2 μs | 99.17 μs | 5.44 μs | 0.27 | - | - | - | 0.000 | +| FuzzySharp | 147.6 μs | 23.79 μs | 1.30 μs | 0.08 | 0.4883 | - | 3200 B | 0.001 | diff --git a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj index bf9a3a3..4fbc563 100644 --- a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj +++ b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj @@ -2,7 +2,7 @@ Exe - net9.0 + NET10.0 enable enable $(MSBuildProjectName) diff --git a/FuzzySharp.Benchmarks/Program.cs b/FuzzySharp.Benchmarks/Program.cs index 651fb45..b27ef8c 100644 --- a/FuzzySharp.Benchmarks/Program.cs +++ b/FuzzySharp.Benchmarks/Program.cs @@ -1,6 +1,7 @@ using BenchmarkDotNet.Configs; using BenchmarkDotNet.Jobs; using BenchmarkDotNet.Running; +using Raffinert.FuzzySharp.Benchmarks; //using Raffinert.FuzzySharp; //using Raffinert.FuzzySharp.SimilarityRatio; @@ -10,7 +11,8 @@ var config = ManualConfig.Create(DefaultConfig.Instance) .AddJob(Job.ShortRun); // ← built-in short run -BenchmarkRunner.Run(typeof(Program).Assembly, config); +//BenchmarkRunner.Run(typeof(Program).Assembly, config); +BenchmarkRunner.Run(config); //var input1 = "+30.0% Damage to Close Enemies [30.01%"; //var input2Collection = new[] diff --git a/FuzzySharp.Test/FuzzySharp.Test.csproj b/FuzzySharp.Test/FuzzySharp.Test.csproj index 588c006..ebff19a 100644 --- a/FuzzySharp.Test/FuzzySharp.Test.csproj +++ b/FuzzySharp.Test/FuzzySharp.Test.csproj @@ -1,7 +1,7 @@  - netframework4.6.2;netframework4.7.2;NET8.0;NET9.0 + netframework4.6.2;netframework4.7.2;NET8.0;NET10.0 false 12.0 Raffinert.$(MSBuildProjectName) diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index b5733a3..1468cac 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -21,7 +21,7 @@ git https://github.com/Raffinert/FuzzySharp snupkg - netstandard2.0;netstandard2.1;netcoreapp3.1;net45;net46;net462;net472;net48;NET60;NET80;NET90 + netstandard2.0;netstandard2.1;netcoreapp3.1;net45;net46;net462;net472;net48;NET60;NET80;NET90;NET10.0 Raffinert.$(MSBuildProjectName) Raffinert.$(MSBuildProjectName.Replace(" ", "_")) README.md diff --git a/FuzzySharp/Indel.Static.cs b/FuzzySharp/Indel.Static.cs index 5e9fc7d..b471651 100644 --- a/FuzzySharp/Indel.Static.cs +++ b/FuzzySharp/Indel.Static.cs @@ -43,6 +43,7 @@ public static int BlockDistance( /// Second sequence. /// Optional maximum normalized distance threshold. If the distance exceeds this value, returns 1. /// The normalized Indel distance between the two sequences. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double BlockNormalizedDistance( CharMaskBuffer block, ReadOnlySpan s1, @@ -68,6 +69,7 @@ public static double BlockNormalizedDistance( /// Second sequence. /// Optional minimum similarity threshold. If the similarity is below this value, returns 0. /// The normalized Indel similarity between the two sequences. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static double BlockNormalizedSimilarity( CharMaskBuffer block, ReadOnlySpan s1, diff --git a/FuzzySharp/LongestCommonSequence.Static.cs b/FuzzySharp/LongestCommonSequence.Static.cs index 8609414..218ab54 100644 --- a/FuzzySharp/LongestCommonSequence.Static.cs +++ b/FuzzySharp/LongestCommonSequence.Static.cs @@ -2,6 +2,7 @@ using Raffinert.FuzzySharp.Extensions; using Raffinert.FuzzySharp.Utils; using System; +using System.Buffers; using System.Collections.Generic; using System.Runtime.CompilerServices; @@ -348,6 +349,7 @@ internal static int SimilarityImpl( /// Second sequence (text). /// Optional minimum similarity threshold. /// The length of the longest common subsequence, or 0 if below cutoff. + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int BlockSimilarityMultipleULongs( CharMaskBuffer block, ReadOnlySpan s1, @@ -361,60 +363,65 @@ internal static int BlockSimilarityMultipleULongs( int len1 = s1.Length; int segCount = (len1 + 63) / 64; - // --- 2) prepare the \"all-ones up to len1\" mask and state S --- - ulong[] S = new ulong[segCount]; - for (int i = 0; i < segCount; i++) - S[i] = ulong.MaxValue; - // clear high bits in the final segment if len1 % 64 != 0 - int rem = len1 & 63; - if (rem != 0) - S[segCount - 1] = (1UL << rem) - 1; - - // --- 3) main bit-parallel loop: S = (S + u) | (S - u) --- - foreach (T ch in s2) + var scratch = ArrayPool.Shared.Rent(segCount * 4); + try { - var M = block.GetOrZero(ch); + var S = scratch.AsSpan(0, segCount); + var u = scratch.AsSpan(segCount, segCount); + var add = scratch.AsSpan(segCount * 2, segCount); + var sub = scratch.AsSpan(segCount * 3, segCount); + + // --- 2) prepare the \"all-ones up to len1\" mask and state S --- + S.Fill(ulong.MaxValue); + int rem = len1 & 63; + if (rem != 0) + S[segCount - 1] = (1UL << rem) - 1; + + // --- 3) main bit-parallel loop: S = (S + u) | (S - u) --- + foreach (T ch in s2) + { + var M = block.GetOrZero(ch); - // u = S & M - var u = new ulong[segCount]; - for (int i = 0; i < segCount; i++) - u[i] = S[i] & M[i]; + // u = S & M + for (int i = 0; i < segCount; i++) + u[i] = S[i] & M[i]; - // add = S + u (multi-precision) - var add = new ulong[segCount]; - ulong carry = 0; - for (int i = 0; i < segCount; i++) - { - ulong sum = S[i] + u[i] + carry; - // carry if sum < S[i] or (carry==1 && sum==S[i]) - carry = sum < S[i] || (carry == 1 && sum == S[i]) ? 1UL : 0UL; - add[i] = sum; - } + // add = S + u (multi-precision) + ulong carry = 0; + for (int i = 0; i < segCount; i++) + { + ulong sum = S[i] + u[i] + carry; + carry = sum < S[i] || (carry == 1 && sum == S[i]) ? 1UL : 0UL; + add[i] = sum; + } - // sub = S - u (multi-precision) - var sub = new ulong[segCount]; - ulong borrow = 0; - for (int i = 0; i < segCount; i++) - { - ulong diff = S[i] - u[i] - borrow; - // borrow if original S[i] < u[i] + borrow - borrow = S[i] < u[i] + borrow ? 1UL : 0UL; - sub[i] = diff; - } + // sub = S - u (multi-precision) + ulong borrow = 0; + for (int i = 0; i < segCount; i++) + { + ulong diff = S[i] - u[i] - borrow; + borrow = S[i] < u[i] + borrow ? 1UL : 0UL; + sub[i] = diff; + } - // new S = add | sub - for (int i = 0; i < segCount; i++) - S[i] = add[i] | sub[i]; - } + // new S = add | sub + for (int i = 0; i < segCount; i++) + S[i] = add[i] | sub[i]; + } - // --- 4) count zero bits in the lower len1 positions of S --- - int lcs = CountZeroBits(S, len1); + // --- 4) count zero bits in the lower len1 positions of S --- + int lcs = CountZeroBits(S, len1); - var result = scoreCutoff == null || lcs >= scoreCutoff.Value - ? lcs - : 0; + var result = scoreCutoff == null || lcs >= scoreCutoff.Value + ? lcs + : 0; - return result; + return result; + } + finally + { + ArrayPool.Shared.Return(scratch); + } } private static int CountZeroBits(ulong x, int length) @@ -425,6 +432,11 @@ private static int CountZeroBits(ulong x, int length) } private static int CountZeroBits(ulong[] S, int length) + { + return CountZeroBits((ReadOnlySpan)S, length); + } + + private static int CountZeroBits(ReadOnlySpan S, int length) { int fullBlocks = length / 64; int remBits = length % 64; @@ -625,4 +637,4 @@ private static int SimilaritySingleULong(ReadOnlySpan s1, ReadOnlySpan int res = CountZeroBits(S, len1); return res; } -} \ No newline at end of file +} diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs index f33a27d..ab39e50 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs @@ -1,5 +1,6 @@ -using Raffinert.FuzzySharp.Utils; using System; +using System.Runtime.CompilerServices; +using Raffinert.FuzzySharp.Utils; namespace Raffinert.FuzzySharp.SimilarityRatio.Strategy.Generic; @@ -7,39 +8,55 @@ internal static class FastPartialRatioStrategyT where T : IEquatable { public static int Calculate(ReadOnlySpan input1, ReadOnlySpan input2) { + if (input1.Length == 0 || input2.Length == 0) + { + return 0; + } + var shorter = input1; var longer = input2; SequenceUtils.SwapIfSourceIsLonger(ref shorter, ref longer); - var matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); - - double maxScore = 0; - - foreach (var matchingBlock in matchingBlocks) - { - int dist = matchingBlock.DestPos - matchingBlock.SourcePos; + using var charMask = CharMask.Create(shorter); - int longStart = dist > 0 ? dist : 0; - int longEnd = longStart + shorter.Length; + var maxScore = ComputeMaxScore(shorter, longer, charMask); - if (longEnd > longer.Length) longEnd = longer.Length; - - var longSubstr = longer[longStart..longEnd]; + return (int)Math.Round(100 * maxScore); + } - double ratio = Indel.NormalizedSimilarity(shorter, longSubstr); + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static double ComputeMaxScore( + ReadOnlySpan shorter, + ReadOnlySpan longer, + CharMaskBuffer charMask) + { + double maxScore = 0; + var len1 = shorter.Length; + var len2 = longer.Length; - if (ratio > .995) + // Only full-length windows are required for partial ratio once strings are normalized. + for (var i = 0; i <= len2 - len1; i++) + { + // Cheap filter to skip windows that cannot improve the score. + if (!charMask.ContainsKey(longer[i + len1 - 1])) { - return 100; + continue; } + var window = longer.Slice(i, len1); + var ratio = Indel.BlockNormalizedSimilarity(charMask, shorter, window); + if (ratio > maxScore) { maxScore = ratio; + if (ratio >= 0.995) + { + return 1.0; + } } } - return (int)Math.Round(100 * maxScore); + return maxScore; } -} \ No newline at end of file +} diff --git a/FuzzySharp/Utils/CharMaskBuffer.cs b/FuzzySharp/Utils/CharMaskBuffer.cs index cecafce..a4bc92c 100644 --- a/FuzzySharp/Utils/CharMaskBuffer.cs +++ b/FuzzySharp/Utils/CharMaskBuffer.cs @@ -67,6 +67,13 @@ private void GrowBuffer() _capacity = newCapacity; } + public bool ContainsKey(T key) + { + if (_disposed) throw new ObjectDisposedException(nameof(CharMaskBuffer)); + + return _indexMap.ContainsKey(key); + } + public bool TryGetMask(T key, out ReadOnlySpan mask) { if (_disposed) throw new ObjectDisposedException(nameof(CharMaskBuffer)); From 337963d03045903cddee93ebd20e3a1e01e7d56a Mon Sep 17 00:00:00 2001 From: ycherkes Date: Sun, 4 Jan 2026 16:55:42 +0100 Subject: [PATCH 05/12] revert benchmarks --- ...p.Benchmarks.BenchmarkAll-report-github.md | 82 +++++++++---------- ...Distance.LevenshteinLarge-report-github.md | 24 +++--- ...istance.LevenshteinNormal-report-github.md | 20 ++--- ...Distance.LevenshteinSmall-report-github.md | 24 +++--- 4 files changed, 75 insertions(+), 75 deletions(-) diff --git a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md index f23ab1c..cbdffe3 100644 --- a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md +++ b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md @@ -1,48 +1,48 @@ ``` -BenchmarkDotNet v0.15.2, Windows 11 (10.0.22621.6060/22H2/2022Update/SunValley2) -11th Gen Intel Core i7-1185G7 3.00GHz, 1 CPU, 8 logical and 4 physical cores -.NET SDK 10.0.101 - [Host] : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI - ShortRun : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI +BenchmarkDotNet v0.15.1, Windows 11 (10.0.26100.4351/24H2/2024Update/HudsonValley) +12th Gen Intel Core i7-1255U 2.60GHz, 1 CPU, 12 logical and 10 physical cores +.NET SDK 9.0.301 + [Host] : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 + ShortRun : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 Job=ShortRun IterationCount=3 LaunchCount=1 WarmupCount=3 ``` -| Method | Mean | Error | StdDev | Gen0 | Gen1 | Allocated | -|------------------------------------- |------------:|------------:|----------:|-------:|-------:|----------:| -| Ratio | 388.5 ns | 1,136.19 ns | 62.28 ns | 0.0200 | - | 128 B | -| PartialRatio | 2,607.4 ns | 3,614.98 ns | 198.15 ns | 1.5869 | - | 9968 B | -| TokenSortRatio | 882.3 ns | 275.55 ns | 15.10 ns | 0.1106 | - | 696 B | -| PartialTokenSortRatio | 4,868.7 ns | 949.09 ns | 52.02 ns | 2.9068 | 0.0076 | 18272 B | -| TokenSetRatio | 1,335.0 ns | 525.48 ns | 28.80 ns | 0.3395 | - | 2136 B | -| PartialTokenSetRatio | 6,094.1 ns | 5,722.86 ns | 313.69 ns | 3.1967 | - | 20064 B | -| WeightedRatio | 6,620.3 ns | 1,842.75 ns | 101.01 ns | 0.7553 | - | 4768 B | -| TokenInitialismRatio1 | 212.8 ns | 58.15 ns | 3.19 ns | 0.0522 | - | 328 B | -| TokenInitialismRatio2 | 198.3 ns | 47.39 ns | 2.60 ns | 0.0508 | - | 320 B | -| TokenInitialismRatio3 | 267.9 ns | 478.62 ns | 26.23 ns | 0.0701 | - | 440 B | -| PartialTokenInitialismRatio | 677.1 ns | 63.48 ns | 3.48 ns | 0.2899 | - | 1824 B | -| TokenAbbreviationRatio | 856.2 ns | 439.38 ns | 24.08 ns | 0.2747 | - | 1728 B | -| PartialTokenAbbreviationRatio | 1,201.7 ns | 2,338.88 ns | 128.20 ns | 0.3681 | - | 2312 B | -| RatioClassic | 254.4 ns | 38.06 ns | 2.09 ns | 0.0505 | - | 320 B | -| PartialRatioClassic | 1,032.9 ns | 263.34 ns | 14.43 ns | 0.5360 | 0.0019 | 3368 B | -| TokenSortRatioClassic | 1,709.1 ns | 658.39 ns | 36.09 ns | 0.3414 | - | 2152 B | -| PartialTokenSortRatioClassic | 1,750.6 ns | 576.74 ns | 31.61 ns | 0.3929 | - | 2472 B | -| TokenSetRatioClassic | 2,320.4 ns | 363.94 ns | 19.95 ns | 0.6714 | - | 4224 B | -| PartialTokenSetRatioClassic | 2,652.7 ns | 2,332.74 ns | 127.87 ns | 0.9079 | - | 5712 B | -| WeightedRatioClassic | 11,768.8 ns | 1,745.34 ns | 95.67 ns | 2.0294 | - | 12770 B | -| TokenInitialismRatio1Classic | 541.3 ns | 238.84 ns | 13.09 ns | 0.1440 | - | 904 B | -| TokenInitialismRatio2Classic | 441.4 ns | 141.55 ns | 7.76 ns | 0.1173 | - | 736 B | -| TokenInitialismRatio3Classic | 1,064.5 ns | 1,128.52 ns | 61.86 ns | 0.2460 | - | 1552 B | -| PartialTokenInitialismRatioClassic | 1,172.7 ns | 155.41 ns | 8.52 ns | 0.3414 | - | 2144 B | -| TokenAbbreviationRatioClassic | 1,399.5 ns | 615.02 ns | 33.71 ns | 0.4749 | - | 2984 B | -| PartialTokenAbbreviationRatioClassic | 1,718.5 ns | 1,322.21 ns | 72.47 ns | 0.6199 | - | 3896 B | -| ExtractOne | 13,824.4 ns | 8,191.93 ns | 449.03 ns | 1.7700 | - | 11184 B | -| ExtractOneClassic | 23,707.0 ns | 5,805.21 ns | 318.20 ns | 4.4556 | - | 28003 B | -| FuzzySharpClassicDistance | 944.4 ns | 45.50 ns | 2.49 ns | 0.0505 | - | 320 B | -| FuzzySharpDistance | 565.6 ns | 117.29 ns | 6.43 ns | 0.0200 | - | 128 B | -| FastenshteinDistance | 896.9 ns | 147.47 ns | 8.08 ns | 0.0229 | - | 144 B | -| FuzzySharpDistanceFrom | 167.5 ns | 30.73 ns | 1.68 ns | - | - | - | -| FastenshteinDistanceFrom | 790.4 ns | 13.07 ns | 0.72 ns | - | - | - | -| QuickenshteinDistance | 898.5 ns | 3,012.57 ns | 165.13 ns | - | - | - | +| Method | Mean | Error | StdDev | Median | Gen0 | Gen1 | Allocated | +|------------------------------------- |------------:|--------------:|-------------:|------------:|-------:|-------:|----------:| +| Ratio | 262.0 ns | 164.34 ns | 9.01 ns | 260.3 ns | 0.0191 | - | 120 B | +| PartialRatio | 2,186.5 ns | 2,186.50 ns | 119.85 ns | 2,173.6 ns | 1.5869 | - | 9960 B | +| TokenSortRatio | 869.5 ns | 1,864.63 ns | 102.21 ns | 858.8 ns | 0.1087 | - | 688 B | +| PartialTokenSortRatio | 4,660.6 ns | 5,317.95 ns | 291.49 ns | 4,567.8 ns | 2.9068 | 0.0076 | 18264 B | +| TokenSetRatio | 1,242.0 ns | 1,435.29 ns | 78.67 ns | 1,221.8 ns | 0.3910 | - | 2464 B | +| PartialTokenSetRatio | 5,222.5 ns | 4,743.26 ns | 259.99 ns | 5,223.6 ns | 3.2501 | - | 20392 B | +| WeightedRatio | 5,414.7 ns | 2,906.83 ns | 159.33 ns | 5,489.6 ns | 0.8240 | - | 5184 B | +| TokenInitialismRatio1 | 220.9 ns | 121.77 ns | 6.67 ns | 217.5 ns | 0.0815 | - | 512 B | +| TokenInitialismRatio2 | 265.9 ns | 869.88 ns | 47.68 ns | 259.5 ns | 0.0739 | - | 464 B | +| TokenInitialismRatio3 | 485.0 ns | 2,062.09 ns | 113.03 ns | 466.5 ns | 0.1297 | - | 816 B | +| PartialTokenInitialismRatio | 723.0 ns | 1,349.58 ns | 73.98 ns | 760.4 ns | 0.3500 | - | 2200 B | +| TokenAbbreviationRatio | 694.1 ns | 106.14 ns | 5.82 ns | 696.2 ns | 0.2737 | - | 1720 B | +| PartialTokenAbbreviationRatio | 1,080.5 ns | 4,773.68 ns | 261.66 ns | 1,016.2 ns | 0.3672 | 0.0010 | 2304 B | +| RatioClassic | 276.9 ns | 85.32 ns | 4.68 ns | 278.7 ns | 0.0505 | - | 320 B | +| PartialRatioClassic | 1,172.2 ns | 1,136.42 ns | 62.29 ns | 1,179.3 ns | 0.5360 | 0.0019 | 3368 B | +| TokenSortRatioClassic | 1,462.9 ns | 1,100.31 ns | 60.31 ns | 1,437.7 ns | 0.3414 | - | 2152 B | +| PartialTokenSortRatioClassic | 1,693.2 ns | 3,658.44 ns | 200.53 ns | 1,601.1 ns | 0.3929 | - | 2472 B | +| TokenSetRatioClassic | 2,261.7 ns | 3,267.86 ns | 179.12 ns | 2,178.5 ns | 0.6714 | - | 4224 B | +| PartialTokenSetRatioClassic | 2,710.2 ns | 3,688.19 ns | 202.16 ns | 2,596.2 ns | 0.9079 | - | 5712 B | +| WeightedRatioClassic | 11,356.2 ns | 8,422.49 ns | 461.66 ns | 11,256.2 ns | 2.0294 | - | 12770 B | +| TokenInitialismRatio1Classic | 485.8 ns | 337.46 ns | 18.50 ns | 477.7 ns | 0.1440 | - | 904 B | +| TokenInitialismRatio2Classic | 425.6 ns | 388.93 ns | 21.32 ns | 415.8 ns | 0.1173 | - | 736 B | +| TokenInitialismRatio3Classic | 920.9 ns | 583.42 ns | 31.98 ns | 926.2 ns | 0.2470 | - | 1552 B | +| PartialTokenInitialismRatioClassic | 1,249.5 ns | 1,351.62 ns | 74.09 ns | 1,270.7 ns | 0.3414 | - | 2144 B | +| TokenAbbreviationRatioClassic | 1,380.1 ns | 1,054.49 ns | 57.80 ns | 1,373.7 ns | 0.4749 | - | 2984 B | +| PartialTokenAbbreviationRatioClassic | 1,578.8 ns | 1,061.68 ns | 58.19 ns | 1,558.4 ns | 0.6199 | - | 3896 B | +| ExtractOne | 12,119.2 ns | 10,269.77 ns | 562.92 ns | 11,880.5 ns | 1.9379 | - | 12208 B | +| ExtractOneClassic | 34,581.9 ns | 335,434.41 ns | 18,386.29 ns | 24,937.2 ns | 4.4556 | - | 28003 B | +| FuzzySharpClassicDistance | 1,211.0 ns | 3,749.37 ns | 205.52 ns | 1,152.3 ns | 0.0496 | - | 320 B | +| FuzzySharpDistance | 449.8 ns | 439.37 ns | 24.08 ns | 436.9 ns | 0.0191 | - | 120 B | +| FastenshteinDistance | 672.1 ns | 182.02 ns | 9.98 ns | 668.2 ns | 0.0229 | - | 144 B | +| FuzzySharpDistanceFrom | 125.8 ns | 112.56 ns | 6.17 ns | 123.8 ns | - | - | - | +| FastenshteinDistanceFrom | 810.2 ns | 1,648.46 ns | 90.36 ns | 805.1 ns | - | - | - | +| QuickenshteinDistance | 619.5 ns | 145.74 ns | 7.99 ns | 617.7 ns | - | - | - | diff --git a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinLarge-report-github.md b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinLarge-report-github.md index a548927..58e2154 100644 --- a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinLarge-report-github.md +++ b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinLarge-report-github.md @@ -1,19 +1,19 @@ ``` -BenchmarkDotNet v0.15.2, Windows 11 (10.0.22621.6060/22H2/2022Update/SunValley2) -11th Gen Intel Core i7-1185G7 3.00GHz, 1 CPU, 8 logical and 4 physical cores -.NET SDK 10.0.101 - [Host] : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI - ShortRun : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI +BenchmarkDotNet v0.15.1, Windows 11 (10.0.26100.4351/24H2/2024Update/HudsonValley) +12th Gen Intel Core i7-1255U 2.60GHz, 1 CPU, 12 logical and 10 physical cores +.NET SDK 9.0.301 + [Host] : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 + ShortRun : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 Job=ShortRun IterationCount=3 LaunchCount=1 WarmupCount=3 ``` -| Method | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Allocated | Alloc Ratio | -|------------------ |-----------:|-----------:|-----------:|------:|--------:|-----------:|-----------:|------------:|------------:| -| NaiveDp | 275.644 ms | 328.375 ms | 17.9993 ms | 1.00 | 0.08 | 43500.0000 | 34500.0000 | 275312720 B | 1.000 | -| FuzzySharpClassic | 172.922 ms | 60.489 ms | 3.3156 ms | 0.63 | 0.04 | - | - | 1545632 B | 0.006 | -| Fastenshtein | 138.357 ms | 35.335 ms | 1.9368 ms | 0.50 | 0.03 | - | - | 33928 B | 0.000 | -| Quickenshtein | 12.897 ms | 3.267 ms | 0.1791 ms | 0.05 | 0.00 | - | - | 64 B | 0.000 | -| FuzzySharp | 6.589 ms | 1.923 ms | 0.1054 ms | 0.02 | 0.00 | - | - | 3337 B | 0.000 | +| Method | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Allocated | Alloc Ratio | +|------------------ |-----------:|-----------:|----------:|------:|--------:|-----------:|-----------:|------------:|------------:| +| NaiveDp | 231.563 ms | 57.5403 ms | 3.1540 ms | 1.00 | 0.02 | 43500.0000 | 34500.0000 | 275312920 B | 1.000 | +| FuzzySharpClassic | 141.820 ms | 4.0905 ms | 0.2242 ms | 0.61 | 0.01 | - | - | 1545732 B | 0.006 | +| Fastenshtein | 123.356 ms | 13.0959 ms | 0.7178 ms | 0.53 | 0.01 | - | - | 34028 B | 0.000 | +| Quickenshtein | 12.918 ms | 12.8046 ms | 0.7019 ms | 0.06 | 0.00 | - | - | 12 B | 0.000 | +| FuzzySharp | 4.970 ms | 0.3311 ms | 0.0181 ms | 0.02 | 0.00 | - | - | 3051 B | 0.000 | diff --git a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinNormal-report-github.md b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinNormal-report-github.md index 0ddabff..416fd0a 100644 --- a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinNormal-report-github.md +++ b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinNormal-report-github.md @@ -1,10 +1,10 @@ ``` -BenchmarkDotNet v0.15.2, Windows 11 (10.0.22621.6060/22H2/2022Update/SunValley2) -11th Gen Intel Core i7-1185G7 3.00GHz, 1 CPU, 8 logical and 4 physical cores -.NET SDK 10.0.101 - [Host] : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI - ShortRun : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI +BenchmarkDotNet v0.15.1, Windows 11 (10.0.26100.4351/24H2/2024Update/HudsonValley) +12th Gen Intel Core i7-1255U 2.60GHz, 1 CPU, 12 logical and 10 physical cores +.NET SDK 9.0.301 + [Host] : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 + ShortRun : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 Job=ShortRun IterationCount=3 LaunchCount=1 WarmupCount=3 @@ -12,8 +12,8 @@ WarmupCount=3 ``` | Method | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Allocated | Alloc Ratio | |------------------ |-----------:|------------:|----------:|------:|--------:|----------:|---------:|-----------:|------------:| -| NaiveDp | 9,463.2 μs | 8,455.64 μs | 463.48 μs | 1.00 | 0.06 | 1593.7500 | 203.1250 | 10012112 B | 1.000 | -| FuzzySharpClassic | 5,479.2 μs | 748.67 μs | 41.04 μs | 0.58 | 0.02 | 46.8750 | - | 300048 B | 0.030 | -| Fastenshtein | 4,542.1 μs | 1,550.30 μs | 84.98 μs | 0.48 | 0.02 | - | - | 7064 B | 0.001 | -| Quickenshtein | 1,455.0 μs | 147.24 μs | 8.07 μs | 0.15 | 0.01 | - | - | 1 B | 0.000 | -| FuzzySharp | 497.7 μs | 23.78 μs | 1.30 μs | 0.05 | 0.00 | - | - | 3203 B | 0.000 | +| NaiveDp | 8,613.1 μs | 4,977.60 μs | 272.84 μs | 1.00 | 0.04 | 1593.7500 | 203.1250 | 10012124 B | 1.000 | +| FuzzySharpClassic | 4,866.5 μs | 866.89 μs | 47.52 μs | 0.57 | 0.02 | 46.8750 | - | 300051 B | 0.030 | +| Fastenshtein | 4,076.7 μs | 1,265.24 μs | 69.35 μs | 0.47 | 0.01 | - | - | 7070 B | 0.001 | +| Quickenshtein | 1,330.2 μs | 111.30 μs | 6.10 μs | 0.15 | 0.00 | - | - | 2 B | 0.000 | +| FuzzySharp | 588.2 μs | 83.65 μs | 4.59 μs | 0.07 | 0.00 | - | - | 3041 B | 0.000 | diff --git a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinSmall-report-github.md b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinSmall-report-github.md index a6cdfd6..e02fa45 100644 --- a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinSmall-report-github.md +++ b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.LevenshteinDistance.LevenshteinSmall-report-github.md @@ -1,19 +1,19 @@ ``` -BenchmarkDotNet v0.15.2, Windows 11 (10.0.22621.6060/22H2/2022Update/SunValley2) -11th Gen Intel Core i7-1185G7 3.00GHz, 1 CPU, 8 logical and 4 physical cores -.NET SDK 10.0.101 - [Host] : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI - ShortRun : .NET 9.0.11 (9.0.1125.51716), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI +BenchmarkDotNet v0.15.1, Windows 11 (10.0.26100.4351/24H2/2024Update/HudsonValley) +12th Gen Intel Core i7-1255U 2.60GHz, 1 CPU, 12 logical and 10 physical cores +.NET SDK 9.0.301 + [Host] : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 + ShortRun : .NET 9.0.6 (9.0.625.26613), X64 RyuJIT AVX2 Job=ShortRun IterationCount=3 LaunchCount=1 WarmupCount=3 ``` -| Method | Mean | Error | StdDev | Ratio | Gen0 | Gen1 | Allocated | Alloc Ratio | -|------------------ |-----------:|----------:|---------:|------:|---------:|-------:|----------:|------------:| -| NaiveDp | 1,898.5 μs | 385.79 μs | 21.15 μs | 1.00 | 371.0938 | 9.7656 | 2335168 B | 1.000 | -| FuzzySharpClassic | 1,123.4 μs | 191.99 μs | 10.52 μs | 0.59 | 23.4375 | - | 149792 B | 0.064 | -| Fastenshtein | 889.9 μs | 93.41 μs | 5.12 μs | 0.47 | - | - | 3728 B | 0.002 | -| Quickenshtein | 505.2 μs | 99.17 μs | 5.44 μs | 0.27 | - | - | - | 0.000 | -| FuzzySharp | 147.6 μs | 23.79 μs | 1.30 μs | 0.08 | 0.4883 | - | 3200 B | 0.001 | +| Method | Mean | Error | StdDev | Ratio | RatioSD | Gen0 | Gen1 | Allocated | Alloc Ratio | +|------------------ |-----------:|----------:|---------:|------:|--------:|---------:|-------:|----------:|------------:| +| NaiveDp | 1,841.4 μs | 753.15 μs | 41.28 μs | 1.00 | 0.03 | 371.0938 | 9.7656 | 2335169 B | 1.000 | +| FuzzySharpClassic | 1,090.0 μs | 23.48 μs | 1.29 μs | 0.59 | 0.01 | 23.4375 | - | 149793 B | 0.064 | +| Fastenshtein | 860.4 μs | 80.93 μs | 4.44 μs | 0.47 | 0.01 | - | - | 3728 B | 0.002 | +| Quickenshtein | 531.9 μs | 52.00 μs | 2.85 μs | 0.29 | 0.01 | - | - | 1 B | 0.000 | +| FuzzySharp | 117.7 μs | 11.88 μs | 0.65 μs | 0.06 | 0.00 | 0.3662 | - | 3040 B | 0.001 | From 06036e0599a115d341e91a33c56d661842810554 Mon Sep 17 00:00:00 2001 From: ycherkes Date: Sun, 4 Jan 2026 17:16:01 +0100 Subject: [PATCH 06/12] reuse ContainsKey from CharMask in PartialRatioStrategyT --- FuzzySharp.Benchmarks/Program.cs | 4 ++-- .../Strategy/Generic/PartialRatioStrategyT.cs | 19 ++++++++----------- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/FuzzySharp.Benchmarks/Program.cs b/FuzzySharp.Benchmarks/Program.cs index b27ef8c..a4f83be 100644 --- a/FuzzySharp.Benchmarks/Program.cs +++ b/FuzzySharp.Benchmarks/Program.cs @@ -11,8 +11,8 @@ var config = ManualConfig.Create(DefaultConfig.Instance) .AddJob(Job.ShortRun); // ← built-in short run -//BenchmarkRunner.Run(typeof(Program).Assembly, config); -BenchmarkRunner.Run(config); +BenchmarkRunner.Run(typeof(Program).Assembly, config); +//BenchmarkRunner.Run(config); //var input1 = "+30.0% Damage to Close Enemies [30.01%"; //var input2Collection = new[] diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs index f87eb19..c1f9644 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs @@ -143,17 +143,14 @@ private static ScoreAlignment PartialRatioImpl( if (len1 == 0 || len2 == 0) return res; - // Precompute s1’s character set for fast Contains - var charSet = new HashSet(s1.ToArray()); - - double? cutoff = scoreCutoff; + double cutoff = scoreCutoff ?? 0.0; // 1) Prefixes shorter than len1 for (int i = 1; i < len1; i++) { - if (!charSet.Contains(s2[i - 1])) continue; + if (!charMask.ContainsKey(s2[i - 1])) continue; var slice = s2[..i]; double sim = Indel.BlockNormalizedSimilarity(charMask, s1, slice); - if (sim > res.Score && (!cutoff.HasValue || sim >= cutoff.Value)) + if (sim > res.Score && sim >= cutoff) { res.Score = sim; cutoff = sim; @@ -166,10 +163,10 @@ private static ScoreAlignment PartialRatioImpl( // 2) Full-width windows of length len1 for (int i = 0; i <= len2 - len1; i++) { - if (!charSet.Contains(s2[i + len1 - 1])) continue; + if (!charMask.ContainsKey(s2[i + len1 - 1])) continue; var window = s2[i..(i + len1)]; double sim = Indel.BlockNormalizedSimilarity(charMask, s1, window); - if (sim > res.Score && (!cutoff.HasValue || sim >= cutoff.Value)) + if (sim > res.Score && sim >= cutoff) { res.Score = sim; cutoff = sim; @@ -182,10 +179,10 @@ private static ScoreAlignment PartialRatioImpl( // 3) Suffixes shorter than len1 for (int i = len2 - len1 + 1; i < len2; i++) { - if (!charSet.Contains(s2[i])) continue; + if (!charMask.ContainsKey(s2[i])) continue; var tail = s2[i..]; double sim = Indel.BlockNormalizedSimilarity(charMask, s1, tail); - if (sim > res.Score && (!cutoff.HasValue || sim >= cutoff.Value)) + if (sim > res.Score && sim >= cutoff) { res.Score = sim; cutoff = sim; @@ -201,4 +198,4 @@ private static ScoreAlignment PartialRatioImpl( } internal record struct ScoreAlignment(double Score, int SrcStart, int SrcEnd, int DestStart, int DestEnd); -} \ No newline at end of file +} From c982eebf1d311f8640f30cd9be6ef1411325e2cb Mon Sep 17 00:00:00 2001 From: ycherkes Date: Sun, 4 Jan 2026 17:31:39 +0100 Subject: [PATCH 07/12] enable 2 benchmarks only --- FuzzySharp.Benchmarks/Program.cs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/FuzzySharp.Benchmarks/Program.cs b/FuzzySharp.Benchmarks/Program.cs index a4f83be..ddd2497 100644 --- a/FuzzySharp.Benchmarks/Program.cs +++ b/FuzzySharp.Benchmarks/Program.cs @@ -11,8 +11,9 @@ var config = ManualConfig.Create(DefaultConfig.Instance) .AddJob(Job.ShortRun); // ← built-in short run -BenchmarkRunner.Run(typeof(Program).Assembly, config); +//BenchmarkRunner.Run(typeof(Program).Assembly, config); //BenchmarkRunner.Run(config); +BenchmarkRunner.Run([typeof(BenchmarkFastPartial), typeof(BenchmarkAll)], config); //var input1 = "+30.0% Damage to Close Enemies [30.01%"; //var input2Collection = new[] From d5e4f583f8283139bf1706fd1b5c5c63e0cfc44b Mon Sep 17 00:00:00 2001 From: ycherkes Date: Sun, 4 Jan 2026 17:41:25 +0100 Subject: [PATCH 08/12] improve fast strategy --- .../Generic/FastPartialRatioStrategyT.cs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs index ab39e50..7584cd6 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/FastPartialRatioStrategyT.cs @@ -1,5 +1,6 @@ using System; using System.Runtime.CompilerServices; +using Raffinert.FuzzySharp; using Raffinert.FuzzySharp.Utils; namespace Raffinert.FuzzySharp.SimilarityRatio.Strategy.Generic; @@ -35,16 +36,20 @@ private static double ComputeMaxScore( var len1 = shorter.Length; var len2 = longer.Length; - // Only full-length windows are required for partial ratio once strings are normalized. - for (var i = 0; i <= len2 - len1; i++) + // Reuse the matching-block candidate generation to avoid scanning every window. + var matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); + foreach (var block in matchingBlocks) { - // Cheap filter to skip windows that cannot improve the score. - if (!charMask.ContainsKey(longer[i + len1 - 1])) + // Offset between source and destination tells us where the shorter string could align. + var dist = block.DestPos - block.SourcePos; + var windowStart = dist > 0 ? dist : 0; + var windowEnd = windowStart + len1; + if (windowEnd > len2) { - continue; + windowEnd = len2; } - var window = longer.Slice(i, len1); + var window = longer.Slice(windowStart, windowEnd - windowStart); var ratio = Indel.BlockNormalizedSimilarity(charMask, shorter, window); if (ratio > maxScore) From f3f8ef60cd698a83ba151ae2b4200e4f8e4e8161 Mon Sep 17 00:00:00 2001 From: ycherkes Date: Sun, 4 Jan 2026 18:08:06 +0100 Subject: [PATCH 09/12] create lsc for single machine word --- .../FuzzySharp.Benchmarks.csproj | 1 + FuzzySharp.Benchmarks/Program.cs | 41 +--------------- FuzzySharp/FuzzySharp.csproj | 2 +- FuzzySharp/Indel.Static.cs | 2 +- FuzzySharp/LongestCommonSequence.Static.cs | 49 +++++++++++++++++-- 5 files changed, 50 insertions(+), 45 deletions(-) diff --git a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj index 4fbc563..0fcb333 100644 --- a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj +++ b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj @@ -13,6 +13,7 @@ + diff --git a/FuzzySharp.Benchmarks/Program.cs b/FuzzySharp.Benchmarks/Program.cs index ddd2497..d5749f6 100644 --- a/FuzzySharp.Benchmarks/Program.cs +++ b/FuzzySharp.Benchmarks/Program.cs @@ -3,44 +3,7 @@ using BenchmarkDotNet.Running; using Raffinert.FuzzySharp.Benchmarks; -//using Raffinert.FuzzySharp; -//using Raffinert.FuzzySharp.SimilarityRatio; -//using Raffinert.FuzzySharp.SimilarityRatio.Scorer.Composite; -//using Classic = FuzzySharp; - var config = ManualConfig.Create(DefaultConfig.Instance) - .AddJob(Job.ShortRun); // ← built-in short run - -//BenchmarkRunner.Run(typeof(Program).Assembly, config); -//BenchmarkRunner.Run(config); -BenchmarkRunner.Run([typeof(BenchmarkFastPartial), typeof(BenchmarkAll)], config); - -//var input1 = "+30.0% Damage to Close Enemies [30.01%"; -//var input2Collection = new[] -//{ -// "+#% Damage", -// "+#% Damage to Crowd Controlled Enemies", -// "+#% Damage to Close Enemies", -// "+#% Damage to Chilled Enemies", -// "+#% Damage to Poisoned Enemies", -// "#% Block Chance#% Blocked Damage Reduction", -// "#% Damage Reduction from Bleeding Enemies", -// "#% Damage Reduction", -// "+#% Cold Damage" -//}; - -//var classicScorer = Classic.SimilarityRatio.ScorerCache.Get(); - -//Func classicScorerFunc = input2 => classicScorer.Score(input1, input2); - -//var classicResult = input2Collection.Select(classicScorerFunc).ToList(); - -//var scorer = ScorerCache.Get(); - -//Func scorerFunc = input2 => scorer.Score(input1, input2); - -//var result = input2Collection.Select(scorerFunc).ToList(); - -//Console.WriteLine(); + .AddJob(Job.ShortRun); -//Console.WriteLine(Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog")); \ No newline at end of file +BenchmarkSwitcher.FromAssembly(typeof(BenchmarkFastPartial).Assembly).Run(args, config); \ No newline at end of file diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index 1468cac..2bb3407 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -12,7 +12,7 @@ true true - 12.0 + Latest MIT https://github.com/Raffinert/FuzzySharp false diff --git a/FuzzySharp/Indel.Static.cs b/FuzzySharp/Indel.Static.cs index b471651..1931fb7 100644 --- a/FuzzySharp/Indel.Static.cs +++ b/FuzzySharp/Indel.Static.cs @@ -26,7 +26,7 @@ public static int BlockDistance( int? scoreCutoff = null) where T : IEquatable { var maximum = s1.Length + s2.Length; - var lcsSim = LongestCommonSequence.BlockSimilarityMultipleULongs(block, s1, s2); + var lcsSim = LongestCommonSequence.BlockSimilarity(block, s1, s2); var dist = maximum - 2 * lcsSim; var result = scoreCutoff == null || dist <= scoreCutoff.Value ? dist diff --git a/FuzzySharp/LongestCommonSequence.Static.cs b/FuzzySharp/LongestCommonSequence.Static.cs index 218ab54..3a34d21 100644 --- a/FuzzySharp/LongestCommonSequence.Static.cs +++ b/FuzzySharp/LongestCommonSequence.Static.cs @@ -349,6 +349,50 @@ internal static int SimilarityImpl( /// Second sequence (text). /// Optional minimum similarity threshold. /// The length of the longest common subsequence, or 0 if below cutoff. + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static int BlockSimilarity( + CharMaskBuffer block, + ReadOnlySpan s1, + ReadOnlySpan s2, + int? scoreCutoff = null + ) where T : IEquatable + { + return s1.Length <= 64 + ? BlockSimilaritySingleULong(block, s1, s2, scoreCutoff) + : BlockSimilarityMultipleULongs(block, s1, s2, scoreCutoff); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int BlockSimilaritySingleULong( + CharMaskBuffer block, + ReadOnlySpan s1, + ReadOnlySpan s2, + int? scoreCutoff = null + ) where T : IEquatable + { + if (s1.IsEmpty) + return 0; + + int len1 = s1.Length; + ulong mask = len1 == 64 ? ulong.MaxValue : (1UL << len1) - 1UL; + + ulong S = mask; + foreach (T ch in s2) + { + ulong M = block.GetOrZero(ch)[0]; + ulong u = S & M; + unchecked + { + S = (S + u) | (S - u); + } + } + + int lcs = CountZeroBits(S, len1); + return scoreCutoff == null || lcs >= scoreCutoff.Value + ? lcs + : 0; + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int BlockSimilarityMultipleULongs( CharMaskBuffer block, @@ -411,12 +455,9 @@ internal static int BlockSimilarityMultipleULongs( // --- 4) count zero bits in the lower len1 positions of S --- int lcs = CountZeroBits(S, len1); - - var result = scoreCutoff == null || lcs >= scoreCutoff.Value + return scoreCutoff == null || lcs >= scoreCutoff.Value ? lcs : 0; - - return result; } finally { From 29f77b38a3fb55d98cc9a6042170b6c39030c77a Mon Sep 17 00:00:00 2001 From: ycherkes Date: Sun, 4 Jan 2026 18:25:27 +0100 Subject: [PATCH 10/12] add bigger strings for partial ratio --- FuzzySharp.Benchmarks/BenchmarkAll.cs | 6 +++--- FuzzySharp.Benchmarks/BenchmarkFastPartial.cs | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/FuzzySharp.Benchmarks/BenchmarkAll.cs b/FuzzySharp.Benchmarks/BenchmarkAll.cs index fc26509..3d89ca3 100644 --- a/FuzzySharp.Benchmarks/BenchmarkAll.cs +++ b/FuzzySharp.Benchmarks/BenchmarkAll.cs @@ -24,7 +24,7 @@ public int Ratio() [Benchmark] public int PartialRatio() { - return Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); + return Fuzz.PartialRatio("Supplier: ACME Corp. International, Address: 221B Baker St., London NW1 6XE", "Order: PO-100923, Supplier: Acme Corporation International, Address: 221B Baker Street, London NW1 6XE, VAT: GB123456789, Contact: accounting@acme.example"); } [Benchmark] @@ -92,7 +92,7 @@ public int PartialTokenAbbreviationRatio() { return Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); } - + [Benchmark] public int RatioClassic() { @@ -102,7 +102,7 @@ public int RatioClassic() [Benchmark] public int PartialRatioClassic() { - return Classic.Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); + return Classic.Fuzz.PartialRatio("Supplier: ACME Corp. International, Address: 221B Baker St., London NW1 6XE", "Order: PO-100923, Supplier: Acme Corporation International, Address: 221B Baker Street, London NW1 6XE, VAT: GB123456789, Contact: accounting@acme.example"); } [Benchmark] diff --git a/FuzzySharp.Benchmarks/BenchmarkFastPartial.cs b/FuzzySharp.Benchmarks/BenchmarkFastPartial.cs index b1bfc80..d219453 100644 --- a/FuzzySharp.Benchmarks/BenchmarkFastPartial.cs +++ b/FuzzySharp.Benchmarks/BenchmarkFastPartial.cs @@ -15,7 +15,7 @@ public void GlobalSetup() [Benchmark] public int PartialRatio() { - return Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); + return Fuzz.PartialRatio("Supplier: ACME Corp. International, Address: 221B Baker St., London NW1 6XE", "Order: PO-100923, Supplier: Acme Corporation International, Address: 221B Baker Street, London NW1 6XE, VAT: GB123456789, Contact: accounting@acme.example"); } [Benchmark] From cb6b339ab9605bb5aafc7d5858687bd53690155c Mon Sep 17 00:00:00 2001 From: ycherkes Date: Tue, 6 Jan 2026 07:40:44 +0100 Subject: [PATCH 11/12] add fast partial ratio tests --- .../FuzzyTests/PartialRatioFastTests.cs | 237 ++++++++++++++++++ 1 file changed, 237 insertions(+) create mode 100644 FuzzySharp.Test/FuzzyTests/PartialRatioFastTests.cs diff --git a/FuzzySharp.Test/FuzzyTests/PartialRatioFastTests.cs b/FuzzySharp.Test/FuzzyTests/PartialRatioFastTests.cs new file mode 100644 index 0000000..12011c4 --- /dev/null +++ b/FuzzySharp.Test/FuzzyTests/PartialRatioFastTests.cs @@ -0,0 +1,237 @@ +using System; +using NUnit.Framework; +using Raffinert.FuzzySharp.PreProcess; + +namespace Raffinert.FuzzySharp.Test.FuzzyTests; + +[TestFixture] +public class PartialRatioFastTests : IDisposable +{ + #region Private Fields + private string _s1, + _s1A, + _s2, + _s3, + _s4, + _s5, + _s6, + _s7, + _s8, + _s8A, + _s9, + _s9A, + _s10, + _s10A; + + private string[] _cirqueStrings, _baseballStrings; + #endregion + + [SetUp] + public void Setup() + { + GlobalConfig.PartialRatioAccuracy = PartialRatioAccuracy.Fast; + _s1 = "new york mets"; + _s1A = "new york mets"; + _s2 = "new YORK mets"; + _s3 = "the wonderful new york mets"; + _s4 = "new york mets vs atlanta braves"; + _s5 = "atlanta braves vs new york mets"; + _s6 = "new york mets - atlanta braves"; + _s7 = "new york city mets - atlanta braves"; + // Edge cases + _s8 = "{"; + _s8A = "{"; + _s9 = "{a"; + _s9A = "{a"; + _s10 = "a{"; + _s10A = "{b"; + } + + public void Dispose() + { + GlobalConfig.PartialRatioAccuracy = PartialRatioAccuracy.Strict; + } + + [Test] + public void Test_Equal() + { + Assert.AreEqual(Fuzz.Ratio(_s1, _s1A), 100); + Assert.AreEqual(Fuzz.Ratio(_s8, _s8A), 100); + Assert.AreEqual(Fuzz.Ratio(_s9, _s9A), 100); + } + + [Test] + public void Test_Case_Insensitive() + { + Assert.AreNotEqual(Fuzz.Ratio(_s1, _s2), 100); + Assert.AreEqual(Fuzz.Ratio(_s1, _s2, PreprocessMode.Full), 100); + } + + [Test] + public void Test_Partial() + { + Assert.AreEqual(Fuzz.PartialRatio(_s1, _s3), 100); + } + + [Test] + public void TestTokenSortRatio() + { + Assert.AreEqual(Fuzz.TokenSortRatio(_s1, _s1A), 100); + } + + [Test] + public void TestPartialTokenSortRatio() + { + Assert.AreEqual(Fuzz.PartialTokenSortRatio(_s1, _s1A, PreprocessMode.Full), 100); + Assert.AreEqual(Fuzz.PartialTokenSortRatio(_s4, _s5, PreprocessMode.Full), 100); + Assert.AreEqual(Fuzz.PartialTokenSortRatio(_s8, _s8A), 100); + Assert.AreEqual(Fuzz.PartialTokenSortRatio(_s9, _s9A, PreprocessMode.Full), 100); + Assert.AreEqual(Fuzz.PartialTokenSortRatio(_s9, _s9A), 100); + + //var al = Fuzz1.PartialRatioAlignment("a certain string".AsSpan(), "cetain".AsSpan()); + + Assert.AreEqual(Fuzz.PartialTokenSortRatio(_s10, _s10A), 50); // 67 in strict mode + Assert.AreEqual(Fuzz.PartialTokenSortRatio(_s10, _s10A, PreprocessMode.Full), 0); + } + + [Test] + public void TestTokenSetRatio() + { + Assert.AreEqual(Fuzz.TokenSetRatio(_s4, _s5, PreprocessMode.Full), 100); + Assert.AreEqual(Fuzz.TokenSetRatio(_s8, _s8A), 100); + Assert.AreEqual(Fuzz.TokenSetRatio(_s9, _s9A, PreprocessMode.Full), 100); + Assert.AreEqual(Fuzz.TokenSetRatio(_s9, _s9A), 100); + Assert.AreEqual(Fuzz.TokenSetRatio(_s10, _s10A), 50); + } + + [Test] + public void TestTokenAbbreviationRatio() + { + Assert.AreEqual(Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full), 40); + Assert.AreEqual(Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full), 50); // 67 in strict mode + } + + [Test] + public void TestPartialTokenSetRatio() + { + Assert.AreEqual(Fuzz.PartialTokenSetRatio(_s4, _s7), 100); + } + + [Test] + public void TestWeightedRatioEqual() + { + Assert.AreEqual(Fuzz.WeightedRatio(_s1, _s1A), 100); + } + + [Test] + public void TestWeightedRatioCaseInsensitive() + { + Assert.AreEqual(Fuzz.WeightedRatio(_s1, _s2, PreprocessMode.Full), 100); + } + + [Test] + public void TestWeightedRatioPartialMatch() + { + Assert.AreEqual(Fuzz.WeightedRatio(_s1, _s3), 90); + } + + [Test] + public void TestWeightedRatioMisorderedMatch() + { + Assert.AreEqual(Fuzz.WeightedRatio(_s4, _s5), 95); + } + + [Test] + public void TestEmptyStringsScore0() + { + Assert.That(Fuzz.Ratio("test_string", ""), Is.EqualTo(0)); + Assert.That(Fuzz.PartialRatio("test_string", ""), Is.EqualTo(0)); + Assert.That(Fuzz.Ratio("", ""), Is.EqualTo(0)); + Assert.That(Fuzz.PartialRatio("", ""), Is.EqualTo(0)); + } + + [Test] + public void TestIssueSeven() + { + _s1 = "HSINCHUANG"; + _s2 = "SINJHUAN"; + _s3 = "LSINJHUANG DISTRIC"; + _s4 = "SINJHUANG DISTRICT"; + + Assert.IsTrue(Fuzz.PartialRatio(_s1, _s2) > 75); + Assert.IsTrue(Fuzz.PartialRatio(_s1, _s3) > 75); + Assert.IsTrue(Fuzz.PartialRatio(_s1, _s4) > 75); + } + + [Test] + public void TestIssueEight() + { + // https://github.com/JakeBayer/FuzzySharp/issues/8 + Assert.AreEqual(85, Fuzz.PartialRatio("Partnernummer", "Partne\nrnum\nmerASDFPartnernummerASDF")); // 100 in strict mode + Assert.AreEqual(77, Fuzz.PartialRatio("Partnernummer", "PartnerrrrnummerASDFPartnernummerASDF")); // 100 in strict mode + + // https://github.com/xdrop/fuzzywuzzy/issues/39 + Assert.AreEqual(57, Fuzz.PartialRatio("kaution", "kdeffxxxiban:de1110010060046666666datum:16.11.17zeit:01:12uft0000899999tan076601testd.-20-maisonette-z4-jobas-hagkautionauszug")); // 100 in strict mode + + // https://github.com/seatgeek/fuzzywuzzy/issues/79 + Assert.AreEqual(93, Fuzz.PartialRatio("this is a test", "is this is a not really thing this is a test!")); // 100 in strict mode + + // https://github.com/Raffinert/FuzzySharp/issues/2 + Assert.AreEqual(100, Fuzz.PartialRatio("sh", "Growing eshops without a popular platform", PreprocessMode.Full)); + Assert.AreEqual(100, Fuzz.PartialRatio("shop", "Growing eshops without a popular platform", PreprocessMode.Full)); + } + + [Test] + public void MorePartialRatio() + { + Assert.AreEqual(100, Fuzz.PartialRatio("geeks for geeks", "geeks for geeks!")); + Assert.AreEqual(64, Fuzz.PartialRatio("geeks for geeks", "geeks geeks")); // 71 in strict mode + Assert.AreEqual(100, Fuzz.TokenSortRatio("geeks for geeks", "for geeks geeks")); + } + + [Test] + public void TestPartialRatioUnicodeString() + { + _s1 = "\u00C1"; + _s2 = "ABCD"; + var score = Fuzz.PartialRatio(_s1, _s2); + Assert.AreEqual(0, score); + } + + [Test] + public void TestZeroRatio() + { + var ratio = Fuzz.PartialTokenSortRatio("abc", "def"); + + Assert.True(ratio == 0); + } + + [Test] + public void Test03() + { + var ratio = Fuzz.PartialTokenSortRatio("new york mets", "atlanta braves vs new york mets"); + + Assert.True(ratio == 77); + } + + [Test] + public void TestRatioUnicodeString() + { + _s1 = "\u00C1"; + _s2 = "ABCD"; + var score = Fuzz.WeightedRatio(_s1, _s2); + Assert.AreEqual(0, score); + + // Cyrillic. + _s1 = "\u043f\u0441\u0438\u0445\u043e\u043b\u043e\u0433"; + _s2 = "\u043f\u0441\u0438\u0445\u043e\u0442\u0435\u0440\u0430\u043f\u0435\u0432\u0442"; + score = Fuzz.WeightedRatio(_s1, _s2); + Assert.AreNotEqual(0, score); + + // Chinese. + _s1 = "\u6211\u4e86\u89e3\u6570\u5b66"; + _s2 = "\u6211\u5b66\u6570\u5b66"; + score = Fuzz.WeightedRatio(_s1, _s2); + Assert.AreNotEqual(0, score); + } +} \ No newline at end of file From f5a73bf77674ba0f1b0a57578a77090f26989c78 Mon Sep 17 00:00:00 2001 From: ycherkes Date: Tue, 6 Jan 2026 07:43:50 +0100 Subject: [PATCH 12/12] rename tests --- .../{PartialRatioFastTests.cs => FastPartialRatioTests.cs} | 2 +- .../FuzzyTests/{RatioTests.cs => StrictPartialRatioTests.cs} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename FuzzySharp.Test/FuzzyTests/{PartialRatioFastTests.cs => FastPartialRatioTests.cs} (99%) rename FuzzySharp.Test/FuzzyTests/{RatioTests.cs => StrictPartialRatioTests.cs} (99%) diff --git a/FuzzySharp.Test/FuzzyTests/PartialRatioFastTests.cs b/FuzzySharp.Test/FuzzyTests/FastPartialRatioTests.cs similarity index 99% rename from FuzzySharp.Test/FuzzyTests/PartialRatioFastTests.cs rename to FuzzySharp.Test/FuzzyTests/FastPartialRatioTests.cs index 12011c4..2fea5f6 100644 --- a/FuzzySharp.Test/FuzzyTests/PartialRatioFastTests.cs +++ b/FuzzySharp.Test/FuzzyTests/FastPartialRatioTests.cs @@ -5,7 +5,7 @@ namespace Raffinert.FuzzySharp.Test.FuzzyTests; [TestFixture] -public class PartialRatioFastTests : IDisposable +public class FastPartialRatioTests : IDisposable { #region Private Fields private string _s1, diff --git a/FuzzySharp.Test/FuzzyTests/RatioTests.cs b/FuzzySharp.Test/FuzzyTests/StrictPartialRatioTests.cs similarity index 99% rename from FuzzySharp.Test/FuzzyTests/RatioTests.cs rename to FuzzySharp.Test/FuzzyTests/StrictPartialRatioTests.cs index 2b6c216..a05b82d 100644 --- a/FuzzySharp.Test/FuzzyTests/RatioTests.cs +++ b/FuzzySharp.Test/FuzzyTests/StrictPartialRatioTests.cs @@ -4,7 +4,7 @@ namespace Raffinert.FuzzySharp.Test.FuzzyTests; [TestFixture] -public class RatioTests +public class StrictPartialRatioTests { #region Private Fields private string _s1,