diff --git a/benchmark/CDT.Benchmarks/PredicatesBenchmarks.cs b/benchmark/CDT.Benchmarks/PredicatesBenchmarks.cs
new file mode 100644
index 0000000..942eb7d
--- /dev/null
+++ b/benchmark/CDT.Benchmarks/PredicatesBenchmarks.cs
@@ -0,0 +1,418 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+using BenchmarkDotNet.Attributes;
+using BenchmarkDotNet.Configs;
+using BenchmarkDotNet.Diagnosers;
+using BenchmarkDotNet.Running;
+using CDT.Predicates;
+
+// ---------------------------------------------------------------------------
+// Predicates microbenchmarks
+// ---------------------------------------------------------------------------
+
+///
+/// Microbenchmarks that compare and
+/// head-to-head for every predicate variant.
+///
+///
+/// Two input scenarios are covered for each method:
+///
+/// - General – random coordinates in general position.
+/// The adaptive fast-path (Stage A) returns immediately without any
+/// expansion arithmetic, so these numbers reflect the absolute best
+/// case for .
+/// - NearDegenerate – nearly-collinear / nearly-co-circular
+/// points that force the adaptive predicate through Stage B–D (and
+/// therefore all the way to the exact expansion code). These numbers
+/// show the cost of the slow path and are comparable to what
+/// always pays.
+///
+///
+///
+[MemoryDiagnoser]
+[EventPipeProfiler(EventPipeProfile.CpuSampling)]
+[GroupBenchmarksBy(BenchmarkLogicalGroupRule.ByCategory)]
+[CategoriesColumn]
+[ShortRunJob]
+public class PredicatesBenchmarks
+{
+ // Number of predicate calls per benchmark iteration – large enough to
+ // dominate timer resolution, small enough for a short-run job.
+ private const int N = 4_000;
+
+ // ── General-position input arrays ──────────────────────────────────────
+ // Orient2d uses 3 points (6 doubles / 6 floats) per call.
+ // InCircle uses 4 points (8 doubles / 8 floats) per call.
+ private double[] _orient2dD = null!;
+ private float[] _orient2dF = null!;
+ private double[] _inCircleD = null!;
+ private float[] _inCircleF = null!;
+
+ // ── Near-degenerate input arrays ───────────────────────────────────────
+ // Orient2d: nearly-collinear triples.
+ // InCircle: points nearly on a common circumcircle.
+ private double[] _orient2dDeg = null!;
+ private float[] _orient2dFDeg = null!;
+ private double[] _inCircleDeg = null!;
+ private float[] _inCircleFDeg = null!;
+
+ [GlobalSetup]
+ public void Setup()
+ {
+ var rng = new Random(unchecked((int)0xDEAD_BEEF));
+
+ // ── general-position ───────────────────────────────────────────────
+ _orient2dD = FillDoubles(rng, 6 * N, scale: 1000.0);
+ _orient2dF = ToFloats(_orient2dD);
+ _inCircleD = FillDoubles(rng, 8 * N, scale: 1000.0);
+ _inCircleF = ToFloats(_inCircleD);
+
+ // ── near-degenerate ────────────────────────────────────────────────
+ // Orient2d: triple (A, B, C) where C is nearly on line AB.
+ // We construct A=(0,0), B=(1,1), C=(t, t+ε) for varying t and ε≈1e-14.
+ _orient2dDeg = BuildNearCollinear(N);
+ _orient2dFDeg = ToFloats(_orient2dDeg);
+
+ // InCircle: quadruple (A,B,C,D) where D is nearly on the
+ // circumcircle of A, B, C.
+ _inCircleDeg = BuildNearCircle(N);
+ _inCircleFDeg = ToFloats(_inCircleDeg);
+ }
+
+ // =========================================================================
+ // Orient2d – double
+ // =========================================================================
+
+ [Benchmark(Description = "Adaptive – Orient2d double (general)")]
+ [BenchmarkCategory("Orient2d-double")]
+ public double Adaptive_Orient2d_Double_General()
+ {
+ double acc = 0.0;
+ double[] pts = _orient2dD;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 6;
+ acc += PredicatesAdaptive.Orient2d(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Exact – Orient2d double (general)")]
+ [BenchmarkCategory("Orient2d-double")]
+ public double Exact_Orient2d_Double_General()
+ {
+ double acc = 0.0;
+ double[] pts = _orient2dD;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 6;
+ acc += PredicatesExact.Orient2d(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Adaptive – Orient2d double (near-degenerate)")]
+ [BenchmarkCategory("Orient2d-double-hard")]
+ public double Adaptive_Orient2d_Double_NearDegenerate()
+ {
+ double acc = 0.0;
+ double[] pts = _orient2dDeg;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 6;
+ acc += PredicatesAdaptive.Orient2d(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Exact – Orient2d double (near-degenerate)")]
+ [BenchmarkCategory("Orient2d-double-hard")]
+ public double Exact_Orient2d_Double_NearDegenerate()
+ {
+ double acc = 0.0;
+ double[] pts = _orient2dDeg;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 6;
+ acc += PredicatesExact.Orient2d(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+ }
+ return acc;
+ }
+
+ // =========================================================================
+ // Orient2d – float
+ // =========================================================================
+
+ [Benchmark(Description = "Adaptive – Orient2d float (general)")]
+ [BenchmarkCategory("Orient2d-float")]
+ public float Adaptive_Orient2d_Float_General()
+ {
+ float acc = 0f;
+ float[] pts = _orient2dF;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 6;
+ acc += PredicatesAdaptive.Orient2d(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Exact – Orient2d float (general)")]
+ [BenchmarkCategory("Orient2d-float")]
+ public float Exact_Orient2d_Float_General()
+ {
+ float acc = 0f;
+ float[] pts = _orient2dF;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 6;
+ acc += PredicatesExact.Orient2d(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Adaptive – Orient2d float (near-degenerate)")]
+ [BenchmarkCategory("Orient2d-float-hard")]
+ public float Adaptive_Orient2d_Float_NearDegenerate()
+ {
+ float acc = 0f;
+ float[] pts = _orient2dFDeg;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 6;
+ acc += PredicatesAdaptive.Orient2d(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Exact – Orient2d float (near-degenerate)")]
+ [BenchmarkCategory("Orient2d-float-hard")]
+ public float Exact_Orient2d_Float_NearDegenerate()
+ {
+ float acc = 0f;
+ float[] pts = _orient2dFDeg;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 6;
+ acc += PredicatesExact.Orient2d(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3], pts[j + 4], pts[j + 5]);
+ }
+ return acc;
+ }
+
+ // =========================================================================
+ // InCircle – double
+ // =========================================================================
+
+ [Benchmark(Description = "Adaptive – InCircle double (general)")]
+ [BenchmarkCategory("InCircle-double")]
+ public double Adaptive_InCircle_Double_General()
+ {
+ double acc = 0.0;
+ double[] pts = _inCircleD;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 8;
+ acc += PredicatesAdaptive.InCircle(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+ pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Exact – InCircle double (general)")]
+ [BenchmarkCategory("InCircle-double")]
+ public double Exact_InCircle_Double_General()
+ {
+ double acc = 0.0;
+ double[] pts = _inCircleD;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 8;
+ acc += PredicatesExact.InCircle(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+ pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Adaptive – InCircle double (near-degenerate)")]
+ [BenchmarkCategory("InCircle-double-hard")]
+ public double Adaptive_InCircle_Double_NearDegenerate()
+ {
+ double acc = 0.0;
+ double[] pts = _inCircleDeg;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 8;
+ acc += PredicatesAdaptive.InCircle(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+ pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Exact – InCircle double (near-degenerate)")]
+ [BenchmarkCategory("InCircle-double-hard")]
+ public double Exact_InCircle_Double_NearDegenerate()
+ {
+ double acc = 0.0;
+ double[] pts = _inCircleDeg;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 8;
+ acc += PredicatesExact.InCircle(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+ pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+ }
+ return acc;
+ }
+
+ // =========================================================================
+ // InCircle – float
+ // =========================================================================
+
+ [Benchmark(Description = "Adaptive – InCircle float (general)")]
+ [BenchmarkCategory("InCircle-float")]
+ public float Adaptive_InCircle_Float_General()
+ {
+ float acc = 0f;
+ float[] pts = _inCircleF;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 8;
+ acc += PredicatesAdaptive.InCircle(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+ pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Exact – InCircle float (general)")]
+ [BenchmarkCategory("InCircle-float")]
+ public float Exact_InCircle_Float_General()
+ {
+ float acc = 0f;
+ float[] pts = _inCircleF;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 8;
+ acc += PredicatesExact.InCircle(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+ pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Adaptive – InCircle float (near-degenerate)")]
+ [BenchmarkCategory("InCircle-float-hard")]
+ public float Adaptive_InCircle_Float_NearDegenerate()
+ {
+ float acc = 0f;
+ float[] pts = _inCircleFDeg;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 8;
+ acc += PredicatesAdaptive.InCircle(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+ pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+ }
+ return acc;
+ }
+
+ [Benchmark(Description = "Exact – InCircle float (near-degenerate)")]
+ [BenchmarkCategory("InCircle-float-hard")]
+ public float Exact_InCircle_Float_NearDegenerate()
+ {
+ float acc = 0f;
+ float[] pts = _inCircleFDeg;
+ for (int i = 0; i < N; i++)
+ {
+ int j = i * 8;
+ acc += PredicatesExact.InCircle(
+ pts[j], pts[j + 1], pts[j + 2], pts[j + 3],
+ pts[j + 4], pts[j + 5], pts[j + 6], pts[j + 7]);
+ }
+ return acc;
+ }
+
+ // =========================================================================
+ // Helpers
+ // =========================================================================
+
+ private static double[] FillDoubles(Random rng, int count, double scale)
+ {
+ var arr = new double[count];
+ for (int i = 0; i < arr.Length; i++)
+ arr[i] = (rng.NextDouble() - 0.5) * 2.0 * scale;
+ return arr;
+ }
+
+ private static float[] ToFloats(double[] src)
+ {
+ var arr = new float[src.Length];
+ for (int i = 0; i < src.Length; i++)
+ arr[i] = (float)src[i];
+ return arr;
+ }
+
+ ///
+ /// Builds an array of nearly-collinear Orient2d triples.
+ /// A=(0,0), B=(t,t), C=(2t, 2t+ε) – nearly collinear because ε is tiny.
+ /// Varies t across [1,100] and ε across [1e-14, 1e-13] to keep results
+ /// diverse while reliably triggering Stage B/C of the adaptive predicate.
+ ///
+ private static double[] BuildNearCollinear(int n)
+ {
+ var arr = new double[6 * n];
+ for (int i = 0; i < n; i++)
+ {
+ double t = 1.0 + i * (99.0 / n); // t in [1, 100]
+ double eps = 1e-14 * (1.0 + i % 10); // ε in [1e-14, 1e-13]
+ int j = i * 6;
+ arr[j] = 0.0; arr[j + 1] = 0.0; // A
+ arr[j + 2] = t; arr[j + 3] = t; // B
+ arr[j + 4] = 2 * t; arr[j + 5] = 2 * t + eps; // C (nearly collinear)
+ }
+ return arr;
+ }
+
+ ///
+ /// Builds nearly-co-circular InCircle quadruples.
+ /// A, B, C are vertices of an equilateral triangle inscribed in the unit
+ /// circle; D is placed at radius 1 + ε so it is just barely outside –
+ /// but close enough that the fast floating-point test cannot decide.
+ ///
+ private static double[] BuildNearCircle(int n)
+ {
+ var arr = new double[8 * n];
+ for (int i = 0; i < n; i++)
+ {
+ double r = 100.0 + i * (900.0 / n); // radius in [100, 1000]
+ double eps = 1e-11 * r * (1.0 + i % 5); // ε scaled to radius
+
+ double ax = r, ay = 0.0;
+ double bx = r * Math.Cos(2 * Math.PI / 3),
+ by = r * Math.Sin(2 * Math.PI / 3);
+ double cx = r * Math.Cos(4 * Math.PI / 3),
+ cy = r * Math.Sin(4 * Math.PI / 3);
+ double dx = r + eps, dy = 0.0; // D just outside
+
+ int j = i * 8;
+ arr[j] = ax; arr[j + 1] = ay;
+ arr[j + 2] = bx; arr[j + 3] = by;
+ arr[j + 4] = cx; arr[j + 5] = cy;
+ arr[j + 6] = dx; arr[j + 7] = dy;
+ }
+ return arr;
+ }
+}
diff --git a/src/CDT.Core/Predicates/PredicatesAdaptive.cs b/src/CDT.Core/Predicates/PredicatesAdaptive.cs
index 41b19d3..153b709 100644
--- a/src/CDT.Core/Predicates/PredicatesAdaptive.cs
+++ b/src/CDT.Core/Predicates/PredicatesAdaptive.cs
@@ -6,6 +6,8 @@
// artem-ogre/CDT (predicates.h, predicates::adaptive namespace).
using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
namespace CDT.Predicates;
@@ -59,7 +61,8 @@ public static class PredicatesAdaptive
/// zero if collinear, or a negative value if to the right.
///
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] // opt-15: aggressive JIT opt for fast-path Stage A
+ [SkipLocalsInit]
public static double Orient2d(
double ax, double ay, double bx, double by, double cx, double cy)
{
@@ -192,7 +195,8 @@ public static float Orient2d(
/// zero if on, or a negative value if outside.
///
///
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining)] // opt-16: keep only AggressiveInlining (AggressiveOptimization inflated the Stage-B frame and hurt Stage-A)
+ [SkipLocalsInit]
public static double InCircle(
double ax, double ay, double bx, double by,
double cx, double cy, double dx, double dy)
@@ -367,8 +371,14 @@ internal static (double aHi, double aLo) Split(double a)
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static double MultTail(double a, double b, double p)
{
- var (aHi, aLo) = Split(a);
- var (bHi, bLo) = Split(b);
+ double c = SplitterD * a;
+ double aBig = c - a;
+ double aHi = c - aBig;
+ double aLo = a - aHi;
+ c = SplitterD * b;
+ double bBig = c - b;
+ double bHi = c - bBig;
+ double bLo = b - bHi;
double y = p - aHi * bHi;
y -= aLo * bHi;
y -= aHi * bLo;
@@ -379,6 +389,8 @@ internal static double MultTail(double a, double b, double p)
/// Exact expansion of ax*by - ay*bx (up to 4 non-zero terms).
/// Matches Lenthe ExpansionBase::TwoTwoDiff.
///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [SkipLocalsInit] // opt-11: x0..x3 are all unconditionally computed before conditional write
internal static int TwoTwoDiff(double ax, double by, double ay, double bx, Span h)
{
double axby1 = ax * by;
@@ -408,6 +420,8 @@ internal static int TwoTwoDiff(double ax, double by, double ay, double bx, Span<
/// Matches Lenthe ExpansionBase::ScaleExpansion.
/// Output has up to 2*elen terms.
///
+ [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] // opt-7, opt-18
+ [SkipLocalsInit] // opt-8: locals (hIdx, Q, hh, Ti, ti, Qi) are all written before read
internal static int ScaleExpansion(Span e, int elen, double b, Span h)
{
if (elen == 0 || b == 0.0)
@@ -416,31 +430,40 @@ internal static int ScaleExpansion(Span e, int elen, double b, Spane*s*s + e*t*t as an expansion (two ScaleExpansion calls each, then sum).
/// Max output: 32 terms for 4-term input (used for InCircle Stage B lift terms).
///
+ [SkipLocalsInit] // keep SkipLocalsInit; remove AggressiveInlining (inlining 3× into AdaptiveInCircle bloats Stage-A JIT frame)
internal static int ScaleExpansionSum(Span e, int elen, double s, double t, Span h)
{
Span es = stackalloc double[8];
@@ -470,63 +494,148 @@ internal static int ScaleExpansionSum(Span e, int elen, double s, double
/// Merge-then-accumulate two expansions. Matches Lenthe ExpansionBase::ExpansionSum:
/// std::merge by |value| (stable), then sequential grow-expansion accumulation.
///
+ [MethodImpl(MethodImplOptions.AggressiveOptimization)] // opt-20: aggressive JIT optimization for this hot method
+ [SkipLocalsInit]
internal static int ExpansionSum(Span e, int elen, Span f, int flen, Span h)
{
if (elen == 0 && flen == 0) { return 0; }
- if (elen == 0) { f[..flen].CopyTo(h); return flen; }
- if (flen == 0) { e[..elen].CopyTo(h); return elen; }
+ if (elen == 0)
+ {
+ // opt-5: Unsafe.CopyBlockUnaligned replaces Span.CopyTo (eliminates Memmove call overhead)
+ Unsafe.CopyBlockUnaligned(
+ ref Unsafe.As(ref MemoryMarshal.GetReference(h)),
+ ref Unsafe.As(ref MemoryMarshal.GetReference(f)),
+ (uint)(flen * sizeof(double)));
+ return flen;
+ }
+ if (flen == 0)
+ {
+ // opt-5: same as above for flen==0 fast path
+ Unsafe.CopyBlockUnaligned(
+ ref Unsafe.As(ref MemoryMarshal.GetReference(h)),
+ ref Unsafe.As(ref MemoryMarshal.GetReference(e)),
+ (uint)(elen * sizeof(double)));
+ return elen;
+ }
int total = elen + flen;
- // Merge sorted by |value| into temporary buffer.
- // Maximum merged size for InCircle Stage D is 192+192=384 ≤ 400, so
- // the stackalloc path is always taken for that call site.
- Span merged = total <= 400 ? stackalloc double[400] : new double[total];
+ // opt-1: Tiered stackalloc — allocate only as much as the actual input size requires.
+ // Using unsafe ref to the first element lets us hold the pointer across the branches
+ // without assigning the Span itself to an outer variable (which Roslyn disallows for
+ // stack-allocated Spans that might escape).
+ if (total <= 16)
+ {
+ Span merged16 = stackalloc double[16];
+ return ExpansionSumCore(e, elen, f, flen, h, merged16);
+ }
+ if (total <= 64)
+ {
+ Span merged64 = stackalloc double[64];
+ return ExpansionSumCore(e, elen, f, flen, h, merged64);
+ }
+ if (total <= 400)
+ {
+ Span merged400 = stackalloc double[400];
+ return ExpansionSumCore(e, elen, f, flen, h, merged400);
+ }
+ return ExpansionSumCore(e, elen, f, flen, h, new double[total]);
+ }
+
+ // opt-2, opt-3, opt-4: Core merge+accumulate logic — receives a pre-sized scratch buffer.
+ // All span accesses use MemoryMarshal.GetReference + Unsafe.Add to eliminate bounds checks.
+ [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)]
+ private static int ExpansionSumCore(
+ Span e, int elen, Span f, int flen, Span h, Span merged)
+ {
+ ref double eRef = ref MemoryMarshal.GetReference(e);
+ ref double fRef = ref MemoryMarshal.GetReference(f);
+ ref double mRef = ref MemoryMarshal.GetReference(merged);
+
int ei = 0, fi = 0, mi = 0;
while (ei < elen && fi < flen)
{
- if (Math.Abs(f[fi]) < Math.Abs(e[ei]))
+ double eVal = Unsafe.Add(ref eRef, ei);
+ double fVal = Unsafe.Add(ref fRef, fi);
+ if (Math.Abs(fVal) < Math.Abs(eVal))
{
- merged[mi++] = f[fi++];
+ Unsafe.Add(ref mRef, mi++) = fVal;
+ fi++;
}
else
{
- merged[mi++] = e[ei++];
+ Unsafe.Add(ref mRef, mi++) = eVal;
+ ei++;
}
}
- while (ei < elen) { merged[mi++] = e[ei++]; }
- while (fi < flen) { merged[mi++] = f[fi++]; }
+ // opt-4: tail copy loops → Unsafe.CopyBlockUnaligned
+ if (ei < elen)
+ {
+ int rem = elen - ei;
+ Unsafe.CopyBlockUnaligned(
+ ref Unsafe.As(ref Unsafe.Add(ref mRef, mi)),
+ ref Unsafe.As(ref Unsafe.Add(ref eRef, ei)),
+ (uint)(rem * sizeof(double)));
+ mi += rem;
+ }
+ if (fi < flen)
+ {
+ int rem = flen - fi;
+ Unsafe.CopyBlockUnaligned(
+ ref Unsafe.As(ref Unsafe.Add(ref mRef, mi)),
+ ref Unsafe.As(ref Unsafe.Add(ref fRef, fi)),
+ (uint)(rem * sizeof(double)));
+ mi += rem;
+ }
- // Sequential accumulation
+ // opt-3: bounds-check-free accumulation loop using ref locals
+ ref double hRef = ref MemoryMarshal.GetReference(h);
int hIdx = 0;
- double Q = merged[0];
- double Qnew = merged[1] + Q;
- double hh = FastPlusTail(merged[1], Q, Qnew);
+ double Q = Unsafe.Add(ref mRef, 0);
+ double m1 = Unsafe.Add(ref mRef, 1);
+ double Qnew = m1 + Q;
+ double hh = FastPlusTail(m1, Q, Qnew);
Q = Qnew;
- if (hh != 0.0) { h[hIdx++] = hh; }
+ if (hh != 0.0) { Unsafe.Add(ref hRef, hIdx++) = hh; }
for (int g = 2; g < mi; g++)
{
- Qnew = Q + merged[g];
- hh = PlusTail(Q, merged[g], Qnew);
+ double mg = Unsafe.Add(ref mRef, g);
+ Qnew = Q + mg;
+ hh = PlusTail(Q, mg, Qnew);
Q = Qnew;
- if (hh != 0.0) { h[hIdx++] = hh; }
+ if (hh != 0.0) { Unsafe.Add(ref hRef, hIdx++) = hh; }
}
- if (Q != 0.0) { h[hIdx++] = Q; }
+ if (Q != 0.0) { Unsafe.Add(ref hRef, hIdx++) = Q; }
return hIdx;
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal static double Estimate(Span e, int elen)
{
- double sum = 0.0;
- for (int i = 0; i < elen; i++) { sum += e[i]; }
- return sum;
+ if (Vector256.IsHardwareAccelerated && elen >= 4)
+ {
+ ref double eRef = ref MemoryMarshal.GetReference(e);
+ Vector256 acc = Vector256.Zero;
+ int i = 0;
+ for (; i <= elen - 4; i += 4)
+ acc = Vector256.Add(acc, Vector256.LoadUnsafe(ref eRef, (nuint)i));
+ double sum = Vector256.Sum(acc);
+ // opt-13: bounds-check-free scalar tail using Unsafe.Add
+ for (; i < elen; i++) sum += Unsafe.Add(ref eRef, i);
+ return sum;
+ }
+
+ // opt-13: bounds-check-free scalar loop
+ ref double sRef = ref MemoryMarshal.GetReference(e);
+ double s = 0.0;
+ for (int i = 0; i < elen; i++) { s += Unsafe.Add(ref sRef, i); }
+ return s;
}
- [MethodImpl(MethodImplOptions.AggressiveInlining)]
+ [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] // opt-12
internal static double MostSignificant(Span e, int elen)
{
for (int i = elen - 1; i >= 0; i--)
@@ -536,8 +645,21 @@ internal static double MostSignificant(Span e, int elen)
return 0.0;
}
+ [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] // opt-14
internal static void NegateInto(Span src, int len, Span dst)
{
+ if (Vector256.IsHardwareAccelerated && len >= 4)
+ {
+ ref double srcRef = ref MemoryMarshal.GetReference(src);
+ ref double dstRef = ref MemoryMarshal.GetReference(dst);
+ int i = 0;
+ for (; i <= len - 4; i += 4)
+ Vector256.Negate(Vector256.LoadUnsafe(ref srcRef, (nuint)i))
+ .StoreUnsafe(ref dstRef, (nuint)i);
+ for (; i < len; i++) dst[i] = -src[i];
+ return;
+ }
+
for (int i = 0; i < len; i++) { dst[i] = -src[i]; }
}
}
diff --git a/src/CDT.Core/Predicates/PredicatesExact.cs b/src/CDT.Core/Predicates/PredicatesExact.cs
index b99ceb1..11f7360 100644
--- a/src/CDT.Core/Predicates/PredicatesExact.cs
+++ b/src/CDT.Core/Predicates/PredicatesExact.cs
@@ -4,6 +4,8 @@
//
// Geometric predicates — exact (arbitrary-precision) paths.
+using System.Runtime.CompilerServices;
+
namespace CDT.Predicates;
///
@@ -41,6 +43,8 @@ public static class PredicatesExact
/// zero if collinear, or a negative value if to the right.
///
///
+ [MethodImpl(MethodImplOptions.AggressiveInlining)] // opt-17: inline into Adaptive.Orient2d Stage D
+ [SkipLocalsInit]
public static double Orient2d(
double ax, double ay, double bx, double by, double cx, double cy)
{
@@ -122,6 +126,8 @@ public static float Orient2d(
/// zero if on, or a negative value if outside.
///
///
+ [MethodImpl(MethodImplOptions.AggressiveOptimization)] // opt-20: aggressive JIT opt for this hot method
+ [SkipLocalsInit]
public static double InCircle(
double ax, double ay, double bx, double by,
double cx, double cy, double dx, double dy)
@@ -172,22 +178,22 @@ public static double InCircle(
int adetLen = PredicatesAdaptive.ScaleExpansionSum(bcd, bcdLen, ax, ay, adet);
// bdet = -(cda*bx*bx + cda*by*by)
- Span bdetPos = stackalloc double[96];
- int bdetPosLen = PredicatesAdaptive.ScaleExpansionSum(cda, cdaLen, bx, by, bdetPos);
+ // opt-18: eliminate bdetPos[96] by computing ScaleExpansionSum directly into bdet,
+ // then negating in-place. Saves 768 bytes of stack per call.
Span bdet = stackalloc double[96];
- PredicatesAdaptive.NegateInto(bdetPos, bdetPosLen, bdet);
- int bdetLen = bdetPosLen;
+ int bdetLen = PredicatesAdaptive.ScaleExpansionSum(cda, cdaLen, bx, by, bdet);
+ PredicatesAdaptive.NegateInto(bdet, bdetLen, bdet);
// cdet = dab*cx*cx + dab*cy*cy
Span cdet = stackalloc double[96];
int cdetLen = PredicatesAdaptive.ScaleExpansionSum(dab, dabLen, cx, cy, cdet);
// ddet = -(abc*dx*dx + abc*dy*dy)
- Span ddetPos = stackalloc double[96];
- int ddetPosLen = PredicatesAdaptive.ScaleExpansionSum(abc, abcLen, dx, dy, ddetPos);
+ // opt-19: same as opt-18 — eliminate ddetPos[96] stackalloc with in-place negate.
+ // Saves another 768 bytes of stack per call (total savings: 1 536 bytes).
Span ddet = stackalloc double[96];
- PredicatesAdaptive.NegateInto(ddetPos, ddetPosLen, ddet);
- int ddetLen = ddetPosLen;
+ int ddetLen = PredicatesAdaptive.ScaleExpansionSum(abc, abcLen, dx, dy, ddet);
+ PredicatesAdaptive.NegateInto(ddet, ddetLen, ddet);
// deter = (adet + bdet) + (cdet + ddet)
Span ab2 = stackalloc double[192];