From 3d5b7c8058ea3c9c7f08d4a15cde83b4cc60a977 Mon Sep 17 00:00:00 2001
From: Daniel Song <xdtsong@gmail.com>
Date: Thu, 12 Mar 2026 21:47:10 -0700
Subject: [PATCH 1/3] fix: complete Collation comparison logic for total
 ordering (#8)

Fix __eq__ to compare languages for ordinal collations and include
absorbs_damage in equality. Fix __gt__ to use deterministic tuple
comparison instead of returning False for all non-trivial cases.
Add comprehensive test suite with 33 tests covering equality,
ordering, total ordering properties, and derived operators.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 data_diff/abcs/database_types.py |  29 ++++-
 tests/test_collation.py          | 210 +++++++++++++++++++++++++++++++
 2 files changed, 236 insertions(+), 3 deletions(-)
 create mode 100644 tests/test_collation.py

diff --git a/data_diff/abcs/database_types.py b/data_diff/abcs/database_types.py
index e05d8f6f..3dae8628 100644
--- a/data_diff/abcs/database_types.py
+++ b/data_diff/abcs/database_types.py
@@ -52,11 +52,19 @@ class Collation:
     def __eq__(self, other: object) -> bool:
         if not isinstance(other, Collation):
             return NotImplemented
+        if self.absorbs_damage != other.absorbs_damage:
+            return False
+        if self.ordinal != other.ordinal:
+            return False
         if self.ordinal and other.ordinal:
-            # TODO: does it depend on language? what does Albanic_BIN mean in MS SQL?
+            # When both have a language, they must match (e.g. Albanian_BIN != Latin_BIN in MS SQL).
+            # When either is None, treat as equal for backward compat (language-agnostic ordinal).
+            if self.language is not None and other.language is not None:
+                return self.language == other.language
             return True
         return (
             self.language == other.language
+            # Country None-tolerance: if either side didn't report a country, don't penalize.
             and (self.country is None or other.country is None or self.country == other.country)
             and self.case_sensitive == other.case_sensitive
             and self.accent_sensitive == other.accent_sensitive
@@ -81,8 +89,23 @@ def __gt__(self, other: object) -> bool:
             return True
         if other.ordinal and not self.ordinal:
             return False
-        # TODO: try to align the languages & countries?
-        return False
+        # Deterministic total ordering: language > country > sensitivity flags.
+        # None is coerced to "" / False so that all collations are comparable.
+        self_key = (
+            self.language or "",
+            self.country or "",
+            self.case_sensitive or False,
+            self.accent_sensitive or False,
+            self.lower_first or False,
+        )
+        other_key = (
+            other.language or "",
+            other.country or "",
+            other.case_sensitive or False,
+            other.accent_sensitive or False,
+            other.lower_first or False,
+        )
+        return self_key > other_key
 
     def __ge__(self, other: object) -> bool:
         if not isinstance(other, Collation):
diff --git a/tests/test_collation.py b/tests/test_collation.py
new file mode 100644
index 00000000..c321219a
--- /dev/null
+++ b/tests/test_collation.py
@@ -0,0 +1,210 @@
+"""Tests for Collation comparison logic in data_diff.abcs.database_types."""
+
+import itertools
+
+from data_diff.abcs.database_types import Collation
+
+# --- Equality ---
+
+
+class TestCollationEquality:
+    def test_ordinal_same_language(self):
+        a = Collation(ordinal=True, language="Albanian")
+        b = Collation(ordinal=True, language="Albanian")
+        assert a == b
+
+    def test_ordinal_different_language(self):
+        a = Collation(ordinal=True, language="Albanian")
+        b = Collation(ordinal=True, language="Latin")
+        assert a != b
+
+    def test_ordinal_none_language_treated_equal(self):
+        a = Collation(ordinal=True, language=None)
+        b = Collation(ordinal=True, language="Latin")
+        assert a == b
+
+    def test_ordinal_both_none_language(self):
+        a = Collation(ordinal=True)
+        b = Collation(ordinal=True)
+        assert a == b
+
+    def test_ordinal_vs_non_ordinal(self):
+        a = Collation(ordinal=True)
+        b = Collation(ordinal=False, language="en")
+        assert a != b
+
+    def test_locale_matching(self):
+        a = Collation(ordinal=False, language="en", case_sensitive=True, accent_sensitive=True)
+        b = Collation(ordinal=False, language="en", case_sensitive=True, accent_sensitive=True)
+        assert a == b
+
+    def test_locale_language_mismatch(self):
+        a = Collation(ordinal=False, language="en")
+        b = Collation(ordinal=False, language="de")
+        assert a != b
+
+    def test_country_none_tolerance(self):
+        a = Collation(ordinal=False, language="en", country="US")
+        b = Collation(ordinal=False, language="en", country=None)
+        assert a == b
+
+    def test_country_mismatch(self):
+        a = Collation(ordinal=False, language="en", country="US")
+        b = Collation(ordinal=False, language="en", country="GB")
+        assert a != b
+
+    def test_sensitivity_difference(self):
+        a = Collation(ordinal=False, language="en", case_sensitive=True)
+        b = Collation(ordinal=False, language="en", case_sensitive=False)
+        assert a != b
+
+    def test_not_implemented_for_non_collation(self):
+        c = Collation()
+        assert c.__eq__("not a collation") is NotImplemented
+
+
+# --- Ordering ---
+
+
+class TestCollationOrdering:
+    def test_absorbs_damage_precedence(self):
+        """absorbs_damage=True makes a collation "lesser" (preferred target) when not otherwise equal."""
+        snowflake = Collation(absorbs_damage=True, ordinal=False, language="en")
+        regular = Collation(absorbs_damage=False, ordinal=False, language="de")
+        # absorbs_damage overrides language ordering: snowflake is always lesser
+        assert regular > snowflake
+        assert not snowflake > regular
+
+    def test_absorbs_damage_differs_not_equal(self):
+        """Collations with different absorbs_damage are not equal."""
+        a = Collation(absorbs_damage=True, ordinal=True)
+        b = Collation(absorbs_damage=False, ordinal=True)
+        assert a != b
+        # The non-absorbing side is "greater" (preferred to absorb)
+        assert b > a
+
+    def test_ordinal_gt_non_ordinal(self):
+        ordinal = Collation(ordinal=True)
+        locale = Collation(ordinal=False, language="en")
+        assert ordinal > locale
+
+    def test_non_ordinal_lt_ordinal(self):
+        ordinal = Collation(ordinal=True)
+        locale = Collation(ordinal=False, language="en")
+        assert locale < ordinal
+
+    def test_language_ordering(self):
+        a = Collation(ordinal=False, language="de")
+        b = Collation(ordinal=False, language="en")
+        assert b > a  # "en" > "de"
+
+    def test_country_ordering(self):
+        a = Collation(ordinal=False, language="en", country="GB")
+        b = Collation(ordinal=False, language="en", country="US")
+        assert b > a  # "US" > "GB"
+
+    def test_sensitivity_tiebreaker(self):
+        a = Collation(ordinal=False, language="en", case_sensitive=False)
+        b = Collation(ordinal=False, language="en", case_sensitive=True)
+        assert b > a  # True > False
+
+    def test_equal_collations_not_gt(self):
+        a = Collation(ordinal=True, language="Latin")
+        b = Collation(ordinal=True, language="Latin")
+        assert not a > b
+        assert not b > a
+
+    def test_gt_not_implemented_for_non_collation(self):
+        c = Collation()
+        assert c.__gt__("not a collation") is NotImplemented
+
+
+# --- Total ordering ---
+
+
+class TestCollationTotalOrdering:
+    """Verify no incomparable pairs exist across a diverse set of collations."""
+
+    DIVERSE_COLLATIONS = [
+        Collation(),
+        Collation(ordinal=True),
+        Collation(ordinal=True, language="Albanian"),
+        Collation(ordinal=True, language="Latin"),
+        Collation(ordinal=False, language="en"),
+        Collation(ordinal=False, language="en", country="US"),
+        Collation(ordinal=False, language="en", country="GB"),
+        Collation(ordinal=False, language="de"),
+        Collation(ordinal=False, language="en", case_sensitive=True),
+        Collation(ordinal=False, language="en", case_sensitive=False),
+        Collation(ordinal=False, language="en", accent_sensitive=True),
+        Collation(absorbs_damage=True, ordinal=True),
+        Collation(absorbs_damage=True, ordinal=False, language="en"),
+        Collation(ordinal=False, language="en", lower_first=True),
+    ]
+
+    def test_no_incomparable_pairs(self):
+        """For every pair, at least one of ==, >, < must hold."""
+        for a, b in itertools.combinations(self.DIVERSE_COLLATIONS, 2):
+            comparable = (a == b) or (a > b) or (a < b)
+            assert comparable, f"Incomparable pair: {a!r} vs {b!r}"
+
+    def test_reflexive(self):
+        for c in self.DIVERSE_COLLATIONS:
+            assert c == c
+
+    def test_antisymmetric(self):
+        """If a > b then not b > a."""
+        for a, b in itertools.combinations(self.DIVERSE_COLLATIONS, 2):
+            if a > b:
+                assert not b > a, f"Antisymmetry violated: {a!r} vs {b!r}"
+
+    def test_transitive_sample(self):
+        """Spot-check transitivity on all triples."""
+        for a, b, c in itertools.combinations(self.DIVERSE_COLLATIONS, 3):
+            if a > b and b > c:
+                assert a > c, f"Transitivity violated: {a!r} > {b!r} > {c!r} but not {a!r} > {c!r}"
+
+
+# --- Derived operators ---
+
+
+class TestCollationDerivedOperators:
+    def test_ne_consistency(self):
+        a = Collation(ordinal=True, language="Albanian")
+        b = Collation(ordinal=True, language="Latin")
+        assert (a != b) is True
+        assert (a == b) is False
+
+    def test_ge_consistency(self):
+        a = Collation(ordinal=True)
+        b = Collation(ordinal=False, language="en")
+        assert (a >= b) == (a > b or a == b)
+
+    def test_le_consistency(self):
+        a = Collation(ordinal=True)
+        b = Collation(ordinal=False, language="en")
+        assert (b <= a) == (b < a or b == a)
+
+    def test_ge_equal(self):
+        a = Collation(ordinal=True)
+        b = Collation(ordinal=True)
+        assert a >= b
+        assert b >= a
+
+    def test_le_equal(self):
+        a = Collation(ordinal=True)
+        b = Collation(ordinal=True)
+        assert a <= b
+        assert b <= a
+
+    def test_ne_not_implemented(self):
+        assert Collation().__ne__("x") is NotImplemented
+
+    def test_ge_not_implemented(self):
+        assert Collation().__ge__("x") is NotImplemented
+
+    def test_le_not_implemented(self):
+        assert Collation().__le__("x") is NotImplemented
+
+    def test_lt_not_implemented(self):
+        assert Collation().__lt__("x") is NotImplemented

From 4ff8268bd8f523fd3fb5bcd4164025ba40857a85 Mon Sep 17 00:00:00 2001
From: Daniel Song <xdtsong@gmail.com>
Date: Thu, 12 Mar 2026 21:54:34 -0700
Subject: [PATCH 2/3] =?UTF-8?q?fix:=20address=20review=20findings=20?=
 =?UTF-8?q?=E2=80=94=20hash=20contract,=20transitivity,=20None=20coercion?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Replace unsafe_hash=True with custom __hash__ via _comparison_key()
  to fix __eq__/__hash__ contract violation (equal objects now always
  have equal hashes)
- Remove country/language None-tolerance from __eq__ to fix equality
  transitivity violation (A==B, B==C but A!=C)
- Use (0,)/(1,val) sentinel wrapping in _ordering_key() so None sorts
  distinctly from "" and False, fixing __eq__/__gt__ inconsistency
- Distinguish ordinal=None from ordinal=False in both equality and
  ordering
- Fix typos in docstring and field comments
- Expand test suite from 33 to 46 tests: hash consistency, equality
  transitivity, None-vs-False edge cases, all sensitivity tiebreakers

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 data_diff/abcs/database_types.py |  86 ++++++++++++------------
 tests/test_collation.py          | 109 ++++++++++++++++++++++++++++---
 2 files changed, 145 insertions(+), 50 deletions(-)

diff --git a/data_diff/abcs/database_types.py b/data_diff/abcs/database_types.py
index 3dae8628..7cdbf817 100644
--- a/data_diff/abcs/database_types.py
+++ b/data_diff/abcs/database_types.py
@@ -15,23 +15,23 @@
 N = TypeVar("N")
 
 
-@attrs.frozen(kw_only=True, eq=False, order=False, unsafe_hash=True)
+@attrs.frozen(kw_only=True, eq=False, order=False, hash=False)
 class Collation:
     """
     A pre-parsed or pre-known record about db collation, per column.
 
     The "greater" collation should be used as a target collation for textual PKs
-    on both sides of the diff — by coverting the "lesser" collation to self.
+    on both sides of the diff — by converting the "lesser" collation to self.
 
     Snowflake easily absorbs the performance losses, so it has a boost to always
     be greater than any other collation in non-Snowflake databases.
     Other databases need to negotiate which side absorbs the performance impact.
     """
 
-    # A boost for special databases that are known to absorb the performance dmaage well.
+    # A boost for special databases that are known to absorb the performance damage well.
     absorbs_damage: bool = False
 
-    # Ordinal soring by ASCII/UTF8 (True), or alphabetic as per locale/country/etc (False).
+    # Ordinal sorting by ASCII/UTF8 (True), or alphabetic as per locale/country/etc (False).
     ordinal: bool | None = None
 
     # Lowercase first (aAbBcC or abcABC). Otherwise, uppercase first (AaBbCc or ABCabc).
@@ -49,27 +49,45 @@ class Collation:
     # Purely informational, for debugging:
     _source: None | str | Collection[str] = None
 
+    def _comparison_key(self) -> tuple:
+        """Key for equality and hashing — keeps __eq__/__hash__ consistent."""
+        if self.ordinal:
+            # Ordinal sorting is by code point; sensitivity flags are irrelevant.
+            return (self.absorbs_damage, True, self.language)
+        return (
+            self.absorbs_damage,
+            self.ordinal,  # None vs False are semantically distinct
+            self.language,
+            self.country,
+            self.case_sensitive,
+            self.accent_sensitive,
+            self.lower_first,
+        )
+
+    def _ordering_key(self) -> tuple:
+        """Key for deterministic total ordering. None sorts before any real value."""
+
+        # (0,) for None sorts before (1, value) for any real value.
+        def _wrap(v: object) -> tuple:
+            return (0,) if v is None else (1, v)
+
+        return (
+            self.absorbs_damage,
+            _wrap(self.ordinal),
+            _wrap(self.language),
+            _wrap(self.country),
+            _wrap(self.case_sensitive),
+            _wrap(self.accent_sensitive),
+            _wrap(self.lower_first),
+        )
+
     def __eq__(self, other: object) -> bool:
         if not isinstance(other, Collation):
             return NotImplemented
-        if self.absorbs_damage != other.absorbs_damage:
-            return False
-        if self.ordinal != other.ordinal:
-            return False
-        if self.ordinal and other.ordinal:
-            # When both have a language, they must match (e.g. Albanian_BIN != Latin_BIN in MS SQL).
-            # When either is None, treat as equal for backward compat (language-agnostic ordinal).
-            if self.language is not None and other.language is not None:
-                return self.language == other.language
-            return True
-        return (
-            self.language == other.language
-            # Country None-tolerance: if either side didn't report a country, don't penalize.
-            and (self.country is None or other.country is None or self.country == other.country)
-            and self.case_sensitive == other.case_sensitive
-            and self.accent_sensitive == other.accent_sensitive
-            and self.lower_first == other.lower_first
-        )
+        return self._comparison_key() == other._comparison_key()
+
+    def __hash__(self) -> int:
+        return hash(self._comparison_key())
 
     def __ne__(self, other: object) -> bool:
         if not isinstance(other, Collation):
@@ -81,31 +99,19 @@ def __gt__(self, other: object) -> bool:
             return NotImplemented
         if self == other:
             return False
+        # absorbs_damage=True means this db absorbs conversion cost — it should be the target
+        # (i.e. "lesser"), so the non-absorbing side is "greater".
         if self.absorbs_damage and not other.absorbs_damage:
             return False
         if other.absorbs_damage and not self.absorbs_damage:
-            return True  # this one is preferred if it cannot absorb damage as its counterpart can
+            return True
         if self.ordinal and not other.ordinal:
             return True
         if other.ordinal and not self.ordinal:
             return False
-        # Deterministic total ordering: language > country > sensitivity flags.
-        # None is coerced to "" / False so that all collations are comparable.
-        self_key = (
-            self.language or "",
-            self.country or "",
-            self.case_sensitive or False,
-            self.accent_sensitive or False,
-            self.lower_first or False,
-        )
-        other_key = (
-            other.language or "",
-            other.country or "",
-            other.case_sensitive or False,
-            other.accent_sensitive or False,
-            other.lower_first or False,
-        )
-        return self_key > other_key
+        # Deterministic tuple ordering: (language, country, sensitivity flags).
+        # None sorts distinctly from "" / False via (0,) vs (1, value) wrapping.
+        return self._ordering_key() > other._ordering_key()
 
     def __ge__(self, other: object) -> bool:
         if not isinstance(other, Collation):
diff --git a/tests/test_collation.py b/tests/test_collation.py
index c321219a..ae767f0e 100644
--- a/tests/test_collation.py
+++ b/tests/test_collation.py
@@ -18,21 +18,34 @@ def test_ordinal_different_language(self):
         b = Collation(ordinal=True, language="Latin")
         assert a != b
 
-    def test_ordinal_none_language_treated_equal(self):
+    def test_ordinal_none_vs_set_language_not_equal(self):
+        """language=None is distinct from language="Latin" — no None-tolerance."""
         a = Collation(ordinal=True, language=None)
         b = Collation(ordinal=True, language="Latin")
-        assert a == b
+        assert a != b
 
     def test_ordinal_both_none_language(self):
         a = Collation(ordinal=True)
         b = Collation(ordinal=True)
         assert a == b
 
+    def test_ordinal_ignores_sensitivity_flags(self):
+        """Ordinal sorting is by code point; sensitivity flags are irrelevant."""
+        a = Collation(ordinal=True, language="Latin", case_sensitive=True)
+        b = Collation(ordinal=True, language="Latin", case_sensitive=False)
+        assert a == b
+
     def test_ordinal_vs_non_ordinal(self):
         a = Collation(ordinal=True)
         b = Collation(ordinal=False, language="en")
         assert a != b
 
+    def test_ordinal_none_vs_false(self):
+        """ordinal=None is distinct from ordinal=False."""
+        a = Collation()  # ordinal=None
+        b = Collation(ordinal=False)
+        assert a != b
+
     def test_locale_matching(self):
         a = Collation(ordinal=False, language="en", case_sensitive=True, accent_sensitive=True)
         b = Collation(ordinal=False, language="en", case_sensitive=True, accent_sensitive=True)
@@ -43,10 +56,11 @@ def test_locale_language_mismatch(self):
         b = Collation(ordinal=False, language="de")
         assert a != b
 
-    def test_country_none_tolerance(self):
+    def test_country_none_vs_set_not_equal(self):
+        """country=None is distinct from country="US" — no None-tolerance."""
         a = Collation(ordinal=False, language="en", country="US")
         b = Collation(ordinal=False, language="en", country=None)
-        assert a == b
+        assert a != b
 
     def test_country_mismatch(self):
         a = Collation(ordinal=False, language="en", country="US")
@@ -58,11 +72,54 @@ def test_sensitivity_difference(self):
         b = Collation(ordinal=False, language="en", case_sensitive=False)
         assert a != b
 
+    def test_case_sensitive_none_vs_false(self):
+        """case_sensitive=None (unknown) is distinct from case_sensitive=False."""
+        a = Collation(ordinal=False, language="en", case_sensitive=None)
+        b = Collation(ordinal=False, language="en", case_sensitive=False)
+        assert a != b
+
+    def test_absorbs_damage_in_equality(self):
+        a = Collation(absorbs_damage=True, ordinal=True)
+        b = Collation(absorbs_damage=False, ordinal=True)
+        assert a != b
+
     def test_not_implemented_for_non_collation(self):
         c = Collation()
         assert c.__eq__("not a collation") is NotImplemented
 
 
+# --- Hash consistency ---
+
+
+class TestCollationHash:
+    def test_equal_objects_same_hash(self):
+        a = Collation(ordinal=True, language="Latin")
+        b = Collation(ordinal=True, language="Latin")
+        assert a == b
+        assert hash(a) == hash(b)
+
+    def test_equal_ordinal_ignoring_sensitivity_same_hash(self):
+        """Ordinals with same language but different sensitivity must hash the same."""
+        a = Collation(ordinal=True, language="Latin", case_sensitive=True)
+        b = Collation(ordinal=True, language="Latin", case_sensitive=False)
+        assert a == b
+        assert hash(a) == hash(b)
+
+    def test_unequal_objects_different_hash(self):
+        """Not guaranteed but should hold for obviously different collations."""
+        a = Collation(ordinal=True, language="Albanian")
+        b = Collation(ordinal=True, language="Latin")
+        assert a != b
+        assert hash(a) != hash(b)
+
+    def test_hash_consistency_across_diverse_set(self):
+        """For all pairs in a diverse set: if a == b then hash(a) == hash(b)."""
+        collations = TestCollationTotalOrdering.DIVERSE_COLLATIONS
+        for a, b in itertools.combinations(collations, 2):
+            if a == b:
+                assert hash(a) == hash(b), f"Hash mismatch for equal pair: {a!r} vs {b!r}"
+
+
 # --- Ordering ---
 
 
@@ -71,18 +128,20 @@ def test_absorbs_damage_precedence(self):
         """absorbs_damage=True makes a collation "lesser" (preferred target) when not otherwise equal."""
         snowflake = Collation(absorbs_damage=True, ordinal=False, language="en")
         regular = Collation(absorbs_damage=False, ordinal=False, language="de")
-        # absorbs_damage overrides language ordering: snowflake is always lesser
         assert regular > snowflake
         assert not snowflake > regular
 
     def test_absorbs_damage_differs_not_equal(self):
-        """Collations with different absorbs_damage are not equal."""
         a = Collation(absorbs_damage=True, ordinal=True)
         b = Collation(absorbs_damage=False, ordinal=True)
         assert a != b
-        # The non-absorbing side is "greater" (preferred to absorb)
         assert b > a
 
+    def test_both_absorbs_damage_falls_through_to_tuple(self):
+        a = Collation(absorbs_damage=True, ordinal=False, language="de")
+        b = Collation(absorbs_damage=True, ordinal=False, language="en")
+        assert b > a  # "en" > "de" by tuple comparison
+
     def test_ordinal_gt_non_ordinal(self):
         ordinal = Collation(ordinal=True)
         locale = Collation(ordinal=False, language="en")
@@ -103,11 +162,27 @@ def test_country_ordering(self):
         b = Collation(ordinal=False, language="en", country="US")
         assert b > a  # "US" > "GB"
 
-    def test_sensitivity_tiebreaker(self):
+    def test_sensitivity_tiebreaker_case(self):
         a = Collation(ordinal=False, language="en", case_sensitive=False)
         b = Collation(ordinal=False, language="en", case_sensitive=True)
         assert b > a  # True > False
 
+    def test_sensitivity_tiebreaker_accent(self):
+        a = Collation(ordinal=False, language="en", accent_sensitive=False)
+        b = Collation(ordinal=False, language="en", accent_sensitive=True)
+        assert b > a
+
+    def test_sensitivity_tiebreaker_lower_first(self):
+        a = Collation(ordinal=False, language="en", lower_first=False)
+        b = Collation(ordinal=False, language="en", lower_first=True)
+        assert b > a
+
+    def test_none_sorts_before_real_value(self):
+        """None sorts distinctly from False / empty string."""
+        a = Collation(ordinal=False, language="en", case_sensitive=None)
+        b = Collation(ordinal=False, language="en", case_sensitive=False)
+        assert b > a  # (1, False) > (0,)
+
     def test_equal_collations_not_gt(self):
         a = Collation(ordinal=True, language="Latin")
         b = Collation(ordinal=True, language="Latin")
@@ -130,16 +205,24 @@ class TestCollationTotalOrdering:
         Collation(ordinal=True),
         Collation(ordinal=True, language="Albanian"),
         Collation(ordinal=True, language="Latin"),
+        Collation(ordinal=False),
         Collation(ordinal=False, language="en"),
         Collation(ordinal=False, language="en", country="US"),
         Collation(ordinal=False, language="en", country="GB"),
+        Collation(ordinal=False, language="en", country=None),
         Collation(ordinal=False, language="de"),
+        Collation(ordinal=False, language=""),
+        Collation(ordinal=False, language=None),
         Collation(ordinal=False, language="en", case_sensitive=True),
         Collation(ordinal=False, language="en", case_sensitive=False),
+        Collation(ordinal=False, language="en", case_sensitive=None),
         Collation(ordinal=False, language="en", accent_sensitive=True),
+        Collation(ordinal=False, language="en", accent_sensitive=False),
         Collation(absorbs_damage=True, ordinal=True),
+        Collation(absorbs_damage=True, ordinal=True, language="Latin"),
         Collation(absorbs_damage=True, ordinal=False, language="en"),
         Collation(ordinal=False, language="en", lower_first=True),
+        Collation(ordinal=False, language="en", lower_first=False),
     ]
 
     def test_no_incomparable_pairs(self):
@@ -158,12 +241,18 @@ def test_antisymmetric(self):
             if a > b:
                 assert not b > a, f"Antisymmetry violated: {a!r} vs {b!r}"
 
-    def test_transitive_sample(self):
-        """Spot-check transitivity on all triples."""
+    def test_transitive_gt(self):
+        """Spot-check transitivity of > on all triples."""
         for a, b, c in itertools.combinations(self.DIVERSE_COLLATIONS, 3):
             if a > b and b > c:
                 assert a > c, f"Transitivity violated: {a!r} > {b!r} > {c!r} but not {a!r} > {c!r}"
 
+    def test_transitive_eq(self):
+        """Spot-check transitivity of == on all triples."""
+        for a, b, c in itertools.combinations(self.DIVERSE_COLLATIONS, 3):
+            if a == b and b == c:
+                assert a == c, f"Equality transitivity violated: {a!r} == {b!r} == {c!r} but {a!r} != {c!r}"
+
 
 # --- Derived operators ---
 

From c2a7b49a8125f0a4e9327ebf6739dad0c19f18a8 Mon Sep 17 00:00:00 2001
From: Daniel Song <xdtsong@gmail.com>
Date: Thu, 12 Mar 2026 23:08:41 -0700
Subject: [PATCH 3/3] fix: correct docstring semantics and improve comment
 accuracy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Fix class docstring: Snowflake is "lesser" (preferred target), not
  "greater" — matches actual absorbs_damage logic
- Clarify __gt__ fallthrough comment: absorbs_damage and ordinal are
  already resolved by that point
- Document _ordering_key divergence from _comparison_key for ordinals

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 data_diff/abcs/database_types.py | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/data_diff/abcs/database_types.py b/data_diff/abcs/database_types.py
index 7cdbf817..94986f6a 100644
--- a/data_diff/abcs/database_types.py
+++ b/data_diff/abcs/database_types.py
@@ -23,8 +23,8 @@ class Collation:
     The "greater" collation should be used as a target collation for textual PKs
     on both sides of the diff — by converting the "lesser" collation to self.
 
-    Snowflake easily absorbs the performance losses, so it has a boost to always
-    be greater than any other collation in non-Snowflake databases.
+    Snowflake easily absorbs the performance losses, so it is always the "lesser"
+    (preferred target) collation, ensuring the non-Snowflake side is "greater".
     Other databases need to negotiate which side absorbs the performance impact.
     """
 
@@ -65,7 +65,12 @@ def _comparison_key(self) -> tuple:
         )
 
     def _ordering_key(self) -> tuple:
-        """Key for deterministic total ordering. None sorts before any real value."""
+        """Key for deterministic total ordering. Only use via __gt__, not as a standalone sort key.
+
+        Unlike _comparison_key, this does not collapse fields for ordinals, so two
+        ordinals that are equal by __eq__ may have different ordering keys. The __gt__
+        method guards against this by checking equality first.
+        """
 
         # (0,) for None sorts before (1, value) for any real value.
         def _wrap(v: object) -> tuple:
@@ -109,7 +114,8 @@ def __gt__(self, other: object) -> bool:
             return True
         if other.ordinal and not self.ordinal:
             return False
-        # Deterministic tuple ordering: (language, country, sensitivity flags).
+        # Deterministic tuple ordering; by this point absorbs_damage and ordinal
+        # are resolved, so language, country, and sensitivity flags decide.
         # None sorts distinctly from "" / False via (0,) vs (1, value) wrapping.
         return self._ordering_key() > other._ordering_key()