From 9da6796883b2684df5ab6c33cd5defa66fd41239 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 09:57:01 +0100
Subject: [PATCH 01/20] refactor: motives are now instances of own classes

---
 src/ms_blocking/ms_blocking.py | 240 ++++++++++++++++++++++--
 src/ms_blocking/utils.py       | 326 +++++++++++----------------------
 2 files changed, 332 insertions(+), 234 deletions(-)

diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index fffbcc8..ac33759 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -1,5 +1,7 @@
 from ms_blocking.utils import *  # noqa: F403
 
+import networkx as nx
+
 
 class BlockerNode:
     """Abstract class from which derive all classes in the module"""
@@ -76,7 +78,6 @@ def __init__(self, left, right):
     def __repr__(self):
         return f"OrNode{{{self.left}, {self.right}}}"
 
-
     def block(self, df, motives=False):
         # Note: for performance, it would be wise to remove rows that are already paired with all other rows, though this case should be pretty rare in real situations
         coords_left = self.left.block(df, motives=motives)
@@ -185,9 +186,7 @@ def block(self, data, motives=False):
         }
 
         if motives:
-            explanations = {
-                f"Same '{column_name}'" for column_name in self.blocking_columns
-            }
+            explanations = [EquivalenceMotive(self.blocking_columns)]
             return add_motives_to_coords(coords, explanations)
         else:
             return set(coords)  # set is unnnecessary
@@ -276,10 +275,9 @@ def block(self, data, motives=False):
         coords = block_overlap(groups=groups, overlap=self.overlap)
 
         if motives:
-            explanations = {
-                f">={self.overlap}{' word_level' if self.word_level else ''} overlap in '{column_name}'"
-                for column_name in self.blocking_columns
-            }
+            explanations = [
+                OverlapMotive(self.blocking_columns, self.overlap, self.word_level)
+            ]
             return add_motives_to_coords(coords, explanations)
         else:
             return set(coords)
@@ -287,7 +285,7 @@ def block(self, data, motives=False):
 
 class MixedBlocker(BlockerNode):  # Leaf; For ANDs and RAM
     """Represent the intersection of an AttributeEquivalenceBlocker and an OverlapBlocker.
-    Designed for performance and RAM efficiency.
+    Used for performance and RAM efficiency.
     """
 
     def __init__(
@@ -426,17 +424,229 @@ def block(self, data, motives=False):
         coords = coords_equivalence.intersection(coords_overlap)
 
         if motives:
-            explanations = {
-                f"Same '{column_name}'" for column_name in self.equivalence_columns
-            } | {
-                f">={self.overlap}{' word_level' if self.word_level else ''} overlap in '{column_name}'"
-                for column_name in self.overlap_columns
-            }
+            explanations = [
+                EquivalenceMotive(self.equivalence_columns),
+                OverlapMotive(self.overlap_columns, self.overlap, self.word_level),
+            ]
+
             return add_motives_to_coords(coords, explanations)
         else:
             return set(coords)
 
 
+def add_blocks_to_dataset(
+    data: pd.DataFrame,
+    coords: Coords,
+    sort: bool = True,
+    keep_ungrouped_rows: bool = False,
+    merge_blocks: bool = True,
+    motives: bool = False,
+    show_as_pairs: bool = False,
+    output_columns: Columns = None,
+) -> pd.DataFrame:
+    """Returns the intersection of an array of links
+
+    Takes two lists of paired elements, with or without motives, returns their intersection
+
+    Parameters
+    ----------
+       data : DataFrame
+           DataFrame for blocking
+       coords : Array
+           Blocked coordinates
+       sort : bool
+           Whether to sort the result by block, thereby regrouping rows of the same block
+       keep_ungrouped_rows : bool
+           Whether to display rows that do not belong to any block
+       merge_blocks : bool
+           Whether to merge transitively merge blocks
+       motives : bool
+           Whether to display the reason behind each block
+       show_as_pairs : bool
+           Whether to show the output as pairs or rows rather than simply reordering the initial DataFrame
+       output_columns : list
+           Columns to show. Useful in combination with show_as_pairs as column names are altered
+
+    Returns
+    -------
+    DataFrame
+      Blocked DataFrame
+
+    Examples
+    --------
+    >>> add_blocks_to_dataset(data=pd.DataFrame(
+       [
+           [0, 'first', 4],
+           [1, 'second', 6],
+           [2, 'first', 2],
+           [3, 'third', 5]
+       ],
+       columns=['id', 'rank', 'score']),
+       coords=np.array([{0, 2}]),
+       show_as_pairs=True,
+       output_columns=['id', 'rank'])
+        id_l rank_l  id_r rank_r  block
+       0     0  first     2  first      0
+    """
+
+    if show_as_pairs and keep_ungrouped_rows:
+        raise ValueError("Cannot both return pairs and keep ungrouped rows")
+
+    if motives:
+        if type(coords) is not dict:
+            raise TypeError("Cannot specify motives=True without passing motives")
+
+    # Ensure the index is a unique identifier
+    if not data.index.is_unique:
+        raise ValueError("DataFrame index must be unique to be used as an identifier.")
+
+    if "_motive" in data.columns:
+        if motives:
+            raise ValueError(
+                "Please rename existing '_motive' column OR do not pass 'motives=True'"
+            )
+
+    if "_block" in data.columns:
+        raise ValueError("Please rename existing '_block' column")
+
+    if output_columns is None:
+        output_columns = data.columns
+    data = data[output_columns].copy()
+
+    if len(coords) == 0 and not keep_ungrouped_rows:  # Empty graph
+        if show_as_pairs:
+            columns = [col + "_l" for col in data.columns] + [
+                col + "_r" for col in data.columns
+            ]
+            output_data = pd.DataFrame(columns=columns)
+        else:
+            output_data = pd.DataFrame(columns=data.columns)
+    else:
+        output_data = data
+        # Map coords to connected component labels
+        if merge_blocks:  # We solve the connected components problem
+            cc_labels = solve_connected_components_from_coords(coords)
+            # Match original index to new block ID
+            matcher = {
+                idx: label
+                for idx, label in enumerate(cc_labels)
+                if label != -1 and idx in data.index
+            }
+        else:  # We solve the cliques problem
+            g = nx.Graph()
+            # noinspection PyTypeChecker
+            g.add_edges_from(coords)
+            complete_subgraphs = list(nx.find_cliques(g))
+            complete_subgraphs = sorted(complete_subgraphs)
+            # matcher = {row_id:([i for i in range(len(complete_subgraphs)) if row_id in complete_subgraphs[i]]) for row_id in set(flatten(complete_subgraphs))}
+            matcher = dict()
+            for i, clique in enumerate(complete_subgraphs):
+                for node_idx in clique:
+                    if node_idx in matcher.keys():
+                        matcher[node_idx].append(i)
+                    else:
+                        matcher[node_idx] = [i]
+
+        if show_as_pairs:
+            output_data = pd.DataFrame()
+            for pair in coords:
+                left_row = data.loc[[tuple(pair)[0]]].copy()
+                current_index = left_row.index
+                right_row = data.loc[[tuple(pair)[1]]].copy()
+                left_row.columns = [col + "_l" for col in left_row.columns]
+                right_row.columns = [col + "_r" for col in right_row.columns]
+                current_row = pd.concat(
+                    [left_row.reset_index(drop=True), right_row.reset_index(drop=True)],
+                    axis=1,
+                )
+                current_row.index = current_index
+                if motives:
+                    current_row["_motive"] = str(solve_motives(coords[pair]))
+                output_data = pd.concat([output_data, current_row])
+
+        # Assign blocks to rows based on their original index
+        output_data["_block"] = output_data.index.map(matcher)
+        if not merge_blocks:
+            output_data = output_data.explode("_block")
+
+        if keep_ungrouped_rows:
+            output_data["_block"] = output_data["_block"].fillna(-1)
+            matcher_ungrouped_rows = {}
+            block_temp = []
+            i = 0  # Track # of blocks processed
+            for b in output_data["_block"]:
+                if b == -1:
+                    block_temp.append(i)
+                    i += 1
+                elif b not in matcher_ungrouped_rows:
+                    matcher_ungrouped_rows[b] = i
+                    block_temp.append(i)
+                    i += 1
+                else:
+                    block_temp.append(matcher_ungrouped_rows[b])
+            output_data["_block"] = block_temp
+        else:
+            if not show_as_pairs:
+                output_data = output_data[
+                    output_data["_block"].duplicated(keep=False)
+                    & output_data["_block"].notna()
+                ]
+
+        output_data.loc[:, ["_block"]] = start_from_zero(output_data["_block"])
+
+        if sort:
+            # Sort by block, then by original index
+            sort_cols = ["_block"]
+            if output_data.index.name:
+                output_data = output_data.sort_values(
+                    sort_cols + [output_data.index.name]
+                )
+            else:
+                # If no named index, use the first column of the DataFrame
+                output_data = output_data.reset_index()
+                output_data = output_data.sort_values(
+                    sort_cols + [output_data.columns[0]]
+                )
+                output_data = output_data.set_index(output_data.columns[0])
+
+        if not show_as_pairs and motives:
+            id_list = flatten(coords.keys())
+            motive_matcher = {
+                row_id: frozenset(
+                    str(solve_motives(coords[pair]))
+                    for pair in coords.keys()
+                    if row_id in pair
+                )
+                for row_id in id_list
+            }
+            output_data["_motive"] = output_data.index.map(motive_matcher)
+
+    if "_block" not in output_data.columns:  # Empty coords
+        output_data["_block"] = -1
+
+    output_data = output_data.reset_index(drop=True)
+    output_data["_block"] = output_data["_block"].astype(int)
+
+    return output_data
+
+
+def generate_blocking_report(
+    data: pd.DataFrame, coords: Coords, output_columns: Collection[str] = None
+) -> pd.DataFrame:
+    """
+    Shorthand for add_blocks_to_dataset with below arguments
+    """
+    return add_blocks_to_dataset(
+        data,
+        coords,
+        sort=True,
+        merge_blocks=False,
+        motives=True,
+        show_as_pairs=True,
+        output_columns=output_columns,
+    )
+
+
 def merge_blockers(
     left: BlockerNode, right: BlockerNode
 ) -> AttributeEquivalenceBlocker | OverlapBlocker | MixedBlocker | AndNode:
diff --git a/src/ms_blocking/utils.py b/src/ms_blocking/utils.py
index 837645f..3ee8ead 100644
--- a/src/ms_blocking/utils.py
+++ b/src/ms_blocking/utils.py
@@ -4,17 +4,53 @@
 from scipy.sparse import coo_matrix
 from scipy.sparse.csgraph import connected_components
 import pandas as pd
-import networkx as nx
 import random
 from collections import Counter
 
 from itertools import combinations
 from typing import List, Set, Iterable, Dict, Collection, Any
 
+
+class EquivalenceMotive:
+    def __init__(self, blocking_columns):
+        self.blocking_columns = blocking_columns
+
+    def __eq__(self, other):
+        return self.blocking_columns == other.blocking_columns
+
+    def __repr__(self):
+        return ", ".join(
+            [f"Same '{column_name}'" for column_name in self.blocking_columns]
+        )
+
+
+class OverlapMotive:
+    def __init__(self, blocking_columns, overlap=1, word_level=False):
+        self.blocking_columns = blocking_columns
+        self.overlap = overlap
+        self.word_level = word_level
+
+    def __eq__(self, other):
+        return (
+            self.blocking_columns == other.blocking_columns
+            and self.overlap == other.overlap
+            and self.word_level == other.word_level
+        )
+
+    def __repr__(self):
+        return ", ".join(
+            [
+                f">={self.overlap}{' word_level' if self.word_level else ''} overlap in '{column_name}'"
+                for column_name in self.blocking_columns
+            ]
+        )
+
+
 Columns = List[str]
 Pair = Collection[int]
+Motive = EquivalenceMotive | OverlapMotive
 CoordsBasic = Set[Pair]
-CoordsMotives = Dict[Pair, Set[str]]
+CoordsMotives = Dict[Pair, List[Motive]]
 Coords = CoordsBasic | CoordsMotives
 
 _PUNCT_RE = re.compile(r'[!"#$%&()*+,-./:;<=>?@\[\\\]^_`{|}~]')
@@ -240,7 +276,7 @@ def merge_blocks_or(coords_1: Coords, coords_2: Coords) -> Coords:
     if type(coords_1) is type(coords_2) is dict:  # We have motives
         return {
             pair: (
-                (coords_1[pair] | coords_2[pair])
+                (coords_1[pair] + coords_2[pair])
                 if (pair in coords_1 and pair in coords_2)
                 else coords_1[pair]
                 if (pair in coords_1)
@@ -278,7 +314,7 @@ def merge_blocks_and(coords_1: Coords, coords_2: Coords) -> Coords:
     """
     if type(coords_1) is type(coords_2) is dict:  # We have motives
         return {
-            pair: (coords_1[pair] | coords_2[pair])
+            pair: (coords_1[pair] + coords_2[pair])
             for y in (coords_1, coords_2)
             for pair in y.keys()
             if (pair in coords_1 and pair in coords_2)
@@ -287,219 +323,6 @@ def merge_blocks_and(coords_1: Coords, coords_2: Coords) -> Coords:
         return coords_1.intersection(coords_2)
 
 
-def add_blocks_to_dataset(
-    data: pd.DataFrame,
-    coords: Coords,
-    sort: bool = True,
-    keep_ungrouped_rows: bool = False,
-    merge_blocks: bool = True,
-    motives: bool = False,
-    show_as_pairs: bool = False,
-    output_columns: Columns = None,
-) -> pd.DataFrame:
-    """Returns the intersection of an array of links
-
-    Takes two lists of paired elements, with or without motives, returns their intersection
-
-    Parameters
-    ----------
-       data : DataFrame
-           DataFrame for blocking
-       coords : Array
-           Blocked coordinates
-       sort : bool
-           Whether to sort the result by block, thereby regrouping rows of the same block
-       keep_ungrouped_rows : bool
-           Whether to display rows that do not belong to any block
-       merge_blocks : bool
-           Whether to merge transitively merge blocks
-       motives : bool
-           Whether to display the reason behind each block
-       show_as_pairs : bool
-           Whether to show the output as pairs or rows rather than simply reordering the initial DataFrame
-       output_columns : list
-           Columns to show. Useful in combination with show_as_pairs as column names are altered
-
-    Returns
-    -------
-    DataFrame
-      Blocked DataFrame
-
-    Examples
-    --------
-    >>> add_blocks_to_dataset(data=pd.DataFrame(
-       [
-           [0, 'first', 4],
-           [1, 'second', 6],
-           [2, 'first', 2],
-           [3, 'third', 5]
-       ],
-       columns=['id', 'rank', 'score']),
-       coords=np.array([{0, 2}]),
-       show_as_pairs=True,
-       output_columns=['id', 'rank'])
-        id_l rank_l  id_r rank_r  block
-       0     0  first     2  first      0
-    """
-
-    if show_as_pairs and keep_ungrouped_rows:
-        raise ValueError("Cannot both return pairs and keep ungrouped rows")
-
-    if motives:
-        if type(coords) is not dict:
-            raise TypeError("Cannot specify motives=True without passing motives")
-
-    # Ensure the index is a unique identifier
-    if not data.index.is_unique:
-        raise ValueError("DataFrame index must be unique to be used as an identifier.")
-
-    if "_motive" in data.columns:
-        if motives:
-            raise ValueError(
-                "Please rename existing '_motive' column OR do not pass 'motives=True'"
-            )
-
-    if "_block" in data.columns:
-        raise ValueError("Please rename existing '_block' column")
-
-    if output_columns is None:
-        output_columns = data.columns
-    data = data[output_columns].copy()
-
-    if len(coords) == 0 and not keep_ungrouped_rows:  # Empty graph
-        if show_as_pairs:
-            columns = [col + "_l" for col in data.columns] + [
-                col + "_r" for col in data.columns
-            ]
-            output_data = pd.DataFrame(columns=columns)
-        else:
-            output_data = pd.DataFrame(columns=data.columns)
-    else:
-        output_data = data
-        # Map coords to connected component labels
-        if merge_blocks:  # We solve the connected components problem
-            cc_labels = solve_connected_components_from_coords(coords)
-            # Match original index to new block ID
-            matcher = {
-                idx: label
-                for idx, label in enumerate(cc_labels)
-                if label != -1 and idx in data.index
-            }
-        else:  # We solve the cliques problem
-            g = nx.Graph()
-            # noinspection PyTypeChecker
-            g.add_edges_from(coords)
-            complete_subgraphs = list(nx.find_cliques(g))
-            complete_subgraphs = sorted(complete_subgraphs)
-            # matcher = {row_id:([i for i in range(len(complete_subgraphs)) if row_id in complete_subgraphs[i]]) for row_id in set(flatten(complete_subgraphs))}
-            matcher = dict()
-            for i, clique in enumerate(complete_subgraphs):
-                for node_idx in clique:
-                    if node_idx in matcher.keys():
-                        matcher[node_idx].append(i)
-                    else:
-                        matcher[node_idx] = [i]
-
-        if show_as_pairs:
-            output_data = pd.DataFrame()
-            for pair in coords:
-                left_row = data.loc[[tuple(pair)[0]]].copy()
-                current_index = left_row.index
-                right_row = data.loc[[tuple(pair)[1]]].copy()
-                left_row.columns = [col + "_l" for col in left_row.columns]
-                right_row.columns = [col + "_r" for col in right_row.columns]
-                current_row = pd.concat(
-                    [left_row.reset_index(drop=True), right_row.reset_index(drop=True)],
-                    axis=1,
-                )
-                current_row.index = current_index
-                output_data = pd.concat([output_data, current_row])
-
-        # Assign blocks to rows based on their original index
-        output_data["_block"] = output_data.index.map(matcher)
-        if not merge_blocks:
-            output_data = output_data.explode("_block")
-
-        if keep_ungrouped_rows:
-            output_data["_block"] = output_data["_block"].fillna(-1)
-            matcher_ungrouped_rows = {}
-            block_temp = []
-            i = 0  # Track # of blocks processed
-            for b in output_data["_block"]:
-                if b == -1:
-                    block_temp.append(i)
-                    i += 1
-                elif b not in matcher_ungrouped_rows:
-                    matcher_ungrouped_rows[b] = i
-                    block_temp.append(i)
-                    i += 1
-                else:
-                    block_temp.append(matcher_ungrouped_rows[b])
-            output_data["_block"] = block_temp
-        else:
-            if not show_as_pairs:
-                output_data = output_data[
-                    output_data["_block"].duplicated(keep=False)
-                    & output_data["_block"].notna()
-                ]
-
-        output_data.loc[:, ["_block"]] = start_from_zero(output_data["_block"])
-
-        if sort:
-            # Sort by block, then by original index
-            sort_cols = ["_block"]
-            if output_data.index.name:
-                output_data = output_data.sort_values(
-                    sort_cols + [output_data.index.name]
-                )
-            else:
-                # If no named index, use the first column of the DataFrame
-                output_data = output_data.reset_index()
-                output_data = output_data.sort_values(
-                    sort_cols + [output_data.columns[0]]
-                )
-                output_data = output_data.set_index(output_data.columns[0])
-
-    if motives:
-        output_data["_motive"] = ""
-        id_list = flatten(coords.keys())
-        motive_matcher = {
-            row_id: frozenset(
-                reason
-                for pair in coords.keys()
-                if row_id in pair
-                for reason in coords[pair]
-            )
-            for row_id in id_list
-        }
-        output_data["_motive"] = output_data.index.map(motive_matcher)
-
-    if "_block" not in output_data.columns:  # Empty coords
-        output_data["_block"] = -1
-
-    output_data = output_data.reset_index(drop=True)
-    output_data["_block"] = output_data["_block"].astype(int)
-
-    return output_data
-
-
-def generate_blocking_report(
-    data: pd.DataFrame, coords: Coords, output_columns: Collection[str] = None
-) -> pd.DataFrame:
-    """
-    Shorthand for add_blocks_to_dataset with below arguments
-    """
-    return add_blocks_to_dataset(
-        data,
-        coords,
-        sort=True,
-        merge_blocks=False,
-        motives=True,
-        show_as_pairs=True,
-        output_columns=output_columns,
-    )
-
-
 def parse_list(s: str | List, word_level: bool = False) -> List[str]:
     """Turns a stringified list into an actual python list, taking extra inner quotes into account
 
@@ -682,7 +505,9 @@ def block_overlap(groups: Iterable, overlap: int = 1) -> Coords:
     return coords
 
 
-def add_motives_to_coords(coords: Coords, explanations: Set[str]) -> CoordsMotives:
+def add_motives_to_coords(
+    coords: Coords, explanations: List[Motive]
+) -> Dict[Pair, List[Motive]]:
     """Block a DataFrame based on overlap accross columns
 
     Parameters
@@ -690,7 +515,7 @@ def add_motives_to_coords(coords: Coords, explanations: Set[str]) -> CoordsMotiv
     coords : Coords
       Coords obtained by blocking
 
-    explanations : Set[str]
+    explanations : Set[EquivalenceMotive|OverlapMotive]
       Set of explanations
 
     Returns
@@ -718,3 +543,66 @@ def add_motives_to_coords(coords: Coords, explanations: Set[str]) -> CoordsMotiv
     }
     """
     return {pair: explanations for pair in coords}
+
+
+def solve_motives(motives: List[Motive]) -> List[Motive]:
+    """Remove duplicated and redundant motives from a list of motives
+
+    Redundant motives refer to OverlapMotives on the same column(s) but with different overlap or word-level condition
+
+    Parameters
+    ----------
+    motives : List[Motive]
+      Coords obtained by blocking
+
+    Returns
+    -------
+    List[Motive]
+      Pairs obtained by blocking
+
+    Examples
+    --------
+    >>> solve_motives([OverlapMotive(['websites'], 1), OverlapMotive(['websites'], 2), OverlapMotive(['websites'], 2, word_level=False)])
+    OverlapMotive(['websites'], 2, word_level=False)
+    """
+    if not motives:
+        raise ValueError("Motives must not be empty")
+
+    final_motives = [motives[0]]
+    for motive in motives[1:]:
+        if motive not in final_motives:
+            final_motives.append(motive)
+            if type(motive) is OverlapMotive:
+                # Look for redundant motives
+                for motive_to_compare in final_motives[:-1]:
+                    if (
+                        type(motive_to_compare) is OverlapMotive
+                    ):  # With EquivalenceMotive, equality check suffices
+                        if (
+                            motive.blocking_columns
+                            == motive_to_compare.blocking_columns
+                        ):
+                            if motive.word_level == motive_to_compare.word_level:
+                                # Replace Blocker with the one with bigger overlap
+                                if motive.overlap < motive_to_compare.overlap:
+                                    final_motives.remove(motive)
+                                    final_motives.append(motive_to_compare)
+                                elif motive.overlap > motive.overlap:
+                                    final_motives.remove(motive_to_compare)
+                                    final_motives.append(motive)
+                            elif motive.overlap == motive_to_compare.overlap:
+                                # Replace Blocker with the one with stricter word/element-level condition
+                                if (
+                                    motive.word_level
+                                    and not motive_to_compare.word_level
+                                ):
+                                    final_motives.remove(motive)
+                                    final_motives.append(motive_to_compare)
+                                elif (
+                                    not motive.word_level
+                                    and motive_to_compare.word_level
+                                ):
+                                    final_motives.remove(motive_to_compare)
+                                    final_motives.append(motive)
+
+    return final_motives

From 60a3cc9b2b68750c0180cced574864053ed18874 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 09:57:32 +0100
Subject: [PATCH 02/20] docs: update example notebook with new motive system

---
 docs/example.ipynb | 685 +++++++++++++++++----------------------------
 1 file changed, 252 insertions(+), 433 deletions(-)

diff --git a/docs/example.ipynb b/docs/example.ipynb
index 6b82165..7c44012 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -32,8 +32,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:14.010997600Z",
-     "start_time": "2026-01-30T14:21:13.420790Z"
+     "end_time": "2026-02-03T08:45:42.897197100Z",
+     "start_time": "2026-02-03T08:45:42.069366700Z"
     }
    },
    "source": [
@@ -60,8 +60,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:14.049404600Z",
-     "start_time": "2026-01-30T14:21:14.010997600Z"
+     "end_time": "2026-02-03T08:45:42.936219800Z",
+     "start_time": "2026-02-03T08:45:42.901218100Z"
     }
    },
    "source": [
@@ -282,8 +282,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:14.190107400Z",
-     "start_time": "2026-01-30T14:21:14.089762400Z"
+     "end_time": "2026-02-03T08:45:43.089459800Z",
+     "start_time": "2026-02-03T08:45:42.974568800Z"
     }
    },
    "source": [
@@ -310,8 +310,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:14.309413300Z",
-     "start_time": "2026-01-30T14:21:14.278545600Z"
+     "end_time": "2026-02-03T08:45:43.241858Z",
+     "start_time": "2026-02-03T08:45:43.164364500Z"
     }
    },
    "source": [
@@ -339,8 +339,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:14.378808Z",
-     "start_time": "2026-01-30T14:21:14.349508200Z"
+     "end_time": "2026-02-03T08:45:43.293312300Z",
+     "start_time": "2026-02-03T08:45:43.279951300Z"
     }
    },
    "source": [
@@ -369,8 +369,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:14.558644200Z",
-     "start_time": "2026-01-30T14:21:14.459573100Z"
+     "end_time": "2026-02-03T08:45:43.491120800Z",
+     "start_time": "2026-02-03T08:45:43.387967900Z"
     }
    },
    "source": [
@@ -409,8 +409,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:14.635514Z",
-     "start_time": "2026-01-30T14:21:14.598913Z"
+     "end_time": "2026-02-03T08:45:43.564017300Z",
+     "start_time": "2026-02-03T08:45:43.543375900Z"
     }
    },
    "source": [
@@ -574,8 +574,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:14.829719100Z",
-     "start_time": "2026-01-30T14:21:14.676157200Z"
+     "end_time": "2026-02-03T08:45:43.730577200Z",
+     "start_time": "2026-02-03T08:45:43.602849600Z"
     }
    },
    "source": [
@@ -622,8 +622,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:15.027923700Z",
-     "start_time": "2026-01-30T14:21:14.926401Z"
+     "end_time": "2026-02-03T08:45:43.963649700Z",
+     "start_time": "2026-02-03T08:45:43.857183700Z"
     }
    },
    "source": [
@@ -759,8 +759,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:15.403596500Z",
-     "start_time": "2026-01-30T14:21:15.279120300Z"
+     "end_time": "2026-02-03T08:45:44.258242200Z",
+     "start_time": "2026-02-03T08:45:44.158668200Z"
     }
    },
    "source": [
@@ -796,8 +796,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:15.686136800Z",
-     "start_time": "2026-01-30T14:21:15.608444400Z"
+     "end_time": "2026-02-03T08:45:44.439022100Z",
+     "start_time": "2026-02-03T08:45:44.392038500Z"
     }
    },
    "source": [
@@ -971,8 +971,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:15.998425200Z",
-     "start_time": "2026-01-30T14:21:15.931370100Z"
+     "end_time": "2026-02-03T08:45:44.704919900Z",
+     "start_time": "2026-02-03T08:45:44.604905100Z"
     }
    },
    "source": [
@@ -1075,8 +1075,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:16.305679100Z",
-     "start_time": "2026-01-30T14:21:16.212470400Z"
+     "end_time": "2026-02-03T08:45:45.167225900Z",
+     "start_time": "2026-02-03T08:45:45.142061100Z"
     }
    },
    "source": [
@@ -1223,8 +1223,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:16.678653800Z",
-     "start_time": "2026-01-30T14:21:16.558976200Z"
+     "end_time": "2026-02-03T08:45:45.497760900Z",
+     "start_time": "2026-02-03T08:45:45.335278600Z"
     }
    },
    "source": [
@@ -1342,8 +1342,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:17.354294400Z",
-     "start_time": "2026-01-30T14:21:17.316050200Z"
+     "end_time": "2026-02-03T08:45:45.879254300Z",
+     "start_time": "2026-02-03T08:45:45.779256400Z"
     }
    },
    "source": [
@@ -1440,8 +1440,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:17.537043700Z",
-     "start_time": "2026-01-30T14:21:17.392490700Z"
+     "end_time": "2026-02-03T08:45:46.232628900Z",
+     "start_time": "2026-02-03T08:45:46.186246600Z"
     }
    },
    "source": [
@@ -1464,8 +1464,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:17.655177300Z",
-     "start_time": "2026-01-30T14:21:17.573776300Z"
+     "end_time": "2026-02-03T08:45:46.440480Z",
+     "start_time": "2026-02-03T08:45:46.391161200Z"
     }
    },
    "source": [
@@ -1589,8 +1589,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:17.910335600Z",
-     "start_time": "2026-01-30T14:21:17.821453400Z"
+     "end_time": "2026-02-03T08:45:46.754986800Z",
+     "start_time": "2026-02-03T08:45:46.666968100Z"
     }
    },
    "source": [
@@ -1804,8 +1804,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:18.279899900Z",
-     "start_time": "2026-01-30T14:21:18.250988900Z"
+     "end_time": "2026-02-03T08:45:47.079529400Z",
+     "start_time": "2026-02-03T08:45:47.029011300Z"
     }
    },
    "source": [
@@ -1828,8 +1828,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:18.481263300Z",
-     "start_time": "2026-01-30T14:21:18.466284300Z"
+     "end_time": "2026-02-03T08:45:47.289177100Z",
+     "start_time": "2026-02-03T08:45:47.270625400Z"
     }
    },
    "source": [
@@ -1849,8 +1849,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:18.562779600Z",
-     "start_time": "2026-01-30T14:21:18.520368200Z"
+     "end_time": "2026-02-03T08:45:47.381218700Z",
+     "start_time": "2026-02-03T08:45:47.334125300Z"
     }
    },
    "source": [
@@ -1990,8 +1990,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:18.843568700Z",
-     "start_time": "2026-01-30T14:21:18.686911500Z"
+     "end_time": "2026-02-03T08:45:47.689050500Z",
+     "start_time": "2026-02-03T08:45:47.511174200Z"
     }
    },
    "source": [
@@ -2034,8 +2034,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:18.967168700Z",
-     "start_time": "2026-01-30T14:21:18.928864500Z"
+     "end_time": "2026-02-03T08:45:47.818974600Z",
+     "start_time": "2026-02-03T08:45:47.771680100Z"
     }
    },
    "source": [
@@ -2213,8 +2213,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:19.276047300Z",
-     "start_time": "2026-01-30T14:21:19.146886900Z"
+     "end_time": "2026-02-03T08:45:48.096706900Z",
+     "start_time": "2026-02-03T08:45:48.012725300Z"
     }
    },
    "source": [
@@ -2443,8 +2443,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:19.820247800Z",
-     "start_time": "2026-01-30T14:21:19.653280100Z"
+     "end_time": "2026-02-03T08:45:48.598207900Z",
+     "start_time": "2026-02-03T08:45:48.541276800Z"
     }
    },
    "source": [
@@ -2593,8 +2593,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:20.335572Z",
-     "start_time": "2026-01-30T14:21:20.302358700Z"
+     "end_time": "2026-02-03T08:45:49.188205Z",
+     "start_time": "2026-02-03T08:45:49.122589100Z"
     }
    },
    "source": [
@@ -2613,12 +2613,12 @@
     {
      "data": {
       "text/plain": [
-       "{frozenset({1, 4}): {\"Same 'City'\"},\n",
-       " frozenset({8, 11}): {\"Same 'City'\"},\n",
-       " frozenset({2, 5}): {\"Same 'City'\"},\n",
-       " frozenset({10, 13}): {\"Same 'City'\"},\n",
-       " frozenset({3, 8}): {\"Same 'City'\"},\n",
-       " frozenset({3, 11}): {\"Same 'City'\"}}"
+       "{frozenset({1, 4}): [Same 'City'],\n",
+       " frozenset({8, 11}): [Same 'City'],\n",
+       " frozenset({2, 5}): [Same 'City'],\n",
+       " frozenset({10, 13}): [Same 'City'],\n",
+       " frozenset({3, 8}): [Same 'City'],\n",
+       " frozenset({3, 11}): [Same 'City']}"
       ]
      },
      "execution_count": 26,
@@ -2631,9 +2631,7 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": [
-    "Of course, this will induce some overhead."
-   ]
+   "source": "This will induce some overhead."
   },
   {
    "cell_type": "markdown",
@@ -2646,8 +2644,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:20.409405100Z",
-     "start_time": "2026-01-30T14:21:20.374573700Z"
+     "end_time": "2026-02-03T08:45:49.300573800Z",
+     "start_time": "2026-02-03T08:45:49.260624100Z"
     }
    },
    "source": [
@@ -2668,16 +2666,16 @@
        "7  10    Caroline Dufour               Lens   45   \n",
        "8  13      Benoît Benoît               Lens   15   \n",
        "\n",
-       "                                            websites  _block        _motive  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  (Same 'City')  \n",
-       "1                               ['jacquesdupond.fr']       0  (Same 'City')  \n",
-       "2                    ['somewebsite.com/users/rpz59']       1  (Same 'City')  \n",
-       "3                                                 []       1  (Same 'City')  \n",
-       "4                                 ['roubaixlove.fr']       2  (Same 'City')  \n",
-       "5                                                 []       2  (Same 'City')  \n",
-       "6                                                 []       2  (Same 'City')  \n",
-       "7             ['pythonensamusant.fr', 'lensfans.fr']       3  (Same 'City')  \n",
-       "8                                    ['lensfans.fr']       3  (Same 'City')  "
+       "                                            websites  _block          _motive  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  ([Same 'City'])  \n",
+       "1                               ['jacquesdupond.fr']       0  ([Same 'City'])  \n",
+       "2                    ['somewebsite.com/users/rpz59']       1  ([Same 'City'])  \n",
+       "3                                                 []       1  ([Same 'City'])  \n",
+       "4                                 ['roubaixlove.fr']       2  ([Same 'City'])  \n",
+       "5                                                 []       2  ([Same 'City'])  \n",
+       "6                                                 []       2  ([Same 'City'])  \n",
+       "7             ['pythonensamusant.fr', 'lensfans.fr']       3  ([Same 'City'])  \n",
+       "8                                    ['lensfans.fr']       3  ([Same 'City'])  "
       ],
       "text/html": [
        "<div>\n",
@@ -2716,7 +2714,7 @@
        "      <td>37</td>\n",
        "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
        "      <td>0</td>\n",
-       "      <td>(Same 'City')</td>\n",
+       "      <td>([Same 'City'])</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -2726,7 +2724,7 @@
        "      <td>37</td>\n",
        "      <td>['jacquesdupond.fr']</td>\n",
        "      <td>0</td>\n",
-       "      <td>(Same 'City')</td>\n",
+       "      <td>([Same 'City'])</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -2736,7 +2734,7 @@
        "      <td>24</td>\n",
        "      <td>['somewebsite.com/users/rpz59']</td>\n",
        "      <td>1</td>\n",
-       "      <td>(Same 'City')</td>\n",
+       "      <td>([Same 'City'])</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -2746,7 +2744,7 @@
        "      <td>24</td>\n",
        "      <td>[]</td>\n",
        "      <td>1</td>\n",
-       "      <td>(Same 'City')</td>\n",
+       "      <td>([Same 'City'])</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -2756,7 +2754,7 @@
        "      <td>32</td>\n",
        "      <td>['roubaixlove.fr']</td>\n",
        "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
+       "      <td>([Same 'City'])</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -2766,7 +2764,7 @@
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
        "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
+       "      <td>([Same 'City'])</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
@@ -2776,7 +2774,7 @@
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
        "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
+       "      <td>([Same 'City'])</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
@@ -2786,7 +2784,7 @@
        "      <td>45</td>\n",
        "      <td>['pythonensamusant.fr', 'lensfans.fr']</td>\n",
        "      <td>3</td>\n",
-       "      <td>(Same 'City')</td>\n",
+       "      <td>([Same 'City'])</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
@@ -2796,7 +2794,7 @@
        "      <td>15</td>\n",
        "      <td>['lensfans.fr']</td>\n",
        "      <td>3</td>\n",
-       "      <td>(Same 'City')</td>\n",
+       "      <td>([Same 'City'])</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -2813,23 +2811,19 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": [
-    "... Though since motives make more sense when considering pairs of rows instead of full blocks, the above visualization is not that interesting..."
-   ]
+   "source": "... though since motives make more sense when considering pairs of rows instead of full blocks, the above visualization is not that interesting..."
   },
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": [
-    "... Which is the reason you can pass `show_as_pairs=True` to `msb.add_blocks_to_dataset` to see the output has a list of pairs:"
-   ]
+   "source": "... which is the reason you can pass `show_as_pairs=True` to `msb.add_blocks_to_dataset` to see the output has a list of pairs:"
   },
   {
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:20.612990700Z",
-     "start_time": "2026-01-30T14:21:20.483928200Z"
+     "end_time": "2026-02-03T08:45:49.556914900Z",
+     "start_time": "2026-02-03T08:45:49.481507100Z"
     }
    },
    "source": [
@@ -2855,13 +2849,13 @@
        "4                                                 []     3       Paul Delarue   \n",
        "5             ['pythonensamusant.fr', 'lensfans.fr']    13      Benoît Benoît   \n",
        "\n",
-       "              City_r  Age_r            websites_r  _block        _motive  \n",
-       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']       0  (Same 'City')  \n",
-       "1          Phalempin     24                    []       1  (Same 'City')  \n",
-       "2            Roubaix     33                    []       2  (Same 'City')  \n",
-       "3            Roubaix     33                    []       2  (Same 'City')  \n",
-       "4            Roubaix     32    ['roubaixlove.fr']       2  (Same 'City')  \n",
-       "5               Lens     15       ['lensfans.fr']       3  (Same 'City')  "
+       "              City_r  Age_r            websites_r        _motive  _block  \n",
+       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']  [Same 'City']       0  \n",
+       "1          Phalempin     24                    []  [Same 'City']       1  \n",
+       "2            Roubaix     33                    []  [Same 'City']       2  \n",
+       "3            Roubaix     33                    []  [Same 'City']       2  \n",
+       "4            Roubaix     32    ['roubaixlove.fr']  [Same 'City']       2  \n",
+       "5               Lens     15       ['lensfans.fr']  [Same 'City']       3  "
       ],
       "text/html": [
        "<div>\n",
@@ -2892,8 +2886,8 @@
        "      <th>City_r</th>\n",
        "      <th>Age_r</th>\n",
        "      <th>websites_r</th>\n",
-       "      <th>_block</th>\n",
        "      <th>_motive</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -2909,8 +2903,8 @@
        "      <td>Villeneuve d'Ascq</td>\n",
        "      <td>37</td>\n",
        "      <td>['jacquesdupond.fr']</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>0</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -2924,8 +2918,8 @@
        "      <td>Phalempin</td>\n",
        "      <td>24</td>\n",
        "      <td>[]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>1</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -2939,8 +2933,8 @@
        "      <td>Roubaix</td>\n",
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -2954,8 +2948,8 @@
        "      <td>Roubaix</td>\n",
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -2969,8 +2963,8 @@
        "      <td>Roubaix</td>\n",
        "      <td>32</td>\n",
        "      <td>['roubaixlove.fr']</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -2984,8 +2978,8 @@
        "      <td>Lens</td>\n",
        "      <td>15</td>\n",
        "      <td>['lensfans.fr']</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>3</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -3010,8 +3004,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:20.944670700Z",
-     "start_time": "2026-01-30T14:21:20.834495500Z"
+     "end_time": "2026-02-03T08:45:50.016104Z",
+     "start_time": "2026-02-03T08:45:49.965660200Z"
     }
    },
    "source": [
@@ -3023,13 +3017,13 @@
     {
      "data": {
       "text/plain": [
-       "   id_l             Name_l  id_r             Name_r  _block        _motive\n",
-       "0     1     Jacques Dupond     4     Jacques Dupont       0  (Same 'City')\n",
-       "1     2  Pierre Dusquesnes     5  pierre dusquesnes       1  (Same 'City')\n",
-       "2     3       Paul Delarue    11     sophie_delarue       2  (Same 'City')\n",
-       "3     8     Sophie Delarue    11     sophie_delarue       2  (Same 'City')\n",
-       "4     8     Sophie Delarue     3       Paul Delarue       2  (Same 'City')\n",
-       "5    10    Caroline Dufour    13      Benoît Benoît       3  (Same 'City')"
+       "   id_l             Name_l  id_r             Name_r        _motive  _block\n",
+       "0     1     Jacques Dupond     4     Jacques Dupont  [Same 'City']       0\n",
+       "1     2  Pierre Dusquesnes     5  pierre dusquesnes  [Same 'City']       1\n",
+       "2     3       Paul Delarue    11     sophie_delarue  [Same 'City']       2\n",
+       "3     8     Sophie Delarue    11     sophie_delarue  [Same 'City']       2\n",
+       "4     8     Sophie Delarue     3       Paul Delarue  [Same 'City']       2\n",
+       "5    10    Caroline Dufour    13      Benoît Benoît  [Same 'City']       3"
       ],
       "text/html": [
        "<div>\n",
@@ -3054,8 +3048,8 @@
        "      <th>Name_l</th>\n",
        "      <th>id_r</th>\n",
        "      <th>Name_r</th>\n",
-       "      <th>_block</th>\n",
        "      <th>_motive</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -3065,8 +3059,8 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>0</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -3074,8 +3068,8 @@
        "      <td>Pierre Dusquesnes</td>\n",
        "      <td>5</td>\n",
        "      <td>pierre dusquesnes</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>1</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -3083,8 +3077,8 @@
        "      <td>Paul Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -3092,8 +3086,8 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -3101,8 +3095,8 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>3</td>\n",
        "      <td>Paul Delarue</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -3110,8 +3104,8 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>3</td>\n",
-       "      <td>(Same 'City')</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -3132,188 +3126,13 @@
     "Motives are dynamic:"
    ]
   },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:21.591044600Z",
-     "start_time": "2026-01-30T14:21:21.517777200Z"
-    }
-   },
-   "source": [
-    "msb.generate_blocking_report(df, links)"
-   ],
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "   id_l             Name_l             City_l  Age_l  \\\n",
-       "0     1     Jacques Dupond  Villeneuve d'Ascq     37   \n",
-       "1     2  Pierre Dusquesnes          Phalempin     24   \n",
-       "2     3       Paul Delarue            Roubaix     32   \n",
-       "3     8     Sophie Delarue            Roubaix     33   \n",
-       "4     8     Sophie Delarue            Roubaix     33   \n",
-       "5    10    Caroline Dufour               Lens     45   \n",
-       "\n",
-       "                                          websites_l  id_r             Name_r  \\\n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...     4     Jacques Dupont   \n",
-       "1                    ['somewebsite.com/users/rpz59']     5  pierre dusquesnes   \n",
-       "2                                 ['roubaixlove.fr']    11     sophie_delarue   \n",
-       "3                                                 []    11     sophie_delarue   \n",
-       "4                                                 []     3       Paul Delarue   \n",
-       "5             ['pythonensamusant.fr', 'lensfans.fr']    13      Benoît Benoît   \n",
-       "\n",
-       "              City_r  Age_r            websites_r  _block        _motive  \n",
-       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']       0  (Same 'City')  \n",
-       "1          Phalempin     24                    []       1  (Same 'City')  \n",
-       "2            Roubaix     33                    []       2  (Same 'City')  \n",
-       "3            Roubaix     33                    []       2  (Same 'City')  \n",
-       "4            Roubaix     32    ['roubaixlove.fr']       2  (Same 'City')  \n",
-       "5               Lens     15       ['lensfans.fr']       3  (Same 'City')  "
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id_l</th>\n",
-       "      <th>Name_l</th>\n",
-       "      <th>City_l</th>\n",
-       "      <th>Age_l</th>\n",
-       "      <th>websites_l</th>\n",
-       "      <th>id_r</th>\n",
-       "      <th>Name_r</th>\n",
-       "      <th>City_r</th>\n",
-       "      <th>Age_r</th>\n",
-       "      <th>websites_r</th>\n",
-       "      <th>_block</th>\n",
-       "      <th>_motive</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['jacquesdupond.fr']</td>\n",
-       "      <td>0</td>\n",
-       "      <td>(Same 'City')</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>['somewebsite.com/users/rpz59']</td>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(Same 'City')</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>Paul Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>32</td>\n",
-       "      <td>['roubaixlove.fr']</td>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>3</td>\n",
-       "      <td>Paul Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>32</td>\n",
-       "      <td>['roubaixlove.fr']</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(Same 'City')</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>45</td>\n",
-       "      <td>['pythonensamusant.fr', 'lensfans.fr']</td>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>15</td>\n",
-       "      <td>['lensfans.fr']</td>\n",
-       "      <td>3</td>\n",
-       "      <td>(Same 'City')</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 30
-  },
   {
    "cell_type": "code",
    "metadata": {
     "scrolled": true,
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:21.867809800Z",
-     "start_time": "2026-01-30T14:21:21.674986800Z"
+     "end_time": "2026-02-03T08:45:50.383366500Z",
+     "start_time": "2026-02-03T08:45:50.220420100Z"
     }
    },
    "source": [
@@ -3344,35 +3163,35 @@
     {
      "data": {
       "text/plain": [
-       "    id_l             Name_l  id_r              Name_r  _block  \\\n",
-       "0      1     Jacques Dupond     4      Jacques Dupont       0   \n",
-       "1      1     Jacques Dupond     6  Jean-Michel Python       0   \n",
-       "2      1     Jacques Dupond    10     Caroline Dufour       0   \n",
-       "3      1     Jacques Dupond     4      Jacques Dupont       1   \n",
-       "4      1     Jacques Dupond     6  Jean-Michel Python       1   \n",
-       "5      1     Jacques Dupond    10     Caroline Dufour       1   \n",
-       "6     10    Caroline Dufour     6  Jean-Michel Python       1   \n",
-       "7     10    Caroline Dufour    13       Benoît Benoît       1   \n",
-       "8      2  Pierre Dusquesnes     5   pierre dusquesnes       2   \n",
-       "9      8     Sophie Delarue    11      sophie_delarue       3   \n",
-       "10    10    Caroline Dufour     6  Jean-Michel Python       4   \n",
-       "11    10    Caroline Dufour    13       Benoît Benoît       4   \n",
-       "12    13      Benoît Benoît     6  Jean-Michel Python       4   \n",
+       "    id_l             Name_l  id_r              Name_r  \\\n",
+       "0      1     Jacques Dupond     4      Jacques Dupont   \n",
+       "1      1     Jacques Dupond     6  Jean-Michel Python   \n",
+       "2      1     Jacques Dupond    10     Caroline Dufour   \n",
+       "3      1     Jacques Dupond     4      Jacques Dupont   \n",
+       "4      1     Jacques Dupond     6  Jean-Michel Python   \n",
+       "5      1     Jacques Dupond    10     Caroline Dufour   \n",
+       "6     10    Caroline Dufour     6  Jean-Michel Python   \n",
+       "7     10    Caroline Dufour    13       Benoît Benoît   \n",
+       "8      2  Pierre Dusquesnes     5   pierre dusquesnes   \n",
+       "9      8     Sophie Delarue    11      sophie_delarue   \n",
+       "10    10    Caroline Dufour     6  Jean-Michel Python   \n",
+       "11    10    Caroline Dufour    13       Benoît Benoît   \n",
+       "12    13      Benoît Benoît     6  Jean-Michel Python   \n",
        "\n",
-       "                                              _motive  \n",
-       "0   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
-       "1   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
-       "2   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
-       "3   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
-       "4   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
-       "5   (>=1 overlap in 'websites', Same 'City', Same ...  \n",
-       "6                         (>=1 overlap in 'websites')  \n",
-       "7                         (>=1 overlap in 'websites')  \n",
-       "8                           (Same 'City', Same 'Age')  \n",
-       "9                           (Same 'City', Same 'Age')  \n",
-       "10                        (>=1 overlap in 'websites')  \n",
-       "11                        (>=1 overlap in 'websites')  \n",
-       "12                        (>=1 overlap in 'websites')  "
+       "                                              _motive  _block  \n",
+       "0   [Same 'Age', Same 'City', >=1 overlap in 'webs...       0  \n",
+       "1                         [>=1 overlap in 'websites']       0  \n",
+       "2                         [>=1 overlap in 'websites']       0  \n",
+       "3   [Same 'Age', Same 'City', >=1 overlap in 'webs...       1  \n",
+       "4                         [>=1 overlap in 'websites']       1  \n",
+       "5                         [>=1 overlap in 'websites']       1  \n",
+       "6                         [>=1 overlap in 'websites']       1  \n",
+       "7                         [>=1 overlap in 'websites']       1  \n",
+       "8                           [Same 'Age', Same 'City']       2  \n",
+       "9                           [Same 'Age', Same 'City']       3  \n",
+       "10                        [>=1 overlap in 'websites']       4  \n",
+       "11                        [>=1 overlap in 'websites']       4  \n",
+       "12                        [>=1 overlap in 'websites']       4  "
       ],
       "text/html": [
        "<div>\n",
@@ -3397,8 +3216,8 @@
        "      <th>Name_l</th>\n",
        "      <th>id_r</th>\n",
        "      <th>Name_r</th>\n",
-       "      <th>_block</th>\n",
        "      <th>_motive</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -3408,8 +3227,8 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
+       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
        "      <td>0</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -3417,8 +3236,8 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>0</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -3426,8 +3245,8 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>0</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -3435,8 +3254,8 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
+       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
        "      <td>1</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -3444,8 +3263,8 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -3453,8 +3272,8 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
@@ -3462,8 +3281,8 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
@@ -3471,8 +3290,8 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
@@ -3480,8 +3299,8 @@
        "      <td>Pierre Dusquesnes</td>\n",
        "      <td>5</td>\n",
        "      <td>pierre dusquesnes</td>\n",
+       "      <td>[Same 'Age', Same 'City']</td>\n",
        "      <td>2</td>\n",
-       "      <td>(Same 'City', Same 'Age')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>9</th>\n",
@@ -3489,8 +3308,8 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
+       "      <td>[Same 'Age', Same 'City']</td>\n",
        "      <td>3</td>\n",
-       "      <td>(Same 'City', Same 'Age')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
@@ -3498,8 +3317,8 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>4</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
@@ -3507,8 +3326,8 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>4</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites')</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>12</th>\n",
@@ -3516,20 +3335,20 @@
        "      <td>Benoît Benoît</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>4</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites')</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 31
+   "execution_count": 30
   },
   {
    "cell_type": "markdown",
@@ -3545,47 +3364,47 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-01-30T14:21:22.186415700Z",
-     "start_time": "2026-01-30T14:21:22.127304600Z"
+     "end_time": "2026-02-03T08:45:50.717126Z",
+     "start_time": "2026-02-03T08:45:50.677002100Z"
     }
    },
    "source": [
-    "report[\"score\"] = msb.scoring(report)\n",
-    "report.sort_values(\"score\", ascending=False)"
+    "report[\"_score\"] = msb.scoring(report)\n",
+    "report.sort_values(\"_score\", ascending=False)"
    ],
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "    id_l             Name_l  id_r              Name_r  _block  \\\n",
-       "0      1     Jacques Dupond     4      Jacques Dupont       0   \n",
-       "1      1     Jacques Dupond     6  Jean-Michel Python       0   \n",
-       "2      1     Jacques Dupond    10     Caroline Dufour       0   \n",
-       "3      1     Jacques Dupond     4      Jacques Dupont       1   \n",
-       "4      1     Jacques Dupond     6  Jean-Michel Python       1   \n",
-       "5      1     Jacques Dupond    10     Caroline Dufour       1   \n",
-       "8      2  Pierre Dusquesnes     5   pierre dusquesnes       2   \n",
-       "9      8     Sophie Delarue    11      sophie_delarue       3   \n",
-       "6     10    Caroline Dufour     6  Jean-Michel Python       1   \n",
-       "7     10    Caroline Dufour    13       Benoît Benoît       1   \n",
-       "10    10    Caroline Dufour     6  Jean-Michel Python       4   \n",
-       "11    10    Caroline Dufour    13       Benoît Benoît       4   \n",
-       "12    13      Benoît Benoît     6  Jean-Michel Python       4   \n",
+       "    id_l             Name_l  id_r              Name_r  \\\n",
+       "0      1     Jacques Dupond     4      Jacques Dupont   \n",
+       "3      1     Jacques Dupond     4      Jacques Dupont   \n",
+       "1      1     Jacques Dupond     6  Jean-Michel Python   \n",
+       "2      1     Jacques Dupond    10     Caroline Dufour   \n",
+       "4      1     Jacques Dupond     6  Jean-Michel Python   \n",
+       "5      1     Jacques Dupond    10     Caroline Dufour   \n",
+       "6     10    Caroline Dufour     6  Jean-Michel Python   \n",
+       "7     10    Caroline Dufour    13       Benoît Benoît   \n",
+       "10    10    Caroline Dufour     6  Jean-Michel Python   \n",
+       "12    13      Benoît Benoît     6  Jean-Michel Python   \n",
+       "11    10    Caroline Dufour    13       Benoît Benoît   \n",
+       "8      2  Pierre Dusquesnes     5   pierre dusquesnes   \n",
+       "9      8     Sophie Delarue    11      sophie_delarue   \n",
        "\n",
-       "                                              _motive  score  \n",
-       "0   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
-       "1   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
-       "2   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
-       "3   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
-       "4   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
-       "5   (>=1 overlap in 'websites', Same 'City', Same ...      3  \n",
-       "8                           (Same 'City', Same 'Age')      2  \n",
-       "9                           (Same 'City', Same 'Age')      2  \n",
-       "6                         (>=1 overlap in 'websites')      1  \n",
-       "7                         (>=1 overlap in 'websites')      1  \n",
-       "10                        (>=1 overlap in 'websites')      1  \n",
-       "11                        (>=1 overlap in 'websites')      1  \n",
-       "12                        (>=1 overlap in 'websites')      1  "
+       "                                              _motive  _block  _score  \n",
+       "0   [Same 'Age', Same 'City', >=1 overlap in 'webs...       0      52  \n",
+       "3   [Same 'Age', Same 'City', >=1 overlap in 'webs...       1      52  \n",
+       "1                         [>=1 overlap in 'websites']       0      27  \n",
+       "2                         [>=1 overlap in 'websites']       0      27  \n",
+       "4                         [>=1 overlap in 'websites']       1      27  \n",
+       "5                         [>=1 overlap in 'websites']       1      27  \n",
+       "6                         [>=1 overlap in 'websites']       1      27  \n",
+       "7                         [>=1 overlap in 'websites']       1      27  \n",
+       "10                        [>=1 overlap in 'websites']       4      27  \n",
+       "12                        [>=1 overlap in 'websites']       4      27  \n",
+       "11                        [>=1 overlap in 'websites']       4      27  \n",
+       "8                           [Same 'Age', Same 'City']       2      25  \n",
+       "9                           [Same 'Age', Same 'City']       3      25  "
       ],
       "text/html": [
        "<div>\n",
@@ -3610,9 +3429,9 @@
        "      <th>Name_l</th>\n",
        "      <th>id_r</th>\n",
        "      <th>Name_r</th>\n",
-       "      <th>_block</th>\n",
        "      <th>_motive</th>\n",
-       "      <th>score</th>\n",
+       "      <th>_block</th>\n",
+       "      <th>_score</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -3622,9 +3441,19 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
+       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
        "      <td>0</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
-       "      <td>3</td>\n",
+       "      <td>52</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Jacques Dupond</td>\n",
+       "      <td>4</td>\n",
+       "      <td>Jacques Dupont</td>\n",
+       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>52</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -3632,9 +3461,9 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>0</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
-       "      <td>3</td>\n",
+       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -3642,19 +3471,9 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>0</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>1</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
-       "      <td>3</td>\n",
+       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -3662,9 +3481,9 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
-       "      <td>3</td>\n",
+       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -3672,29 +3491,9 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites', Same 'City', Same ...</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>2</td>\n",
-       "      <td>(Same 'City', Same 'Age')</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>3</td>\n",
-       "      <td>(Same 'City', Same 'Age')</td>\n",
-       "      <td>2</td>\n",
+       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
@@ -3702,9 +3501,9 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites')</td>\n",
-       "      <td>1</td>\n",
+       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
@@ -3712,9 +3511,9 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites')</td>\n",
-       "      <td>1</td>\n",
+       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
@@ -3722,41 +3521,61 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>4</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites')</td>\n",
-       "      <td>1</td>\n",
+       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
+       "      <th>12</th>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
+       "      <td>6</td>\n",
+       "      <td>Jean-Michel Python</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>4</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites')</td>\n",
-       "      <td>1</td>\n",
+       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>12</th>\n",
+       "      <th>11</th>\n",
+       "      <td>10</td>\n",
+       "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>4</td>\n",
-       "      <td>(&gt;=1 overlap in 'websites')</td>\n",
-       "      <td>1</td>\n",
+       "      <td>27</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Pierre Dusquesnes</td>\n",
+       "      <td>5</td>\n",
+       "      <td>pierre dusquesnes</td>\n",
+       "      <td>[Same 'Age', Same 'City']</td>\n",
+       "      <td>2</td>\n",
+       "      <td>25</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>8</td>\n",
+       "      <td>Sophie Delarue</td>\n",
+       "      <td>11</td>\n",
+       "      <td>sophie_delarue</td>\n",
+       "      <td>[Same 'Age', Same 'City']</td>\n",
+       "      <td>3</td>\n",
+       "      <td>25</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 32
+   "execution_count": 31
   }
  ],
  "metadata": {

From d08173dc432d05ffec1621497f6861e0aee24d93 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 15:54:20 +0100
Subject: [PATCH 03/20] feat: BREAKING CHANGES various improvements and
 bugfixes

---
 src/ms_blocking/ms_blocking.py |  64 +++++++++---
 src/ms_blocking/utils.py       | 174 ++++++++++++++++-----------------
 2 files changed, 134 insertions(+), 104 deletions(-)

diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index ac33759..9d61832 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -186,7 +186,7 @@ def block(self, data, motives=False):
         }
 
         if motives:
-            explanations = [EquivalenceMotive(self.blocking_columns)]
+            explanations = [EquivalenceMotive(col) for col in self.blocking_columns]
             return add_motives_to_coords(coords, explanations)
         else:
             return set(coords)  # set is unnnecessary
@@ -276,7 +276,8 @@ def block(self, data, motives=False):
 
         if motives:
             explanations = [
-                OverlapMotive(self.blocking_columns, self.overlap, self.word_level)
+                OverlapMotive(col, self.overlap, self.word_level)
+                for col in self.blocking_columns
             ]
             return add_motives_to_coords(coords, explanations)
         else:
@@ -425,8 +426,10 @@ def block(self, data, motives=False):
 
         if motives:
             explanations = [
-                EquivalenceMotive(self.equivalence_columns),
-                OverlapMotive(self.overlap_columns, self.overlap, self.word_level),
+                EquivalenceMotive(col) for col in self.equivalence_columns
+            ] + [
+                OverlapMotive(col, self.overlap, self.word_level)
+                for col in self.overlap_columns
             ]
 
             return add_motives_to_coords(coords, explanations)
@@ -443,6 +446,7 @@ def add_blocks_to_dataset(
     motives: bool = False,
     show_as_pairs: bool = False,
     output_columns: Columns = None,
+    score: bool = False,
 ) -> pd.DataFrame:
     """Returns the intersection of an array of links
 
@@ -466,6 +470,8 @@ def add_blocks_to_dataset(
            Whether to show the output as pairs or rows rather than simply reordering the initial DataFrame
        output_columns : list
            Columns to show. Useful in combination with show_as_pairs as column names are altered
+       score : bool
+           Whether to show a score (computed from the number of motives)
 
     Returns
     -------
@@ -494,23 +500,33 @@ def add_blocks_to_dataset(
 
     if motives:
         if type(coords) is not dict:
-            raise TypeError("Cannot specify motives=True without passing motives")
+            raise TypeError("Cannot specify 'motives=True' without passing motives")
 
     # Ensure the index is a unique identifier
     if not data.index.is_unique:
         raise ValueError("DataFrame index must be unique to be used as an identifier.")
 
+    if score and not motives:
+        raise ValueError("Cannot specify 'score=True' without passing motives")
+
     if "_motive" in data.columns:
         if motives:
             raise ValueError(
                 "Please rename existing '_motive' column OR do not pass 'motives=True'"
             )
 
+    if "score" in data.columns:
+        if score:
+            raise ValueError(
+                "Please rename existing '_score' column OR do not pass 'score=True'"
+            )
+
     if "_block" in data.columns:
         raise ValueError("Please rename existing '_block' column")
 
     if output_columns is None:
         output_columns = data.columns
+
     data = data[output_columns].copy()
 
     if len(coords) == 0 and not keep_ungrouped_rows:  # Empty graph
@@ -521,6 +537,13 @@ def add_blocks_to_dataset(
             output_data = pd.DataFrame(columns=columns)
         else:
             output_data = pd.DataFrame(columns=data.columns)
+
+        if motives:
+            output_data["_motive"] = ""
+        if score:
+            output_data["_score"] = 0
+        output_data["_block"] = -1
+
     else:
         output_data = data
         # Map coords to connected component labels
@@ -561,7 +584,12 @@ def add_blocks_to_dataset(
                 )
                 current_row.index = current_index
                 if motives:
-                    current_row["_motive"] = str(solve_motives(coords[pair]))
+                    motives_solved = solve_motives(coords[pair])
+                    current_row["_motive"] = str(list(map(str, motives_solved)))
+                    if score:
+                        current_row["_score"] = len(
+                            motives_solved
+                        )  # Score is simply the number of non-redundant motives
                 output_data = pd.concat([output_data, current_row])
 
         # Assign blocks to rows based on their original index
@@ -612,17 +640,24 @@ def add_blocks_to_dataset(
         if not show_as_pairs and motives:
             id_list = flatten(coords.keys())
             motive_matcher = {
-                row_id: frozenset(
-                    str(solve_motives(coords[pair]))
-                    for pair in coords.keys()
-                    if row_id in pair
-                )
+                row_id: str(list(map(str, solve_motives(coords[pair]))))
+                for pair in coords.keys()
                 for row_id in id_list
+                if row_id in pair
             }
             output_data["_motive"] = output_data.index.map(motive_matcher)
+            if score:
+                output_data["_score"] = 0
+                score_matcher = {  # Horribly repetitive
+                    row_id: len(solve_motives(coords[pair]))
+                    for pair in coords.keys()
+                    for row_id in id_list
+                    if row_id in pair
+                }
+                output_data["_score"] = output_data.index.map(score_matcher)
 
-    if "_block" not in output_data.columns:  # Empty coords
-        output_data["_block"] = -1
+    # if "_block" not in output_data.columns:  # Empty coords
+    #    output_data["_block"] = -1
 
     output_data = output_data.reset_index(drop=True)
     output_data["_block"] = output_data["_block"].astype(int)
@@ -802,3 +837,6 @@ def merge_blockers(
         )
     else:
         return AndNode(left, right)
+
+
+# TODO: deport logic in a way that enables .progress_apply
diff --git a/src/ms_blocking/utils.py b/src/ms_blocking/utils.py
index 3ee8ead..aaa5e08 100644
--- a/src/ms_blocking/utils.py
+++ b/src/ms_blocking/utils.py
@@ -12,38 +12,45 @@
 
 
 class EquivalenceMotive:
-    def __init__(self, blocking_columns):
-        self.blocking_columns = blocking_columns
+    def __init__(self, blocking_column):
+        if not isinstance(blocking_column, str):
+            raise TypeError("blocking_column for Motive must be a string")
+        self.blocking_column = blocking_column
 
     def __eq__(self, other):
-        return self.blocking_columns == other.blocking_columns
+        return self.blocking_column == other.blocking_column
+
+    def __str__(self):
+        return f"Same '{self.blocking_column}'"
 
     def __repr__(self):
-        return ", ".join(
-            [f"Same '{column_name}'" for column_name in self.blocking_columns]
-        )
+        return f"EquivalenceMotive(['{self.blocking_column}'])"
 
 
 class OverlapMotive:
-    def __init__(self, blocking_columns, overlap=1, word_level=False):
-        self.blocking_columns = blocking_columns
+    def __init__(self, blocking_column, overlap=1, word_level=False):
+        if not isinstance(blocking_column, str):
+            raise TypeError("blocking_column for Motive must be a string")
+        if not isinstance(overlap, int):
+            raise TypeError("overlap must be an int")
+        if not isinstance(word_level, bool):
+            raise TypeError("word_level must be a boolean")
+        self.blocking_column = blocking_column
         self.overlap = overlap
         self.word_level = word_level
 
     def __eq__(self, other):
         return (
-            self.blocking_columns == other.blocking_columns
+            self.blocking_column == other.blocking_column
             and self.overlap == other.overlap
             and self.word_level == other.word_level
         )
 
+    def __str__(self):
+        return f">={self.overlap}{' word-level' if self.word_level else ''} overlap in '{self.blocking_column}'"
+
     def __repr__(self):
-        return ", ".join(
-            [
-                f">={self.overlap}{' word_level' if self.word_level else ''} overlap in '{column_name}'"
-                for column_name in self.blocking_columns
-            ]
-        )
+        return f"OverlapMotive(['{self.blocking_column}'], {self.overlap}{', word_level=True' if self.word_level else ''})"
 
 
 Columns = List[str]
@@ -276,7 +283,7 @@ def merge_blocks_or(coords_1: Coords, coords_2: Coords) -> Coords:
     if type(coords_1) is type(coords_2) is dict:  # We have motives
         return {
             pair: (
-                (coords_1[pair] + coords_2[pair])
+                coords_1[pair] + coords_2[pair]
                 if (pair in coords_1 and pair in coords_2)
                 else coords_1[pair]
                 if (pair in coords_1)
@@ -287,6 +294,7 @@ def merge_blocks_or(coords_1: Coords, coords_2: Coords) -> Coords:
         }
     else:
         return coords_1.union(coords_2)
+    # TODO: check for merging one with motive and one w/o
 
 
 def merge_blocks_and(coords_1: Coords, coords_2: Coords) -> Coords:
@@ -314,7 +322,7 @@ def merge_blocks_and(coords_1: Coords, coords_2: Coords) -> Coords:
     """
     if type(coords_1) is type(coords_2) is dict:  # We have motives
         return {
-            pair: (coords_1[pair] + coords_2[pair])
+            pair: coords_1[pair] + coords_2[pair]
             for y in (coords_1, coords_2)
             for pair in y.keys()
             if (pair in coords_1 and pair in coords_2)
@@ -376,40 +384,6 @@ def parse_list(s: str | List, word_level: bool = False) -> List[str]:
         return [s for s in cleaned_items if len(s) > 0]
 
 
-def scoring(data: pd.DataFrame, motives_column: str = "_motive") -> pd.Series:
-    """Add a score to a blocked DataFrame based on the number of motives
-
-    Parameters
-    ----------
-    data : DataFrame
-      DataFrame with motives
-
-    motives_column : str
-      Name of the column containing the motives
-
-    Returns
-    -------
-    Series[int]
-      A column of scores
-    """
-
-    # Check that we do have motives
-    if motives_column not in data.columns:
-        if motives_column == "_motive":
-            raise ValueError("No motives in DataFrame")
-        else:
-            raise ValueError(
-                f'Specified motives column "{motives_column}" does not exist'
-            )
-
-    if "score" in data.columns:
-        print("Renaming 'score' column to 'score_old'")
-        data = data.rename(columns={"score": "score_old"})
-
-    scores = data[motives_column].apply(len)
-    return scores
-
-
 def must_not_be_different_apply(  # WIP
     temp_data: pd.DataFrame,
     blocking_columns: List[str],
@@ -558,51 +532,69 @@ def solve_motives(motives: List[Motive]) -> List[Motive]:
     Returns
     -------
     List[Motive]
-      Pairs obtained by blocking
+      A list of Motives whose length should be smaller or equal to the original list of motives
 
     Examples
     --------
     >>> solve_motives([OverlapMotive(['websites'], 1), OverlapMotive(['websites'], 2), OverlapMotive(['websites'], 2, word_level=False)])
-    OverlapMotive(['websites'], 2, word_level=False)
+    [OverlapMotive(['websites'], 2, word_level=False)]
     """
     if not motives:
         raise ValueError("Motives must not be empty")
 
-    final_motives = [motives[0]]
-    for motive in motives[1:]:
-        if motive not in final_motives:
-            final_motives.append(motive)
-            if type(motive) is OverlapMotive:
-                # Look for redundant motives
-                for motive_to_compare in final_motives[:-1]:
-                    if (
-                        type(motive_to_compare) is OverlapMotive
-                    ):  # With EquivalenceMotive, equality check suffices
-                        if (
-                            motive.blocking_columns
-                            == motive_to_compare.blocking_columns
-                        ):
-                            if motive.word_level == motive_to_compare.word_level:
-                                # Replace Blocker with the one with bigger overlap
-                                if motive.overlap < motive_to_compare.overlap:
-                                    final_motives.remove(motive)
-                                    final_motives.append(motive_to_compare)
-                                elif motive.overlap > motive.overlap:
-                                    final_motives.remove(motive_to_compare)
-                                    final_motives.append(motive)
-                            elif motive.overlap == motive_to_compare.overlap:
-                                # Replace Blocker with the one with stricter word/element-level condition
-                                if (
-                                    motive.word_level
-                                    and not motive_to_compare.word_level
-                                ):
-                                    final_motives.remove(motive)
-                                    final_motives.append(motive_to_compare)
-                                elif (
-                                    not motive.word_level
-                                    and motive_to_compare.word_level
-                                ):
-                                    final_motives.remove(motive_to_compare)
-                                    final_motives.append(motive)
-
-    return final_motives
+    # split_motives = []
+    # for motive in motives:
+    #    split_motives += split_motive(motive)
+
+    final_motives = [
+        motive for motive in motives if type(motive) is EquivalenceMotive
+    ]  # With EquivalenceMotive, equality check suffices
+    overlap_motives = [motive for motive in motives if type(motive) is OverlapMotive]
+    overlap_columns = [motive.blocking_column for motive in overlap_motives]
+
+    for column in overlap_columns:
+        overlap_motives_for_column = [
+            motive for motive in overlap_motives if motive.blocking_column == column
+        ]
+
+        # Select Blocker with stricter word/element-level condition
+        word_level_motives_for_column = [
+            motive for motive in overlap_motives_for_column if motive.word_level
+        ]
+        not_word_level_motives_for_column = [
+            motive for motive in overlap_motives_for_column if not motive.word_level
+        ]
+
+        # Find biggest overlap among the non-word_level ones
+        if not_word_level_motives_for_column:
+            max_overlap_not_word_level_for_column = max(not_word_level_motives_for_column, key=lambda m: m.overlap)
+            max_overlap_not_word_level_for_column_overlap = max_overlap_not_word_level_for_column.overlap
+        else:
+            max_overlap_not_word_level_for_column = []
+            max_overlap_not_word_level_for_column_overlap = 0 # Will never be used, left for linter
+
+        # Now find biggest overlap among the word_level ones
+        if word_level_motives_for_column:
+            max_overlap_word_level_for_column = max(word_level_motives_for_column, key=lambda m: m.overlap)
+            max_overlap_word_level_for_column_overlap = max_overlap_word_level_for_column.overlap
+            if not_word_level_motives_for_column:
+                # If there is already an OverlapMotive on same column with equal or greater overlap but not word_level, discard it
+                if max_overlap_word_level_for_column_overlap <= max_overlap_not_word_level_for_column_overlap:
+                    max_overlap_word_level_for_column = []
+        else:
+            max_overlap_word_level_for_column = []
+
+        if max_overlap_not_word_level_for_column:
+            max_overlap_not_word_level_for_column = [max_overlap_not_word_level_for_column]
+        if max_overlap_word_level_for_column:
+            max_overlap_word_level_for_column = [max_overlap_word_level_for_column]
+        final_motives += (
+            max_overlap_word_level_for_column + max_overlap_not_word_level_for_column
+        )
+
+    # Remove duplicates
+    final_motives_no_duplicates = []
+    for motive in final_motives:
+        if motive not in final_motives_no_duplicates:
+            final_motives_no_duplicates.append(motive)
+    return final_motives_no_duplicates

From add6ae54fed5e847452fcf55034cfab59f6f8510 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 15:54:54 +0100
Subject: [PATCH 04/20] docs: fix rendering issue in notebook

---
 docs/example.ipynb | 440 +++++++++++++++++++++++----------------------
 1 file changed, 224 insertions(+), 216 deletions(-)

diff --git a/docs/example.ipynb b/docs/example.ipynb
index 7c44012..45699af 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -32,8 +32,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:42.897197100Z",
-     "start_time": "2026-02-03T08:45:42.069366700Z"
+     "end_time": "2026-02-03T14:40:28.508876500Z",
+     "start_time": "2026-02-03T14:40:27.761433800Z"
     }
    },
    "source": [
@@ -60,8 +60,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:42.936219800Z",
-     "start_time": "2026-02-03T08:45:42.901218100Z"
+     "end_time": "2026-02-03T14:40:28.563486200Z",
+     "start_time": "2026-02-03T14:40:28.512916Z"
     }
    },
    "source": [
@@ -282,8 +282,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:43.089459800Z",
-     "start_time": "2026-02-03T08:45:42.974568800Z"
+     "end_time": "2026-02-03T14:40:28.741020400Z",
+     "start_time": "2026-02-03T14:40:28.615799300Z"
     }
    },
    "source": [
@@ -310,8 +310,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:43.241858Z",
-     "start_time": "2026-02-03T08:45:43.164364500Z"
+     "end_time": "2026-02-03T14:40:29.068322900Z",
+     "start_time": "2026-02-03T14:40:28.915502800Z"
     }
    },
    "source": [
@@ -339,8 +339,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:43.293312300Z",
-     "start_time": "2026-02-03T08:45:43.279951300Z"
+     "end_time": "2026-02-03T14:40:29.328166900Z",
+     "start_time": "2026-02-03T14:40:29.309785500Z"
     }
    },
    "source": [
@@ -369,8 +369,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:43.491120800Z",
-     "start_time": "2026-02-03T08:45:43.387967900Z"
+     "end_time": "2026-02-03T14:40:29.547537Z",
+     "start_time": "2026-02-03T14:40:29.397273800Z"
     }
    },
    "source": [
@@ -409,8 +409,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:43.564017300Z",
-     "start_time": "2026-02-03T08:45:43.543375900Z"
+     "end_time": "2026-02-03T14:40:29.599221700Z",
+     "start_time": "2026-02-03T14:40:29.572788900Z"
     }
    },
    "source": [
@@ -574,8 +574,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:43.730577200Z",
-     "start_time": "2026-02-03T08:45:43.602849600Z"
+     "end_time": "2026-02-03T14:40:29.841412500Z",
+     "start_time": "2026-02-03T14:40:29.660471200Z"
     }
    },
    "source": [
@@ -622,8 +622,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:43.963649700Z",
-     "start_time": "2026-02-03T08:45:43.857183700Z"
+     "end_time": "2026-02-03T14:40:30.138487100Z",
+     "start_time": "2026-02-03T14:40:30.060590900Z"
     }
    },
    "source": [
@@ -759,8 +759,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:44.258242200Z",
-     "start_time": "2026-02-03T08:45:44.158668200Z"
+     "end_time": "2026-02-03T14:40:30.619777700Z",
+     "start_time": "2026-02-03T14:40:30.422768900Z"
     }
    },
    "source": [
@@ -796,8 +796,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:44.439022100Z",
-     "start_time": "2026-02-03T08:45:44.392038500Z"
+     "end_time": "2026-02-03T14:40:30.988164600Z",
+     "start_time": "2026-02-03T14:40:30.834419400Z"
     }
    },
    "source": [
@@ -971,8 +971,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:44.704919900Z",
-     "start_time": "2026-02-03T08:45:44.604905100Z"
+     "end_time": "2026-02-03T14:40:31.490353400Z",
+     "start_time": "2026-02-03T14:40:31.385134Z"
     }
    },
    "source": [
@@ -1075,8 +1075,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:45.167225900Z",
-     "start_time": "2026-02-03T08:45:45.142061100Z"
+     "end_time": "2026-02-03T14:40:32.118722700Z",
+     "start_time": "2026-02-03T14:40:31.995086900Z"
     }
    },
    "source": [
@@ -1223,8 +1223,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:45.497760900Z",
-     "start_time": "2026-02-03T08:45:45.335278600Z"
+     "end_time": "2026-02-03T14:40:32.898993200Z",
+     "start_time": "2026-02-03T14:40:32.771388400Z"
     }
    },
    "source": [
@@ -1342,8 +1342,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:45.879254300Z",
-     "start_time": "2026-02-03T08:45:45.779256400Z"
+     "end_time": "2026-02-03T14:40:33.431455700Z",
+     "start_time": "2026-02-03T14:40:33.206324Z"
     }
    },
    "source": [
@@ -1440,8 +1440,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:46.232628900Z",
-     "start_time": "2026-02-03T08:45:46.186246600Z"
+     "end_time": "2026-02-03T14:40:34.177679600Z",
+     "start_time": "2026-02-03T14:40:34.059417200Z"
     }
    },
    "source": [
@@ -1464,8 +1464,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:46.440480Z",
-     "start_time": "2026-02-03T08:45:46.391161200Z"
+     "end_time": "2026-02-03T14:40:34.323788200Z",
+     "start_time": "2026-02-03T14:40:34.232749100Z"
     }
    },
    "source": [
@@ -1589,8 +1589,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:46.754986800Z",
-     "start_time": "2026-02-03T08:45:46.666968100Z"
+     "end_time": "2026-02-03T14:40:35.068638700Z",
+     "start_time": "2026-02-03T14:40:34.966880900Z"
     }
    },
    "source": [
@@ -1804,8 +1804,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:47.079529400Z",
-     "start_time": "2026-02-03T08:45:47.029011300Z"
+     "end_time": "2026-02-03T14:40:35.421514400Z",
+     "start_time": "2026-02-03T14:40:35.348243100Z"
     }
    },
    "source": [
@@ -1828,8 +1828,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:47.289177100Z",
-     "start_time": "2026-02-03T08:45:47.270625400Z"
+     "end_time": "2026-02-03T14:40:35.616427400Z",
+     "start_time": "2026-02-03T14:40:35.568154600Z"
     }
    },
    "source": [
@@ -1849,8 +1849,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:47.381218700Z",
-     "start_time": "2026-02-03T08:45:47.334125300Z"
+     "end_time": "2026-02-03T14:40:35.856681600Z",
+     "start_time": "2026-02-03T14:40:35.755378800Z"
     }
    },
    "source": [
@@ -1990,8 +1990,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:47.689050500Z",
-     "start_time": "2026-02-03T08:45:47.511174200Z"
+     "end_time": "2026-02-03T14:40:36.225054300Z",
+     "start_time": "2026-02-03T14:40:36.088658200Z"
     }
    },
    "source": [
@@ -2034,8 +2034,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:47.818974600Z",
-     "start_time": "2026-02-03T08:45:47.771680100Z"
+     "end_time": "2026-02-03T14:40:36.546944200Z",
+     "start_time": "2026-02-03T14:40:36.506897100Z"
     }
    },
    "source": [
@@ -2213,8 +2213,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:48.096706900Z",
-     "start_time": "2026-02-03T08:45:48.012725300Z"
+     "end_time": "2026-02-03T14:40:36.899709500Z",
+     "start_time": "2026-02-03T14:40:36.769059100Z"
     }
    },
    "source": [
@@ -2443,8 +2443,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:48.598207900Z",
-     "start_time": "2026-02-03T08:45:48.541276800Z"
+     "end_time": "2026-02-03T14:40:37.743563200Z",
+     "start_time": "2026-02-03T14:40:37.558867900Z"
     }
    },
    "source": [
@@ -2593,8 +2593,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:49.188205Z",
-     "start_time": "2026-02-03T08:45:49.122589100Z"
+     "end_time": "2026-02-03T14:40:38.350268900Z",
+     "start_time": "2026-02-03T14:40:38.156431100Z"
     }
    },
    "source": [
@@ -2613,12 +2613,12 @@
     {
      "data": {
       "text/plain": [
-       "{frozenset({1, 4}): [Same 'City'],\n",
-       " frozenset({8, 11}): [Same 'City'],\n",
-       " frozenset({2, 5}): [Same 'City'],\n",
-       " frozenset({10, 13}): [Same 'City'],\n",
-       " frozenset({3, 8}): [Same 'City'],\n",
-       " frozenset({3, 11}): [Same 'City']}"
+       "{frozenset({1, 4}): [EquivalenceMotive(['City'])],\n",
+       " frozenset({8, 11}): [EquivalenceMotive(['City'])],\n",
+       " frozenset({2, 5}): [EquivalenceMotive(['City'])],\n",
+       " frozenset({10, 13}): [EquivalenceMotive(['City'])],\n",
+       " frozenset({3, 8}): [EquivalenceMotive(['City'])],\n",
+       " frozenset({3, 11}): [EquivalenceMotive(['City'])]}"
       ]
      },
      "execution_count": 26,
@@ -2644,8 +2644,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:49.300573800Z",
-     "start_time": "2026-02-03T08:45:49.260624100Z"
+     "end_time": "2026-02-03T14:40:38.712869Z",
+     "start_time": "2026-02-03T14:40:38.617699300Z"
     }
    },
    "source": [
@@ -2667,15 +2667,15 @@
        "8  13      Benoît Benoît               Lens   15   \n",
        "\n",
        "                                            websites  _block          _motive  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  ([Same 'City'])  \n",
-       "1                               ['jacquesdupond.fr']       0  ([Same 'City'])  \n",
-       "2                    ['somewebsite.com/users/rpz59']       1  ([Same 'City'])  \n",
-       "3                                                 []       1  ([Same 'City'])  \n",
-       "4                                 ['roubaixlove.fr']       2  ([Same 'City'])  \n",
-       "5                                                 []       2  ([Same 'City'])  \n",
-       "6                                                 []       2  ([Same 'City'])  \n",
-       "7             ['pythonensamusant.fr', 'lensfans.fr']       3  ([Same 'City'])  \n",
-       "8                                    ['lensfans.fr']       3  ([Same 'City'])  "
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  [\"Same 'City'\"]  \n",
+       "1                               ['jacquesdupond.fr']       0  [\"Same 'City'\"]  \n",
+       "2                    ['somewebsite.com/users/rpz59']       1  [\"Same 'City'\"]  \n",
+       "3                                                 []       1  [\"Same 'City'\"]  \n",
+       "4                                 ['roubaixlove.fr']       2  [\"Same 'City'\"]  \n",
+       "5                                                 []       2  [\"Same 'City'\"]  \n",
+       "6                                                 []       2  [\"Same 'City'\"]  \n",
+       "7             ['pythonensamusant.fr', 'lensfans.fr']       3  [\"Same 'City'\"]  \n",
+       "8                                    ['lensfans.fr']       3  [\"Same 'City'\"]  "
       ],
       "text/html": [
        "<div>\n",
@@ -2714,7 +2714,7 @@
        "      <td>37</td>\n",
        "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
        "      <td>0</td>\n",
-       "      <td>([Same 'City'])</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -2724,7 +2724,7 @@
        "      <td>37</td>\n",
        "      <td>['jacquesdupond.fr']</td>\n",
        "      <td>0</td>\n",
-       "      <td>([Same 'City'])</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -2734,7 +2734,7 @@
        "      <td>24</td>\n",
        "      <td>['somewebsite.com/users/rpz59']</td>\n",
        "      <td>1</td>\n",
-       "      <td>([Same 'City'])</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -2744,7 +2744,7 @@
        "      <td>24</td>\n",
        "      <td>[]</td>\n",
        "      <td>1</td>\n",
-       "      <td>([Same 'City'])</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -2754,7 +2754,7 @@
        "      <td>32</td>\n",
        "      <td>['roubaixlove.fr']</td>\n",
        "      <td>2</td>\n",
-       "      <td>([Same 'City'])</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -2764,7 +2764,7 @@
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
        "      <td>2</td>\n",
-       "      <td>([Same 'City'])</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
@@ -2774,7 +2774,7 @@
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
        "      <td>2</td>\n",
-       "      <td>([Same 'City'])</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
@@ -2784,7 +2784,7 @@
        "      <td>45</td>\n",
        "      <td>['pythonensamusant.fr', 'lensfans.fr']</td>\n",
        "      <td>3</td>\n",
-       "      <td>([Same 'City'])</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
@@ -2794,7 +2794,7 @@
        "      <td>15</td>\n",
        "      <td>['lensfans.fr']</td>\n",
        "      <td>3</td>\n",
-       "      <td>([Same 'City'])</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -2822,8 +2822,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:49.556914900Z",
-     "start_time": "2026-02-03T08:45:49.481507100Z"
+     "end_time": "2026-02-03T14:40:39.016063100Z",
+     "start_time": "2026-02-03T14:40:38.850306600Z"
     }
    },
    "source": [
@@ -2849,13 +2849,13 @@
        "4                                                 []     3       Paul Delarue   \n",
        "5             ['pythonensamusant.fr', 'lensfans.fr']    13      Benoît Benoît   \n",
        "\n",
-       "              City_r  Age_r            websites_r        _motive  _block  \n",
-       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']  [Same 'City']       0  \n",
-       "1          Phalempin     24                    []  [Same 'City']       1  \n",
-       "2            Roubaix     33                    []  [Same 'City']       2  \n",
-       "3            Roubaix     33                    []  [Same 'City']       2  \n",
-       "4            Roubaix     32    ['roubaixlove.fr']  [Same 'City']       2  \n",
-       "5               Lens     15       ['lensfans.fr']  [Same 'City']       3  "
+       "              City_r  Age_r            websites_r          _motive  _block  \n",
+       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']  [\"Same 'City'\"]       0  \n",
+       "1          Phalempin     24                    []  [\"Same 'City'\"]       1  \n",
+       "2            Roubaix     33                    []  [\"Same 'City'\"]       2  \n",
+       "3            Roubaix     33                    []  [\"Same 'City'\"]       2  \n",
+       "4            Roubaix     32    ['roubaixlove.fr']  [\"Same 'City'\"]       2  \n",
+       "5               Lens     15       ['lensfans.fr']  [\"Same 'City'\"]       3  "
       ],
       "text/html": [
        "<div>\n",
@@ -2903,7 +2903,7 @@
        "      <td>Villeneuve d'Ascq</td>\n",
        "      <td>37</td>\n",
        "      <td>['jacquesdupond.fr']</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -2918,7 +2918,7 @@
        "      <td>Phalempin</td>\n",
        "      <td>24</td>\n",
        "      <td>[]</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -2933,7 +2933,7 @@
        "      <td>Roubaix</td>\n",
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -2948,7 +2948,7 @@
        "      <td>Roubaix</td>\n",
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -2963,7 +2963,7 @@
        "      <td>Roubaix</td>\n",
        "      <td>32</td>\n",
        "      <td>['roubaixlove.fr']</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -2978,7 +2978,7 @@
        "      <td>Lens</td>\n",
        "      <td>15</td>\n",
        "      <td>['lensfans.fr']</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -3004,8 +3004,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:50.016104Z",
-     "start_time": "2026-02-03T08:45:49.965660200Z"
+     "end_time": "2026-02-03T14:40:39.771226400Z",
+     "start_time": "2026-02-03T14:40:39.536276Z"
     }
    },
    "source": [
@@ -3017,13 +3017,13 @@
     {
      "data": {
       "text/plain": [
-       "   id_l             Name_l  id_r             Name_r        _motive  _block\n",
-       "0     1     Jacques Dupond     4     Jacques Dupont  [Same 'City']       0\n",
-       "1     2  Pierre Dusquesnes     5  pierre dusquesnes  [Same 'City']       1\n",
-       "2     3       Paul Delarue    11     sophie_delarue  [Same 'City']       2\n",
-       "3     8     Sophie Delarue    11     sophie_delarue  [Same 'City']       2\n",
-       "4     8     Sophie Delarue     3       Paul Delarue  [Same 'City']       2\n",
-       "5    10    Caroline Dufour    13      Benoît Benoît  [Same 'City']       3"
+       "   id_l             Name_l  id_r             Name_r          _motive  _block\n",
+       "0     1     Jacques Dupond     4     Jacques Dupont  [\"Same 'City'\"]       0\n",
+       "1     2  Pierre Dusquesnes     5  pierre dusquesnes  [\"Same 'City'\"]       1\n",
+       "2     3       Paul Delarue    11     sophie_delarue  [\"Same 'City'\"]       2\n",
+       "3     8     Sophie Delarue    11     sophie_delarue  [\"Same 'City'\"]       2\n",
+       "4     8     Sophie Delarue     3       Paul Delarue  [\"Same 'City'\"]       2\n",
+       "5    10    Caroline Dufour    13      Benoît Benoît  [\"Same 'City'\"]       3"
       ],
       "text/html": [
        "<div>\n",
@@ -3059,7 +3059,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3068,7 +3068,7 @@
        "      <td>Pierre Dusquesnes</td>\n",
        "      <td>5</td>\n",
        "      <td>pierre dusquesnes</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3077,7 +3077,7 @@
        "      <td>Paul Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3086,7 +3086,7 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3095,7 +3095,7 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>3</td>\n",
        "      <td>Paul Delarue</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3104,7 +3104,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
-       "      <td>[Same 'City']</td>\n",
+       "      <td>[\"Same 'City'\"]</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -3131,8 +3131,8 @@
    "metadata": {
     "scrolled": true,
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:50.383366500Z",
-     "start_time": "2026-02-03T08:45:50.220420100Z"
+     "end_time": "2026-02-03T14:40:41.596196Z",
+     "start_time": "2026-02-03T14:40:41.287210400Z"
     }
    },
    "source": [
@@ -3179,19 +3179,19 @@
        "12    13      Benoît Benoît     6  Jean-Michel Python   \n",
        "\n",
        "                                              _motive  _block  \n",
-       "0   [Same 'Age', Same 'City', >=1 overlap in 'webs...       0  \n",
-       "1                         [>=1 overlap in 'websites']       0  \n",
-       "2                         [>=1 overlap in 'websites']       0  \n",
-       "3   [Same 'Age', Same 'City', >=1 overlap in 'webs...       1  \n",
-       "4                         [>=1 overlap in 'websites']       1  \n",
-       "5                         [>=1 overlap in 'websites']       1  \n",
-       "6                         [>=1 overlap in 'websites']       1  \n",
-       "7                         [>=1 overlap in 'websites']       1  \n",
-       "8                           [Same 'Age', Same 'City']       2  \n",
-       "9                           [Same 'Age', Same 'City']       3  \n",
-       "10                        [>=1 overlap in 'websites']       4  \n",
-       "11                        [>=1 overlap in 'websites']       4  \n",
-       "12                        [>=1 overlap in 'websites']       4  "
+       "0   [\"Same 'Age'\", \"Same 'City'\", \">=1 overlap in ...       0  \n",
+       "1                       [\">=1 overlap in 'websites'\"]       0  \n",
+       "2                       [\">=1 overlap in 'websites'\"]       0  \n",
+       "3   [\"Same 'Age'\", \"Same 'City'\", \">=1 overlap in ...       1  \n",
+       "4                       [\">=1 overlap in 'websites'\"]       1  \n",
+       "5                       [\">=1 overlap in 'websites'\"]       1  \n",
+       "6                       [\">=1 overlap in 'websites'\"]       1  \n",
+       "7                       [\">=1 overlap in 'websites'\"]       1  \n",
+       "8                       [\"Same 'Age'\", \"Same 'City'\"]       2  \n",
+       "9                       [\"Same 'Age'\", \"Same 'City'\"]       3  \n",
+       "10                      [\">=1 overlap in 'websites'\"]       4  \n",
+       "11                      [\">=1 overlap in 'websites'\"]       4  \n",
+       "12                      [\">=1 overlap in 'websites'\"]       4  "
       ],
       "text/html": [
        "<div>\n",
@@ -3227,7 +3227,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>[\"Same 'Age'\", \"Same 'City'\", \"&gt;=1 overlap in ...</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3236,7 +3236,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3245,7 +3245,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3254,7 +3254,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>[\"Same 'Age'\", \"Same 'City'\", \"&gt;=1 overlap in ...</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3263,7 +3263,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3272,7 +3272,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3281,7 +3281,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3290,7 +3290,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3299,7 +3299,7 @@
        "      <td>Pierre Dusquesnes</td>\n",
        "      <td>5</td>\n",
        "      <td>pierre dusquesnes</td>\n",
-       "      <td>[Same 'Age', Same 'City']</td>\n",
+       "      <td>[\"Same 'Age'\", \"Same 'City'\"]</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3308,7 +3308,7 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
-       "      <td>[Same 'Age', Same 'City']</td>\n",
+       "      <td>[\"Same 'Age'\", \"Same 'City'\"]</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3317,7 +3317,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
        "      <td>4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3326,7 +3326,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
        "      <td>4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3335,7 +3335,7 @@
        "      <td>Benoît Benoît</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
        "      <td>4</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -3358,18 +3358,26 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "For reports, it can be interesting to have numbers to drive decision-making. Using `scoring` gives you an indicator of the likelihood of rows behing duplicates based on the number of motives."
+   "source": "For reports, it can be interesting to have numbers to drive decision-making. Using `scoring` gives you an indicator of the likelihood of rows behing duplicates based on the number of distinct motives."
   },
   {
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T08:45:50.717126Z",
-     "start_time": "2026-02-03T08:45:50.677002100Z"
+     "end_time": "2026-02-03T14:40:42.388195200Z",
+     "start_time": "2026-02-03T14:40:42.261220800Z"
     }
    },
    "source": [
-    "report[\"_score\"] = msb.scoring(report)\n",
+    "report = msb.add_blocks_to_dataset(\n",
+    "    df,\n",
+    "    links,\n",
+    "    motives=True,\n",
+    "    show_as_pairs=True,\n",
+    "    output_columns=[\"id\", \"Name\"],\n",
+    "    merge_blocks=False,\n",
+    "    score=True,\n",
+    ")\n",
     "report.sort_values(\"_score\", ascending=False)"
    ],
    "outputs": [
@@ -3379,32 +3387,32 @@
        "    id_l             Name_l  id_r              Name_r  \\\n",
        "0      1     Jacques Dupond     4      Jacques Dupont   \n",
        "3      1     Jacques Dupond     4      Jacques Dupont   \n",
+       "8      2  Pierre Dusquesnes     5   pierre dusquesnes   \n",
+       "9      8     Sophie Delarue    11      sophie_delarue   \n",
        "1      1     Jacques Dupond     6  Jean-Michel Python   \n",
-       "2      1     Jacques Dupond    10     Caroline Dufour   \n",
        "4      1     Jacques Dupond     6  Jean-Michel Python   \n",
-       "5      1     Jacques Dupond    10     Caroline Dufour   \n",
+       "2      1     Jacques Dupond    10     Caroline Dufour   \n",
        "6     10    Caroline Dufour     6  Jean-Michel Python   \n",
+       "5      1     Jacques Dupond    10     Caroline Dufour   \n",
        "7     10    Caroline Dufour    13       Benoît Benoît   \n",
        "10    10    Caroline Dufour     6  Jean-Michel Python   \n",
-       "12    13      Benoît Benoît     6  Jean-Michel Python   \n",
        "11    10    Caroline Dufour    13       Benoît Benoît   \n",
-       "8      2  Pierre Dusquesnes     5   pierre dusquesnes   \n",
-       "9      8     Sophie Delarue    11      sophie_delarue   \n",
+       "12    13      Benoît Benoît     6  Jean-Michel Python   \n",
        "\n",
-       "                                              _motive  _block  _score  \n",
-       "0   [Same 'Age', Same 'City', >=1 overlap in 'webs...       0      52  \n",
-       "3   [Same 'Age', Same 'City', >=1 overlap in 'webs...       1      52  \n",
-       "1                         [>=1 overlap in 'websites']       0      27  \n",
-       "2                         [>=1 overlap in 'websites']       0      27  \n",
-       "4                         [>=1 overlap in 'websites']       1      27  \n",
-       "5                         [>=1 overlap in 'websites']       1      27  \n",
-       "6                         [>=1 overlap in 'websites']       1      27  \n",
-       "7                         [>=1 overlap in 'websites']       1      27  \n",
-       "10                        [>=1 overlap in 'websites']       4      27  \n",
-       "12                        [>=1 overlap in 'websites']       4      27  \n",
-       "11                        [>=1 overlap in 'websites']       4      27  \n",
-       "8                           [Same 'Age', Same 'City']       2      25  \n",
-       "9                           [Same 'Age', Same 'City']       3      25  "
+       "                                              _motive  _score  _block  \n",
+       "0   [\"Same 'Age'\", \"Same 'City'\", \">=1 overlap in ...       3       0  \n",
+       "3   [\"Same 'Age'\", \"Same 'City'\", \">=1 overlap in ...       3       1  \n",
+       "8                       [\"Same 'Age'\", \"Same 'City'\"]       2       2  \n",
+       "9                       [\"Same 'Age'\", \"Same 'City'\"]       2       3  \n",
+       "1                       [\">=1 overlap in 'websites'\"]       1       0  \n",
+       "4                       [\">=1 overlap in 'websites'\"]       1       1  \n",
+       "2                       [\">=1 overlap in 'websites'\"]       1       0  \n",
+       "6                       [\">=1 overlap in 'websites'\"]       1       1  \n",
+       "5                       [\">=1 overlap in 'websites'\"]       1       1  \n",
+       "7                       [\">=1 overlap in 'websites'\"]       1       1  \n",
+       "10                      [\">=1 overlap in 'websites'\"]       1       4  \n",
+       "11                      [\">=1 overlap in 'websites'\"]       1       4  \n",
+       "12                      [\">=1 overlap in 'websites'\"]       1       4  "
       ],
       "text/html": [
        "<div>\n",
@@ -3430,8 +3438,8 @@
        "      <th>id_r</th>\n",
        "      <th>Name_r</th>\n",
        "      <th>_motive</th>\n",
-       "      <th>_block</th>\n",
        "      <th>_score</th>\n",
+       "      <th>_block</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -3441,9 +3449,9 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>[\"Same 'Age'\", \"Same 'City'\", \"&gt;=1 overlap in ...</td>\n",
+       "      <td>3</td>\n",
        "      <td>0</td>\n",
-       "      <td>52</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -3451,9 +3459,29 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>[\"Same 'Age'\", \"Same 'City'\", \"&gt;=1 overlap in ...</td>\n",
+       "      <td>3</td>\n",
        "      <td>1</td>\n",
-       "      <td>52</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>8</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Pierre Dusquesnes</td>\n",
+       "      <td>5</td>\n",
+       "      <td>pierre dusquesnes</td>\n",
+       "      <td>[\"Same 'Age'\", \"Same 'City'\"]</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>9</th>\n",
+       "      <td>8</td>\n",
+       "      <td>Sophie Delarue</td>\n",
+       "      <td>11</td>\n",
+       "      <td>sophie_delarue</td>\n",
+       "      <td>[\"Same 'Age'\", \"Same 'City'\"]</td>\n",
+       "      <td>2</td>\n",
+       "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -3461,19 +3489,9 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>0</td>\n",
-       "      <td>27</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
        "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>0</td>\n",
-       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -3481,19 +3499,19 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>5</th>\n",
+       "      <th>2</th>\n",
        "      <td>1</td>\n",
        "      <td>Jacques Dupond</td>\n",
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
        "      <td>1</td>\n",
-       "      <td>27</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
@@ -3501,9 +3519,19 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>5</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Jacques Dupond</td>\n",
+       "      <td>10</td>\n",
+       "      <td>Caroline Dufour</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
@@ -3511,9 +3539,9 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>10</th>\n",
@@ -3521,19 +3549,9 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>4</td>\n",
-       "      <td>27</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>1</td>\n",
        "      <td>4</td>\n",
-       "      <td>27</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>11</th>\n",
@@ -3541,29 +3559,19 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>1</td>\n",
        "      <td>4</td>\n",
-       "      <td>27</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>[Same 'Age', Same 'City']</td>\n",
-       "      <td>2</td>\n",
-       "      <td>25</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>[Same 'Age', Same 'City']</td>\n",
-       "      <td>3</td>\n",
-       "      <td>25</td>\n",
+       "      <th>12</th>\n",
+       "      <td>13</td>\n",
+       "      <td>Benoît Benoît</td>\n",
+       "      <td>6</td>\n",
+       "      <td>Jean-Michel Python</td>\n",
+       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",

From 41e154e0ff48b3fed132194f29620334a704f9c9 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 15:55:34 +0100
Subject: [PATCH 05/20] test: update tests to new Motives

---
 tests/test_ms_blocking.py | 117 ++++++++++++++++++++++++++++++--------
 1 file changed, 94 insertions(+), 23 deletions(-)

diff --git a/tests/test_ms_blocking.py b/tests/test_ms_blocking.py
index d3f9ab2..3efb309 100644
--- a/tests/test_ms_blocking.py
+++ b/tests/test_ms_blocking.py
@@ -84,18 +84,29 @@ def attribute_city_keep_ungrouped_rows_false():
 @pytest.fixture
 def attribute_city_motives_true_block():
     return {
-        frozenset({3, 8}): {"Same 'City'"},
-        frozenset({1, 4}): {"Same 'City'"},
-        frozenset({8, 11}): {"Same 'City'"},
-        frozenset({3, 11}): {"Same 'City'"},
-        frozenset({2, 5}): {"Same 'City'"},
-        frozenset({10, 13}): {"Same 'City'"},
+        frozenset({3, 8}): [msb.EquivalenceMotive("City")],
+        frozenset({1, 4}): [msb.EquivalenceMotive("City")],
+        frozenset({8, 11}): [msb.EquivalenceMotive("City")],
+        frozenset({3, 11}): [msb.EquivalenceMotive("City")],
+        frozenset({2, 5}): [msb.EquivalenceMotive("City")],
+        frozenset({10, 13}): [msb.EquivalenceMotive("City")],
     }
 
 
 @pytest.fixture
 def attribute_city_motives_true_add():
-    return [{"Same 'City'"}] * 9
+    return [
+        "[\"Same 'City'\"]",
+        "[\"Same 'City'\"]",
+        "[\"Same 'City'\"]",
+        "[\"Same 'City'\"]",
+        "[\"Same 'City'\"]",
+        "[\"Same 'City'\"]",
+        "[\"Same 'City'\"]",
+        "[\"Same 'City'\"]",
+        "[\"Same 'City'\"]",
+    ]
+    # [msb.EquivalenceMotive("City")] * 9
 
 
 @pytest.fixture
@@ -116,25 +127,65 @@ def city_age_name_websites_pipelining_id():
 @pytest.fixture
 def city_age_websites_pipelining_motives():
     return [
-        frozenset({"Same 'Age'", "Same 'City'", ">=1 overlap in 'websites'"}),
-        frozenset({"Same 'Age'", "Same 'City'", ">=1 overlap in 'websites'"}),
-        frozenset({"Same 'Age'", "Same 'City'", ">=1 overlap in 'websites'"}),
-        frozenset({"Same 'Age'", "Same 'City'", ">=1 overlap in 'websites'"}),
-        frozenset({"Same 'Age'", "Same 'City'", ">=1 overlap in 'websites'"}),
-        frozenset({"Same 'Age'", "Same 'City'", ">=1 overlap in 'websites'"}),
-        frozenset({">=1 overlap in 'websites'"}),
-        frozenset({">=1 overlap in 'websites'"}),
-        frozenset({"Same 'Age'", "Same 'City'"}),
-        frozenset({"Same 'Age'", "Same 'City'"}),
-        frozenset({">=1 overlap in 'websites'"}),
-        frozenset({">=1 overlap in 'websites'"}),
-        frozenset({">=1 overlap in 'websites'"}),
+        "[\"Same 'City'\", \"Same 'Age'\", \">=1 overlap in 'websites'\"]",
+        "[\">=1 overlap in 'websites'\"]",
+        "[\">=1 overlap in 'websites'\"]",
+        "[\"Same 'City'\", \"Same 'Age'\", \">=1 overlap in 'websites'\"]",
+        "[\">=1 overlap in 'websites'\"]",
+        "[\">=1 overlap in 'websites'\"]",
+        "[\">=1 overlap in 'websites'\"]",
+        "[\">=1 overlap in 'websites'\"]",
+        "[\"Same 'City'\", \"Same 'Age'\"]",
+        "[\"Same 'City'\", \"Same 'Age'\"]",
+        "[\">=1 overlap in 'websites'\"]",
+        "[\">=1 overlap in 'websites'\"]",
+        "[\">=1 overlap in 'websites'\"]",
     ]
 
+    # [
+    #     [
+    #         msb.EquivalenceMotive("Age"),
+    #         msb.EquivalenceMotive("City"),
+    #         msb.OverlapMotive("websites", 1),
+    #     ],
+    #     [
+    #         msb.EquivalenceMotive("Age"),
+    #         msb.EquivalenceMotive("City"),
+    #         msb.OverlapMotive("websites", 1),
+    #     ],
+    #     [
+    #         msb.EquivalenceMotive("Age"),
+    #         msb.EquivalenceMotive("City"),
+    #         msb.OverlapMotive("websites", 1),
+    #     ],
+    #     [
+    #         msb.EquivalenceMotive("Age"),
+    #         msb.EquivalenceMotive("City"),
+    #         msb.OverlapMotive("websites", 1),
+    #     ],
+    #     [
+    #         msb.EquivalenceMotive("Age"),
+    #         msb.EquivalenceMotive("City"),
+    #         msb.OverlapMotive("websites", 1),
+    #     ],
+    #     [
+    #         msb.EquivalenceMotive("Age"),
+    #         msb.EquivalenceMotive("City"),
+    #         msb.OverlapMotive("websites", 1),
+    #     ],
+    #     [msb.OverlapMotive("websites", 1)],
+    #     [msb.OverlapMotive("websites", 1)],
+    #     [msb.EquivalenceMotive("Age"), msb.EquivalenceMotive("City")],
+    #     [msb.EquivalenceMotive("Age"), msb.EquivalenceMotive("City")],
+    #     [msb.OverlapMotive("websites", 1)],
+    #     [msb.OverlapMotive("websites", 1)],
+    #     [msb.OverlapMotive("websites", 1)],
+    # ]
+
 
 @pytest.fixture
 def city_age_websites_pipelining_scores():
-    return [3, 3, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 1]
+    return [3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]
 
 
 @pytest.fixture
@@ -350,12 +401,32 @@ def test_pipelining_scores(city_age_websites_pipelining_scores):
     final_blocker = (city_blocker & age_blocker) | websites_blocker
     links = final_blocker.block(get_users(), motives=True)
     report = msb.add_blocks_to_dataset(
-        get_users(), links, show_as_pairs=True, motives=True, merge_blocks=False
+        get_users(),
+        links,
+        show_as_pairs=True,
+        motives=True,
+        merge_blocks=False,
+        score=True,
     )
-    actual = sorted(msb.scoring(report), reverse=True)
+    actual = sorted(report["_score"], reverse=True)
     assert actual == expected
 
 
+# def test_pipelining_scores_without_show_as_pairs(city_age_websites_pipelining_scores):
+#    """Test that scoring does work as intended"""
+#    expected = city_age_websites_pipelining_scores
+#    city_blocker = msb.AttributeEquivalenceBlocker(["City"])
+#    age_blocker = msb.AttributeEquivalenceBlocker(["Age"])
+#    websites_blocker = msb.OverlapBlocker(["websites"])
+#    final_blocker = (city_blocker & age_blocker) | websites_blocker
+#    links = final_blocker.block(get_users(), motives=True)
+#    report = msb.add_blocks_to_dataset(
+#         get_users(), links, show_as_pairs=True, motives=True, merge_blocks=False, score=True
+#     )
+#    actual = sorted(msb.scoring(report), reverse=True)
+#    assert actual == expected
+
+
 def test_merge_blockers_aa():
     """Test that merging blockers does work as intended"""
     expected = msb.AttributeEquivalenceBlocker(["City", "Age"])

From c502c19864025391dba96e826f0164b416a1c291 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 15:56:27 +0100
Subject: [PATCH 06/20] style: reformat

---
 src/ms_blocking/utils.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/ms_blocking/utils.py b/src/ms_blocking/utils.py
index aaa5e08..15c7919 100644
--- a/src/ms_blocking/utils.py
+++ b/src/ms_blocking/utils.py
@@ -567,25 +567,40 @@ def solve_motives(motives: List[Motive]) -> List[Motive]:
 
         # Find biggest overlap among the non-word_level ones
         if not_word_level_motives_for_column:
-            max_overlap_not_word_level_for_column = max(not_word_level_motives_for_column, key=lambda m: m.overlap)
-            max_overlap_not_word_level_for_column_overlap = max_overlap_not_word_level_for_column.overlap
+            max_overlap_not_word_level_for_column = max(
+                not_word_level_motives_for_column, key=lambda m: m.overlap
+            )
+            max_overlap_not_word_level_for_column_overlap = (
+                max_overlap_not_word_level_for_column.overlap
+            )
         else:
             max_overlap_not_word_level_for_column = []
-            max_overlap_not_word_level_for_column_overlap = 0 # Will never be used, left for linter
+            max_overlap_not_word_level_for_column_overlap = (
+                0  # Will never be used, left for linter
+            )
 
         # Now find biggest overlap among the word_level ones
         if word_level_motives_for_column:
-            max_overlap_word_level_for_column = max(word_level_motives_for_column, key=lambda m: m.overlap)
-            max_overlap_word_level_for_column_overlap = max_overlap_word_level_for_column.overlap
+            max_overlap_word_level_for_column = max(
+                word_level_motives_for_column, key=lambda m: m.overlap
+            )
+            max_overlap_word_level_for_column_overlap = (
+                max_overlap_word_level_for_column.overlap
+            )
             if not_word_level_motives_for_column:
                 # If there is already an OverlapMotive on same column with equal or greater overlap but not word_level, discard it
-                if max_overlap_word_level_for_column_overlap <= max_overlap_not_word_level_for_column_overlap:
+                if (
+                    max_overlap_word_level_for_column_overlap
+                    <= max_overlap_not_word_level_for_column_overlap
+                ):
                     max_overlap_word_level_for_column = []
         else:
             max_overlap_word_level_for_column = []
 
         if max_overlap_not_word_level_for_column:
-            max_overlap_not_word_level_for_column = [max_overlap_not_word_level_for_column]
+            max_overlap_not_word_level_for_column = [
+                max_overlap_not_word_level_for_column
+            ]
         if max_overlap_word_level_for_column:
             max_overlap_word_level_for_column = [max_overlap_word_level_for_column]
         final_motives += (

From a1282199a27d7d4b0f42b4fea1947bd7f9cf7e48 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 16:30:14 +0100
Subject: [PATCH 07/20] docs: fix motives

---
 docs/example.ipynb       | 318 +++++++++++++++++++--------------------
 src/ms_blocking/utils.py |   1 -
 2 files changed, 159 insertions(+), 160 deletions(-)

diff --git a/docs/example.ipynb b/docs/example.ipynb
index 45699af..32ee69d 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -32,8 +32,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:28.508876500Z",
-     "start_time": "2026-02-03T14:40:27.761433800Z"
+     "end_time": "2026-02-03T15:26:00.408434200Z",
+     "start_time": "2026-02-03T15:25:59.668629400Z"
     }
    },
    "source": [
@@ -60,8 +60,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:28.563486200Z",
-     "start_time": "2026-02-03T14:40:28.512916Z"
+     "end_time": "2026-02-03T15:26:00.464804400Z",
+     "start_time": "2026-02-03T15:26:00.408434200Z"
     }
    },
    "source": [
@@ -282,8 +282,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:28.741020400Z",
-     "start_time": "2026-02-03T14:40:28.615799300Z"
+     "end_time": "2026-02-03T15:26:00.723249900Z",
+     "start_time": "2026-02-03T15:26:00.545044Z"
     }
    },
    "source": [
@@ -310,8 +310,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:29.068322900Z",
-     "start_time": "2026-02-03T14:40:28.915502800Z"
+     "end_time": "2026-02-03T15:26:00.930325600Z",
+     "start_time": "2026-02-03T15:26:00.842587Z"
     }
    },
    "source": [
@@ -339,8 +339,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:29.328166900Z",
-     "start_time": "2026-02-03T14:40:29.309785500Z"
+     "end_time": "2026-02-03T15:26:01.002006Z",
+     "start_time": "2026-02-03T15:26:00.984929700Z"
     }
    },
    "source": [
@@ -369,8 +369,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:29.547537Z",
-     "start_time": "2026-02-03T14:40:29.397273800Z"
+     "end_time": "2026-02-03T15:26:01.389874900Z",
+     "start_time": "2026-02-03T15:26:01.189496400Z"
     }
    },
    "source": [
@@ -409,8 +409,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:29.599221700Z",
-     "start_time": "2026-02-03T14:40:29.572788900Z"
+     "end_time": "2026-02-03T15:26:01.488509700Z",
+     "start_time": "2026-02-03T15:26:01.458139Z"
     }
    },
    "source": [
@@ -574,8 +574,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:29.841412500Z",
-     "start_time": "2026-02-03T14:40:29.660471200Z"
+     "end_time": "2026-02-03T15:26:01.849762800Z",
+     "start_time": "2026-02-03T15:26:01.604523100Z"
     }
    },
    "source": [
@@ -622,8 +622,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:30.138487100Z",
-     "start_time": "2026-02-03T14:40:30.060590900Z"
+     "end_time": "2026-02-03T15:26:02.327630200Z",
+     "start_time": "2026-02-03T15:26:02.082466800Z"
     }
    },
    "source": [
@@ -759,8 +759,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:30.619777700Z",
-     "start_time": "2026-02-03T14:40:30.422768900Z"
+     "end_time": "2026-02-03T15:26:02.765309400Z",
+     "start_time": "2026-02-03T15:26:02.567839300Z"
     }
    },
    "source": [
@@ -796,8 +796,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:30.988164600Z",
-     "start_time": "2026-02-03T14:40:30.834419400Z"
+     "end_time": "2026-02-03T15:26:03.163072900Z",
+     "start_time": "2026-02-03T15:26:03.015158500Z"
     }
    },
    "source": [
@@ -971,8 +971,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:31.490353400Z",
-     "start_time": "2026-02-03T14:40:31.385134Z"
+     "end_time": "2026-02-03T15:26:03.713908200Z",
+     "start_time": "2026-02-03T15:26:03.587227900Z"
     }
    },
    "source": [
@@ -1075,8 +1075,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:32.118722700Z",
-     "start_time": "2026-02-03T14:40:31.995086900Z"
+     "end_time": "2026-02-03T15:26:04.512418700Z",
+     "start_time": "2026-02-03T15:26:04.371414700Z"
     }
    },
    "source": [
@@ -1223,8 +1223,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:32.898993200Z",
-     "start_time": "2026-02-03T14:40:32.771388400Z"
+     "end_time": "2026-02-03T15:26:05.232296700Z",
+     "start_time": "2026-02-03T15:26:05.138463900Z"
     }
    },
    "source": [
@@ -1342,8 +1342,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:33.431455700Z",
-     "start_time": "2026-02-03T14:40:33.206324Z"
+     "end_time": "2026-02-03T15:26:05.746292700Z",
+     "start_time": "2026-02-03T15:26:05.615214500Z"
     }
    },
    "source": [
@@ -1440,8 +1440,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:34.177679600Z",
-     "start_time": "2026-02-03T14:40:34.059417200Z"
+     "end_time": "2026-02-03T15:26:06.550041700Z",
+     "start_time": "2026-02-03T15:26:06.378265100Z"
     }
    },
    "source": [
@@ -1464,8 +1464,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:34.323788200Z",
-     "start_time": "2026-02-03T14:40:34.232749100Z"
+     "end_time": "2026-02-03T15:26:06.933740700Z",
+     "start_time": "2026-02-03T15:26:06.700136700Z"
     }
    },
    "source": [
@@ -1589,8 +1589,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:35.068638700Z",
-     "start_time": "2026-02-03T14:40:34.966880900Z"
+     "end_time": "2026-02-03T15:26:07.724806800Z",
+     "start_time": "2026-02-03T15:26:07.416889200Z"
     }
    },
    "source": [
@@ -1804,8 +1804,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:35.421514400Z",
-     "start_time": "2026-02-03T14:40:35.348243100Z"
+     "end_time": "2026-02-03T15:26:08.366574400Z",
+     "start_time": "2026-02-03T15:26:08.287314300Z"
     }
    },
    "source": [
@@ -1828,8 +1828,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:35.616427400Z",
-     "start_time": "2026-02-03T14:40:35.568154600Z"
+     "end_time": "2026-02-03T15:26:08.624518900Z",
+     "start_time": "2026-02-03T15:26:08.604191500Z"
     }
    },
    "source": [
@@ -1849,8 +1849,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:35.856681600Z",
-     "start_time": "2026-02-03T14:40:35.755378800Z"
+     "end_time": "2026-02-03T15:26:08.886089600Z",
+     "start_time": "2026-02-03T15:26:08.721474Z"
     }
    },
    "source": [
@@ -1990,8 +1990,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:36.225054300Z",
-     "start_time": "2026-02-03T14:40:36.088658200Z"
+     "end_time": "2026-02-03T15:26:09.251246500Z",
+     "start_time": "2026-02-03T15:26:09.080396800Z"
     }
    },
    "source": [
@@ -2034,8 +2034,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:36.546944200Z",
-     "start_time": "2026-02-03T14:40:36.506897100Z"
+     "end_time": "2026-02-03T15:26:09.530329Z",
+     "start_time": "2026-02-03T15:26:09.486287900Z"
     }
    },
    "source": [
@@ -2213,8 +2213,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:36.899709500Z",
-     "start_time": "2026-02-03T14:40:36.769059100Z"
+     "end_time": "2026-02-03T15:26:09.985303200Z",
+     "start_time": "2026-02-03T15:26:09.845263800Z"
     }
    },
    "source": [
@@ -2443,8 +2443,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:37.743563200Z",
-     "start_time": "2026-02-03T14:40:37.558867900Z"
+     "end_time": "2026-02-03T15:26:10.930371500Z",
+     "start_time": "2026-02-03T15:26:10.809849600Z"
     }
    },
    "source": [
@@ -2593,8 +2593,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:38.350268900Z",
-     "start_time": "2026-02-03T14:40:38.156431100Z"
+     "end_time": "2026-02-03T15:26:11.634404900Z",
+     "start_time": "2026-02-03T15:26:11.403226800Z"
     }
    },
    "source": [
@@ -2644,8 +2644,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:38.712869Z",
-     "start_time": "2026-02-03T14:40:38.617699300Z"
+     "end_time": "2026-02-03T15:26:12.214789500Z",
+     "start_time": "2026-02-03T15:26:12.007748800Z"
     }
    },
    "source": [
@@ -2666,16 +2666,16 @@
        "7  10    Caroline Dufour               Lens   45   \n",
        "8  13      Benoît Benoît               Lens   15   \n",
        "\n",
-       "                                            websites  _block          _motive  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  [\"Same 'City'\"]  \n",
-       "1                               ['jacquesdupond.fr']       0  [\"Same 'City'\"]  \n",
-       "2                    ['somewebsite.com/users/rpz59']       1  [\"Same 'City'\"]  \n",
-       "3                                                 []       1  [\"Same 'City'\"]  \n",
-       "4                                 ['roubaixlove.fr']       2  [\"Same 'City'\"]  \n",
-       "5                                                 []       2  [\"Same 'City'\"]  \n",
-       "6                                                 []       2  [\"Same 'City'\"]  \n",
-       "7             ['pythonensamusant.fr', 'lensfans.fr']       3  [\"Same 'City'\"]  \n",
-       "8                                    ['lensfans.fr']       3  [\"Same 'City'\"]  "
+       "                                            websites  _block        _motive  \n",
+       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  [Same 'City']  \n",
+       "1                               ['jacquesdupond.fr']       0  [Same 'City']  \n",
+       "2                    ['somewebsite.com/users/rpz59']       1  [Same 'City']  \n",
+       "3                                                 []       1  [Same 'City']  \n",
+       "4                                 ['roubaixlove.fr']       2  [Same 'City']  \n",
+       "5                                                 []       2  [Same 'City']  \n",
+       "6                                                 []       2  [Same 'City']  \n",
+       "7             ['pythonensamusant.fr', 'lensfans.fr']       3  [Same 'City']  \n",
+       "8                                    ['lensfans.fr']       3  [Same 'City']  "
       ],
       "text/html": [
        "<div>\n",
@@ -2714,7 +2714,7 @@
        "      <td>37</td>\n",
        "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
        "      <td>0</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -2724,7 +2724,7 @@
        "      <td>37</td>\n",
        "      <td>['jacquesdupond.fr']</td>\n",
        "      <td>0</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -2734,7 +2734,7 @@
        "      <td>24</td>\n",
        "      <td>['somewebsite.com/users/rpz59']</td>\n",
        "      <td>1</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -2744,7 +2744,7 @@
        "      <td>24</td>\n",
        "      <td>[]</td>\n",
        "      <td>1</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -2754,7 +2754,7 @@
        "      <td>32</td>\n",
        "      <td>['roubaixlove.fr']</td>\n",
        "      <td>2</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>5</th>\n",
@@ -2764,7 +2764,7 @@
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
        "      <td>2</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>6</th>\n",
@@ -2774,7 +2774,7 @@
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
        "      <td>2</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>7</th>\n",
@@ -2784,7 +2784,7 @@
        "      <td>45</td>\n",
        "      <td>['pythonensamusant.fr', 'lensfans.fr']</td>\n",
        "      <td>3</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>8</th>\n",
@@ -2794,7 +2794,7 @@
        "      <td>15</td>\n",
        "      <td>['lensfans.fr']</td>\n",
        "      <td>3</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -2822,8 +2822,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:39.016063100Z",
-     "start_time": "2026-02-03T14:40:38.850306600Z"
+     "end_time": "2026-02-03T15:26:12.610291100Z",
+     "start_time": "2026-02-03T15:26:12.498335600Z"
     }
    },
    "source": [
@@ -2849,13 +2849,13 @@
        "4                                                 []     3       Paul Delarue   \n",
        "5             ['pythonensamusant.fr', 'lensfans.fr']    13      Benoît Benoît   \n",
        "\n",
-       "              City_r  Age_r            websites_r          _motive  _block  \n",
-       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']  [\"Same 'City'\"]       0  \n",
-       "1          Phalempin     24                    []  [\"Same 'City'\"]       1  \n",
-       "2            Roubaix     33                    []  [\"Same 'City'\"]       2  \n",
-       "3            Roubaix     33                    []  [\"Same 'City'\"]       2  \n",
-       "4            Roubaix     32    ['roubaixlove.fr']  [\"Same 'City'\"]       2  \n",
-       "5               Lens     15       ['lensfans.fr']  [\"Same 'City'\"]       3  "
+       "              City_r  Age_r            websites_r        _motive  _block  \n",
+       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']  [Same 'City']       0  \n",
+       "1          Phalempin     24                    []  [Same 'City']       1  \n",
+       "2            Roubaix     33                    []  [Same 'City']       2  \n",
+       "3            Roubaix     33                    []  [Same 'City']       2  \n",
+       "4            Roubaix     32    ['roubaixlove.fr']  [Same 'City']       2  \n",
+       "5               Lens     15       ['lensfans.fr']  [Same 'City']       3  "
       ],
       "text/html": [
        "<div>\n",
@@ -2903,7 +2903,7 @@
        "      <td>Villeneuve d'Ascq</td>\n",
        "      <td>37</td>\n",
        "      <td>['jacquesdupond.fr']</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -2918,7 +2918,7 @@
        "      <td>Phalempin</td>\n",
        "      <td>24</td>\n",
        "      <td>[]</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -2933,7 +2933,7 @@
        "      <td>Roubaix</td>\n",
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -2948,7 +2948,7 @@
        "      <td>Roubaix</td>\n",
        "      <td>33</td>\n",
        "      <td>[]</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -2963,7 +2963,7 @@
        "      <td>Roubaix</td>\n",
        "      <td>32</td>\n",
        "      <td>['roubaixlove.fr']</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -2978,7 +2978,7 @@
        "      <td>Lens</td>\n",
        "      <td>15</td>\n",
        "      <td>['lensfans.fr']</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -3004,8 +3004,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:39.771226400Z",
-     "start_time": "2026-02-03T14:40:39.536276Z"
+     "end_time": "2026-02-03T15:26:13.272171600Z",
+     "start_time": "2026-02-03T15:26:13.063070700Z"
     }
    },
    "source": [
@@ -3017,13 +3017,13 @@
     {
      "data": {
       "text/plain": [
-       "   id_l             Name_l  id_r             Name_r          _motive  _block\n",
-       "0     1     Jacques Dupond     4     Jacques Dupont  [\"Same 'City'\"]       0\n",
-       "1     2  Pierre Dusquesnes     5  pierre dusquesnes  [\"Same 'City'\"]       1\n",
-       "2     3       Paul Delarue    11     sophie_delarue  [\"Same 'City'\"]       2\n",
-       "3     8     Sophie Delarue    11     sophie_delarue  [\"Same 'City'\"]       2\n",
-       "4     8     Sophie Delarue     3       Paul Delarue  [\"Same 'City'\"]       2\n",
-       "5    10    Caroline Dufour    13      Benoît Benoît  [\"Same 'City'\"]       3"
+       "   id_l             Name_l  id_r             Name_r        _motive  _block\n",
+       "0     1     Jacques Dupond     4     Jacques Dupont  [Same 'City']       0\n",
+       "1     2  Pierre Dusquesnes     5  pierre dusquesnes  [Same 'City']       1\n",
+       "2     3       Paul Delarue    11     sophie_delarue  [Same 'City']       2\n",
+       "3     8     Sophie Delarue    11     sophie_delarue  [Same 'City']       2\n",
+       "4     8     Sophie Delarue     3       Paul Delarue  [Same 'City']       2\n",
+       "5    10    Caroline Dufour    13      Benoît Benoît  [Same 'City']       3"
       ],
       "text/html": [
        "<div>\n",
@@ -3059,7 +3059,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3068,7 +3068,7 @@
        "      <td>Pierre Dusquesnes</td>\n",
        "      <td>5</td>\n",
        "      <td>pierre dusquesnes</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3077,7 +3077,7 @@
        "      <td>Paul Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3086,7 +3086,7 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3095,7 +3095,7 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>3</td>\n",
        "      <td>Paul Delarue</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3104,7 +3104,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
-       "      <td>[\"Same 'City'\"]</td>\n",
+       "      <td>[Same 'City']</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -3131,8 +3131,8 @@
    "metadata": {
     "scrolled": true,
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:41.596196Z",
-     "start_time": "2026-02-03T14:40:41.287210400Z"
+     "end_time": "2026-02-03T15:26:14.300257400Z",
+     "start_time": "2026-02-03T15:26:13.981549200Z"
     }
    },
    "source": [
@@ -3179,19 +3179,19 @@
        "12    13      Benoît Benoît     6  Jean-Michel Python   \n",
        "\n",
        "                                              _motive  _block  \n",
-       "0   [\"Same 'Age'\", \"Same 'City'\", \">=1 overlap in ...       0  \n",
-       "1                       [\">=1 overlap in 'websites'\"]       0  \n",
-       "2                       [\">=1 overlap in 'websites'\"]       0  \n",
-       "3   [\"Same 'Age'\", \"Same 'City'\", \">=1 overlap in ...       1  \n",
-       "4                       [\">=1 overlap in 'websites'\"]       1  \n",
-       "5                       [\">=1 overlap in 'websites'\"]       1  \n",
-       "6                       [\">=1 overlap in 'websites'\"]       1  \n",
-       "7                       [\">=1 overlap in 'websites'\"]       1  \n",
-       "8                       [\"Same 'Age'\", \"Same 'City'\"]       2  \n",
-       "9                       [\"Same 'Age'\", \"Same 'City'\"]       3  \n",
-       "10                      [\">=1 overlap in 'websites'\"]       4  \n",
-       "11                      [\">=1 overlap in 'websites'\"]       4  \n",
-       "12                      [\">=1 overlap in 'websites'\"]       4  "
+       "0   [Same 'Age', Same 'City', >=1 overlap in 'webs...       0  \n",
+       "1                         [>=1 overlap in 'websites']       0  \n",
+       "2                         [>=1 overlap in 'websites']       0  \n",
+       "3   [Same 'Age', Same 'City', >=1 overlap in 'webs...       1  \n",
+       "4                         [>=1 overlap in 'websites']       1  \n",
+       "5                         [>=1 overlap in 'websites']       1  \n",
+       "6                         [>=1 overlap in 'websites']       1  \n",
+       "7                         [>=1 overlap in 'websites']       1  \n",
+       "8                           [Same 'Age', Same 'City']       2  \n",
+       "9                           [Same 'Age', Same 'City']       3  \n",
+       "10                        [>=1 overlap in 'websites']       4  \n",
+       "11                        [>=1 overlap in 'websites']       4  \n",
+       "12                        [>=1 overlap in 'websites']       4  "
       ],
       "text/html": [
        "<div>\n",
@@ -3227,7 +3227,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[\"Same 'Age'\", \"Same 'City'\", \"&gt;=1 overlap in ...</td>\n",
+       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3236,7 +3236,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3245,7 +3245,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3254,7 +3254,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[\"Same 'Age'\", \"Same 'City'\", \"&gt;=1 overlap in ...</td>\n",
+       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3263,7 +3263,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3272,7 +3272,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3281,7 +3281,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3290,7 +3290,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3299,7 +3299,7 @@
        "      <td>Pierre Dusquesnes</td>\n",
        "      <td>5</td>\n",
        "      <td>pierre dusquesnes</td>\n",
-       "      <td>[\"Same 'Age'\", \"Same 'City'\"]</td>\n",
+       "      <td>[Same 'Age', Same 'City']</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3308,7 +3308,7 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
-       "      <td>[\"Same 'Age'\", \"Same 'City'\"]</td>\n",
+       "      <td>[Same 'Age', Same 'City']</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3317,7 +3317,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3326,7 +3326,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3335,7 +3335,7 @@
        "      <td>Benoît Benoît</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>4</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -3364,8 +3364,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T14:40:42.388195200Z",
-     "start_time": "2026-02-03T14:40:42.261220800Z"
+     "end_time": "2026-02-03T15:26:15.195066300Z",
+     "start_time": "2026-02-03T15:26:14.996741600Z"
     }
    },
    "source": [
@@ -3400,19 +3400,19 @@
        "12    13      Benoît Benoît     6  Jean-Michel Python   \n",
        "\n",
        "                                              _motive  _score  _block  \n",
-       "0   [\"Same 'Age'\", \"Same 'City'\", \">=1 overlap in ...       3       0  \n",
-       "3   [\"Same 'Age'\", \"Same 'City'\", \">=1 overlap in ...       3       1  \n",
-       "8                       [\"Same 'Age'\", \"Same 'City'\"]       2       2  \n",
-       "9                       [\"Same 'Age'\", \"Same 'City'\"]       2       3  \n",
-       "1                       [\">=1 overlap in 'websites'\"]       1       0  \n",
-       "4                       [\">=1 overlap in 'websites'\"]       1       1  \n",
-       "2                       [\">=1 overlap in 'websites'\"]       1       0  \n",
-       "6                       [\">=1 overlap in 'websites'\"]       1       1  \n",
-       "5                       [\">=1 overlap in 'websites'\"]       1       1  \n",
-       "7                       [\">=1 overlap in 'websites'\"]       1       1  \n",
-       "10                      [\">=1 overlap in 'websites'\"]       1       4  \n",
-       "11                      [\">=1 overlap in 'websites'\"]       1       4  \n",
-       "12                      [\">=1 overlap in 'websites'\"]       1       4  "
+       "0   [Same 'Age', Same 'City', >=1 overlap in 'webs...       3       0  \n",
+       "3   [Same 'Age', Same 'City', >=1 overlap in 'webs...       3       1  \n",
+       "8                           [Same 'Age', Same 'City']       2       2  \n",
+       "9                           [Same 'Age', Same 'City']       2       3  \n",
+       "1                         [>=1 overlap in 'websites']       1       0  \n",
+       "4                         [>=1 overlap in 'websites']       1       1  \n",
+       "2                         [>=1 overlap in 'websites']       1       0  \n",
+       "6                         [>=1 overlap in 'websites']       1       1  \n",
+       "5                         [>=1 overlap in 'websites']       1       1  \n",
+       "7                         [>=1 overlap in 'websites']       1       1  \n",
+       "10                        [>=1 overlap in 'websites']       1       4  \n",
+       "11                        [>=1 overlap in 'websites']       1       4  \n",
+       "12                        [>=1 overlap in 'websites']       1       4  "
       ],
       "text/html": [
        "<div>\n",
@@ -3449,7 +3449,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[\"Same 'Age'\", \"Same 'City'\", \"&gt;=1 overlap in ...</td>\n",
+       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
        "      <td>3</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
@@ -3459,7 +3459,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[\"Same 'Age'\", \"Same 'City'\", \"&gt;=1 overlap in ...</td>\n",
+       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
        "      <td>3</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
@@ -3469,7 +3469,7 @@
        "      <td>Pierre Dusquesnes</td>\n",
        "      <td>5</td>\n",
        "      <td>pierre dusquesnes</td>\n",
-       "      <td>[\"Same 'Age'\", \"Same 'City'\"]</td>\n",
+       "      <td>[Same 'Age', Same 'City']</td>\n",
        "      <td>2</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
@@ -3479,7 +3479,7 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
-       "      <td>[\"Same 'Age'\", \"Same 'City'\"]</td>\n",
+       "      <td>[Same 'Age', Same 'City']</td>\n",
        "      <td>2</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
@@ -3489,7 +3489,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
@@ -3499,7 +3499,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
@@ -3509,7 +3509,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
@@ -3519,7 +3519,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
@@ -3529,7 +3529,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>10</td>\n",
        "      <td>Caroline Dufour</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
@@ -3539,7 +3539,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
@@ -3549,7 +3549,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "      <td>4</td>\n",
        "    </tr>\n",
@@ -3559,7 +3559,7 @@
        "      <td>Caroline Dufour</td>\n",
        "      <td>13</td>\n",
        "      <td>Benoît Benoît</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "      <td>4</td>\n",
        "    </tr>\n",
@@ -3569,7 +3569,7 @@
        "      <td>Benoît Benoît</td>\n",
        "      <td>6</td>\n",
        "      <td>Jean-Michel Python</td>\n",
-       "      <td>[\"&gt;=1 overlap in 'websites'\"]</td>\n",
+       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
        "      <td>1</td>\n",
        "      <td>4</td>\n",
        "    </tr>\n",
diff --git a/src/ms_blocking/utils.py b/src/ms_blocking/utils.py
index 15c7919..7f271f5 100644
--- a/src/ms_blocking/utils.py
+++ b/src/ms_blocking/utils.py
@@ -294,7 +294,6 @@ def merge_blocks_or(coords_1: Coords, coords_2: Coords) -> Coords:
         }
     else:
         return coords_1.union(coords_2)
-    # TODO: check for merging one with motive and one w/o
 
 
 def merge_blocks_and(coords_1: Coords, coords_2: Coords) -> Coords:

From c0e18911b5eebb2466277f77529fa4e427c2dc6a Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 16:30:56 +0100
Subject: [PATCH 08/20] refactor: motive as list instead of string

---
 src/ms_blocking/ms_blocking.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index 9d61832..d47ceb7 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -585,7 +585,7 @@ def add_blocks_to_dataset(
                 current_row.index = current_index
                 if motives:
                     motives_solved = solve_motives(coords[pair])
-                    current_row["_motive"] = str(list(map(str, motives_solved)))
+                    current_row["_motive"] = [list(map(str, motives_solved))]
                     if score:
                         current_row["_score"] = len(
                             motives_solved
@@ -640,11 +640,12 @@ def add_blocks_to_dataset(
         if not show_as_pairs and motives:
             id_list = flatten(coords.keys())
             motive_matcher = {
-                row_id: str(list(map(str, solve_motives(coords[pair]))))
+                row_id: list(map(str, solve_motives(coords[pair])))
                 for pair in coords.keys()
                 for row_id in id_list
                 if row_id in pair
             }
+            # noinspection PyTypeChecker
             output_data["_motive"] = output_data.index.map(motive_matcher)
             if score:
                 output_data["_score"] = 0

From 4822c87f342b88f9358c0c5e4ea104013fcb2f64 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 16:31:39 +0100
Subject: [PATCH 09/20] test: fix checks depending on (random) ordering of
 motives

---
 tests/test_ms_blocking.py | 126 +++++++++++++++-----------------------
 1 file changed, 49 insertions(+), 77 deletions(-)

diff --git a/tests/test_ms_blocking.py b/tests/test_ms_blocking.py
index 3efb309..7fdaaa5 100644
--- a/tests/test_ms_blocking.py
+++ b/tests/test_ms_blocking.py
@@ -96,17 +96,16 @@ def attribute_city_motives_true_block():
 @pytest.fixture
 def attribute_city_motives_true_add():
     return [
-        "[\"Same 'City'\"]",
-        "[\"Same 'City'\"]",
-        "[\"Same 'City'\"]",
-        "[\"Same 'City'\"]",
-        "[\"Same 'City'\"]",
-        "[\"Same 'City'\"]",
-        "[\"Same 'City'\"]",
-        "[\"Same 'City'\"]",
-        "[\"Same 'City'\"]",
+        ["Same 'City'"],
+        ["Same 'City'"],
+        ["Same 'City'"],
+        ["Same 'City'"],
+        ["Same 'City'"],
+        ["Same 'City'"],
+        ["Same 'City'"],
+        ["Same 'City'"],
+        ["Same 'City'"],
     ]
-    # [msb.EquivalenceMotive("City")] * 9
 
 
 @pytest.fixture
@@ -127,67 +126,32 @@ def city_age_name_websites_pipelining_id():
 @pytest.fixture
 def city_age_websites_pipelining_motives():
     return [
-        "[\"Same 'City'\", \"Same 'Age'\", \">=1 overlap in 'websites'\"]",
-        "[\">=1 overlap in 'websites'\"]",
-        "[\">=1 overlap in 'websites'\"]",
-        "[\"Same 'City'\", \"Same 'Age'\", \">=1 overlap in 'websites'\"]",
-        "[\">=1 overlap in 'websites'\"]",
-        "[\">=1 overlap in 'websites'\"]",
-        "[\">=1 overlap in 'websites'\"]",
-        "[\">=1 overlap in 'websites'\"]",
-        "[\"Same 'City'\", \"Same 'Age'\"]",
-        "[\"Same 'City'\", \"Same 'Age'\"]",
-        "[\">=1 overlap in 'websites'\"]",
-        "[\">=1 overlap in 'websites'\"]",
-        "[\">=1 overlap in 'websites'\"]",
+        {"Same 'City'", "Same 'Age'", ">=1 overlap in 'websites'"},
+    {">=1 overlap in 'websites'"},
+    {">=1 overlap in 'websites'"},
+    {"Same 'City'", "Same 'Age'", ">=1 overlap in 'websites'"},
+    {">=1 overlap in 'websites'"},
+    {">=1 overlap in 'websites'"},
+    {">=1 overlap in 'websites'"},
+    {">=1 overlap in 'websites'"},
+    {"Same 'City'", "Same 'Age'"},
+    {"Same 'City'", "Same 'Age'"},
+    {">=1 overlap in 'websites'"},
+    {">=1 overlap in 'websites'"},
+    {">=1 overlap in 'websites'"},
     ]
 
-    # [
-    #     [
-    #         msb.EquivalenceMotive("Age"),
-    #         msb.EquivalenceMotive("City"),
-    #         msb.OverlapMotive("websites", 1),
-    #     ],
-    #     [
-    #         msb.EquivalenceMotive("Age"),
-    #         msb.EquivalenceMotive("City"),
-    #         msb.OverlapMotive("websites", 1),
-    #     ],
-    #     [
-    #         msb.EquivalenceMotive("Age"),
-    #         msb.EquivalenceMotive("City"),
-    #         msb.OverlapMotive("websites", 1),
-    #     ],
-    #     [
-    #         msb.EquivalenceMotive("Age"),
-    #         msb.EquivalenceMotive("City"),
-    #         msb.OverlapMotive("websites", 1),
-    #     ],
-    #     [
-    #         msb.EquivalenceMotive("Age"),
-    #         msb.EquivalenceMotive("City"),
-    #         msb.OverlapMotive("websites", 1),
-    #     ],
-    #     [
-    #         msb.EquivalenceMotive("Age"),
-    #         msb.EquivalenceMotive("City"),
-    #         msb.OverlapMotive("websites", 1),
-    #     ],
-    #     [msb.OverlapMotive("websites", 1)],
-    #     [msb.OverlapMotive("websites", 1)],
-    #     [msb.EquivalenceMotive("Age"), msb.EquivalenceMotive("City")],
-    #     [msb.EquivalenceMotive("Age"), msb.EquivalenceMotive("City")],
-    #     [msb.OverlapMotive("websites", 1)],
-    #     [msb.OverlapMotive("websites", 1)],
-    #     [msb.OverlapMotive("websites", 1)],
-    # ]
-
 
 @pytest.fixture
 def city_age_websites_pipelining_scores():
     return [3, 3, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1]
 
 
+@pytest.fixture
+def city_age_websites_pipelining_scores_not_show_as_pairs():
+    return [3, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1]
+
+
 @pytest.fixture
 def city_age_not_different():
     return {frozenset({1, 4}), frozenset({8, 11}), frozenset({2, 5})}
@@ -386,9 +350,10 @@ def test_pipelining_motives(city_age_websites_pipelining_motives):
     websites_blocker = msb.OverlapBlocker(["websites"])
     final_blocker = (city_blocker & age_blocker) | websites_blocker
     links = final_blocker.block(get_users(), motives=True)
-    actual = msb.add_blocks_to_dataset(
+    motives = msb.add_blocks_to_dataset(  # Use set to ignore ordering
         get_users(), links, show_as_pairs=True, motives=True, merge_blocks=False
     )["_motive"].to_list()
+    actual = [set(motive) for motive in motives]
     assert actual == expected
 
 
@@ -412,19 +377,26 @@ def test_pipelining_scores(city_age_websites_pipelining_scores):
     assert actual == expected
 
 
-# def test_pipelining_scores_without_show_as_pairs(city_age_websites_pipelining_scores):
-#    """Test that scoring does work as intended"""
-#    expected = city_age_websites_pipelining_scores
-#    city_blocker = msb.AttributeEquivalenceBlocker(["City"])
-#    age_blocker = msb.AttributeEquivalenceBlocker(["Age"])
-#    websites_blocker = msb.OverlapBlocker(["websites"])
-#    final_blocker = (city_blocker & age_blocker) | websites_blocker
-#    links = final_blocker.block(get_users(), motives=True)
-#    report = msb.add_blocks_to_dataset(
-#         get_users(), links, show_as_pairs=True, motives=True, merge_blocks=False, score=True
-#     )
-#    actual = sorted(msb.scoring(report), reverse=True)
-#    assert actual == expected
+def test_pipelining_scores_without_show_as_pairs(
+    city_age_websites_pipelining_scores_not_show_as_pairs,
+):
+    """Test that scoring does work as intended"""
+    expected = city_age_websites_pipelining_scores_not_show_as_pairs
+    city_blocker = msb.AttributeEquivalenceBlocker(["City"])
+    age_blocker = msb.AttributeEquivalenceBlocker(["Age"])
+    websites_blocker = msb.OverlapBlocker(["websites"])
+    final_blocker = (city_blocker & age_blocker) | websites_blocker
+    links = final_blocker.block(get_users(), motives=True)
+    report = msb.add_blocks_to_dataset(
+        get_users(),
+        links,
+        show_as_pairs=False,
+        motives=True,
+        merge_blocks=False,
+        score=True,
+    )
+    actual = sorted(report["_score"], reverse=True)
+    assert actual == expected
 
 
 def test_merge_blockers_aa():

From 460808d7e4557bfd6947726a73a4573e376e29a7 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 16:33:06 +0100
Subject: [PATCH 10/20] style: reformat

---
 tests/test_ms_blocking.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/tests/test_ms_blocking.py b/tests/test_ms_blocking.py
index 7fdaaa5..cf92924 100644
--- a/tests/test_ms_blocking.py
+++ b/tests/test_ms_blocking.py
@@ -127,18 +127,18 @@ def city_age_name_websites_pipelining_id():
 def city_age_websites_pipelining_motives():
     return [
         {"Same 'City'", "Same 'Age'", ">=1 overlap in 'websites'"},
-    {">=1 overlap in 'websites'"},
-    {">=1 overlap in 'websites'"},
-    {"Same 'City'", "Same 'Age'", ">=1 overlap in 'websites'"},
-    {">=1 overlap in 'websites'"},
-    {">=1 overlap in 'websites'"},
-    {">=1 overlap in 'websites'"},
-    {">=1 overlap in 'websites'"},
-    {"Same 'City'", "Same 'Age'"},
-    {"Same 'City'", "Same 'Age'"},
-    {">=1 overlap in 'websites'"},
-    {">=1 overlap in 'websites'"},
-    {">=1 overlap in 'websites'"},
+        {">=1 overlap in 'websites'"},
+        {">=1 overlap in 'websites'"},
+        {"Same 'City'", "Same 'Age'", ">=1 overlap in 'websites'"},
+        {">=1 overlap in 'websites'"},
+        {">=1 overlap in 'websites'"},
+        {">=1 overlap in 'websites'"},
+        {">=1 overlap in 'websites'"},
+        {"Same 'City'", "Same 'Age'"},
+        {"Same 'City'", "Same 'Age'"},
+        {">=1 overlap in 'websites'"},
+        {">=1 overlap in 'websites'"},
+        {">=1 overlap in 'websites'"},
     ]
 
 

From 9e722604e8a27973e01f5a452f57e6aedbf356bf Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 16:35:00 +0100
Subject: [PATCH 11/20] style: remove obsolete comments

---
 src/ms_blocking/ms_blocking.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index d47ceb7..5571ba0 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -657,9 +657,6 @@ def add_blocks_to_dataset(
                 }
                 output_data["_score"] = output_data.index.map(score_matcher)
 
-    # if "_block" not in output_data.columns:  # Empty coords
-    #    output_data["_block"] = -1
-
     output_data = output_data.reset_index(drop=True)
     output_data["_block"] = output_data["_block"].astype(int)
 

From 41e8defbbc31fec7fad983e70adcb33d8a8954c9 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 16:37:54 +0100
Subject: [PATCH 12/20] docs: fix discarded reference

---
 docs/example.ipynb | 270 ++++++++++++++++++++++-----------------------
 1 file changed, 135 insertions(+), 135 deletions(-)

diff --git a/docs/example.ipynb b/docs/example.ipynb
index 32ee69d..f3d0353 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -32,15 +32,15 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:00.408434200Z",
-     "start_time": "2026-02-03T15:25:59.668629400Z"
+     "end_time": "2026-02-03T15:36:04.452948500Z",
+     "start_time": "2026-02-03T15:36:03.131330Z"
     }
    },
    "source": [
     "import ms_blocking.ms_blocking as msb"
    ],
    "outputs": [],
-   "execution_count": 1
+   "execution_count": 2
   },
   {
    "cell_type": "markdown",
@@ -60,8 +60,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:00.464804400Z",
-     "start_time": "2026-02-03T15:26:00.408434200Z"
+     "end_time": "2026-02-03T15:36:04.676076Z",
+     "start_time": "2026-02-03T15:36:04.488835200Z"
     }
    },
    "source": [
@@ -250,12 +250,12 @@
        "</div>"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 2
+   "execution_count": 3
   },
   {
    "cell_type": "markdown",
@@ -282,15 +282,15 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:00.723249900Z",
-     "start_time": "2026-02-03T15:26:00.545044Z"
+     "end_time": "2026-02-03T15:36:04.943687900Z",
+     "start_time": "2026-02-03T15:36:04.758421500Z"
     }
    },
    "source": [
     "city_blocker = msb.AttributeEquivalenceBlocker([\"City\"])"
    ],
    "outputs": [],
-   "execution_count": 3
+   "execution_count": 4
   },
   {
    "cell_type": "markdown",
@@ -310,8 +310,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:00.930325600Z",
-     "start_time": "2026-02-03T15:26:00.842587Z"
+     "end_time": "2026-02-03T15:36:05.205832200Z",
+     "start_time": "2026-02-03T15:36:05.172076200Z"
     }
    },
    "source": [
@@ -326,7 +326,7 @@
      ]
     }
    ],
-   "execution_count": 4
+   "execution_count": 5
   },
   {
    "cell_type": "markdown",
@@ -339,8 +339,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:01.002006Z",
-     "start_time": "2026-02-03T15:26:00.984929700Z"
+     "end_time": "2026-02-03T15:36:05.479610700Z",
+     "start_time": "2026-02-03T15:36:05.419422900Z"
     }
    },
    "source": [
@@ -358,19 +358,19 @@
        " frozenset({10, 13})}"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 5
+   "execution_count": 6
   },
   {
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:01.389874900Z",
-     "start_time": "2026-02-03T15:26:01.189496400Z"
+     "end_time": "2026-02-03T15:36:05.687275800Z",
+     "start_time": "2026-02-03T15:36:05.545108700Z"
     }
    },
    "source": [
@@ -396,7 +396,7 @@
      }
     }
    ],
-   "execution_count": 6
+   "execution_count": 7
   },
   {
    "cell_type": "markdown",
@@ -409,8 +409,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:01.488509700Z",
-     "start_time": "2026-02-03T15:26:01.458139Z"
+     "end_time": "2026-02-03T15:36:05.722622300Z",
+     "start_time": "2026-02-03T15:36:05.695740900Z"
     }
    },
    "source": [
@@ -556,12 +556,12 @@
        "</div>"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 7
+   "execution_count": 8
   },
   {
    "cell_type": "markdown",
@@ -574,8 +574,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:01.849762800Z",
-     "start_time": "2026-02-03T15:26:01.604523100Z"
+     "end_time": "2026-02-03T15:36:06.032894900Z",
+     "start_time": "2026-02-03T15:36:05.817497100Z"
     }
    },
    "source": [
@@ -590,12 +590,12 @@
        "array([-1,  0,  1,  2,  0,  1, -1, -1,  2, -1,  3,  2, -1,  3])"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 8
+   "execution_count": 9
   },
   {
    "cell_type": "markdown",
@@ -622,8 +622,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:02.327630200Z",
-     "start_time": "2026-02-03T15:26:02.082466800Z"
+     "end_time": "2026-02-03T15:36:06.307293Z",
+     "start_time": "2026-02-03T15:36:06.224025900Z"
     }
    },
    "source": [
@@ -734,12 +734,12 @@
        "</div>"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 9
+   "execution_count": 10
   },
   {
    "cell_type": "markdown",
@@ -759,8 +759,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:02.765309400Z",
-     "start_time": "2026-02-03T15:26:02.567839300Z"
+     "end_time": "2026-02-03T15:36:07.067841300Z",
+     "start_time": "2026-02-03T15:36:06.923106400Z"
     }
    },
    "source": [
@@ -783,7 +783,7 @@
      }
     }
    ],
-   "execution_count": 10
+   "execution_count": 11
   },
   {
    "cell_type": "markdown",
@@ -796,8 +796,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:03.163072900Z",
-     "start_time": "2026-02-03T15:26:03.015158500Z"
+     "end_time": "2026-02-03T15:36:07.208452100Z",
+     "start_time": "2026-02-03T15:36:07.146327700Z"
     }
    },
    "source": [
@@ -932,12 +932,12 @@
        "</div>"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 11
+   "execution_count": 12
   },
   {
    "cell_type": "markdown",
@@ -971,8 +971,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:03.713908200Z",
-     "start_time": "2026-02-03T15:26:03.587227900Z"
+     "end_time": "2026-02-03T15:36:07.531853600Z",
+     "start_time": "2026-02-03T15:36:07.348460600Z"
     }
    },
    "source": [
@@ -1050,12 +1050,12 @@
        "</div>"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 12
+   "execution_count": 13
   },
   {
    "cell_type": "markdown",
@@ -1075,8 +1075,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:04.512418700Z",
-     "start_time": "2026-02-03T15:26:04.371414700Z"
+     "end_time": "2026-02-03T15:36:08.439928700Z",
+     "start_time": "2026-02-03T15:36:08.350546900Z"
     }
    },
    "source": [
@@ -1089,7 +1089,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing AttributeEquivalenceBlocker(['Age', 'City'], [])\n"
+      "Processing AttributeEquivalenceBlocker(['City', 'Age'], [])\n"
      ]
     },
     {
@@ -1198,12 +1198,12 @@
        "</div>"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 13
+   "execution_count": 14
   },
   {
    "cell_type": "markdown",
@@ -1223,8 +1223,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:05.232296700Z",
-     "start_time": "2026-02-03T15:26:05.138463900Z"
+     "end_time": "2026-02-03T15:36:08.853151500Z",
+     "start_time": "2026-02-03T15:36:08.721190500Z"
     }
    },
    "source": [
@@ -1324,12 +1324,12 @@
        "</div>"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 14
+   "execution_count": 15
   },
   {
    "cell_type": "markdown",
@@ -1342,8 +1342,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:05.746292700Z",
-     "start_time": "2026-02-03T15:26:05.615214500Z"
+     "end_time": "2026-02-03T15:36:09.489967600Z",
+     "start_time": "2026-02-03T15:36:09.326530600Z"
     }
    },
    "source": [
@@ -1401,12 +1401,12 @@
        "</div>"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 15
+   "execution_count": 16
   },
   {
    "cell_type": "markdown",
@@ -1440,8 +1440,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:06.550041700Z",
-     "start_time": "2026-02-03T15:26:06.378265100Z"
+     "end_time": "2026-02-03T15:36:10.401390200Z",
+     "start_time": "2026-02-03T15:36:10.365448300Z"
     }
    },
    "source": [
@@ -1458,14 +1458,14 @@
      ]
     }
    ],
-   "execution_count": 16
+   "execution_count": 17
   },
   {
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:06.933740700Z",
-     "start_time": "2026-02-03T15:26:06.700136700Z"
+     "end_time": "2026-02-03T15:36:10.570875500Z",
+     "start_time": "2026-02-03T15:36:10.489956600Z"
     }
    },
    "source": [
@@ -1564,12 +1564,12 @@
        "</div>"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 17
+   "execution_count": 18
   },
   {
    "cell_type": "markdown",
@@ -1589,8 +1589,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:07.724806800Z",
-     "start_time": "2026-02-03T15:26:07.416889200Z"
+     "end_time": "2026-02-03T15:36:10.930134200Z",
+     "start_time": "2026-02-03T15:36:10.722500400Z"
     }
    },
    "source": [
@@ -1756,12 +1756,12 @@
        "</div>"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 18
+   "execution_count": 19
   },
   {
    "cell_type": "markdown",
@@ -1804,8 +1804,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:08.366574400Z",
-     "start_time": "2026-02-03T15:26:08.287314300Z"
+     "end_time": "2026-02-03T15:36:11.472124800Z",
+     "start_time": "2026-02-03T15:36:11.410766500Z"
     }
    },
    "source": [
@@ -1815,7 +1815,7 @@
     "websites_blocker = msb.OverlapBlocker([\"websites\"])"
    ],
    "outputs": [],
-   "execution_count": 19
+   "execution_count": 20
   },
   {
    "cell_type": "markdown",
@@ -1828,15 +1828,15 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:08.624518900Z",
-     "start_time": "2026-02-03T15:26:08.604191500Z"
+     "end_time": "2026-02-03T15:36:11.730809800Z",
+     "start_time": "2026-02-03T15:36:11.717895300Z"
     }
    },
    "source": [
     "final_blocker = (city_blocker & age_blocker) | (name_blocker & websites_blocker)"
    ],
    "outputs": [],
-   "execution_count": 20
+   "execution_count": 21
   },
   {
    "cell_type": "markdown",
@@ -1849,8 +1849,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:08.886089600Z",
-     "start_time": "2026-02-03T15:26:08.721474Z"
+     "end_time": "2026-02-03T15:36:12.008762600Z",
+     "start_time": "2026-02-03T15:36:11.829817400Z"
     }
    },
    "source": [
@@ -1862,7 +1862,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing AttributeEquivalenceBlocker(['Age', 'City'], [])\n",
+      "Processing AttributeEquivalenceBlocker(['City', 'Age'], [])\n",
       "Processing MixedBlocker(['Name'], ['websites'], 1)\n"
      ]
     },
@@ -1972,12 +1972,12 @@
        "</div>"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 21
+   "execution_count": 22
   },
   {
    "cell_type": "markdown",
@@ -1990,8 +1990,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:09.251246500Z",
-     "start_time": "2026-02-03T15:26:09.080396800Z"
+     "end_time": "2026-02-03T15:36:12.404335300Z",
+     "start_time": "2026-02-03T15:36:12.172638Z"
     }
    },
    "source": [
@@ -2007,7 +2007,7 @@
      ]
     }
    ],
-   "execution_count": 22
+   "execution_count": 23
   },
   {
    "cell_type": "markdown",
@@ -2034,8 +2034,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:09.530329Z",
-     "start_time": "2026-02-03T15:26:09.486287900Z"
+     "end_time": "2026-02-03T15:36:12.721833200Z",
+     "start_time": "2026-02-03T15:36:12.589340400Z"
     }
    },
    "source": [
@@ -2181,12 +2181,12 @@
        "</div>"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 23
+   "execution_count": 24
   },
   {
    "cell_type": "markdown",
@@ -2213,8 +2213,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:09.985303200Z",
-     "start_time": "2026-02-03T15:26:09.845263800Z"
+     "end_time": "2026-02-03T15:36:13.208456200Z",
+     "start_time": "2026-02-03T15:36:13.112548200Z"
     }
    },
    "source": [
@@ -2415,12 +2415,12 @@
        "</div>"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 24
+   "execution_count": 25
   },
   {
    "cell_type": "markdown",
@@ -2443,8 +2443,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:10.930371500Z",
-     "start_time": "2026-02-03T15:26:10.809849600Z"
+     "end_time": "2026-02-03T15:36:14.249378400Z",
+     "start_time": "2026-02-03T15:36:14.008531Z"
     }
    },
    "source": [
@@ -2568,12 +2568,12 @@
        "</div>"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 25
+   "execution_count": 26
   },
   {
    "cell_type": "markdown",
@@ -2593,8 +2593,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:11.634404900Z",
-     "start_time": "2026-02-03T15:26:11.403226800Z"
+     "end_time": "2026-02-03T15:36:14.709861100Z",
+     "start_time": "2026-02-03T15:36:14.517552400Z"
     }
    },
    "source": [
@@ -2621,12 +2621,12 @@
        " frozenset({3, 11}): [EquivalenceMotive(['City'])]}"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 26
+   "execution_count": 27
   },
   {
    "cell_type": "markdown",
@@ -2644,8 +2644,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:12.214789500Z",
-     "start_time": "2026-02-03T15:26:12.007748800Z"
+     "end_time": "2026-02-03T15:36:15.116572300Z",
+     "start_time": "2026-02-03T15:36:15.007172300Z"
     }
    },
    "source": [
@@ -2801,12 +2801,12 @@
        "</div>"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 27
+   "execution_count": 28
   },
   {
    "cell_type": "markdown",
@@ -2822,8 +2822,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:12.610291100Z",
-     "start_time": "2026-02-03T15:26:12.498335600Z"
+     "end_time": "2026-02-03T15:36:15.563997200Z",
+     "start_time": "2026-02-03T15:36:15.425225900Z"
     }
    },
    "source": [
@@ -2986,12 +2986,12 @@
        "</div>"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 28
+   "execution_count": 29
   },
   {
    "cell_type": "markdown",
@@ -3004,8 +3004,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:13.272171600Z",
-     "start_time": "2026-02-03T15:26:13.063070700Z"
+     "end_time": "2026-02-03T15:36:16.181630Z",
+     "start_time": "2026-02-03T15:36:16.065192300Z"
     }
    },
    "source": [
@@ -3112,12 +3112,12 @@
        "</div>"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 29
+   "execution_count": 30
   },
   {
    "cell_type": "markdown",
@@ -3131,8 +3131,8 @@
    "metadata": {
     "scrolled": true,
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:14.300257400Z",
-     "start_time": "2026-02-03T15:26:13.981549200Z"
+     "end_time": "2026-02-03T15:36:17.213402500Z",
+     "start_time": "2026-02-03T15:36:17.028434800Z"
     }
    },
    "source": [
@@ -3156,7 +3156,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing AttributeEquivalenceBlocker(['Age', 'City'], [])\n",
+      "Processing AttributeEquivalenceBlocker(['City', 'Age'], [])\n",
       "Processing OverlapBlocker(['websites'], 1)\n"
      ]
     },
@@ -3179,16 +3179,16 @@
        "12    13      Benoît Benoît     6  Jean-Michel Python   \n",
        "\n",
        "                                              _motive  _block  \n",
-       "0   [Same 'Age', Same 'City', >=1 overlap in 'webs...       0  \n",
+       "0   [Same 'City', Same 'Age', >=1 overlap in 'webs...       0  \n",
        "1                         [>=1 overlap in 'websites']       0  \n",
        "2                         [>=1 overlap in 'websites']       0  \n",
-       "3   [Same 'Age', Same 'City', >=1 overlap in 'webs...       1  \n",
+       "3   [Same 'City', Same 'Age', >=1 overlap in 'webs...       1  \n",
        "4                         [>=1 overlap in 'websites']       1  \n",
        "5                         [>=1 overlap in 'websites']       1  \n",
        "6                         [>=1 overlap in 'websites']       1  \n",
        "7                         [>=1 overlap in 'websites']       1  \n",
-       "8                           [Same 'Age', Same 'City']       2  \n",
-       "9                           [Same 'Age', Same 'City']       3  \n",
+       "8                           [Same 'City', Same 'Age']       2  \n",
+       "9                           [Same 'City', Same 'Age']       3  \n",
        "10                        [>=1 overlap in 'websites']       4  \n",
        "11                        [>=1 overlap in 'websites']       4  \n",
        "12                        [>=1 overlap in 'websites']       4  "
@@ -3227,7 +3227,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>[Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3254,7 +3254,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>[Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3299,7 +3299,7 @@
        "      <td>Pierre Dusquesnes</td>\n",
        "      <td>5</td>\n",
        "      <td>pierre dusquesnes</td>\n",
-       "      <td>[Same 'Age', Same 'City']</td>\n",
+       "      <td>[Same 'City', Same 'Age']</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3308,7 +3308,7 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
-       "      <td>[Same 'Age', Same 'City']</td>\n",
+       "      <td>[Same 'City', Same 'Age']</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
        "    <tr>\n",
@@ -3343,12 +3343,12 @@
        "</div>"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 30
+   "execution_count": 31
   },
   {
    "cell_type": "markdown",
@@ -3358,14 +3358,14 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "For reports, it can be interesting to have numbers to drive decision-making. Using `scoring` gives you an indicator of the likelihood of rows behing duplicates based on the number of distinct motives."
+   "source": "For reports, it can be interesting to have numbers to drive decision-making. Using `score=True` gives you an indicator of the likelihood of rows behing duplicates based on the number of distinct motives."
   },
   {
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:26:15.195066300Z",
-     "start_time": "2026-02-03T15:26:14.996741600Z"
+     "end_time": "2026-02-03T15:36:17.696557900Z",
+     "start_time": "2026-02-03T15:36:17.550771100Z"
     }
    },
    "source": [
@@ -3400,10 +3400,10 @@
        "12    13      Benoît Benoît     6  Jean-Michel Python   \n",
        "\n",
        "                                              _motive  _score  _block  \n",
-       "0   [Same 'Age', Same 'City', >=1 overlap in 'webs...       3       0  \n",
-       "3   [Same 'Age', Same 'City', >=1 overlap in 'webs...       3       1  \n",
-       "8                           [Same 'Age', Same 'City']       2       2  \n",
-       "9                           [Same 'Age', Same 'City']       2       3  \n",
+       "0   [Same 'City', Same 'Age', >=1 overlap in 'webs...       3       0  \n",
+       "3   [Same 'City', Same 'Age', >=1 overlap in 'webs...       3       1  \n",
+       "8                           [Same 'City', Same 'Age']       2       2  \n",
+       "9                           [Same 'City', Same 'Age']       2       3  \n",
        "1                         [>=1 overlap in 'websites']       1       0  \n",
        "4                         [>=1 overlap in 'websites']       1       1  \n",
        "2                         [>=1 overlap in 'websites']       1       0  \n",
@@ -3449,7 +3449,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>[Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
        "      <td>3</td>\n",
        "      <td>0</td>\n",
        "    </tr>\n",
@@ -3459,7 +3459,7 @@
        "      <td>Jacques Dupond</td>\n",
        "      <td>4</td>\n",
        "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'Age', Same 'City', &gt;=1 overlap in 'webs...</td>\n",
+       "      <td>[Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
        "      <td>3</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
@@ -3469,7 +3469,7 @@
        "      <td>Pierre Dusquesnes</td>\n",
        "      <td>5</td>\n",
        "      <td>pierre dusquesnes</td>\n",
-       "      <td>[Same 'Age', Same 'City']</td>\n",
+       "      <td>[Same 'City', Same 'Age']</td>\n",
        "      <td>2</td>\n",
        "      <td>2</td>\n",
        "    </tr>\n",
@@ -3479,7 +3479,7 @@
        "      <td>Sophie Delarue</td>\n",
        "      <td>11</td>\n",
        "      <td>sophie_delarue</td>\n",
-       "      <td>[Same 'Age', Same 'City']</td>\n",
+       "      <td>[Same 'City', Same 'Age']</td>\n",
        "      <td>2</td>\n",
        "      <td>3</td>\n",
        "    </tr>\n",
@@ -3578,12 +3578,12 @@
        "</div>"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 31
+   "execution_count": 32
   }
  ],
  "metadata": {

From b4441c402fe4d45389a120539c28b2b92628cb88 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 17:07:48 +0100
Subject: [PATCH 13/20] fix: switche must_not_be_different and
 normalize_strings

---
 docs/example.ipynb             | 2087 +++-----------------------------
 src/ms_blocking/ms_blocking.py |   17 +-
 2 files changed, 190 insertions(+), 1914 deletions(-)

diff --git a/docs/example.ipynb b/docs/example.ipynb
index f3d0353..aef6ee6 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -32,15 +32,15 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:04.452948500Z",
-     "start_time": "2026-02-03T15:36:03.131330Z"
+     "end_time": "2026-02-03T16:02:56.751154300Z",
+     "start_time": "2026-02-03T16:02:55.924397100Z"
     }
    },
    "source": [
     "import ms_blocking.ms_blocking as msb"
    ],
    "outputs": [],
-   "execution_count": 2
+   "execution_count": 1
   },
   {
    "cell_type": "markdown",
@@ -60,8 +60,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:04.676076Z",
-     "start_time": "2026-02-03T15:36:04.488835200Z"
+     "end_time": "2026-02-03T16:02:56.810955300Z",
+     "start_time": "2026-02-03T16:02:56.751154300Z"
     }
    },
    "source": [
@@ -250,12 +250,12 @@
        "</div>"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 2,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 3
+   "execution_count": 2
   },
   {
    "cell_type": "markdown",
@@ -282,15 +282,15 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:04.943687900Z",
-     "start_time": "2026-02-03T15:36:04.758421500Z"
+     "end_time": "2026-02-03T16:02:56.966380500Z",
+     "start_time": "2026-02-03T16:02:56.862834100Z"
     }
    },
    "source": [
     "city_blocker = msb.AttributeEquivalenceBlocker([\"City\"])"
    ],
    "outputs": [],
-   "execution_count": 4
+   "execution_count": 3
   },
   {
    "cell_type": "markdown",
@@ -310,8 +310,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:05.205832200Z",
-     "start_time": "2026-02-03T15:36:05.172076200Z"
+     "end_time": "2026-02-03T16:02:57.285912400Z",
+     "start_time": "2026-02-03T16:02:57.147878900Z"
     }
    },
    "source": [
@@ -322,11 +322,11 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing AttributeEquivalenceBlocker(['City'], [])\n"
+      "Processing AttributeEquivalenceBlocker(['City'])\n"
      ]
     }
    ],
-   "execution_count": 5
+   "execution_count": 4
   },
   {
    "cell_type": "markdown",
@@ -339,8 +339,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:05.479610700Z",
-     "start_time": "2026-02-03T15:36:05.419422900Z"
+     "end_time": "2026-02-03T16:02:57.479607Z",
+     "start_time": "2026-02-03T16:02:57.418159200Z"
     }
    },
    "source": [
@@ -358,19 +358,19 @@
        " frozenset({10, 13})}"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 6
+   "execution_count": 5
   },
   {
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:05.687275800Z",
-     "start_time": "2026-02-03T15:36:05.545108700Z"
+     "end_time": "2026-02-03T16:02:57.776512200Z",
+     "start_time": "2026-02-03T16:02:57.565676Z"
     }
    },
    "source": [
@@ -396,7 +396,7 @@
      }
     }
    ],
-   "execution_count": 7
+   "execution_count": 6
   },
   {
    "cell_type": "markdown",
@@ -409,8 +409,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:05.722622300Z",
-     "start_time": "2026-02-03T15:36:05.695740900Z"
+     "end_time": "2026-02-03T16:02:57.810023Z",
+     "start_time": "2026-02-03T16:02:57.778482900Z"
     }
    },
    "source": [
@@ -556,12 +556,12 @@
        "</div>"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 8
+   "execution_count": 7
   },
   {
    "cell_type": "markdown",
@@ -574,8 +574,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:06.032894900Z",
-     "start_time": "2026-02-03T15:36:05.817497100Z"
+     "end_time": "2026-02-03T16:02:58.075057800Z",
+     "start_time": "2026-02-03T16:02:57.893294100Z"
     }
    },
    "source": [
@@ -590,12 +590,12 @@
        "array([-1,  0,  1,  2,  0,  1, -1, -1,  2, -1,  3,  2, -1,  3])"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 9
+   "execution_count": 8
   },
   {
    "cell_type": "markdown",
@@ -622,8 +622,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:06.307293Z",
-     "start_time": "2026-02-03T15:36:06.224025900Z"
+     "end_time": "2026-02-03T16:02:58.413477400Z",
+     "start_time": "2026-02-03T16:02:58.285492900Z"
     }
    },
    "source": [
@@ -734,12 +734,12 @@
        "</div>"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 10
+   "execution_count": 9
   },
   {
    "cell_type": "markdown",
@@ -759,8 +759,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:07.067841300Z",
-     "start_time": "2026-02-03T15:36:06.923106400Z"
+     "end_time": "2026-02-03T16:02:58.887317800Z",
+     "start_time": "2026-02-03T16:02:58.675247500Z"
     }
    },
    "source": [
@@ -783,7 +783,7 @@
      }
     }
    ],
-   "execution_count": 11
+   "execution_count": 10
   },
   {
    "cell_type": "markdown",
@@ -796,8 +796,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:07.208452100Z",
-     "start_time": "2026-02-03T15:36:07.146327700Z"
+     "end_time": "2026-02-03T16:02:59.272554700Z",
+     "start_time": "2026-02-03T16:02:59.130460300Z"
     }
    },
    "source": [
@@ -932,12 +932,12 @@
        "</div>"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 12
+   "execution_count": 11
   },
   {
    "cell_type": "markdown",
@@ -971,8 +971,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:07.531853600Z",
-     "start_time": "2026-02-03T15:36:07.348460600Z"
+     "end_time": "2026-02-03T16:02:59.806784300Z",
+     "start_time": "2026-02-03T16:02:59.686250600Z"
     }
    },
    "source": [
@@ -1050,12 +1050,12 @@
        "</div>"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 13
+   "execution_count": 12
   },
   {
    "cell_type": "markdown",
@@ -1075,8 +1075,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:08.439928700Z",
-     "start_time": "2026-02-03T15:36:08.350546900Z"
+     "end_time": "2026-02-03T16:03:00.721777Z",
+     "start_time": "2026-02-03T16:03:00.603955400Z"
     }
    },
    "source": [
@@ -1089,7 +1089,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing AttributeEquivalenceBlocker(['City', 'Age'], [])\n"
+      "Processing AttributeEquivalenceBlocker(['Age', 'City'])\n"
      ]
     },
     {
@@ -1198,12 +1198,12 @@
        "</div>"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 14
+   "execution_count": 13
   },
   {
    "cell_type": "markdown",
@@ -1223,8 +1223,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:08.853151500Z",
-     "start_time": "2026-02-03T15:36:08.721190500Z"
+     "end_time": "2026-02-03T16:03:01.209432600Z",
+     "start_time": "2026-02-03T16:03:01.048013600Z"
     }
    },
    "source": [
@@ -1237,7 +1237,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing AttributeEquivalenceBlocker(['Name'], [])\n"
+      "Processing AttributeEquivalenceBlocker(['Name'])\n"
      ]
     },
     {
@@ -1324,12 +1324,12 @@
        "</div>"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 15
+   "execution_count": 14
   },
   {
    "cell_type": "markdown",
@@ -1342,8 +1342,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:09.489967600Z",
-     "start_time": "2026-02-03T15:36:09.326530600Z"
+     "end_time": "2026-02-03T16:03:01.834433100Z",
+     "start_time": "2026-02-03T16:03:01.686309100Z"
     }
    },
    "source": [
@@ -1358,7 +1358,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing AttributeEquivalenceBlocker(['Name'], [])\n"
+      "Processing AttributeEquivalenceBlocker(['Name'], NON-NORMALIZED)\n"
      ]
     },
     {
@@ -1401,12 +1401,12 @@
        "</div>"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
-   "execution_count": 16
+   "execution_count": 15
   },
   {
    "cell_type": "markdown",
@@ -1440,8 +1440,8 @@
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:10.401390200Z",
-     "start_time": "2026-02-03T15:36:10.365448300Z"
+     "end_time": "2026-02-03T16:03:02.711968Z",
+     "start_time": "2026-02-03T16:03:02.581163100Z"
     }
    },
    "source": [
@@ -1453,19 +1453,19 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing AttributeEquivalenceBlocker(['City'], [])\n",
+      "Processing AttributeEquivalenceBlocker(['City'])\n",
       "Processing OverlapBlocker(['websites'], 1)\n"
      ]
     }
    ],
-   "execution_count": 17
+   "execution_count": 16
   },
   {
    "cell_type": "code",
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:10.570875500Z",
-     "start_time": "2026-02-03T15:36:10.489956600Z"
+     "end_time": "2026-02-03T16:03:03.614029700Z",
+     "start_time": "2026-02-03T16:03:02.835393200Z"
     }
    },
    "source": [
@@ -1477,99 +1477,25 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Processing MixedBlocker(['City'], ['websites'], 1)\n"
+      "Processing "
      ]
     },
     {
-     "data": {
-      "text/plain": [
-       "   id             Name               City  Age  \\\n",
-       "0   1   Jacques Dupond  Villeneuve d'Ascq   37   \n",
-       "1   4   Jacques Dupont  Villeneuve d'Ascq   37   \n",
-       "2  10  Caroline Dufour               Lens   45   \n",
-       "3  13    Benoît Benoît               Lens   15   \n",
-       "\n",
-       "                                            websites  _block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
-       "1                               ['jacquesdupond.fr']       0  \n",
-       "2             ['pythonensamusant.fr', 'lensfans.fr']       1  \n",
-       "3                                    ['lensfans.fr']       1  "
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>Name</th>\n",
-       "      <th>City</th>\n",
-       "      <th>Age</th>\n",
-       "      <th>websites</th>\n",
-       "      <th>_block</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['jacquesdupond.fr']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>45</td>\n",
-       "      <td>['pythonensamusant.fr', 'lensfans.fr']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>15</td>\n",
-       "      <td>['lensfans.fr']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
+     "ename": "TypeError",
+     "evalue": "object of type 'bool' has no len()",
+     "output_type": "error",
+     "traceback": [
+      "\u001B[31m---------------------------------------------------------------------------\u001B[39m",
+      "\u001B[31mTypeError\u001B[39m                                 Traceback (most recent call last)",
+      "\u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[17]\u001B[39m\u001B[32m, line 1\u001B[39m\n\u001B[32m----> \u001B[39m\u001B[32m1\u001B[39m links = \u001B[43m(\u001B[49m\u001B[43mcity_blocker\u001B[49m\u001B[43m \u001B[49m\u001B[43m&\u001B[49m\u001B[43m \u001B[49m\u001B[43mwebsites_blocker\u001B[49m\u001B[43m)\u001B[49m\u001B[43m.\u001B[49m\u001B[43mblock\u001B[49m\u001B[43m(\u001B[49m\u001B[43mdf\u001B[49m\u001B[43m)\u001B[49m\n\u001B[32m      2\u001B[39m msb.add_blocks_to_dataset(df, links)\n",
+      "\u001B[36mFile \u001B[39m\u001B[32m~\\PycharmProjects\\MSBlock\\ms_blocking\\src\\ms_blocking\\ms_blocking.py:383\u001B[39m, in \u001B[36mMixedBlocker.block\u001B[39m\u001B[34m(self, data, motives)\u001B[39m\n\u001B[32m    380\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34mblock\u001B[39m(\u001B[38;5;28mself\u001B[39m, data, motives=\u001B[38;5;28;01mFalse\u001B[39;00m):\n\u001B[32m    381\u001B[39m \u001B[38;5;250m    \u001B[39m\u001B[33;03m\"\"\"Regroup rows based on overlap of one or more columns\"\"\"\u001B[39;00m\n\u001B[32m--> \u001B[39m\u001B[32m383\u001B[39m     \u001B[38;5;28;43mprint\u001B[39;49m\u001B[43m(\u001B[49m\u001B[33;43m\"\u001B[39;49m\u001B[33;43mProcessing\u001B[39;49m\u001B[33;43m\"\u001B[39;49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m)\u001B[49m\n\u001B[32m    385\u001B[39m     total_columns = \u001B[38;5;28mself\u001B[39m.equivalence_columns + \u001B[38;5;28mself\u001B[39m.overlap_columns\n\u001B[32m    387\u001B[39m     temp_data = data[total_columns].copy()\n",
+      "\u001B[36mFile \u001B[39m\u001B[32m~\\PycharmProjects\\MSBlock\\ms_blocking\\src\\ms_blocking\\ms_blocking.py:345\u001B[39m, in \u001B[36mMixedBlocker.__repr__\u001B[39m\u001B[34m(self)\u001B[39m\n\u001B[32m    342\u001B[39m \u001B[38;5;28;01mdef\u001B[39;00m\u001B[38;5;250m \u001B[39m\u001B[34m__repr__\u001B[39m(\u001B[38;5;28mself\u001B[39m):\n\u001B[32m    343\u001B[39m     \u001B[38;5;28;01mreturn\u001B[39;00m \u001B[38;5;28mstr\u001B[39m(\n\u001B[32m    344\u001B[39m         AndNode(\n\u001B[32m--> \u001B[39m\u001B[32m345\u001B[39m             \u001B[43mAttributeEquivalenceBlocker\u001B[49m\u001B[43m(\u001B[49m\n\u001B[32m    346\u001B[39m \u001B[43m                \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43mequivalence_columns\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43mmust_not_be_different\u001B[49m\u001B[43m,\u001B[49m\u001B[43m \u001B[49m\u001B[38;5;28;43mself\u001B[39;49m\u001B[43m.\u001B[49m\u001B[43mnormalize\u001B[49m\n\u001B[32m    347\u001B[39m \u001B[43m            \u001B[49m\u001B[43m)\u001B[49m,\n\u001B[32m    348\u001B[39m             OverlapBlocker(\n\u001B[32m    349\u001B[39m                 \u001B[38;5;28mself\u001B[39m.overlap_columns, \u001B[38;5;28mself\u001B[39m.overlap, \u001B[38;5;28mself\u001B[39m.word_level, \u001B[38;5;28mself\u001B[39m.normalize\n\u001B[32m    350\u001B[39m             ),\n\u001B[32m    351\u001B[39m         )\n\u001B[32m    352\u001B[39m     )\n",
+      "\u001B[36mFile \u001B[39m\u001B[32m~\\PycharmProjects\\MSBlock\\ms_blocking\\src\\ms_blocking\\ms_blocking.py:109\u001B[39m, in \u001B[36mAttributeEquivalenceBlocker.__init__\u001B[39m\u001B[34m(self, blocking_columns, normalize_strings, must_not_be_different)\u001B[39m\n\u001B[32m    107\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28mtype\u001B[39m(must_not_be_different) \u001B[38;5;129;01mis\u001B[39;00m \u001B[38;5;28mstr\u001B[39m:\n\u001B[32m    108\u001B[39m     must_not_be_different = [must_not_be_different]\n\u001B[32m--> \u001B[39m\u001B[32m109\u001B[39m \u001B[38;5;28;01mif\u001B[39;00m \u001B[38;5;28;43mlen\u001B[39;49m\u001B[43m(\u001B[49m\u001B[43mmust_not_be_different\u001B[49m\u001B[43m)\u001B[49m > \u001B[32m1\u001B[39m:\n\u001B[32m    110\u001B[39m     \u001B[38;5;28;01mraise\u001B[39;00m \u001B[38;5;167;01mValueError\u001B[39;00m(\u001B[33m\"\u001B[39m\u001B[33mThere must be only one extra column\u001B[39m\u001B[33m\"\u001B[39m)\n\u001B[32m    111\u001B[39m \u001B[38;5;28;01melif\u001B[39;00m (\n\u001B[32m    112\u001B[39m     must_not_be_different\n\u001B[32m    113\u001B[39m     \u001B[38;5;129;01mand\u001B[39;00m must_not_be_different[\u001B[32m0\u001B[39m] \u001B[38;5;129;01min\u001B[39;00m \u001B[38;5;28mself\u001B[39m.blocking_columns\n\u001B[32m    114\u001B[39m ):\n",
+      "\u001B[31mTypeError\u001B[39m: object of type 'bool' has no len()"
+     ]
     }
    ],
-   "execution_count": 18
+   "execution_count": 17
   },
   {
    "cell_type": "markdown",
@@ -1587,181 +1513,13 @@
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:10.930134200Z",
-     "start_time": "2026-02-03T15:36:10.722500400Z"
-    }
-   },
+   "metadata": {},
    "source": [
     "links = (city_blocker | websites_blocker).block(df)\n",
     "msb.add_blocks_to_dataset(df, links)"
    ],
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Processing AttributeEquivalenceBlocker(['City'], [])\n",
-      "Processing OverlapBlocker(['websites'], 1)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "   id                Name               City  Age  \\\n",
-       "0   1      Jacques Dupond  Villeneuve d'Ascq   37   \n",
-       "1   4      Jacques Dupont  Villeneuve d'Ascq   37   \n",
-       "2   6  Jean-Michel Python              Douai   49   \n",
-       "3  10     Caroline Dufour               Lens   45   \n",
-       "4  13       Benoît Benoît               Lens   15   \n",
-       "5   2   Pierre Dusquesnes          Phalempin   24   \n",
-       "6   5   pierre dusquesnes          Phalempin   24   \n",
-       "7   3        Paul Delarue            Roubaix   32   \n",
-       "8   8      Sophie Delarue            Roubaix   33   \n",
-       "9  11      sophie_delarue            Roubaix   33   \n",
-       "\n",
-       "                                            websites  _block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
-       "1                               ['jacquesdupond.fr']       0  \n",
-       "2             ['lensfans.fr', 'pythonensamusant.fr']       0  \n",
-       "3             ['pythonensamusant.fr', 'lensfans.fr']       0  \n",
-       "4                                    ['lensfans.fr']       0  \n",
-       "5                    ['somewebsite.com/users/rpz59']       1  \n",
-       "6                                                 []       1  \n",
-       "7                                 ['roubaixlove.fr']       2  \n",
-       "8                                                 []       2  \n",
-       "9                                                 []       2  "
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>Name</th>\n",
-       "      <th>City</th>\n",
-       "      <th>Age</th>\n",
-       "      <th>websites</th>\n",
-       "      <th>_block</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['jacquesdupond.fr']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>Douai</td>\n",
-       "      <td>49</td>\n",
-       "      <td>['lensfans.fr', 'pythonensamusant.fr']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>45</td>\n",
-       "      <td>['pythonensamusant.fr', 'lensfans.fr']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>15</td>\n",
-       "      <td>['lensfans.fr']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>['somewebsite.com/users/rpz59']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>3</td>\n",
-       "      <td>Paul Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>32</td>\n",
-       "      <td>['roubaixlove.fr']</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 19
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -1802,12 +1560,7 @@
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:11.472124800Z",
-     "start_time": "2026-02-03T15:36:11.410766500Z"
-    }
-   },
+   "metadata": {},
    "source": [
     "city_blocker = msb.AttributeEquivalenceBlocker([\"City\"])\n",
     "age_blocker = msb.AttributeEquivalenceBlocker([\"Age\"])\n",
@@ -1815,7 +1568,7 @@
     "websites_blocker = msb.OverlapBlocker([\"websites\"])"
    ],
    "outputs": [],
-   "execution_count": 20
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -1826,17 +1579,12 @@
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:11.730809800Z",
-     "start_time": "2026-02-03T15:36:11.717895300Z"
-    }
-   },
+   "metadata": {},
    "source": [
     "final_blocker = (city_blocker & age_blocker) | (name_blocker & websites_blocker)"
    ],
    "outputs": [],
-   "execution_count": 21
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -1847,137 +1595,13 @@
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:12.008762600Z",
-     "start_time": "2026-02-03T15:36:11.829817400Z"
-    }
-   },
+   "metadata": {},
    "source": [
     "links = final_blocker.block(df)\n",
     "msb.add_blocks_to_dataset(df, links)"
    ],
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Processing AttributeEquivalenceBlocker(['City', 'Age'], [])\n",
-      "Processing MixedBlocker(['Name'], ['websites'], 1)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "   id               Name               City  Age  \\\n",
-       "0   1     Jacques Dupond  Villeneuve d'Ascq   37   \n",
-       "1   4     Jacques Dupont  Villeneuve d'Ascq   37   \n",
-       "2   2  Pierre Dusquesnes          Phalempin   24   \n",
-       "3   5  pierre dusquesnes          Phalempin   24   \n",
-       "4   8     Sophie Delarue            Roubaix   33   \n",
-       "5  11     sophie_delarue            Roubaix   33   \n",
-       "\n",
-       "                                            websites  _block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
-       "1                               ['jacquesdupond.fr']       0  \n",
-       "2                    ['somewebsite.com/users/rpz59']       1  \n",
-       "3                                                 []       1  \n",
-       "4                                                 []       2  \n",
-       "5                                                 []       2  "
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>Name</th>\n",
-       "      <th>City</th>\n",
-       "      <th>Age</th>\n",
-       "      <th>websites</th>\n",
-       "      <th>_block</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['jacquesdupond.fr']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>['somewebsite.com/users/rpz59']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 22
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -1988,26 +1612,13 @@
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:12.404335300Z",
-     "start_time": "2026-02-03T15:36:12.172638Z"
-    }
-   },
+   "metadata": {},
    "source": [
     "city_blocker = msb.AttributeEquivalenceBlocker([\"City\"])\n",
     "links = city_blocker.block(df)"
    ],
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Processing AttributeEquivalenceBlocker(['City'], [])\n"
-     ]
-    }
-   ],
-   "execution_count": 23
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -2032,161 +1643,12 @@
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:12.721833200Z",
-     "start_time": "2026-02-03T15:36:12.589340400Z"
-    }
-   },
+   "metadata": {},
    "source": [
     "msb.add_blocks_to_dataset(df, links, sort=False)"
    ],
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "   id               Name               City  Age  \\\n",
-       "0   1     Jacques Dupond  Villeneuve d'Ascq   37   \n",
-       "1   2  Pierre Dusquesnes          Phalempin   24   \n",
-       "2   3       Paul Delarue            Roubaix   32   \n",
-       "3   4     Jacques Dupont  Villeneuve d'Ascq   37   \n",
-       "4   5  pierre dusquesnes          Phalempin   24   \n",
-       "5   8     Sophie Delarue            Roubaix   33   \n",
-       "6  10    Caroline Dufour               Lens   45   \n",
-       "7  11     sophie_delarue            Roubaix   33   \n",
-       "8  13      Benoît Benoît               Lens   15   \n",
-       "\n",
-       "                                            websites  _block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
-       "1                    ['somewebsite.com/users/rpz59']       1  \n",
-       "2                                 ['roubaixlove.fr']       2  \n",
-       "3                               ['jacquesdupond.fr']       0  \n",
-       "4                                                 []       1  \n",
-       "5                                                 []       2  \n",
-       "6             ['pythonensamusant.fr', 'lensfans.fr']       3  \n",
-       "7                                                 []       2  \n",
-       "8                                    ['lensfans.fr']       3  "
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>Name</th>\n",
-       "      <th>City</th>\n",
-       "      <th>Age</th>\n",
-       "      <th>websites</th>\n",
-       "      <th>_block</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>['somewebsite.com/users/rpz59']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>Paul Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>32</td>\n",
-       "      <td>['roubaixlove.fr']</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['jacquesdupond.fr']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>45</td>\n",
-       "      <td>['pythonensamusant.fr', 'lensfans.fr']</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>15</td>\n",
-       "      <td>['lensfans.fr']</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 24,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 24
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -2211,216 +1673,12 @@
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:13.208456200Z",
-     "start_time": "2026-02-03T15:36:13.112548200Z"
-    }
-   },
+   "metadata": {},
    "source": [
     "msb.add_blocks_to_dataset(df, links, keep_ungrouped_rows=True)"
    ],
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "    id                 Name               City  Age  \\\n",
-       "0    0           Jean d'Aux              Lille   26   \n",
-       "1    1       Jacques Dupond  Villeneuve d'Ascq   37   \n",
-       "2    4       Jacques Dupont  Villeneuve d'Ascq   37   \n",
-       "3    2    Pierre Dusquesnes          Phalempin   24   \n",
-       "4    5    pierre dusquesnes          Phalempin   24   \n",
-       "5    3         Paul Delarue            Roubaix   32   \n",
-       "6    8       Sophie Delarue            Roubaix   33   \n",
-       "7   11       sophie_delarue            Roubaix   33   \n",
-       "8    6   Jean-Michel Python              Douai   49   \n",
-       "9    7     Gédéon Glincarné              Paris   53   \n",
-       "10   9     Jeanne Verbrugge       Valenciennes   41   \n",
-       "11  10      Caroline Dufour               Lens   45   \n",
-       "12  13        Benoît Benoît               Lens   15   \n",
-       "13  12  Marcel Vandermersch           Fourmies   48   \n",
-       "\n",
-       "                                             websites  _block  \n",
-       "0                     ['jeandaux.fr', 'lillefans.fr']       0  \n",
-       "1   ['somewebsite.com/users/jacquesdupond', 'jacqu...       1  \n",
-       "2                                ['jacquesdupond.fr']       1  \n",
-       "3                     ['somewebsite.com/users/rpz59']       2  \n",
-       "4                                                  []       2  \n",
-       "5                                  ['roubaixlove.fr']       3  \n",
-       "6                                                  []       3  \n",
-       "7                                                  []       3  \n",
-       "8              ['lensfans.fr', 'pythonensamusant.fr']       4  \n",
-       "9                                        ['lorem.fr']       5  \n",
-       "10                ['somewebsite.com/users/jajanne59']       6  \n",
-       "11             ['pythonensamusant.fr', 'lensfans.fr']       7  \n",
-       "12                                    ['lensfans.fr']       7  \n",
-       "13                         ['lesrecettesdemarcel.fr']       8  "
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>Name</th>\n",
-       "      <th>City</th>\n",
-       "      <th>Age</th>\n",
-       "      <th>websites</th>\n",
-       "      <th>_block</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>0</td>\n",
-       "      <td>Jean d'Aux</td>\n",
-       "      <td>Lille</td>\n",
-       "      <td>26</td>\n",
-       "      <td>['jeandaux.fr', 'lillefans.fr']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['jacquesdupond.fr']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>['somewebsite.com/users/rpz59']</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>3</td>\n",
-       "      <td>Paul Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>32</td>\n",
-       "      <td>['roubaixlove.fr']</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>Douai</td>\n",
-       "      <td>49</td>\n",
-       "      <td>['lensfans.fr', 'pythonensamusant.fr']</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>7</td>\n",
-       "      <td>Gédéon Glincarné</td>\n",
-       "      <td>Paris</td>\n",
-       "      <td>53</td>\n",
-       "      <td>['lorem.fr']</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>9</td>\n",
-       "      <td>Jeanne Verbrugge</td>\n",
-       "      <td>Valenciennes</td>\n",
-       "      <td>41</td>\n",
-       "      <td>['somewebsite.com/users/jajanne59']</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>45</td>\n",
-       "      <td>['pythonensamusant.fr', 'lensfans.fr']</td>\n",
-       "      <td>7</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>15</td>\n",
-       "      <td>['lensfans.fr']</td>\n",
-       "      <td>7</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>13</th>\n",
-       "      <td>12</td>\n",
-       "      <td>Marcel Vandermersch</td>\n",
-       "      <td>Fourmies</td>\n",
-       "      <td>48</td>\n",
-       "      <td>['lesrecettesdemarcel.fr']</td>\n",
-       "      <td>8</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 25
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -2441,12 +1699,7 @@
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:14.249378400Z",
-     "start_time": "2026-02-03T15:36:14.008531Z"
-    }
-   },
+   "metadata": {},
    "source": [
     "city_blocker_not_different_age = msb.AttributeEquivalenceBlocker(\n",
     "    [\"City\"], must_not_be_different=[\"Age\"]\n",
@@ -2454,126 +1707,8 @@
     "links = city_blocker_not_different_age.block(df)\n",
     "msb.add_blocks_to_dataset(df, links)"
    ],
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Processing AttributeEquivalenceBlocker(['City'], ['Age'])\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "   id               Name               City  Age  \\\n",
-       "0   1     Jacques Dupond  Villeneuve d'Ascq   37   \n",
-       "1   4     Jacques Dupont  Villeneuve d'Ascq   37   \n",
-       "2   2  Pierre Dusquesnes          Phalempin   24   \n",
-       "3   5  pierre dusquesnes          Phalempin   24   \n",
-       "4   8     Sophie Delarue            Roubaix   33   \n",
-       "5  11     sophie_delarue            Roubaix   33   \n",
-       "\n",
-       "                                            websites  _block  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  \n",
-       "1                               ['jacquesdupond.fr']       0  \n",
-       "2                    ['somewebsite.com/users/rpz59']       1  \n",
-       "3                                                 []       1  \n",
-       "4                                                 []       2  \n",
-       "5                                                 []       2  "
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>Name</th>\n",
-       "      <th>City</th>\n",
-       "      <th>Age</th>\n",
-       "      <th>websites</th>\n",
-       "      <th>_block</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['jacquesdupond.fr']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>['somewebsite.com/users/rpz59']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 26
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -2591,42 +1726,14 @@
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:14.709861100Z",
-     "start_time": "2026-02-03T15:36:14.517552400Z"
-    }
-   },
+   "metadata": {},
    "source": [
     "city_blocker = msb.AttributeEquivalenceBlocker([\"City\"])\n",
     "links = city_blocker.block(df, motives=True)\n",
     "links"
    ],
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Processing AttributeEquivalenceBlocker(['City'], [])\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{frozenset({1, 4}): [EquivalenceMotive(['City'])],\n",
-       " frozenset({8, 11}): [EquivalenceMotive(['City'])],\n",
-       " frozenset({2, 5}): [EquivalenceMotive(['City'])],\n",
-       " frozenset({10, 13}): [EquivalenceMotive(['City'])],\n",
-       " frozenset({3, 8}): [EquivalenceMotive(['City'])],\n",
-       " frozenset({3, 11}): [EquivalenceMotive(['City'])]}"
-      ]
-     },
-     "execution_count": 27,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 27
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -2640,715 +1747,82 @@
     "Similarly, you may add `motives=True` to the `msb.add_blocks_to_dataset` function to see said motives:"
    ]
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "msb.add_blocks_to_dataset(df, links, motives=True)"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "... though since motives make more sense when considering pairs of rows instead of full blocks, the above visualization is not that interesting..."
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": "... which is the reason you can pass `show_as_pairs=True` to `msb.add_blocks_to_dataset` to see the output has a list of pairs:"
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "msb.add_blocks_to_dataset(df, links, motives=True, show_as_pairs=True)"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "If our dataset had many columns, the above output would be too large to easily be read, so we added the `output_columns` option:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "msb.add_blocks_to_dataset(\n",
+    "    df, links, motives=True, show_as_pairs=True, output_columns=[\"id\", \"Name\"]\n",
+    ")"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Motives are dynamic:"
+   ]
+  },
   {
    "cell_type": "code",
    "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:15.116572300Z",
-     "start_time": "2026-02-03T15:36:15.007172300Z"
-    }
+    "scrolled": true
    },
    "source": [
-    "msb.add_blocks_to_dataset(df, links, motives=True)"
+    "city_blocker = msb.AttributeEquivalenceBlocker([\"City\"])\n",
+    "age_blocker = msb.AttributeEquivalenceBlocker([\"Age\"])\n",
+    "websites_blocker = msb.OverlapBlocker([\"websites\"])\n",
+    "final_blocker = (city_blocker & age_blocker) | websites_blocker\n",
+    "links = final_blocker.block(df, motives=True)\n",
+    "report = msb.add_blocks_to_dataset(\n",
+    "    df,\n",
+    "    links,\n",
+    "    motives=True,\n",
+    "    show_as_pairs=True,\n",
+    "    output_columns=[\"id\", \"Name\"],\n",
+    "    merge_blocks=False,\n",
+    ")\n",
+    "report"
    ],
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "   id               Name               City  Age  \\\n",
-       "0   1     Jacques Dupond  Villeneuve d'Ascq   37   \n",
-       "1   4     Jacques Dupont  Villeneuve d'Ascq   37   \n",
-       "2   2  Pierre Dusquesnes          Phalempin   24   \n",
-       "3   5  pierre dusquesnes          Phalempin   24   \n",
-       "4   3       Paul Delarue            Roubaix   32   \n",
-       "5   8     Sophie Delarue            Roubaix   33   \n",
-       "6  11     sophie_delarue            Roubaix   33   \n",
-       "7  10    Caroline Dufour               Lens   45   \n",
-       "8  13      Benoît Benoît               Lens   15   \n",
-       "\n",
-       "                                            websites  _block        _motive  \n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...       0  [Same 'City']  \n",
-       "1                               ['jacquesdupond.fr']       0  [Same 'City']  \n",
-       "2                    ['somewebsite.com/users/rpz59']       1  [Same 'City']  \n",
-       "3                                                 []       1  [Same 'City']  \n",
-       "4                                 ['roubaixlove.fr']       2  [Same 'City']  \n",
-       "5                                                 []       2  [Same 'City']  \n",
-       "6                                                 []       2  [Same 'City']  \n",
-       "7             ['pythonensamusant.fr', 'lensfans.fr']       3  [Same 'City']  \n",
-       "8                                    ['lensfans.fr']       3  [Same 'City']  "
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id</th>\n",
-       "      <th>Name</th>\n",
-       "      <th>City</th>\n",
-       "      <th>Age</th>\n",
-       "      <th>websites</th>\n",
-       "      <th>_block</th>\n",
-       "      <th>_motive</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['jacquesdupond.fr']</td>\n",
-       "      <td>0</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>['somewebsite.com/users/rpz59']</td>\n",
-       "      <td>1</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>1</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>3</td>\n",
-       "      <td>Paul Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>32</td>\n",
-       "      <td>['roubaixlove.fr']</td>\n",
-       "      <td>2</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>2</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>45</td>\n",
-       "      <td>['pythonensamusant.fr', 'lensfans.fr']</td>\n",
-       "      <td>3</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>15</td>\n",
-       "      <td>['lensfans.fr']</td>\n",
-       "      <td>3</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 28
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": "... though since motives make more sense when considering pairs of rows instead of full blocks, the above visualization is not that interesting..."
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": "... which is the reason you can pass `show_as_pairs=True` to `msb.add_blocks_to_dataset` to see the output has a list of pairs:"
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:15.563997200Z",
-     "start_time": "2026-02-03T15:36:15.425225900Z"
-    }
-   },
-   "source": [
-    "msb.add_blocks_to_dataset(df, links, motives=True, show_as_pairs=True)"
-   ],
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "   id_l             Name_l             City_l  Age_l  \\\n",
-       "0     1     Jacques Dupond  Villeneuve d'Ascq     37   \n",
-       "1     2  Pierre Dusquesnes          Phalempin     24   \n",
-       "2     3       Paul Delarue            Roubaix     32   \n",
-       "3     8     Sophie Delarue            Roubaix     33   \n",
-       "4     8     Sophie Delarue            Roubaix     33   \n",
-       "5    10    Caroline Dufour               Lens     45   \n",
-       "\n",
-       "                                          websites_l  id_r             Name_r  \\\n",
-       "0  ['somewebsite.com/users/jacquesdupond', 'jacqu...     4     Jacques Dupont   \n",
-       "1                    ['somewebsite.com/users/rpz59']     5  pierre dusquesnes   \n",
-       "2                                 ['roubaixlove.fr']    11     sophie_delarue   \n",
-       "3                                                 []    11     sophie_delarue   \n",
-       "4                                                 []     3       Paul Delarue   \n",
-       "5             ['pythonensamusant.fr', 'lensfans.fr']    13      Benoît Benoît   \n",
-       "\n",
-       "              City_r  Age_r            websites_r        _motive  _block  \n",
-       "0  Villeneuve d'Ascq     37  ['jacquesdupond.fr']  [Same 'City']       0  \n",
-       "1          Phalempin     24                    []  [Same 'City']       1  \n",
-       "2            Roubaix     33                    []  [Same 'City']       2  \n",
-       "3            Roubaix     33                    []  [Same 'City']       2  \n",
-       "4            Roubaix     32    ['roubaixlove.fr']  [Same 'City']       2  \n",
-       "5               Lens     15       ['lensfans.fr']  [Same 'City']       3  "
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id_l</th>\n",
-       "      <th>Name_l</th>\n",
-       "      <th>City_l</th>\n",
-       "      <th>Age_l</th>\n",
-       "      <th>websites_l</th>\n",
-       "      <th>id_r</th>\n",
-       "      <th>Name_r</th>\n",
-       "      <th>City_r</th>\n",
-       "      <th>Age_r</th>\n",
-       "      <th>websites_r</th>\n",
-       "      <th>_motive</th>\n",
-       "      <th>_block</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['somewebsite.com/users/jacquesdupond', 'jacqu...</td>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>Villeneuve d'Ascq</td>\n",
-       "      <td>37</td>\n",
-       "      <td>['jacquesdupond.fr']</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>['somewebsite.com/users/rpz59']</td>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>Phalempin</td>\n",
-       "      <td>24</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>Paul Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>32</td>\n",
-       "      <td>['roubaixlove.fr']</td>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>33</td>\n",
-       "      <td>[]</td>\n",
-       "      <td>3</td>\n",
-       "      <td>Paul Delarue</td>\n",
-       "      <td>Roubaix</td>\n",
-       "      <td>32</td>\n",
-       "      <td>['roubaixlove.fr']</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>45</td>\n",
-       "      <td>['pythonensamusant.fr', 'lensfans.fr']</td>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>Lens</td>\n",
-       "      <td>15</td>\n",
-       "      <td>['lensfans.fr']</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 29
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "If our dataset had many columns, the above output would be too large to easily be read, so we added the `output_columns` option:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:16.181630Z",
-     "start_time": "2026-02-03T15:36:16.065192300Z"
-    }
-   },
-   "source": [
-    "msb.add_blocks_to_dataset(\n",
-    "    df, links, motives=True, show_as_pairs=True, output_columns=[\"id\", \"Name\"]\n",
-    ")"
-   ],
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "   id_l             Name_l  id_r             Name_r        _motive  _block\n",
-       "0     1     Jacques Dupond     4     Jacques Dupont  [Same 'City']       0\n",
-       "1     2  Pierre Dusquesnes     5  pierre dusquesnes  [Same 'City']       1\n",
-       "2     3       Paul Delarue    11     sophie_delarue  [Same 'City']       2\n",
-       "3     8     Sophie Delarue    11     sophie_delarue  [Same 'City']       2\n",
-       "4     8     Sophie Delarue     3       Paul Delarue  [Same 'City']       2\n",
-       "5    10    Caroline Dufour    13      Benoît Benoît  [Same 'City']       3"
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id_l</th>\n",
-       "      <th>Name_l</th>\n",
-       "      <th>id_r</th>\n",
-       "      <th>Name_r</th>\n",
-       "      <th>_motive</th>\n",
-       "      <th>_block</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>Paul Delarue</td>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>3</td>\n",
-       "      <td>Paul Delarue</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>[Same 'City']</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 30,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 30
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Motives are dynamic:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "metadata": {
-    "scrolled": true,
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:17.213402500Z",
-     "start_time": "2026-02-03T15:36:17.028434800Z"
-    }
-   },
-   "source": [
-    "city_blocker = msb.AttributeEquivalenceBlocker([\"City\"])\n",
-    "age_blocker = msb.AttributeEquivalenceBlocker([\"Age\"])\n",
-    "websites_blocker = msb.OverlapBlocker([\"websites\"])\n",
-    "final_blocker = (city_blocker & age_blocker) | websites_blocker\n",
-    "links = final_blocker.block(df, motives=True)\n",
-    "report = msb.add_blocks_to_dataset(\n",
-    "    df,\n",
-    "    links,\n",
-    "    motives=True,\n",
-    "    show_as_pairs=True,\n",
-    "    output_columns=[\"id\", \"Name\"],\n",
-    "    merge_blocks=False,\n",
-    ")\n",
-    "report"
-   ],
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Processing AttributeEquivalenceBlocker(['City', 'Age'], [])\n",
-      "Processing OverlapBlocker(['websites'], 1)\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "    id_l             Name_l  id_r              Name_r  \\\n",
-       "0      1     Jacques Dupond     4      Jacques Dupont   \n",
-       "1      1     Jacques Dupond     6  Jean-Michel Python   \n",
-       "2      1     Jacques Dupond    10     Caroline Dufour   \n",
-       "3      1     Jacques Dupond     4      Jacques Dupont   \n",
-       "4      1     Jacques Dupond     6  Jean-Michel Python   \n",
-       "5      1     Jacques Dupond    10     Caroline Dufour   \n",
-       "6     10    Caroline Dufour     6  Jean-Michel Python   \n",
-       "7     10    Caroline Dufour    13       Benoît Benoît   \n",
-       "8      2  Pierre Dusquesnes     5   pierre dusquesnes   \n",
-       "9      8     Sophie Delarue    11      sophie_delarue   \n",
-       "10    10    Caroline Dufour     6  Jean-Michel Python   \n",
-       "11    10    Caroline Dufour    13       Benoît Benoît   \n",
-       "12    13      Benoît Benoît     6  Jean-Michel Python   \n",
-       "\n",
-       "                                              _motive  _block  \n",
-       "0   [Same 'City', Same 'Age', >=1 overlap in 'webs...       0  \n",
-       "1                         [>=1 overlap in 'websites']       0  \n",
-       "2                         [>=1 overlap in 'websites']       0  \n",
-       "3   [Same 'City', Same 'Age', >=1 overlap in 'webs...       1  \n",
-       "4                         [>=1 overlap in 'websites']       1  \n",
-       "5                         [>=1 overlap in 'websites']       1  \n",
-       "6                         [>=1 overlap in 'websites']       1  \n",
-       "7                         [>=1 overlap in 'websites']       1  \n",
-       "8                           [Same 'City', Same 'Age']       2  \n",
-       "9                           [Same 'City', Same 'Age']       3  \n",
-       "10                        [>=1 overlap in 'websites']       4  \n",
-       "11                        [>=1 overlap in 'websites']       4  \n",
-       "12                        [>=1 overlap in 'websites']       4  "
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id_l</th>\n",
-       "      <th>Name_l</th>\n",
-       "      <th>id_r</th>\n",
-       "      <th>Name_r</th>\n",
-       "      <th>_motive</th>\n",
-       "      <th>_block</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>[Same 'City', Same 'Age']</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>[Same 'City', Same 'Age']</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 31
+   "outputs": [],
+   "execution_count": null
   },
   {
    "cell_type": "markdown",
@@ -3362,12 +1836,7 @@
   },
   {
    "cell_type": "code",
-   "metadata": {
-    "ExecuteTime": {
-     "end_time": "2026-02-03T15:36:17.696557900Z",
-     "start_time": "2026-02-03T15:36:17.550771100Z"
-    }
-   },
+   "metadata": {},
    "source": [
     "report = msb.add_blocks_to_dataset(\n",
     "    df,\n",
@@ -3380,210 +1849,8 @@
     ")\n",
     "report.sort_values(\"_score\", ascending=False)"
    ],
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "    id_l             Name_l  id_r              Name_r  \\\n",
-       "0      1     Jacques Dupond     4      Jacques Dupont   \n",
-       "3      1     Jacques Dupond     4      Jacques Dupont   \n",
-       "8      2  Pierre Dusquesnes     5   pierre dusquesnes   \n",
-       "9      8     Sophie Delarue    11      sophie_delarue   \n",
-       "1      1     Jacques Dupond     6  Jean-Michel Python   \n",
-       "4      1     Jacques Dupond     6  Jean-Michel Python   \n",
-       "2      1     Jacques Dupond    10     Caroline Dufour   \n",
-       "6     10    Caroline Dufour     6  Jean-Michel Python   \n",
-       "5      1     Jacques Dupond    10     Caroline Dufour   \n",
-       "7     10    Caroline Dufour    13       Benoît Benoît   \n",
-       "10    10    Caroline Dufour     6  Jean-Michel Python   \n",
-       "11    10    Caroline Dufour    13       Benoît Benoît   \n",
-       "12    13      Benoît Benoît     6  Jean-Michel Python   \n",
-       "\n",
-       "                                              _motive  _score  _block  \n",
-       "0   [Same 'City', Same 'Age', >=1 overlap in 'webs...       3       0  \n",
-       "3   [Same 'City', Same 'Age', >=1 overlap in 'webs...       3       1  \n",
-       "8                           [Same 'City', Same 'Age']       2       2  \n",
-       "9                           [Same 'City', Same 'Age']       2       3  \n",
-       "1                         [>=1 overlap in 'websites']       1       0  \n",
-       "4                         [>=1 overlap in 'websites']       1       1  \n",
-       "2                         [>=1 overlap in 'websites']       1       0  \n",
-       "6                         [>=1 overlap in 'websites']       1       1  \n",
-       "5                         [>=1 overlap in 'websites']       1       1  \n",
-       "7                         [>=1 overlap in 'websites']       1       1  \n",
-       "10                        [>=1 overlap in 'websites']       1       4  \n",
-       "11                        [>=1 overlap in 'websites']       1       4  \n",
-       "12                        [>=1 overlap in 'websites']       1       4  "
-      ],
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>id_l</th>\n",
-       "      <th>Name_l</th>\n",
-       "      <th>id_r</th>\n",
-       "      <th>Name_r</th>\n",
-       "      <th>_motive</th>\n",
-       "      <th>_score</th>\n",
-       "      <th>_block</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
-       "      <td>3</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>4</td>\n",
-       "      <td>Jacques Dupont</td>\n",
-       "      <td>[Same 'City', Same 'Age', &gt;=1 overlap in 'webs...</td>\n",
-       "      <td>3</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>8</th>\n",
-       "      <td>2</td>\n",
-       "      <td>Pierre Dusquesnes</td>\n",
-       "      <td>5</td>\n",
-       "      <td>pierre dusquesnes</td>\n",
-       "      <td>[Same 'City', Same 'Age']</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>9</th>\n",
-       "      <td>8</td>\n",
-       "      <td>Sophie Delarue</td>\n",
-       "      <td>11</td>\n",
-       "      <td>sophie_delarue</td>\n",
-       "      <td>[Same 'City', Same 'Age']</td>\n",
-       "      <td>2</td>\n",
-       "      <td>3</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>6</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>5</th>\n",
-       "      <td>1</td>\n",
-       "      <td>Jacques Dupond</td>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>7</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>10</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>11</th>\n",
-       "      <td>10</td>\n",
-       "      <td>Caroline Dufour</td>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>12</th>\n",
-       "      <td>13</td>\n",
-       "      <td>Benoît Benoît</td>\n",
-       "      <td>6</td>\n",
-       "      <td>Jean-Michel Python</td>\n",
-       "      <td>[&gt;=1 overlap in 'websites']</td>\n",
-       "      <td>1</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "execution_count": 32
+   "outputs": [],
+   "execution_count": null
   }
  ],
  "metadata": {
diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index 5571ba0..b27d3d1 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -92,7 +92,7 @@ class AttributeEquivalenceBlocker(BlockerNode):  # Leaf
     """To regroup rows based on equality across columns."""
 
     def __init__(
-        self, blocking_columns, normalize_strings=True, must_not_be_different=None
+        self, blocking_columns, must_not_be_different=None, normalize_strings=True
     ):
         super().__init__()
 
@@ -121,7 +121,7 @@ def __init__(
         self.normalize = normalize_strings  # if True, will casefold+remove punctation+strip spaces for all strings before comparing them
 
     def __repr__(self):
-        return f"AttributeEquivalenceBlocker({self.blocking_columns}, {self.must_not_be_different})"
+        return f"AttributeEquivalenceBlocker({self.blocking_columns}{', ' + str(self.must_not_be_different) if self.must_not_be_different else ''}{', NON-NORMALIZED' if not self.normalize else ''})"
 
     def __eq__(self, other):
         if type(other) is AttributeEquivalenceBlocker:
@@ -216,7 +216,7 @@ def __init__(
         self.normalize = normalize_strings  # if True, will casefold+remove punctation+strip spaces for all strings before comparing them
 
     def __repr__(self):
-        return f"OverlapBlocker({self.blocking_columns}, {self.overlap})"
+        return f"OverlapBlocker({self.blocking_columns}, {self.overlap}{', WORD-LEVEL' if self.word_level else ''}{', NON-NORMALIZED' if not self.normalize else ''})"
 
     def __eq__(self, other):
         if type(other) is OverlapBlocker:
@@ -340,7 +340,16 @@ def __init__(
         self.normalize = normalize_strings  # if True, will casefold+remove punctation+strip spaces for all strings before comparing them
 
     def __repr__(self):
-        return f"MixedBlocker({self.equivalence_columns}, {self.overlap_columns}, {self.overlap})"
+        return str(
+            AndNode(
+                AttributeEquivalenceBlocker(
+                    self.equivalence_columns, self.must_not_be_different, self.normalize
+                ),
+                OverlapBlocker(
+                    self.overlap_columns, self.overlap, self.word_level, self.normalize
+                ),
+            )
+        )
 
     def __eq__(self, other):
         if type(other) is AttributeEquivalenceBlocker:

From 3643a0030afcfb4163a228c81a15a649b4b30bfc Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 17:24:49 +0100
Subject: [PATCH 14/20] docs: fix obsolete type in docstring

---
 src/ms_blocking/utils.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/ms_blocking/utils.py b/src/ms_blocking/utils.py
index 7f271f5..100ab2e 100644
--- a/src/ms_blocking/utils.py
+++ b/src/ms_blocking/utils.py
@@ -12,12 +12,14 @@
 
 
 class EquivalenceMotive:
-    def __init__(self, blocking_column):
+    def __init__(self, blocking_column: str):
         if not isinstance(blocking_column, str):
             raise TypeError("blocking_column for Motive must be a string")
         self.blocking_column = blocking_column
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, EquivalenceMotive | OverlapMotive):
+            raise TypeError("Can only compare Motives")
         return self.blocking_column == other.blocking_column
 
     def __str__(self):
@@ -28,7 +30,9 @@ def __repr__(self):
 
 
 class OverlapMotive:
-    def __init__(self, blocking_column, overlap=1, word_level=False):
+    def __init__(
+        self, blocking_column: str, overlap: int = 1, word_level: bool = False
+    ):
         if not isinstance(blocking_column, str):
             raise TypeError("blocking_column for Motive must be a string")
         if not isinstance(overlap, int):
@@ -39,7 +43,9 @@ def __init__(self, blocking_column, overlap=1, word_level=False):
         self.overlap = overlap
         self.word_level = word_level
 
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
+        if not isinstance(other, EquivalenceMotive | OverlapMotive):
+            raise TypeError("Can only compare Motives")
         return (
             self.blocking_column == other.blocking_column
             and self.overlap == other.overlap
@@ -535,7 +541,7 @@ def solve_motives(motives: List[Motive]) -> List[Motive]:
 
     Examples
     --------
-    >>> solve_motives([OverlapMotive(['websites'], 1), OverlapMotive(['websites'], 2), OverlapMotive(['websites'], 2, word_level=False)])
+    >>> solve_motives([OverlapMotive('websites', 1), OverlapMotive('websites', 2), OverlapMotive('websites', 2, word_level=False)])
     [OverlapMotive(['websites'], 2, word_level=False)]
     """
     if not motives:

From 86a443c9982ffb376ee61f6d0d7d444beda0337c Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Tue, 3 Feb 2026 17:40:18 +0100
Subject: [PATCH 15/20] docs: add typehints

---
 src/ms_blocking/ms_blocking.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index b27d3d1..cdfa664 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -48,7 +48,7 @@ def __init__(self, left, right):
     def __repr__(self):
         return f"AndNode{{{self.left}, {self.right}}}"
 
-    def block(self, df, motives=False):
+    def block(self, df: pd.DataFrame, motives: bool=False) -> Coords:
         # In order not to perform redundant computations, we first filter out the rows that were not considered by the first blocker before running the second blocker
         coords_left = self.left.block(df, motives=motives)
 
@@ -78,7 +78,7 @@ def __init__(self, left, right):
     def __repr__(self):
         return f"OrNode{{{self.left}, {self.right}}}"
 
-    def block(self, df, motives=False):
+    def block(self, df: pd.DataFrame, motives: bool=False) -> Coords:
         # Note: for performance, it would be wise to remove rows that are already paired with all other rows, though this case should be pretty rare in real situations
         coords_left = self.left.block(df, motives=motives)
 
@@ -92,7 +92,7 @@ class AttributeEquivalenceBlocker(BlockerNode):  # Leaf
     """To regroup rows based on equality across columns."""
 
     def __init__(
-        self, blocking_columns, must_not_be_different=None, normalize_strings=True
+        self, blocking_columns: str|Collection[str], must_not_be_different: str|Collection[str]=None, normalize_strings: bool=True
     ):
         super().__init__()
 
@@ -140,7 +140,7 @@ def __eq__(self, other):
         else:
             return False
 
-    def block(self, data, motives=False):
+    def block(self, data: pd.DataFrame, motives: bool=False) -> Coords:
         """Regroup rows based on equality of one or more columns"""
 
         print("Processing", self)
@@ -196,7 +196,7 @@ class OverlapBlocker(BlockerNode):  # Leaf
     """To regroup rows based on overlap of one or more columns."""
 
     def __init__(
-        self, blocking_columns, overlap=1, word_level=False, normalize_strings=True
+        self, blocking_columns: str|Collection[str], overlap: int=1, word_level: bool=False, normalize_strings: bool=True
     ):
         super().__init__()
 
@@ -237,7 +237,7 @@ def __eq__(self, other):
         else:
             return False
 
-    def block(self, data, motives=False):
+    def block(self, data: pd.DataFrame, motives: bool=False) -> Coords:
         """Regroup rows based on overlap of one or more columns"""
 
         print("Processing", self)
@@ -291,12 +291,12 @@ class MixedBlocker(BlockerNode):  # Leaf; For ANDs and RAM
 
     def __init__(
         self,
-        equivalence_columns,
-        overlap_columns,
-        must_not_be_different=None,
-        overlap=1,
-        word_level=False,
-        normalize_strings=True,
+        equivalence_columns: str|Collection[str],
+        overlap_columns: str|Collection[str],
+        must_not_be_different: str|Collection[str]=None,
+        overlap: int=1,
+        word_level: bool=False,
+        normalize_strings: bool=True,
     ):
         super().__init__()
 
@@ -377,7 +377,7 @@ def __eq__(self, other):
         else:
             return False
 
-    def block(self, data, motives=False):
+    def block(self, data: pd.DataFrame, motives: bool=False) -> Coords:
         """Regroup rows based on overlap of one or more columns"""
 
         print("Processing", self)

From 83b193229d03a43f473179ed8be0bb6ef9f6227c Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Wed, 4 Feb 2026 09:54:50 +0100
Subject: [PATCH 16/20] fix: parse_list crashes on strings that do not
 represent lists

---
 docs/example.ipynb             | 45 ++++++++++++++++++++++++++++++++++
 src/ms_blocking/ms_blocking.py | 33 +++++++++++++++----------
 src/ms_blocking/utils.py       | 21 +++++++++++-----
 3 files changed, 80 insertions(+), 19 deletions(-)

diff --git a/docs/example.ipynb b/docs/example.ipynb
index aef6ee6..8243053 100644
--- a/docs/example.ipynb
+++ b/docs/example.ipynb
@@ -1851,6 +1851,51 @@
    ],
    "outputs": [],
    "execution_count": null
+  },
+  {
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-02-03T16:43:50.398834500Z",
+     "start_time": "2026-02-03T16:43:50.048297Z"
+    }
+   },
+   "cell_type": "code",
+   "source": [
+    "city_blocker = msb.OverlapBlocker([\"City\"])\n",
+    "city_blocker.block(df)"
+   ],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Processing OverlapBlocker(['City'], 1)\n"
+     ]
+    },
+    {
+     "ename": "SyntaxError",
+     "evalue": "unterminated string literal (detected at line 1) (<unknown>, line 1)",
+     "output_type": "error",
+     "traceback": [
+      "Traceback \u001B[36m(most recent call last)\u001B[39m:\n",
+      "  File \u001B[92m~\\PycharmProjects\\MSBlock\\.venv\\Lib\\site-packages\\IPython\\core\\interactiveshell.py:3701\u001B[39m in \u001B[95mrun_code\u001B[39m\n    exec(code_obj, self.user_global_ns, self.user_ns)\n",
+      "  Cell \u001B[92mIn[19]\u001B[39m\u001B[92m, line 2\u001B[39m\n    city_blocker.block(df)\n",
+      "  File \u001B[92m~\\PycharmProjects\\MSBlock\\ms_blocking\\src\\ms_blocking\\ms_blocking.py:250\u001B[39m in \u001B[95mblock\u001B[39m\n    temp_data[col] = temp_data[col].apply(\n",
+      "  File \u001B[92m~\\PycharmProjects\\MSBlock\\.venv\\Lib\\site-packages\\pandas\\core\\series.py:4943\u001B[39m in \u001B[95mapply\u001B[39m\n    ).apply()\n",
+      "  File \u001B[92m~\\PycharmProjects\\MSBlock\\.venv\\Lib\\site-packages\\pandas\\core\\apply.py:1422\u001B[39m in \u001B[95mapply\u001B[39m\n    return self.apply_standard()\n",
+      "  File \u001B[92m~\\PycharmProjects\\MSBlock\\.venv\\Lib\\site-packages\\pandas\\core\\apply.py:1502\u001B[39m in \u001B[95mapply_standard\u001B[39m\n    mapped = obj._map_values(\n",
+      "  File \u001B[92m~\\PycharmProjects\\MSBlock\\.venv\\Lib\\site-packages\\pandas\\core\\base.py:925\u001B[39m in \u001B[95m_map_values\u001B[39m\n    return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert)\n",
+      "  File \u001B[92m~\\PycharmProjects\\MSBlock\\.venv\\Lib\\site-packages\\pandas\\core\\algorithms.py:1743\u001B[39m in \u001B[95mmap_array\u001B[39m\n    return lib.map_infer(values, mapper, convert=convert)\n",
+      "  File \u001B[92mpandas/_libs/lib.pyx:2999\u001B[39m in \u001B[95mpandas._libs.lib.map_infer\u001B[39m\n",
+      "  File \u001B[92m~\\PycharmProjects\\MSBlock\\.venv\\Lib\\site-packages\\pandas\\core\\apply.py:1491\u001B[39m in \u001B[95mcurried\u001B[39m\n    return func(x, *self.args, **self.kwargs)\n",
+      "  File \u001B[92m~\\PycharmProjects\\MSBlock\\ms_blocking\\src\\ms_blocking\\utils.py:374\u001B[39m in \u001B[95mparse_list\u001B[39m\n    s = str(s).strip()\n",
+      "  File \u001B[92m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\ast.py:66\u001B[39m in \u001B[95mliteral_eval\u001B[39m\n    node_or_string = parse(node_or_string.lstrip(\" \\t\"), mode='eval')\n",
+      "\u001B[36m  \u001B[39m\u001B[36mFile \u001B[39m\u001B[32m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\ast.py:52\u001B[39m\u001B[36m in \u001B[39m\u001B[35mparse\u001B[39m\n\u001B[31m    \u001B[39m\u001B[31mreturn compile(source, filename, mode, flags,\u001B[39m\n",
+      "  \u001B[36mFile \u001B[39m\u001B[32m<unknown>:1\u001B[39m\n\u001B[31m    \u001B[39m\u001B[31mVilleneuve d'Ascq\u001B[39m\n                ^\n\u001B[31mSyntaxError\u001B[39m\u001B[31m:\u001B[39m unterminated string literal (detected at line 1)\n"
+     ]
+    }
+   ],
+   "execution_count": 19
   }
  ],
  "metadata": {
diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index cdfa664..a6a6097 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -48,7 +48,7 @@ def __init__(self, left, right):
     def __repr__(self):
         return f"AndNode{{{self.left}, {self.right}}}"
 
-    def block(self, df: pd.DataFrame, motives: bool=False) -> Coords:
+    def block(self, df: pd.DataFrame, motives: bool = False) -> Coords:
         # In order not to perform redundant computations, we first filter out the rows that were not considered by the first blocker before running the second blocker
         coords_left = self.left.block(df, motives=motives)
 
@@ -78,7 +78,7 @@ def __init__(self, left, right):
     def __repr__(self):
         return f"OrNode{{{self.left}, {self.right}}}"
 
-    def block(self, df: pd.DataFrame, motives: bool=False) -> Coords:
+    def block(self, df: pd.DataFrame, motives: bool = False) -> Coords:
         # Note: for performance, it would be wise to remove rows that are already paired with all other rows, though this case should be pretty rare in real situations
         coords_left = self.left.block(df, motives=motives)
 
@@ -92,7 +92,10 @@ class AttributeEquivalenceBlocker(BlockerNode):  # Leaf
     """To regroup rows based on equality across columns."""
 
     def __init__(
-        self, blocking_columns: str|Collection[str], must_not_be_different: str|Collection[str]=None, normalize_strings: bool=True
+        self,
+        blocking_columns: str | Collection[str],
+        must_not_be_different: str | Collection[str] = None,
+        normalize_strings: bool = True,
     ):
         super().__init__()
 
@@ -140,7 +143,7 @@ def __eq__(self, other):
         else:
             return False
 
-    def block(self, data: pd.DataFrame, motives: bool=False) -> Coords:
+    def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
         """Regroup rows based on equality of one or more columns"""
 
         print("Processing", self)
@@ -196,7 +199,11 @@ class OverlapBlocker(BlockerNode):  # Leaf
     """To regroup rows based on overlap of one or more columns."""
 
     def __init__(
-        self, blocking_columns: str|Collection[str], overlap: int=1, word_level: bool=False, normalize_strings: bool=True
+        self,
+        blocking_columns: str | Collection[str],
+        overlap: int = 1,
+        word_level: bool = False,
+        normalize_strings: bool = True,
     ):
         super().__init__()
 
@@ -237,7 +244,7 @@ def __eq__(self, other):
         else:
             return False
 
-    def block(self, data: pd.DataFrame, motives: bool=False) -> Coords:
+    def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
         """Regroup rows based on overlap of one or more columns"""
 
         print("Processing", self)
@@ -291,12 +298,12 @@ class MixedBlocker(BlockerNode):  # Leaf; For ANDs and RAM
 
     def __init__(
         self,
-        equivalence_columns: str|Collection[str],
-        overlap_columns: str|Collection[str],
-        must_not_be_different: str|Collection[str]=None,
-        overlap: int=1,
-        word_level: bool=False,
-        normalize_strings: bool=True,
+        equivalence_columns: str | Collection[str],
+        overlap_columns: str | Collection[str],
+        must_not_be_different: str | Collection[str] = None,
+        overlap: int = 1,
+        word_level: bool = False,
+        normalize_strings: bool = True,
     ):
         super().__init__()
 
@@ -377,7 +384,7 @@ def __eq__(self, other):
         else:
             return False
 
-    def block(self, data: pd.DataFrame, motives: bool=False) -> Coords:
+    def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
         """Regroup rows based on overlap of one or more columns"""
 
         print("Processing", self)
diff --git a/src/ms_blocking/utils.py b/src/ms_blocking/utils.py
index 100ab2e..5c7125c 100644
--- a/src/ms_blocking/utils.py
+++ b/src/ms_blocking/utils.py
@@ -347,7 +347,9 @@ def parse_list(s: str | List, word_level: bool = False) -> List[str]:
       Stringified representation of a list e.g. "['string 1', 'string 2', ...]"
 
     word_level : bool
-      Whether to return a list of all words within s instead of a list of each comma-separated element
+      Whether to return a list of all words within s instead of a list of each comma-separated element;
+      Note that if passed a string that does not represent a list, this argument will be ignored and the function
+      will return a list of each word in the string
 
     Returns
     -------
@@ -363,7 +365,9 @@ def parse_list(s: str | List, word_level: bool = False) -> List[str]:
     """
 
     if type(s) is list:  # If we already have a list
-        if len(s) == 1 and s[0][0] == "[" and s[0][-1] == "]":
+        if (
+            len(s) == 1 and str(s[0]).startswith("[") and str(s[0]).startswith("]")
+        ):  # In case we have a stringified list INSIDE a normal list
             s = s[0]
         else:
             return s
@@ -376,10 +380,15 @@ def parse_list(s: str | List, word_level: bool = False) -> List[str]:
     if not s:
         return []
 
-    try:
-        parts = ast.literal_eval(s)
-    except ValueError:  # doesn't seem to be a stringified list
-        parts = s.split("', '")
+    if s.startswith("[") and s.startswith("]"):  # Stringified list?
+        try:
+            parts = ast.literal_eval(s)
+        except ValueError:  # doesn't seem to be a stringified list
+            parts = s.split("', '")
+        except SyntaxError:  # In case we have a string surroudned by brackets
+            parts = s.split()
+    else:
+        parts = s.split()
 
     cleaned_items = [str(part).strip().strip("''") for part in parts]
 

From b37e0178b4e0dafa77cb9d0c60433f64cc04e94c Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Wed, 4 Feb 2026 10:00:17 +0100
Subject: [PATCH 17/20] fix: parse_list use startswith instead of endswith

---
 src/ms_blocking/utils.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/ms_blocking/utils.py b/src/ms_blocking/utils.py
index 5c7125c..0e10488 100644
--- a/src/ms_blocking/utils.py
+++ b/src/ms_blocking/utils.py
@@ -366,7 +366,7 @@ def parse_list(s: str | List, word_level: bool = False) -> List[str]:
 
     if type(s) is list:  # If we already have a list
         if (
-            len(s) == 1 and str(s[0]).startswith("[") and str(s[0]).startswith("]")
+            len(s) == 1 and str(s[0]).startswith("[") and str(s[0]).endswith("]")
         ):  # In case we have a stringified list INSIDE a normal list
             s = s[0]
         else:
@@ -380,7 +380,7 @@ def parse_list(s: str | List, word_level: bool = False) -> List[str]:
     if not s:
         return []
 
-    if s.startswith("[") and s.startswith("]"):  # Stringified list?
+    if s.startswith("[") and s.endswith("]"):  # Stringified list?
         try:
             parts = ast.literal_eval(s)
         except ValueError:  # doesn't seem to be a stringified list

From 9fce21d39c1ee6187dd06ea7859a633fd60a0d70 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Wed, 4 Feb 2026 10:49:47 +0100
Subject: [PATCH 18/20] refactor: remove remove_value_if_appears_only_once
 since it was redundant with df.duplicated

---
 src/ms_blocking/ms_blocking.py | 17 +++++------------
 src/ms_blocking/utils.py       | 30 ------------------------------
 2 files changed, 5 insertions(+), 42 deletions(-)

diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index a6a6097..9301136 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -153,10 +153,7 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
         for col in self.blocking_columns:
             if self.normalize:
                 temp_data[col] = temp_data[col].apply(normalize)
-        temp_data = temp_data.dropna(subset=self.blocking_columns)
-        temp_data = remove_rows_if_value_appears_only_once(
-            temp_data, self.blocking_columns
-        )
+            temp_data = temp_data[temp_data[col].duplicated(keep=False)]
 
         if len(temp_data) == 0:  # No pairs
             if motives:
@@ -249,9 +246,7 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
 
         print("Processing", self)
 
-        temp_data = data.copy()
-
-        temp_data = temp_data[self.blocking_columns].copy()
+        temp_data = data[self.blocking_columns].copy()
 
         for col in self.blocking_columns:
             temp_data[col] = temp_data[col].apply(
@@ -260,12 +255,10 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
             temp_data = temp_data.explode(col)
             if self.normalize:
                 temp_data[col] = temp_data[col].apply(normalize)
+            temp_data = temp_data[temp_data[col].duplicated(keep=False)]
         temp_data = temp_data.dropna(
             subset=self.blocking_columns
         )  # Remove empty objects
-        temp_data = remove_rows_if_value_appears_only_once(
-            temp_data, self.blocking_columns
-        )
 
         if len(temp_data) == 0:  # No pairs fulfill any overlap
             if motives:
@@ -274,7 +267,7 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
                 return set()
 
         # Use the DataFrame index for grouping and forming pairs
-        # Using frozenset since they are ahshable and thus can be used as dictionary keys
+        # Using frozenset since they are hashable and thus can be used as dictionary keys
         groups = temp_data.groupby(self.blocking_columns).apply(
             lambda x: frozenset(x.index), include_groups=False
         )
@@ -405,9 +398,9 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
                     else parse_list(x, self.word_level)
                 )
                 temp_data = temp_data.explode(col)
+            temp_data = temp_data[temp_data[col].duplicated(keep=False)]
 
         temp_data = temp_data.dropna(subset=total_columns)  # Remove empty objects
-        temp_data = remove_rows_if_value_appears_only_once(temp_data, total_columns)
 
         if len(temp_data) == 0:  # No pairs fulfill any overlap
             if motives:
diff --git a/src/ms_blocking/utils.py b/src/ms_blocking/utils.py
index 0e10488..b644a43 100644
--- a/src/ms_blocking/utils.py
+++ b/src/ms_blocking/utils.py
@@ -70,36 +70,6 @@ def __repr__(self):
 _SPACE_RE = re.compile(r"\s+")
 
 
-def remove_rows_if_value_appears_only_once(
-    data: pd.DataFrame, cols: Columns
-) -> pd.DataFrame:
-    """Drop rows of a Pandas DataFrame where a certain column's values appears only once.
-
-    Ensures all elements of provided columns appear at least twice in their column
-
-    Parameters
-    ----------
-    data : DataFrame
-      DataFrame to preprocess
-
-    cols : List[str]
-      List of columns where rows that contain non-duplicated elements shall be discarded
-
-    Returns
-    -------
-    DataFrame
-      DataFrame with reduced number of rows
-
-    Examples
-    --------
-    >>> remove_rows_if_value_appears_only_once(data, ['name', 'city'])
-    """
-    for col in cols:
-        counts = data[col].map(data[col].value_counts())
-        data = data[counts >= 2]
-    return data
-
-
 def start_from_zero(figures: Collection[int]) -> List[int]:
     """Turns a list of integers into a same-length list that starts at 0, without gaps
 

From 8cdd3bd30c1666ba23be717808214d044fc5d758 Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Wed, 4 Feb 2026 11:16:07 +0100
Subject: [PATCH 19/20] perf: move dropna higher in the block logic

---
 src/ms_blocking/ms_blocking.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index 9301136..ca99978 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -148,7 +148,11 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
 
         print("Processing", self)
 
-        temp_data = data.copy()
+        temp_data = (
+            data[self.blocking_columns + self.must_not_be_different]
+            .dropna(subset=self.blocking_columns)
+            .copy()
+        )
 
         for col in self.blocking_columns:
             if self.normalize:
@@ -246,7 +250,7 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
 
         print("Processing", self)
 
-        temp_data = data[self.blocking_columns].copy()
+        temp_data = data[self.blocking_columns].dropna().copy()
 
         for col in self.blocking_columns:
             temp_data[col] = temp_data[col].apply(
@@ -256,9 +260,6 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
             if self.normalize:
                 temp_data[col] = temp_data[col].apply(normalize)
             temp_data = temp_data[temp_data[col].duplicated(keep=False)]
-        temp_data = temp_data.dropna(
-            subset=self.blocking_columns
-        )  # Remove empty objects
 
         if len(temp_data) == 0:  # No pairs fulfill any overlap
             if motives:
@@ -384,7 +385,7 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
 
         total_columns = self.equivalence_columns + self.overlap_columns
 
-        temp_data = data[total_columns].copy()
+        temp_data = data[total_columns].dropna().copy()
 
         for col in total_columns:
             if col in self.equivalence_columns:
@@ -400,8 +401,6 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
                 temp_data = temp_data.explode(col)
             temp_data = temp_data[temp_data[col].duplicated(keep=False)]
 
-        temp_data = temp_data.dropna(subset=total_columns)  # Remove empty objects
-
         if len(temp_data) == 0:  # No pairs fulfill any overlap
             if motives:
                 return dict()

From 9521338a5b354d29a25b763626c589a0eaeb08dc Mon Sep 17 00:00:00 2001
From: RTiedrez <pokroor@gmail.com>
Date: Wed, 4 Feb 2026 12:04:23 +0100
Subject: [PATCH 20/20] refactor: make preprocessing in .block more compact and
 pandas-esque

---
 src/ms_blocking/ms_blocking.py | 64 +++++++++++++++++++++-------------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/src/ms_blocking/ms_blocking.py b/src/ms_blocking/ms_blocking.py
index ca99978..b3552af 100644
--- a/src/ms_blocking/ms_blocking.py
+++ b/src/ms_blocking/ms_blocking.py
@@ -154,11 +154,17 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
             .copy()
         )
 
-        for col in self.blocking_columns:
-            if self.normalize:
-                temp_data[col] = temp_data[col].apply(normalize)
-            temp_data = temp_data[temp_data[col].duplicated(keep=False)]
+        # Normalize strings if required
+        if self.normalize:
+            temp_data[self.blocking_columns] = temp_data[self.blocking_columns].apply(
+                lambda col: col.apply(normalize)
+            )
+        # Non-duplicated values cannot belong to any block; We discard them
+        temp_data = temp_data[
+            temp_data.duplicated(keep=False, subset=self.blocking_columns)
+        ]
 
+        # No need to run anything else if we already ran out of candidates
         if len(temp_data) == 0:  # No pairs
             if motives:
                 return dict()
@@ -252,15 +258,24 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
 
         temp_data = data[self.blocking_columns].dropna().copy()
 
-        for col in self.blocking_columns:
-            temp_data[col] = temp_data[col].apply(
-                parse_list, word_level=self.word_level
+        # Ensure we check for overlap between lists of strings
+        temp_data[self.blocking_columns] = temp_data[self.blocking_columns].apply(
+            lambda col: col.apply(parse_list, word_level=self.word_level)
+        )
+        # Split elements of said lists to compare them one by one
+        temp_data = temp_data.explode(self.blocking_columns)
+        # Normalize strings if required
+        if self.normalize:
+            temp_data[self.blocking_columns] = temp_data[self.blocking_columns].apply(
+                lambda col: col.apply(normalize)
             )
-            temp_data = temp_data.explode(col)
-            if self.normalize:
-                temp_data[col] = temp_data[col].apply(normalize)
-            temp_data = temp_data[temp_data[col].duplicated(keep=False)]
 
+        # Non-duplicated values cannot belong to any block; We discard them
+        temp_data = temp_data[
+            temp_data.duplicated(keep=False, subset=self.blocking_columns)
+        ]
+
+        # No need to run anything else if we already ran out of candidates
         if len(temp_data) == 0:  # No pairs fulfill any overlap
             if motives:
                 return dict()
@@ -387,20 +402,21 @@ def block(self, data: pd.DataFrame, motives: bool = False) -> Coords:
 
         temp_data = data[total_columns].dropna().copy()
 
-        for col in total_columns:
-            if col in self.equivalence_columns:
-                temp_data[col] = temp_data[col].apply(normalize)
-            elif col in self.overlap_columns:
-                temp_data[col] = temp_data[col].apply(
-                    lambda x: [
-                        normalize(item) for item in parse_list(x, self.word_level)
-                    ]
-                    if self.normalize
-                    else parse_list(x, self.word_level)
-                )
-                temp_data = temp_data.explode(col)
-            temp_data = temp_data[temp_data[col].duplicated(keep=False)]
+        # Ensure we check for overlap between lists of strings
+        temp_data[self.overlap_columns] = temp_data[self.overlap_columns].apply(
+            lambda col: col.apply(parse_list, word_level=self.word_level)
+        )
+        # Split elements of said lists to compare them one by one
+        temp_data = temp_data.explode(self.overlap_columns)
+        # Normalize strings if required
+        if self.normalize:
+            temp_data[total_columns] = temp_data[total_columns].apply(
+                lambda col: col.apply(normalize)
+            )
+        # Non-duplicated values cannot belong to any block; We discard them
+        temp_data = temp_data[temp_data.duplicated(keep=False, subset=total_columns)]
 
+        # No need to run anything else if we already ran out of candidates
         if len(temp_data) == 0:  # No pairs fulfill any overlap
             if motives:
                 return dict()