From 9ce2138ed15bdf5aa493dffefab5be87ba032a01 Mon Sep 17 00:00:00 2001 From: Zach Boldyga Date: Mon, 4 May 2026 21:19:09 -0700 Subject: [PATCH 1/7] illico test fix --- tests/test_rank_genes_groups.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 3f3bed8bbe..2c949b9711 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -355,7 +355,9 @@ def test_mask_not_equal(): @pytest.mark.parametrize("corr_method", ["benjamini-hochberg", "bonferroni"]) @pytest.mark.parametrize("test", ["ovo", "ovr"]) -@pytest.mark.parametrize("exp_post_agg", [True, False], ids=["post_exp", "pre_exp"]) +@pytest.mark.parametrize( + "mean_in_log_space", [True, False], ids=["log_space_mean", "linear_space_mean"] +) @pytest.mark.parametrize( "tie_correct", [True, False], ids=["tie_correct", "no_tie_correct"] ) @@ -368,7 +370,7 @@ def test_illico( subtests: pytest.Subtests, groups: Literal["all"] | Sequence[str], *, - exp_post_agg: bool, + mean_in_log_space: bool, tie_correct: bool, ): @@ -386,7 +388,7 @@ def test_illico( n_genes=pbmc.n_vars, tie_correct=tie_correct, corr_method=corr_method, - exp_post_agg=exp_post_agg, + mean_in_log_space=mean_in_log_space, groups=groups, ) @@ -398,7 +400,7 @@ def test_illico( n_genes=pbmc.n_vars, tie_correct=tie_correct, corr_method=corr_method, - exp_post_agg=exp_post_agg, + mean_in_log_space=mean_in_log_space, groups=groups, ) scanpy_results = pbmc.uns["rank_genes_groups"] From 8e766117be63c93d892acd7e62c3effcdd107362 Mon Sep 17 00:00:00 2001 From: Zach Boldyga Date: Mon, 4 May 2026 21:34:29 -0700 Subject: [PATCH 2/7] wilcoxon-illico included in test_mean_in_log_space --- tests/test_rank_genes_groups.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 2c949b9711..fb5a5452e7 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -439,10 +439,18 @@ def test_illico( (False, -1.0), ], ) -@pytest.mark.parametrize("method", ["wilcoxon", "t-test", "t-test_overestim_var"]) +@pytest.mark.parametrize( + "method", + [ + "wilcoxon", + "t-test", + "t-test_overestim_var", + pytest.param("wilcoxon_illico", marks=needs.illico), + ], +) def test_mean_in_log_space( expected_logfc: float, - method: Literal["wilcoxon", "t-test", "t-test_overestim_var"], + method: Literal["wilcoxon", "wilcoxon_illico", "t-test", "t-test_overestim_var"], *, mean_in_log_space: bool, ): From 4263b081c384855af4a09d76eb7c7a00e7c67626 Mon Sep 17 00:00:00 2001 From: Zach Boldyga Date: Mon, 4 May 2026 22:36:53 -0700 Subject: [PATCH 3/7] more robust and performant illico dataframe handling logic --- src/scanpy/tools/_rank_genes_groups.py | 66 ++++++++++++--------- tests/test_rank_genes_groups.py | 81 +++++++++++++++++++++++++- 2 files changed, 119 insertions(+), 28 deletions(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 910577f781..7ae14254de 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -47,6 +47,37 @@ def _select_top_n(scores: NDArray, n_top: int): return global_indices +def _illico_results_to_iter( + illico_df: pd.DataFrame, + groups_order: NDArray, + ireference: int | None, + *, + feature_order, +): + """Reshape illico's long-form output into per-group ``(index, z, p)`` tuples. + + illico returns a DataFrame with a 2-level MultiIndex ``(group, feature)`` + and columns including ``z_score`` and ``p_value``. We pivot to wide form + via :meth:`pandas.Series.unstack` (label-based, robust to row order or + level renaming), then ``reindex`` the columns to ``feature_order`` so + per-gene values stay aligned with the caller's gene axis (``unstack`` + sorts the unstacked level by default, which would otherwise scramble the + gene→value mapping for non-alphabetical ``var_names``). + + Yields rows in ``groups_order`` order with ``group_index`` set to each + group's position in ``groups_order``. Reference and any group illico + didn't see are filtered out. + """ + z_wide = illico_df["z_score"].unstack().reindex(columns=feature_order) + p_wide = illico_df["p_value"].unstack().reindex(columns=feature_order) + ref_label = None if ireference is None else groups_order[ireference] + return ( + (gi, z_wide.loc[g].to_numpy(), p_wide.loc[g].to_numpy()) + for gi, g in enumerate(groups_order) + if g != ref_label and g in z_wide.index + ) + + @njit def rankdata(data: NDArray[np.number]) -> NDArray[np.float64]: """Parallelized version of scipy.stats.rankdata.""" @@ -469,33 +500,14 @@ def compute_statistics( # noqa: PLR0912, PLR0915 alternative="two-sided", use_rust=False, ) - # Generate a lookup of category -> result excluding the refernece if it is present. - generate_test_results_map = { - group_cat: ( - group["z_score"].to_numpy(copy=True), - group["p_value"].to_numpy(copy=True), - ) - for (_, group) in illico_df.groupby(level="pert") - if ( - group_cat := np.unique( - group.index.get_level_values("pert").to_numpy(copy=True) - ).item() - ) - != ( - None - if self.ireference is None - else self.groups_order[self.ireference] - ) - } - # Create the iterator that is expected by the other method-branches. - groups_order_list = self.groups_order.tolist() - generate_test_results = ( - ( - groups_order_list.index(group_cat), - *generate_test_results_map[group_cat], - ) - for group_cat in self.groups_order - if group_cat in generate_test_results_map + # Reshape illico's long-form output into the per-group iterator + # the rest of compute_statistics expects, aligning per-gene + # values to var_names so they stay consistent with self.X. + generate_test_results = _illico_results_to_iter( + illico_df, + self.groups_order, + self.ireference, + feature_order=self.var_names, ) else: generate_test_results = self.wilcoxon(tie_correct=tie_correct) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index fb5a5452e7..1a71e16f12 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -17,7 +17,7 @@ from scanpy._utils.random import _LegacyRng from scanpy.get import rank_genes_groups_df from scanpy.tools import rank_genes_groups -from scanpy.tools._rank_genes_groups import _RankGenes +from scanpy.tools._rank_genes_groups import _RankGenes, _illico_results_to_iter from testing.scanpy._helpers import random_mask from testing.scanpy._helpers.data import pbmc68k_reduced from testing.scanpy._pytest.marks import needs @@ -79,6 +79,27 @@ def get_true_scores(method: Literal["t-test", "wilcoxon"]) -> Expected: return Expected(names=expected["names"].astype("T"), scores=expected["scores"]) +def get_illico_results_df(n_groups: int, n_genes: int, *, seed: int = 0) -> pd.DataFrame: + """Synthetic illico-shaped output used by the _illico_results_to_iter tests. + + Features are intentionally non-alphabetical so that any silent + reordering by the helper (e.g., `unstack` defaulting to sort=True) + surfaces as a test failure. + """ + rng = np.random.default_rng(seed) + groups = [f"g{i}" for i in range(n_groups)] + features = [f"f{n_genes - 1 - j}" for j in range(n_genes)] # reversed -> not sorted + return pd.DataFrame( + { + "z_score": rng.normal(size=n_groups * n_genes), + "p_value": rng.uniform(size=n_groups * n_genes), + }, + index=pd.MultiIndex.from_product( + [groups, features], names=["pert", "feature"] + ), + ) + + # TODO: Make dask compatible @pytest.mark.parametrize("method", ["t-test", "wilcoxon"]) @pytest.mark.parametrize("array_type", ARRAY_TYPES_MEM) @@ -353,6 +374,64 @@ def test_mask_not_equal(): assert not np.array_equal(no_mask, with_mask) +@pytest.mark.parametrize( + ("groups_order", "ireference", "expected_indices"), + [ + (["g0", "g1", "g2"], None, [0, 1, 2]), + (["g0", "g1", "g2"], 1, [0, 2]), + (["g2", "g0"], None, [0, 1]), + ], + ids=["vs_rest", "vs_reference", "subset"], +) +def test_illico_iter(groups_order, ireference, expected_indices): + df = get_illico_results_df(n_groups=3, n_genes=4) + feature_order = df.index.unique(level="feature") + out = list( + _illico_results_to_iter( + df, np.array(groups_order), ireference, feature_order=feature_order + ) + ) + assert [t[0] for t in out] == expected_indices + for gi, z, p in out: + sub = df.xs(groups_order[gi], level=0).reindex(feature_order) + np.testing.assert_array_equal(z, sub["z_score"].to_numpy()) + np.testing.assert_array_equal(p, sub["p_value"].to_numpy()) + + +def test_illico_iter_missing_group(): + df = get_illico_results_df(n_groups=3, n_genes=4) + out = list( + _illico_results_to_iter( + df, + np.array(["g0", "missing"]), + None, + feature_order=df.index.unique(level="feature"), + ) + ) + assert [t[0] for t in out] == [0] + + +def test_illico_iter_row_order(): + """Pivoting by label means shuffled rows must produce identical output.""" + df = get_illico_results_df(n_groups=3, n_genes=4) + feature_order = df.index.unique(level="feature") + df_shuffled = df.sample(frac=1.0, random_state=42) + groups_order = np.array(["g0", "g1", "g2"]) + out = list( + _illico_results_to_iter(df, groups_order, None, feature_order=feature_order) + ) + out_shuffled = list( + _illico_results_to_iter( + df_shuffled, groups_order, None, feature_order=feature_order + ) + ) + assert len(out) == len(out_shuffled) == 3 + for (gi_a, z_a, p_a), (gi_b, z_b, p_b) in zip(out, out_shuffled, strict=True): + assert gi_a == gi_b + np.testing.assert_array_equal(z_a, z_b) + np.testing.assert_array_equal(p_a, p_b) + + @pytest.mark.parametrize("corr_method", ["benjamini-hochberg", "bonferroni"]) @pytest.mark.parametrize("test", ["ovo", "ovr"]) @pytest.mark.parametrize( From 2fde37ed013a83571a7cddd0c892115533dd29df Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 5 May 2026 05:38:58 +0000 Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/scanpy/tools/_rank_genes_groups.py | 2 +- tests/test_rank_genes_groups.py | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 7ae14254de..0ff774025c 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -456,7 +456,7 @@ def logreg( if len(self.groups_order) <= 2: break - def compute_statistics( # noqa: PLR0912, PLR0915 + def compute_statistics( # noqa: PLR0912 self, method: DETest, *, diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 1a71e16f12..5a43ace614 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -17,7 +17,7 @@ from scanpy._utils.random import _LegacyRng from scanpy.get import rank_genes_groups_df from scanpy.tools import rank_genes_groups -from scanpy.tools._rank_genes_groups import _RankGenes, _illico_results_to_iter +from scanpy.tools._rank_genes_groups import _illico_results_to_iter, _RankGenes from testing.scanpy._helpers import random_mask from testing.scanpy._helpers.data import pbmc68k_reduced from testing.scanpy._pytest.marks import needs @@ -79,7 +79,9 @@ def get_true_scores(method: Literal["t-test", "wilcoxon"]) -> Expected: return Expected(names=expected["names"].astype("T"), scores=expected["scores"]) -def get_illico_results_df(n_groups: int, n_genes: int, *, seed: int = 0) -> pd.DataFrame: +def get_illico_results_df( + n_groups: int, n_genes: int, *, seed: int = 0 +) -> pd.DataFrame: """Synthetic illico-shaped output used by the _illico_results_to_iter tests. Features are intentionally non-alphabetical so that any silent @@ -94,9 +96,7 @@ def get_illico_results_df(n_groups: int, n_genes: int, *, seed: int = 0) -> pd.D "z_score": rng.normal(size=n_groups * n_genes), "p_value": rng.uniform(size=n_groups * n_genes), }, - index=pd.MultiIndex.from_product( - [groups, features], names=["pert", "feature"] - ), + index=pd.MultiIndex.from_product([groups, features], names=["pert", "feature"]), ) From 806ea7d17b0c4e1b38286ce3eb030be11f2eac3c Mon Sep 17 00:00:00 2001 From: Zach Boldyga Date: Thu, 7 May 2026 14:04:56 -0700 Subject: [PATCH 5/7] simpler comment, better variable names --- src/scanpy/tools/_rank_genes_groups.py | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 0ff774025c..eb88d705fd 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -58,23 +58,15 @@ def _illico_results_to_iter( illico returns a DataFrame with a 2-level MultiIndex ``(group, feature)`` and columns including ``z_score`` and ``p_value``. We pivot to wide form - via :meth:`pandas.Series.unstack` (label-based, robust to row order or - level renaming), then ``reindex`` the columns to ``feature_order`` so - per-gene values stay aligned with the caller's gene axis (``unstack`` - sorts the unstacked level by default, which would otherwise scramble the - gene→value mapping for non-alphabetical ``var_names``). - - Yields rows in ``groups_order`` order with ``group_index`` set to each - group's position in ``groups_order``. Reference and any group illico - didn't see are filtered out. + via :meth:`pandas.Series.unstack`. """ z_wide = illico_df["z_score"].unstack().reindex(columns=feature_order) p_wide = illico_df["p_value"].unstack().reindex(columns=feature_order) ref_label = None if ireference is None else groups_order[ireference] return ( - (gi, z_wide.loc[g].to_numpy(), p_wide.loc[g].to_numpy()) - for gi, g in enumerate(groups_order) - if g != ref_label and g in z_wide.index + (group_index, z_wide.loc[group_name].to_numpy(), p_wide.loc[group_name].to_numpy()) + for group_index, group_name in enumerate(groups_order) + if group_name != ref_label and group_name in z_wide.index ) From f5741c46a98117ddd8b857638a0dcebcefbb9ede Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 7 May 2026 21:11:11 +0000 Subject: [PATCH 6/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/scanpy/tools/_rank_genes_groups.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index eb88d705fd..8bc7170b30 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -64,7 +64,11 @@ def _illico_results_to_iter( p_wide = illico_df["p_value"].unstack().reindex(columns=feature_order) ref_label = None if ireference is None else groups_order[ireference] return ( - (group_index, z_wide.loc[group_name].to_numpy(), p_wide.loc[group_name].to_numpy()) + ( + group_index, + z_wide.loc[group_name].to_numpy(), + p_wide.loc[group_name].to_numpy(), + ) for group_index, group_name in enumerate(groups_order) if group_name != ref_label and group_name in z_wide.index ) From 68caea9b5673b85affa0d8a270130299fe5fc69c Mon Sep 17 00:00:00 2001 From: Zach Boldyga Date: Fri, 8 May 2026 14:56:16 -0700 Subject: [PATCH 7/7] more optimal approach, simplified tests. --- src/scanpy/tools/_rank_genes_groups.py | 26 ++++++-------- tests/test_rank_genes_groups.py | 49 +++----------------------- 2 files changed, 15 insertions(+), 60 deletions(-) diff --git a/src/scanpy/tools/_rank_genes_groups.py b/src/scanpy/tools/_rank_genes_groups.py index 8bc7170b30..e555c6c602 100644 --- a/src/scanpy/tools/_rank_genes_groups.py +++ b/src/scanpy/tools/_rank_genes_groups.py @@ -51,26 +51,26 @@ def _illico_results_to_iter( illico_df: pd.DataFrame, groups_order: NDArray, ireference: int | None, - *, - feature_order, ): - """Reshape illico's long-form output into per-group ``(index, z, p)`` tuples. + """Yield per-group ``(index, z, p)`` tuples from illico's long-form output. - illico returns a DataFrame with a 2-level MultiIndex ``(group, feature)`` - and columns including ``z_score`` and ``p_value``. We pivot to wide form - via :meth:`pandas.Series.unstack`. + illico returns a DataFrame with a 2-level MultiIndex ``(pert, feature)`` + and columns including ``z_score`` and ``p_value``. We stream one group + at a time via `pandas.Series.loc`, trusting illico_df groups are ordered + by ``var_name``. """ - z_wide = illico_df["z_score"].unstack().reindex(columns=feature_order) - p_wide = illico_df["p_value"].unstack().reindex(columns=feature_order) ref_label = None if ireference is None else groups_order[ireference] + z_series = illico_df["z_score"] + p_series = illico_df["p_value"] + illico_groups = set(illico_df.index.unique(level="pert")) return ( ( group_index, - z_wide.loc[group_name].to_numpy(), - p_wide.loc[group_name].to_numpy(), + z_series.loc[group_name].to_numpy(), + p_series.loc[group_name].to_numpy(), ) for group_index, group_name in enumerate(groups_order) - if group_name != ref_label and group_name in z_wide.index + if group_name != ref_label and group_name in illico_groups ) @@ -496,14 +496,10 @@ def compute_statistics( # noqa: PLR0912 alternative="two-sided", use_rust=False, ) - # Reshape illico's long-form output into the per-group iterator - # the rest of compute_statistics expects, aligning per-gene - # values to var_names so they stay consistent with self.X. generate_test_results = _illico_results_to_iter( illico_df, self.groups_order, self.ireference, - feature_order=self.var_names, ) else: generate_test_results = self.wilcoxon(tie_correct=tie_correct) diff --git a/tests/test_rank_genes_groups.py b/tests/test_rank_genes_groups.py index 5a43ace614..6c32d45dbc 100644 --- a/tests/test_rank_genes_groups.py +++ b/tests/test_rank_genes_groups.py @@ -84,9 +84,7 @@ def get_illico_results_df( ) -> pd.DataFrame: """Synthetic illico-shaped output used by the _illico_results_to_iter tests. - Features are intentionally non-alphabetical so that any silent - reordering by the helper (e.g., `unstack` defaulting to sort=True) - surfaces as a test failure. + Features are in deliberately non-ascending to test ``var_name`` ordering. """ rng = np.random.default_rng(seed) groups = [f"g{i}" for i in range(n_groups)] @@ -379,59 +377,20 @@ def test_mask_not_equal(): [ (["g0", "g1", "g2"], None, [0, 1, 2]), (["g0", "g1", "g2"], 1, [0, 2]), - (["g2", "g0"], None, [0, 1]), ], - ids=["vs_rest", "vs_reference", "subset"], + ids=["vs_rest", "vs_reference"], ) def test_illico_iter(groups_order, ireference, expected_indices): df = get_illico_results_df(n_groups=3, n_genes=4) feature_order = df.index.unique(level="feature") - out = list( - _illico_results_to_iter( - df, np.array(groups_order), ireference, feature_order=feature_order - ) - ) - assert [t[0] for t in out] == expected_indices + out = list(_illico_results_to_iter(df, np.array(groups_order), ireference)) + assert sorted(t[0] for t in out) == sorted(expected_indices) for gi, z, p in out: sub = df.xs(groups_order[gi], level=0).reindex(feature_order) np.testing.assert_array_equal(z, sub["z_score"].to_numpy()) np.testing.assert_array_equal(p, sub["p_value"].to_numpy()) -def test_illico_iter_missing_group(): - df = get_illico_results_df(n_groups=3, n_genes=4) - out = list( - _illico_results_to_iter( - df, - np.array(["g0", "missing"]), - None, - feature_order=df.index.unique(level="feature"), - ) - ) - assert [t[0] for t in out] == [0] - - -def test_illico_iter_row_order(): - """Pivoting by label means shuffled rows must produce identical output.""" - df = get_illico_results_df(n_groups=3, n_genes=4) - feature_order = df.index.unique(level="feature") - df_shuffled = df.sample(frac=1.0, random_state=42) - groups_order = np.array(["g0", "g1", "g2"]) - out = list( - _illico_results_to_iter(df, groups_order, None, feature_order=feature_order) - ) - out_shuffled = list( - _illico_results_to_iter( - df_shuffled, groups_order, None, feature_order=feature_order - ) - ) - assert len(out) == len(out_shuffled) == 3 - for (gi_a, z_a, p_a), (gi_b, z_b, p_b) in zip(out, out_shuffled, strict=True): - assert gi_a == gi_b - np.testing.assert_array_equal(z_a, z_b) - np.testing.assert_array_equal(p_a, p_b) - - @pytest.mark.parametrize("corr_method", ["benjamini-hochberg", "bonferroni"]) @pytest.mark.parametrize("test", ["ovo", "ovr"]) @pytest.mark.parametrize(