diff --git a/CHANGELOG.md b/CHANGELOG.md index 416bc97..32575c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ and this project adheres to [Semantic Versioning][]. [keep a changelog]: https://keepachangelog.com/en/1.0.0/ [semantic versioning]: https://semver.org/spec/v2.0.0.html +## 2.1.5 + +### Added +- `pl.volcano` now accepts a gene name (`str`) or list of gene names (`list[str]`) for the `top` parameter to annotate specific features on volcano plots + ## 2.1.4 ### Changes diff --git a/pyproject.toml b/pyproject.toml index 63f2b1f..ca5fd2f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ requires = [ "hatchling" ] [project] name = "decoupler" -version = "2.1.4" +version = "2.1.5" description = "Python package to perform enrichment analysis from omics data." readme = "README.md" license = { file = "LICENSE" } diff --git a/src/decoupler/pl/_volcano.py b/src/decoupler/pl/_volcano.py index 2035bd7..8afa379 100644 --- a/src/decoupler/pl/_volcano.py +++ b/src/decoupler/pl/_volcano.py @@ -15,7 +15,7 @@ def volcano( y: str, net: pd.DataFrame | None = None, name: str | None = None, - top: int = 5, + top: int | str | list[str] = 5, thr_stat: float = 0.5, thr_sign: float = 0.05, max_stat: float | None = None, @@ -40,7 +40,8 @@ def volcano( name Name of the source to subset ``net``. top - Number of top differentially abundant features to show. + Number of top differentially abundant features to show. Can also be a gene name + (``str``) or a list of gene names (``list[str]``) to annotate specific features. thr_stat Significance threshold for change statitsics. thr_sign @@ -76,7 +77,14 @@ def volcano( assert isinstance(data, pd.DataFrame), m assert {x, y}.issubset(data.columns), m assert (net is None) == (name is None), "net and name must be both defined or both None" - assert isinstance(top, int) and top > 0, "top must be int and > 0" + if isinstance(top, str): + top = [top] + if isinstance(top, list): + assert all(isinstance(g, str) for g in top), "top must contain only str gene names" + missing = [g for g in top if g not in data.index] + assert not missing, f"gene names not found in data.index: {missing}" + else: + assert isinstance(top, int) and top > 0, "top must be int, str, list[str], and int must be > 0" assert isinstance(thr_stat, int | float) and thr_stat > 0, "thr_stat must be numeric and > 0" assert isinstance(thr_sign, int | float) and thr_sign > 0, "thr_sign must be numeric and > 0" if max_stat is None: @@ -127,8 +135,11 @@ def volcano( bp.ax.set_xlabel(x) bp.ax.set_ylabel(rf"$-\log_{{10}}({y})$") # Show top sign features - signs = df[up_msk | dw_msk].sort_values("pval", ascending=False) - signs = signs.iloc[:top] + if isinstance(top, list): + signs = df[df.index.isin(top)] + else: + signs = df[up_msk | dw_msk].sort_values("pval", ascending=False) + signs = signs.iloc[:top] texts = [] for x, y, s in zip(signs["stat"], signs["pval"], signs.index, strict=False): texts.append(bp.ax.text(x, y, s)) diff --git a/tests/pl/test_volcano.py b/tests/pl/test_volcano.py index c69efef..d8c0854 100644 --- a/tests/pl/test_volcano.py +++ b/tests/pl/test_volcano.py @@ -32,3 +32,21 @@ def test_volcano( else: with pytest.raises(AssertionError): dc.pl.volcano(data=deg, x="stat", y="padj", net=net, name=name, return_fig=True) + + +@pytest.mark.parametrize( + "top,a_err", + [ + ["G03", False], + [["G01", "G03"], False], + [["G01", "NONEXISTENT"], True], + ], +) +def test_volcano_top_genes(deg, top, a_err): + if not a_err: + fig = dc.pl.volcano(data=deg, x="stat", y="padj", top=top, return_fig=True) + assert isinstance(fig, Figure) + plt.close(fig) + else: + with pytest.raises(AssertionError): + dc.pl.volcano(data=deg, x="stat", y="padj", top=top, return_fig=True)