From 592f07919894fc6ce7a02b5f07fdf206273e1ea8 Mon Sep 17 00:00:00 2001
From: Aman Srivastava <amansri345@gmail.com>
Date: Sun, 1 Mar 2026 16:25:20 +0530
Subject: [PATCH 1/9] add Epochs.score_quality() for data-driven epoch quality
 scoring

---
 doc/changes/dev/13676.newfeature.rst | Bin 0 -> 588 bytes
 mne/epochs.py                        |  68 +++++++++++++++++++++++++++
 mne/tests/test_epochs.py             |  27 +++++++++++
 3 files changed, 95 insertions(+)
 create mode 100644 doc/changes/dev/13676.newfeature.rst

diff --git a/doc/changes/dev/13676.newfeature.rst b/doc/changes/dev/13676.newfeature.rst
new file mode 100644
index 0000000000000000000000000000000000000000..b5403c8dd0022b47b719691d5c28a1dc984155eb
GIT binary patch
literal 588
zcma)(QBuP&3`F;v8SdZ%kRG6a{on)~ph=t*O4}sFNoTk`@YbflCzG)($*YxC`TW?a
z;|tpAqF#$$@M`>lXro6>8WqVVqJ%eU=4G!pt!i}Ek(zfZ0{4hD;zHF*+!HsP32#<~
zy|D#QM!4Q@;SYD<C6OQWOnwI0=FE~&3yce|Dci_xt^w;VjZXCOB-Kys89Y&$bwzEj
z2k=+&GfWND@jGYV+c?p2&ODwZW&D(9xe|ArGM&|#&%Iq~8`(#t1L&U5$~?2#ZdKlb
zeJeWP7yaJ;FUHlcoS1V(q5qEaiF-s(m({mq3X8i%dY!Y@GU0#zYX|FyhCQ2ZV6C&>
c799b9$M(FJpeJz0ytkss8?33?=w5gF0{w|@R{#J2

literal 0
HcmV?d00001

diff --git a/mne/epochs.py b/mne/epochs.py
index 2d317caa63e..d34365fdf7f 100644
--- a/mne/epochs.py
+++ b/mne/epochs.py
@@ -1421,6 +1421,74 @@ def drop_bad(self, reject="existing", flat="existing", verbose=None):
         self._get_data(out=False, verbose=verbose)
         return self
 
+    def score_quality(self, picks=None):
+        """Score each epoch by its likelihood of containing an artifact.
+
+        Computes a per-epoch outlier score in [0, 1] using robust statistics
+        on peak-to-peak amplitude, variance, and kurtosis. No additional
+        dependencies are required. Useful for informing rejection thresholds
+        before calling :meth:`drop_bad`.
+
+        .. note:: To constrain the time period used for scoring, set
+                ``epochs.reject_tmin`` and ``epochs.reject_tmax``.
+
+        Parameters
+        ----------
+        picks : str | list | slice | None
+            Channels to include. Defaults to good data channels.
+            See :func:`mne.pick_types` for more information.
+
+        Returns
+        -------
+        scores : ndarray, shape (n_epochs,)
+            Outlier score per epoch. Values closer to 0 indicate clean
+            epochs; values closer to 1 indicate likely artifacts.
+
+        See Also
+        --------
+        drop_bad : Drop epochs that exceed rejection thresholds.
+        plot_drop_log : Plot the drop log after calling drop_bad.
+
+        Examples
+        --------
+        Score epochs and use result to inform rejection thresholds::
+
+            scores = epochs.score_quality()
+            bad_epochs = np.where(scores > 0.8)[0]
+            epochs.drop(bad_epochs, reason='quality-score')
+        """
+        from scipy.stats import kurtosis as _kurtosis
+
+        self.load_data()
+        data = self.get_data(picks=picks)  # (n_epochs, n_channels, n_times)
+
+        if data.shape[0] < 2:
+            raise ValueError(
+                "At least 2 epochs are required to compute quality scores."
+            )
+
+        # Feature 1: peak-to-peak amplitude (mean across channels)
+        ptp = np.ptp(data, axis=-1).mean(axis=-1)
+
+        # Feature 2: variance (mean across channels)
+        var = data.var(axis=-1).mean(axis=-1)
+
+        # Feature 3: kurtosis — sensitive to spike artifacts
+        kurt = np.array([_kurtosis(data[i].ravel()) for i in range(len(data))])
+
+        # Robust z-score each feature using median absolute deviation
+        features = np.column_stack([ptp, var, kurt])
+        median = np.median(features, axis=0)
+        mad = np.median(np.abs(features - median), axis=0) + 1e-10
+        z = np.abs((features - median) / mad)
+
+        # Combine and normalize to [0, 1]
+        raw_score = z.mean(axis=-1)
+        score_range = raw_score.max() - raw_score.min()
+        scores = (raw_score - raw_score.min()) / (score_range + 1e-10)
+
+        return scores
+
     def drop_log_stats(self, ignore=("IGNORED",)):
         """Compute the channel stats based on a drop_log from Epochs.
 
diff --git a/mne/tests/test_epochs.py b/mne/tests/test_epochs.py
index 91c5f902ac8..46d4ffb1feb 100644
--- a/mne/tests/test_epochs.py
+++ b/mne/tests/test_epochs.py
@@ -3434,6 +3434,33 @@ def test_drop_epochs():
         ("a", "b"),
     ]
 
+def test_score_quality():
+    """Test epoch quality scoring."""
+    raw, events, picks = _get_data()
+    epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, preload=True)
+
+    # Basic output checks
+    scores = epochs.score_quality()
+    assert scores.shape == (len(epochs),)
+    assert scores.min() >= 0.0
+    assert scores.max() <= 1.0
+
+    # Scores should vary across epochs (not all identical)
+    assert scores.std() > 0
+
+    # Works with picks
+    scores_eeg = epochs.score_quality(picks="eeg")
+    assert scores_eeg.shape == (len(epochs),)
+
+    # Inject an obvious artifact into one epoch and check it scores highest
+    epochs_art = epochs.copy()
+    epochs_art._data[0] *= 100  # make epoch 0 a clear outlier
+    scores_art = epochs_art.score_quality()
+    assert scores_art[0] == scores_art.max()
+
+    # Too few epochs should raise
+    with pytest.raises(ValueError, match="At least 2 epochs"):
+        epochs[:1].score_quality()
 
 @pytest.mark.parametrize("preload", (True, False))
 def test_drop_epochs_mult(preload):

From db8a17687cfce12428aeed1941563963b71b66f5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 1 Mar 2026 10:58:16 +0000
Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 mne/tests/test_epochs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/mne/tests/test_epochs.py b/mne/tests/test_epochs.py
index 46d4ffb1feb..f1a11c540c1 100644
--- a/mne/tests/test_epochs.py
+++ b/mne/tests/test_epochs.py
@@ -3434,6 +3434,7 @@ def test_drop_epochs():
         ("a", "b"),
     ]
 
+
 def test_score_quality():
     """Test epoch quality scoring."""
     raw, events, picks = _get_data()
@@ -3462,6 +3463,7 @@ def test_score_quality():
     with pytest.raises(ValueError, match="At least 2 epochs"):
         epochs[:1].score_quality()
 
+
 @pytest.mark.parametrize("preload", (True, False))
 def test_drop_epochs_mult(preload):
     """Test that subselecting epochs or making fewer epochs is similar."""

From c84679329ab29c3d8d210857599591f44fa8da86 Mon Sep 17 00:00:00 2001
From: Aman Srivastava <amansri345@gmail.com>
Date: Sun, 1 Mar 2026 16:36:23 +0530
Subject: [PATCH 3/9] DOC: Fix encoding of changelog file

---
 doc/changes/dev/13676.newfeature.rst | Bin 588 -> 292 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/doc/changes/dev/13676.newfeature.rst b/doc/changes/dev/13676.newfeature.rst
index b5403c8dd0022b47b719691d5c28a1dc984155eb..2adb5635da32d0e4aa51124ebf7cddaf7116ac41 100644
GIT binary patch
literal 292
zcmZXPJ#ND=42Ace;vK;D0kUQ239=e6Xo;zr*rG~N27LQUSv!&7`}x3ojJO@_+igA)
zPv2R?R;Civc=@fgi(fNb5%kE_8HCI>a<@UN@4|+U8?{LKf$3U{qd2(~mr$UoMd&@c
zj(viT+*)ho0VkPI3T>kl@vi1tqzuTWr^H@Ih@3e^PT?A##@w6DyOdRu<Ef3${<#>x
uIJh*RZ^mWQJr=CoEQ~<=-c$d7jiz~7Db54d3-kS;gonvVjdaqyOkaOMMr<_z

literal 588
zcma)(QBuP&3`F;v8SdZ%kRG6a{on)~ph=t*O4}sFNoTk`@YbflCzG)($*YxC`TW?a
z;|tpAqF#$$@M`>lXro6>8WqVVqJ%eU=4G!pt!i}Ek(zfZ0{4hD;zHF*+!HsP32#<~
zy|D#QM!4Q@;SYD<C6OQWOnwI0=FE~&3yce|Dci_xt^w;VjZXCOB-Kys89Y&$bwzEj
z2k=+&GfWND@jGYV+c?p2&ODwZW&D(9xe|ArGM&|#&%Iq~8`(#t1L&U5$~?2#ZdKlb
zeJeWP7yaJ;FUHlcoS1V(q5qEaiF-s(m({mq3X8i%dY!Y@GU0#zYX|FyhCQ2ZV6C&>
c799b9$M(FJpeJz0ytkss8?33?=w5gF0{w|@R{#J2


From c76aa847fe4d2aec2c24bc757dd375c147a4789e Mon Sep 17 00:00:00 2001
From: Aman Srivastava <amansri345@gmail.com>
Date: Tue, 3 Mar 2026 15:44:11 +0530
Subject: [PATCH 4/9] adding example for exploring epoch quality before
 rejection

---
 examples/preprocessing/plot_epoch_quality.py | 109 +++++++++++++++++++
 1 file changed, 109 insertions(+)
 create mode 100644 examples/preprocessing/plot_epoch_quality.py

diff --git a/examples/preprocessing/plot_epoch_quality.py b/examples/preprocessing/plot_epoch_quality.py
new file mode 100644
index 00000000000..de7a5da192d
--- /dev/null
+++ b/examples/preprocessing/plot_epoch_quality.py
@@ -0,0 +1,109 @@
+"""
+.. _ex-epoch-quality:
+
+=====================================
+Exploring epoch quality before rejection
+=====================================
+
+Before rejecting epochs with :meth:`mne.Epochs.drop_bad`, it can be useful
+to get a sense of which epochs are the most likely artifacts. This example
+shows how to compute simple per-epoch statistics — peak-to-peak amplitude,
+variance, and kurtosis — and use them to rank epochs by their outlier score.
+
+The approach is inspired by established methods in the EEG artifact detection
+literature, namely FASTER (Nolan et al., 2010) and Delorme et al. (2007), both
+of which use z-scored kurtosis and variance across epochs to flag bad trials.
+
+References
+----------
+.. [1] Nolan, H., Whelan, R., & Reilly, R. B. (2010). FASTER: Fully Automated
+       Statistical Thresholding for EEG artifact Rejection.
+       Journal of Neuroscience Methods, 192(1), 152-162.
+.. [2] Delorme, A., Sejnowski, T., & Makeig, S. (2007). Enhanced detection of
+       artifacts in EEG data using higher-order statistics and independent
+       component analysis. NeuroImage, 34(4), 1443-1449.
+"""
+# Authors: Aman Srivastava
+#
+# License: BSD-3-Clause
+# Copyright the MNE-Python contributors.
+
+# %%
+import matplotlib.pyplot as plt
+import numpy as np
+
+import mne
+from mne.datasets import sample
+
+print(__doc__)
+
+data_path = sample.data_path()
+
+# %%
+# Load the sample dataset and create epochs
+meg_path = data_path / "MEG" / "sample"
+raw_fname = meg_path / "sample_audvis_filt-0-40_raw.fif"
+
+raw = mne.io.read_raw_fif(raw_fname, preload=True)
+events = mne.find_events(raw, "STI 014")
+
+event_id = {"auditory/left": 1, "auditory/right": 2}
+tmin, tmax = -0.2, 0.5
+picks = mne.pick_types(raw.info, meg="grad", eeg=False)
+
+epochs = mne.Epochs(
+    raw, events, event_id, tmin, tmax, picks=picks, preload=True, baseline=(None, 0)
+)
+
+# %%
+# Compute per-epoch statistics
+# We compute three features for each epoch:
+# - Peak-to-peak amplitude (sensitive to large jumps)
+# - Variance (sensitive to sustained high-amplitude noise)
+# - Kurtosis (sensitive to spike artifacts)
+#
+# Each feature is z-scored robustly using median absolute deviation (MAD)
+# across epochs, then averaged into a single outlier score per epoch.
+
+data = epochs.get_data()  # (n_epochs, n_channels, n_times)
+
+# Feature 1: peak-to-peak
+ptp = np.ptp(data, axis=-1).mean(axis=-1)
+
+# Feature 2: variance
+var = data.var(axis=-1).mean(axis=-1)
+
+# Feature 3: kurtosis
+from scipy.stats import kurtosis  # noqa: E402
+
+kurt = np.array([kurtosis(data[i].ravel()) for i in range(len(data))])
+
+# Robust z-score using MAD
+features = np.column_stack([ptp, var, kurt])
+median = np.median(features, axis=0)
+mad = np.median(np.abs(features - median), axis=0) + 1e-10
+z = np.abs((features - median) / mad)
+
+# Normalize to [0, 1]
+raw_score = z.mean(axis=-1)
+scores = (raw_score - raw_score.min()) / (raw_score.max() - raw_score.min() + 1e-10)
+
+# %%
+# Plot the scores ranked from cleanest to noisiest
+fig, ax = plt.subplots(layout="constrained")
+sorted_idx = np.argsort(scores)
+ax.bar(np.arange(len(scores)), scores[sorted_idx], color="steelblue")
+ax.axhline(0.8, color="red", linestyle="--", label="Example threshold (0.8)")
+ax.set(
+    xlabel="Epoch (sorted by score)",
+    ylabel="Outlier score",
+    title="Epoch quality scores (0 = clean, 1 = likely artifact)",
+)
+ax.legend()
+
+# %%
+# Inspect the worst epochs
+# Epochs scoring above 0.8 are worth inspecting manually
+bad_epochs = np.where(scores > 0.8)[0]
+print(f"Epochs worth inspecting: {bad_epochs}")
+print(f"That's {len(bad_epochs)} out of {len(epochs)} total epochs")

From 518b6b13a5c3e83cc7a6bf4c932028b5d116b549 Mon Sep 17 00:00:00 2001
From: Aman Srivastava <amansri345@gmail.com>
Date: Tue, 3 Mar 2026 15:47:43 +0530
Subject: [PATCH 5/9] updating newfeature.rst file

---
 doc/changes/dev/13676.newfeature.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/changes/dev/13676.newfeature.rst b/doc/changes/dev/13676.newfeature.rst
index 2adb5635da3..dd2611fac68 100644
--- a/doc/changes/dev/13676.newfeature.rst
+++ b/doc/changes/dev/13676.newfeature.rst
@@ -1 +1 @@
-Add :meth:\mne.Epochs.score_quality\ to compute a per-epoch outlier score using robust statistics on peak-to-peak amplitude, variance, and kurtosis, providing a dependency-free starting point for setting rejection thresholds before calling :meth:\mne.Epochs.drop_bad\, by \Aman Srivastava\_.
+Add :meth:`mne.Epochs.score_quality` to compute a per-epoch outlier score using robust statistics on peak-to-peak amplitude, variance, and kurtosis, and add a preprocessing example showing how to explore epoch quality before rejection, by `Aman Srivastava`_.
\ No newline at end of file

From d7b55819d430c80374be965502fbe412e9d038ed Mon Sep 17 00:00:00 2001
From: Aman Srivastava <amansri345@gmail.com>
Date: Fri, 6 Mar 2026 14:40:37 +0530
Subject: [PATCH 6/9] remove score_quality method, keep example only per review
 feedback

---
 mne/epochs.py            | 68 ----------------------------------------
 mne/tests/test_epochs.py | 29 -----------------
 2 files changed, 97 deletions(-)

diff --git a/mne/epochs.py b/mne/epochs.py
index d34365fdf7f..2d317caa63e 100644
--- a/mne/epochs.py
+++ b/mne/epochs.py
@@ -1421,74 +1421,6 @@ def drop_bad(self, reject="existing", flat="existing", verbose=None):
         self._get_data(out=False, verbose=verbose)
         return self
 
-    def score_quality(self, picks=None):
-        """Score each epoch by its likelihood of containing an artifact.
-
-        Computes a per-epoch outlier score in [0, 1] using robust statistics
-        on peak-to-peak amplitude, variance, and kurtosis. No additional
-        dependencies are required. Useful for informing rejection thresholds
-        before calling :meth:`drop_bad`.
-
-        .. note:: To constrain the time period used for scoring, set
-                ``epochs.reject_tmin`` and ``epochs.reject_tmax``.
-
-        Parameters
-        ----------
-        picks : str | list | slice | None
-            Channels to include. Defaults to good data channels.
-            See :func:`mne.pick_types` for more information.
-
-        Returns
-        -------
-        scores : ndarray, shape (n_epochs,)
-            Outlier score per epoch. Values closer to 0 indicate clean
-            epochs; values closer to 1 indicate likely artifacts.
-
-        See Also
-        --------
-        drop_bad : Drop epochs that exceed rejection thresholds.
-        plot_drop_log : Plot the drop log after calling drop_bad.
-
-        Examples
-        --------
-        Score epochs and use result to inform rejection thresholds::
-
-            scores = epochs.score_quality()
-            bad_epochs = np.where(scores > 0.8)[0]
-            epochs.drop(bad_epochs, reason='quality-score')
-        """
-        from scipy.stats import kurtosis as _kurtosis
-
-        self.load_data()
-        data = self.get_data(picks=picks)  # (n_epochs, n_channels, n_times)
-
-        if data.shape[0] < 2:
-            raise ValueError(
-                "At least 2 epochs are required to compute quality scores."
-            )
-
-        # Feature 1: peak-to-peak amplitude (mean across channels)
-        ptp = np.ptp(data, axis=-1).mean(axis=-1)
-
-        # Feature 2: variance (mean across channels)
-        var = data.var(axis=-1).mean(axis=-1)
-
-        # Feature 3: kurtosis — sensitive to spike artifacts
-        kurt = np.array([_kurtosis(data[i].ravel()) for i in range(len(data))])
-
-        # Robust z-score each feature using median absolute deviation
-        features = np.column_stack([ptp, var, kurt])
-        median = np.median(features, axis=0)
-        mad = np.median(np.abs(features - median), axis=0) + 1e-10
-        z = np.abs((features - median) / mad)
-
-        # Combine and normalize to [0, 1]
-        raw_score = z.mean(axis=-1)
-        score_range = raw_score.max() - raw_score.min()
-        scores = (raw_score - raw_score.min()) / (score_range + 1e-10)
-
-        return scores
-
     def drop_log_stats(self, ignore=("IGNORED",)):
         """Compute the channel stats based on a drop_log from Epochs.
 
diff --git a/mne/tests/test_epochs.py b/mne/tests/test_epochs.py
index f1a11c540c1..91c5f902ac8 100644
--- a/mne/tests/test_epochs.py
+++ b/mne/tests/test_epochs.py
@@ -3435,35 +3435,6 @@ def test_drop_epochs():
     ]
 
 
-def test_score_quality():
-    """Test epoch quality scoring."""
-    raw, events, picks = _get_data()
-    epochs = Epochs(raw, events, event_id, tmin, tmax, picks=picks, preload=True)
-
-    # Basic output checks
-    scores = epochs.score_quality()
-    assert scores.shape == (len(epochs),)
-    assert scores.min() >= 0.0
-    assert scores.max() <= 1.0
-
-    # Scores should vary across epochs (not all identical)
-    assert scores.std() > 0
-
-    # Works with picks
-    scores_eeg = epochs.score_quality(picks="eeg")
-    assert scores_eeg.shape == (len(epochs),)
-
-    # Inject an obvious artifact into one epoch and check it scores highest
-    epochs_art = epochs.copy()
-    epochs_art._data[0] *= 100  # make epoch 0 a clear outlier
-    scores_art = epochs_art.score_quality()
-    assert scores_art[0] == scores_art.max()
-
-    # Too few epochs should raise
-    with pytest.raises(ValueError, match="At least 2 epochs"):
-        epochs[:1].score_quality()
-
-
 @pytest.mark.parametrize("preload", (True, False))
 def test_drop_epochs_mult(preload):
     """Test that subselecting epochs or making fewer epochs is similar."""

From 926f501e534f470260260f840bff225f8563e525 Mon Sep 17 00:00:00 2001
From: Aman Srivastava <amansri345@gmail.com>
Date: Fri, 6 Mar 2026 14:42:07 +0530
Subject: [PATCH 7/9] updating .rst file

---
 doc/changes/dev/13676.newfeature.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/changes/dev/13676.newfeature.rst b/doc/changes/dev/13676.newfeature.rst
index dd2611fac68..7c6e73283f0 100644
--- a/doc/changes/dev/13676.newfeature.rst
+++ b/doc/changes/dev/13676.newfeature.rst
@@ -1 +1 @@
-Add :meth:`mne.Epochs.score_quality` to compute a per-epoch outlier score using robust statistics on peak-to-peak amplitude, variance, and kurtosis, and add a preprocessing example showing how to explore epoch quality before rejection, by `Aman Srivastava`_.
\ No newline at end of file
+Add a preprocessing example showing how to explore epoch quality before rejection using robust statistics (peak-to-peak amplitude, variance, and kurtosis) inspired by FASTER (Nolan et al., 2010) and Delorme et al. (2007), by `Aman Srivastava`_.
\ No newline at end of file

From 3fdddece4e27193ba475b74d996522499ea58fe5 Mon Sep 17 00:00:00 2001
From: Aman Srivastava <amansri345@gmail.com>
Date: Fri, 6 Mar 2026 14:45:01 +0530
Subject: [PATCH 8/9] rename changelog file to match PR number

---
 doc/changes/dev/{13676.newfeature.rst => 13710.newfeature.rst} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename doc/changes/dev/{13676.newfeature.rst => 13710.newfeature.rst} (100%)

diff --git a/doc/changes/dev/13676.newfeature.rst b/doc/changes/dev/13710.newfeature.rst
similarity index 100%
rename from doc/changes/dev/13676.newfeature.rst
rename to doc/changes/dev/13710.newfeature.rst

From 08d9cf8d5ad3797ded6d1f943099100161e31665 Mon Sep 17 00:00:00 2001
From: Aman Srivastava <amansri345@gmail.com>
Date: Tue, 10 Mar 2026 14:24:36 +0530
Subject: [PATCH 9/9] add footcite references and update bib

---
 doc/references.bib                           | 21 ++++++++++++++++++++
 examples/preprocessing/plot_epoch_quality.py | 19 ++++++++----------
 2 files changed, 29 insertions(+), 11 deletions(-)

diff --git a/doc/references.bib b/doc/references.bib
index 12d63458bd1..c7eefd9e808 100644
--- a/doc/references.bib
+++ b/doc/references.bib
@@ -25,6 +25,27 @@ @article{GramfortEtAl2013a
   number = {267}
 }
 % everything else
+@article{NolanEtAl2010,
+  author  = {Nolan, H. and Whelan, R. and Reilly, R. B.},
+  title   = {{FASTER}: Fully Automated Statistical Thresholding for {EEG} artifact Rejection},
+  journal = {Journal of Neuroscience Methods},
+  year    = {2010},
+  volume  = {192},
+  number  = {1},
+  pages   = {152--162},
+  doi     = {10.1016/j.jneumeth.2010.07.015},
+}
+
+@article{DelormeEtAl2007,
+  author  = {Delorme, A. and Sejnowski, T. and Makeig, S.},
+  title   = {Enhanced detection of artifacts in {EEG} data using higher-order statistics and independent component analysis},
+  journal = {NeuroImage},
+  year    = {2007},
+  volume  = {34},
+  number  = {4},
+  pages   = {1443--1449},
+  doi     = {10.1016/j.neuroimage.2006.11.004},
+}
 @article{AblinEtAl2018,
   author = {Ablin, Pierre and Cardoso, Jean-Francois and Gramfort, Alexandre},
   doi = {10.1109/TSP.2018.2844203},
diff --git a/examples/preprocessing/plot_epoch_quality.py b/examples/preprocessing/plot_epoch_quality.py
index de7a5da192d..d37644edbfd 100644
--- a/examples/preprocessing/plot_epoch_quality.py
+++ b/examples/preprocessing/plot_epoch_quality.py
@@ -11,17 +11,9 @@
 variance, and kurtosis — and use them to rank epochs by their outlier score.
 
 The approach is inspired by established methods in the EEG artifact detection
-literature, namely FASTER (Nolan et al., 2010) and Delorme et al. (2007), both
-of which use z-scored kurtosis and variance across epochs to flag bad trials.
-
-References
-----------
-.. [1] Nolan, H., Whelan, R., & Reilly, R. B. (2010). FASTER: Fully Automated
-       Statistical Thresholding for EEG artifact Rejection.
-       Journal of Neuroscience Methods, 192(1), 152-162.
-.. [2] Delorme, A., Sejnowski, T., & Makeig, S. (2007). Enhanced detection of
-       artifacts in EEG data using higher-order statistics and independent
-       component analysis. NeuroImage, 34(4), 1443-1449.
+literature, namely FASTER :footcite:t:`NolanEtAl2010` and
+:footcite:t:`DelormeEtAl2007`, both of which use z-scored kurtosis and
+variance across epochs to flag bad trials.
 """
 # Authors: Aman Srivastava
 #
@@ -107,3 +99,8 @@
 bad_epochs = np.where(scores > 0.8)[0]
 print(f"Epochs worth inspecting: {bad_epochs}")
 print(f"That's {len(bad_epochs)} out of {len(epochs)} total epochs")
+
+# %%
+# References
+# ----------
+# .. footbibliography::