From 99e38f7c7a06b7e8e14c22c98a0ee68893930cc9 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 1 May 2026 10:41:19 -0700 Subject: [PATCH] blk-mq: check for stale cached request in blk_mq_submit_bio When submitting a bio to blk-mq, if the task should sleep after peeking a cached request, but before it pops it, the plug flushes and calls blk_mq_free_plug_rqs, freeing the cached_rqs. This creates a use-after-free bug. Fix this by ensuring the cached_rqs still contains our peeked request, and retry the bio submission without it if the request had been freed. The code had already warned of this possibility, and specifically popped the request before other known blocking calls, but it didn't handle a blocking GFP_NOIO alloc. Under memory pressure, allocating the split bio or the integrity payload are two such cases that can block. The blk-mq submit_bio function continues using the peeked request that was already freed and re-initialized, so the driver receives that request with a NULL'ed mq_hctx, and inevitably panics. Relevant kernel messages if you should encounter this condition, where the "WARNING" is the harbinger of the panic about to happen: ------------[ cut here ]------------ WARNING: CPU: 4 PID: 80820 at block/blk-mq.c:3071 blk_mq_submit_bio+0x2cf/0x5b0 ... BUG: kernel NULL pointer dereference, address: 0000000000000100 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 6b367b067 P4D 6b367b067 PUD 6bb5eb067 PMD 0 Oops: Oops: 0000 [#1] SMP ... Call Trace: blk_mq_dispatch_queue_requests+0x46/0x120 blk_mq_flush_plug_list+0x38/0x130 blk_add_rq_to_plug+0xa2/0x160 blk_mq_submit_bio+0x3ab/0x5b0 __submit_bio+0x3a/0x260 submit_bio_noacct_nocheck+0xc6/0x2b0 btrfs_submit_bbio+0x14d/0x520 ? btrfs_get_extent+0x43f/0x640 submit_extent_folio+0x31f/0x340 btrfs_do_readpage+0x2d7/0xac0 btrfs_readahead+0x142/0x200 ? clear_state_bit+0x520/0x520 read_pages+0x57/0x200 ? folio_alloc_noprof+0x10c/0x310 page_cache_ra_unbounded+0x28c/0x480 ? asm_sysvec_call_function+0x16/0x20 ? blk_cgroup_congested+0xa/0x50 ? page_cache_sync_ra+0x41/0x2d0 filemap_get_pages+0x347/0xd50 filemap_read+0xd3/0x500 ? 0xffffffff81000000 __io_read+0x111/0x440 io_read+0x23/0x90 __io_issue_sqe+0x40/0x120 io_issue_sqe+0x3f/0x3a0 io_submit_sqes+0x2bd/0x790 __se_sys_io_uring_enter+0x100/0xc10 ? eventfd_read+0x100/0x1f0 ? futex_wake+0x1b9/0x260 ? syscall_trace_enter+0x34/0x1d0 do_syscall_64+0x6a/0x250 entry_SYSCALL_64_after_hwframe+0x4b/0x53 Fixes: b0077e269f6c1 ("blk-mq: make sure active queue usage is held for bio_integrity_prep()") Fixes: 7b4f36cd22a65 ("block: ensure we hold a queue reference when using queue limits") Signed-off-by: Keith Busch --- block/blk-mq.c | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 4c5c16cce4f8f..73ef3e4be5123 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -3096,22 +3096,27 @@ static struct request *blk_mq_peek_cached_request(struct blk_plug *plug, return rq; } -static void blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug, +static bool blk_mq_use_cached_rq(struct request *rq, struct blk_plug *plug, struct bio *bio) { - if (rq_list_pop(&plug->cached_rqs) != rq) - WARN_ON_ONCE(1); - /* - * If any qos ->throttle() end up blocking, we will have flushed the - * plug and hence killed the cached_rq list as well. Pop this entry - * before we throttle. + * We will have flushed the plug and hence killed the cached_rq list as + * well if anything had scheduled. Pop this entry before we throttle if + * the entry is still valid. */ + struct request *popped = rq_list_pop(&plug->cached_rqs); + + if (popped != rq) { + WARN_ON_ONCE(popped); + return false; + } + rq_qos_throttle(rq->q, bio); blk_mq_rq_time_init(rq, blk_time_get_ns()); rq->cmd_flags = bio->bi_opf; INIT_LIST_HEAD(&rq->queuelist); + return true; } static bool bio_unaligned(const struct bio *bio, struct request_queue *q) @@ -3154,6 +3159,7 @@ void blk_mq_submit_bio(struct bio *bio) */ rq = blk_mq_peek_cached_request(plug, q, bio->bi_opf); +retry: /* * A BIO that was released from a zone write plug has already been * through the preparation in this function, already holds a reference @@ -3211,7 +3217,14 @@ void blk_mq_submit_bio(struct bio *bio) new_request: if (rq) { - blk_mq_use_cached_rq(rq, plug, bio); + if (!blk_mq_use_cached_rq(rq, plug, bio)) { + struct bio_integrity_payload *bip = bio_integrity(bio); + + if (bip && (bip->bip_flags & BIP_BLOCK_INTEGRITY)) + bio_integrity_free(bio); + rq = NULL; + goto retry; + } } else { rq = blk_mq_get_new_requests(q, plug, bio); if (unlikely(!rq)) {