From 8ba18b7ec084f2d303fcc1b5749091cb06a1270c Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:43 -0700 Subject: [PATCH 01/12] block: Annotate the queue limits functions Let the thread-safety checker verify whether every start of a queue limits update is followed by a call to a function that finishes a queue limits update. Reviewed-by: Christoph Hellwig Signed-off-by: Bart Van Assche --- include/linux/blkdev.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 890128cdea1c..8651af4fe9d7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1093,15 +1093,17 @@ static inline unsigned int blk_boundary_sectors_left(sector_t offset, */ static inline struct queue_limits queue_limits_start_update(struct request_queue *q) + __acquires(&q->limits_lock) { mutex_lock(&q->limits_lock); return q->limits; } int queue_limits_commit_update_frozen(struct request_queue *q, - struct queue_limits *lim); + struct queue_limits *lim) __releases(&q->limits_lock); int queue_limits_commit_update(struct request_queue *q, - struct queue_limits *lim); -int queue_limits_set(struct request_queue *q, struct queue_limits *lim); + struct queue_limits *lim) __releases(&q->limits_lock); +int queue_limits_set(struct request_queue *q, struct queue_limits *lim) + __must_not_hold(&q->limits_lock); int blk_validate_limits(struct queue_limits *lim); /** @@ -1113,6 +1115,7 @@ int blk_validate_limits(struct queue_limits *lim); * starting update. */ static inline void queue_limits_cancel_update(struct request_queue *q) + __releases(&q->limits_lock) { mutex_unlock(&q->limits_lock); } From 47bf1ae1faa8734a7188e96171313ea3972d0297 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:44 -0700 Subject: [PATCH 02/12] block/bdev: Annotate the blk_holder_ops callback functions The four callback functions in blk_holder_ops all release the bd_holder_lock. Annotate these functions accordingly. Signed-off-by: Bart Van Assche Signed-off-by: Bart Van Assche --- include/linux/blkdev.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8651af4fe9d7..d0c181da2da0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1747,22 +1747,26 @@ void blkdev_show(struct seq_file *seqf, off_t offset); #endif struct blk_holder_ops { - void (*mark_dead)(struct block_device *bdev, bool surprise); + void (*mark_dead)(struct block_device *bdev, bool surprise) + __releases(&bdev->bd_holder_lock); /* * Sync the file system mounted on the block device. */ - void (*sync)(struct block_device *bdev); + void (*sync)(struct block_device *bdev) + __releases(&bdev->bd_holder_lock); /* * Freeze the file system mounted on the block device. */ - int (*freeze)(struct block_device *bdev); + int (*freeze)(struct block_device *bdev) + __releases(&bdev->bd_holder_lock); /* * Thaw the file system mounted on the block device. */ - int (*thaw)(struct block_device *bdev); + int (*thaw)(struct block_device *bdev) + __releases(&bdev->bd_holder_lock); }; /* From d98bd745f2236268ab85591cc8c433b4c8f374f8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:45 -0700 Subject: [PATCH 03/12] block/cgroup: Split blkg_conf_prep() Move the blkg_conf_open_bdev() call out of blkg_conf_prep() to make it possible to add lock context annotations to blkg_conf_prep(). Change an if-statement in blkg_conf_open_bdev() into a WARN_ON_ONCE() call. Export blkg_conf_open_bdev() because it is called by the BFQ I/O scheduler and the BFQ I/O scheduler may be built as a kernel module. Reviewed-by: Christoph Hellwig Cc: Tejun Heo Signed-off-by: Bart Van Assche --- block/bfq-cgroup.c | 4 ++++ block/blk-cgroup.c | 18 ++++++++---------- block/blk-iocost.c | 4 ++++ 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index ac83b0668764..38396df9dce7 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -1051,6 +1051,10 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of, blkg_conf_init(&ctx, buf); + ret = blkg_conf_open_bdev(&ctx); + if (ret) + goto out; + ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx); if (ret) goto out; diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 554c87bb4a86..a8d95d51b866 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -771,10 +771,7 @@ EXPORT_SYMBOL_GPL(blkg_conf_init); * @ctx->input and get and store the matching bdev in @ctx->bdev. @ctx->body is * set to point past the device node prefix. * - * This function may be called multiple times on @ctx and the extra calls become - * NOOPs. blkg_conf_prep() implicitly calls this function. Use this function - * explicitly if bdev access is needed without resolving the blkcg / policy part - * of @ctx->input. Returns -errno on error. + * Returns: -errno on error. */ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) { @@ -783,8 +780,8 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) struct block_device *bdev; int key_len; - if (ctx->bdev) - return 0; + if (WARN_ON_ONCE(ctx->bdev)) + return -EINVAL; if (sscanf(input, "%u:%u%n", &major, &minor, &key_len) != 2) return -EINVAL; @@ -813,6 +810,8 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) ctx->bdev = bdev; return 0; } +EXPORT_SYMBOL_GPL(blkg_conf_open_bdev); + /* * Similar to blkg_conf_open_bdev, but additionally freezes the queue, * ensures the correct locking order between freeze queue and q->rq_qos_mutex. @@ -857,7 +856,7 @@ unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx) * following MAJ:MIN, @ctx->bdev points to the target block device and * @ctx->blkg to the blkg being configured. * - * blkg_conf_open_bdev() may be called on @ctx beforehand. On success, this + * blkg_conf_open_bdev() must be called on @ctx beforehand. On success, this * function returns with queue lock held and must be followed by * blkg_conf_exit(). */ @@ -870,9 +869,8 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, struct blkcg_gq *blkg; int ret; - ret = blkg_conf_open_bdev(ctx); - if (ret) - return ret; + if (WARN_ON_ONCE(!ctx->bdev)) + return -EINVAL; disk = ctx->bdev->bd_disk; q = disk->queue; diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 0cca88a366dc..b34f820dedcc 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3140,6 +3140,10 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, blkg_conf_init(&ctx, buf); + ret = blkg_conf_open_bdev(&ctx); + if (ret) + goto err; + ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx); if (ret) goto err; From 9070942183e5ceb33cf12ed58c7327d2a1c0fce1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:46 -0700 Subject: [PATCH 04/12] block/cgroup: Split blkg_conf_exit() Split blkg_conf_exit() into blkg_conf_unprep() and blkg_conf_close_bdev() because blkg_conf_exit() is not compatible with the Clang thread-safety annotations. Remove blkg_conf_exit(). Rename blkg_conf_exit_frozen() into blkg_conf_close_bdev_frozen(). Add thread-safety annotations to the new functions. Reviewed-by: Christoph Hellwig Cc: Tejun Heo Signed-off-by: Bart Van Assche --- block/bfq-cgroup.c | 9 ++++-- block/blk-cgroup.c | 57 ++++++++++++++++++------------------ block/blk-cgroup.h | 6 ++-- block/blk-iocost.c | 67 +++++++++++++++++++++---------------------- block/blk-iolatency.c | 19 ++++++------ block/blk-throttle.c | 34 +++++++++++++--------- 6 files changed, 101 insertions(+), 91 deletions(-) diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c index 38396df9dce7..5d40279d6c9d 100644 --- a/block/bfq-cgroup.c +++ b/block/bfq-cgroup.c @@ -1053,11 +1053,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of, ret = blkg_conf_open_bdev(&ctx); if (ret) - goto out; + return ret; ret = blkg_conf_prep(blkcg, &blkcg_policy_bfq, &ctx); if (ret) - goto out; + goto close_bdev; if (sscanf(ctx.body, "%llu", &v) == 1) { /* require "default" on dfl */ @@ -1078,8 +1078,11 @@ static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of, bfq_group_set_weight(bfqg, bfqg->entity.weight, v); ret = 0; } + out: - blkg_conf_exit(&ctx); + blkg_conf_unprep(&ctx); +close_bdev: + blkg_conf_close_bdev(&ctx); return ret ?: nbytes; } diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index a8d95d51b866..38d7bcfcbbe8 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -755,7 +755,7 @@ EXPORT_SYMBOL_GPL(__blkg_prfill_u64); * * Initialize @ctx which can be used to parse blkg config input string @input. * Once initialized, @ctx can be used with blkg_conf_open_bdev() and - * blkg_conf_prep(), and must be cleaned up with blkg_conf_exit(). + * blkg_conf_prep(). */ void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input) { @@ -817,8 +817,8 @@ EXPORT_SYMBOL_GPL(blkg_conf_open_bdev); * ensures the correct locking order between freeze queue and q->rq_qos_mutex. * * This function returns negative error on failure. On success it returns - * memflags which must be saved and later passed to blkg_conf_exit_frozen - * for restoring the memalloc scope. + * memflags which must be saved and later passed to + * blkg_conf_close_bdev_frozen() for restoring the memalloc scope. */ unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx) { @@ -858,7 +858,7 @@ unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx) * * blkg_conf_open_bdev() must be called on @ctx beforehand. On success, this * function returns with queue lock held and must be followed by - * blkg_conf_exit(). + * blkg_conf_close_bdev(). */ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, struct blkg_conf_ctx *ctx) @@ -968,42 +968,41 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, EXPORT_SYMBOL_GPL(blkg_conf_prep); /** - * blkg_conf_exit - clean up per-blkg config update + * blkg_conf_unprep - counterpart of blkg_conf_prep() * @ctx: blkg_conf_ctx initialized with blkg_conf_init() - * - * Clean up after per-blkg config update. This function must be called on all - * blkg_conf_ctx's initialized with blkg_conf_init(). */ -void blkg_conf_exit(struct blkg_conf_ctx *ctx) - __releases(&ctx->bdev->bd_queue->queue_lock) - __releases(&ctx->bdev->bd_queue->rq_qos_mutex) +void blkg_conf_unprep(struct blkg_conf_ctx *ctx) { - if (ctx->blkg) { - spin_unlock_irq(&bdev_get_queue(ctx->bdev)->queue_lock); - ctx->blkg = NULL; - } + WARN_ON_ONCE(!ctx->blkg); + spin_unlock_irq(&ctx->bdev->bd_disk->queue->queue_lock); + ctx->blkg = NULL; +} +EXPORT_SYMBOL_GPL(blkg_conf_unprep); - if (ctx->bdev) { - mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex); - blkdev_put_no_open(ctx->bdev); - ctx->body = NULL; - ctx->bdev = NULL; - } +/** + * blkg_conf_close_bdev - counterpart of blkg_conf_open_bdev() + * @ctx: blkg_conf_ctx initialized with blkg_conf_init() + */ +void blkg_conf_close_bdev(struct blkg_conf_ctx *ctx) +{ + mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex); + blkdev_put_no_open(ctx->bdev); + ctx->body = NULL; + ctx->bdev = NULL; } -EXPORT_SYMBOL_GPL(blkg_conf_exit); +EXPORT_SYMBOL_GPL(blkg_conf_close_bdev); /* - * Similar to blkg_conf_exit, but also unfreezes the queue. Should be used + * Similar to blkg_close_bdev, but also unfreezes the queue. Should be used * when blkg_conf_open_bdev_frozen is used to open the bdev. */ -void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags) +void blkg_conf_close_bdev_frozen(struct blkg_conf_ctx *ctx, + unsigned long memflags) { - if (ctx->bdev) { - struct request_queue *q = ctx->bdev->bd_queue; + struct request_queue *q = ctx->bdev->bd_queue; - blkg_conf_exit(ctx); - blk_mq_unfreeze_queue(q, memflags); - } + blkg_conf_close_bdev(ctx); + blk_mq_unfreeze_queue(q, memflags); } static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src) diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 1cce3294634d..ce90f5b60d52 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -222,8 +222,10 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx); unsigned long blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx); int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, struct blkg_conf_ctx *ctx); -void blkg_conf_exit(struct blkg_conf_ctx *ctx); -void blkg_conf_exit_frozen(struct blkg_conf_ctx *ctx, unsigned long memflags); +void blkg_conf_unprep(struct blkg_conf_ctx *ctx); +void blkg_conf_close_bdev(struct blkg_conf_ctx *ctx); +void blkg_conf_close_bdev_frozen(struct blkg_conf_ctx *ctx, + unsigned long memflags); /** * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg diff --git a/block/blk-iocost.c b/block/blk-iocost.c index b34f820dedcc..e611dd63d712 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3142,21 +3142,23 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, ret = blkg_conf_open_bdev(&ctx); if (ret) - goto err; + return ret; ret = blkg_conf_prep(blkcg, &blkcg_policy_iocost, &ctx); if (ret) - goto err; + goto close_bdev; iocg = blkg_to_iocg(ctx.blkg); + ret = -EINVAL; + if (!strncmp(ctx.body, "default", 7)) { v = 0; } else { if (!sscanf(ctx.body, "%u", &v)) - goto einval; + goto unprep; if (v < CGROUP_WEIGHT_MIN || v > CGROUP_WEIGHT_MAX) - goto einval; + goto unprep; } spin_lock(&iocg->ioc->lock); @@ -3165,14 +3167,15 @@ static ssize_t ioc_weight_write(struct kernfs_open_file *of, char *buf, weight_updated(iocg, &now); spin_unlock(&iocg->ioc->lock); - blkg_conf_exit(&ctx); - return nbytes; + ret = 0; -einval: - ret = -EINVAL; -err: - blkg_conf_exit(&ctx); - return ret; +unprep: + blkg_conf_unprep(&ctx); + +close_bdev: + blkg_conf_close_bdev(&ctx); + + return ret ? ret : nbytes; } static u64 ioc_qos_prfill(struct seq_file *sf, struct blkg_policy_data *pd, @@ -3241,10 +3244,8 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, blkg_conf_init(&ctx, input); memflags = blkg_conf_open_bdev_frozen(&ctx); - if (IS_ERR_VALUE(memflags)) { - ret = memflags; - goto err; - } + if (IS_ERR_VALUE(memflags)) + return memflags; body = ctx.body; disk = ctx.bdev->bd_disk; @@ -3361,14 +3362,14 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, blk_mq_unquiesce_queue(disk->queue); - blkg_conf_exit_frozen(&ctx, memflags); + blkg_conf_close_bdev_frozen(&ctx, memflags); return nbytes; einval: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(disk->queue); ret = -EINVAL; err: - blkg_conf_exit_frozen(&ctx, memflags); + blkg_conf_close_bdev_frozen(&ctx, memflags); return ret; } @@ -3434,20 +3435,20 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, ret = blkg_conf_open_bdev(&ctx); if (ret) - goto err; + return ret; body = ctx.body; q = bdev_get_queue(ctx.bdev); if (!queue_is_mq(q)) { ret = -EOPNOTSUPP; - goto err; + goto close_bdev; } ioc = q_to_ioc(q); if (!ioc) { ret = blk_iocost_init(ctx.bdev->bd_disk); if (ret) - goto err; + goto close_bdev; ioc = q_to_ioc(q); } @@ -3458,6 +3459,8 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, memcpy(u, ioc->params.i_lcoefs, sizeof(u)); user = ioc->user_cost_model; + ret = -EINVAL; + while ((p = strsep(&body, " \t\n"))) { substring_t args[MAX_OPT_ARGS]; char buf[32]; @@ -3475,20 +3478,20 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, else if (!strcmp(buf, "user")) user = true; else - goto einval; + goto unlock; continue; case COST_MODEL: match_strlcpy(buf, &args[0], sizeof(buf)); if (strcmp(buf, "linear")) - goto einval; + goto unlock; continue; } tok = match_token(p, i_lcoef_tokens, args); if (tok == NR_I_LCOEFS) - goto einval; + goto unlock; if (match_u64(&args[0], &v)) - goto einval; + goto unlock; u[tok] = v; user = true; } @@ -3500,24 +3503,18 @@ static ssize_t ioc_cost_model_write(struct kernfs_open_file *of, char *input, ioc->user_cost_model = false; } ioc_refresh_params(ioc, true); - spin_unlock_irq(&ioc->lock); - blk_mq_unquiesce_queue(q); - blk_mq_unfreeze_queue(q, memflags); - - blkg_conf_exit(&ctx); - return nbytes; + ret = 0; -einval: +unlock: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(q); blk_mq_unfreeze_queue(q, memflags); - ret = -EINVAL; -err: - blkg_conf_exit(&ctx); - return ret; +close_bdev: + blkg_conf_close_bdev(&ctx); + return ret ? ret : nbytes; } static struct cftype ioc_files[] = { diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c index 53e8dd2dfa8a..1aaee6fb0f59 100644 --- a/block/blk-iolatency.c +++ b/block/blk-iolatency.c @@ -840,7 +840,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, ret = blkg_conf_open_bdev(&ctx); if (ret) - goto out; + return ret; /* * blk_iolatency_init() may fail after rq_qos_add() succeeds which can @@ -850,11 +850,11 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, if (!iolat_rq_qos(ctx.bdev->bd_queue)) ret = blk_iolatency_init(ctx.bdev->bd_disk); if (ret) - goto out; + goto close_bdev; ret = blkg_conf_prep(blkcg, &blkcg_policy_iolatency, &ctx); if (ret) - goto out; + goto close_bdev; iolat = blkg_to_lat(ctx.blkg); p = ctx.body; @@ -865,7 +865,7 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, char val[21]; /* 18446744073709551616 */ if (sscanf(tok, "%15[^=]=%20s", key, val) != 2) - goto out; + goto unprep; if (!strcmp(key, "target")) { u64 v; @@ -875,9 +875,9 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, else if (sscanf(val, "%llu", &v) == 1) lat_val = v * NSEC_PER_USEC; else - goto out; + goto unprep; } else { - goto out; + goto unprep; } } @@ -889,8 +889,11 @@ static ssize_t iolatency_set_limit(struct kernfs_open_file *of, char *buf, if (oldval != iolat->min_lat_nsec) iolatency_clear_scaling(blkg); ret = 0; -out: - blkg_conf_exit(&ctx); + +unprep: + blkg_conf_unprep(&ctx); +close_bdev: + blkg_conf_close_bdev(&ctx); return ret ?: nbytes; } diff --git a/block/blk-throttle.c b/block/blk-throttle.c index cabf91f0d0dc..8f269310cbf4 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1353,21 +1353,21 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of, ret = blkg_conf_open_bdev(&ctx); if (ret) - goto out_finish; + return ret; if (!blk_throtl_activated(ctx.bdev->bd_queue)) { ret = blk_throtl_init(ctx.bdev->bd_disk); if (ret) - goto out_finish; + goto close_bdev; } ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx); if (ret) - goto out_finish; + goto close_bdev; ret = -EINVAL; if (sscanf(ctx.body, "%llu", &v) != 1) - goto out_finish; + goto unprep; if (!v) v = U64_MAX; @@ -1381,8 +1381,12 @@ static ssize_t tg_set_conf(struct kernfs_open_file *of, tg_conf_updated(tg, false); ret = 0; -out_finish: - blkg_conf_exit(&ctx); + +unprep: + blkg_conf_unprep(&ctx); + +close_bdev: + blkg_conf_close_bdev(&ctx); return ret ?: nbytes; } @@ -1537,17 +1541,17 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, ret = blkg_conf_open_bdev(&ctx); if (ret) - goto out_finish; + return ret; if (!blk_throtl_activated(ctx.bdev->bd_queue)) { ret = blk_throtl_init(ctx.bdev->bd_disk); if (ret) - goto out_finish; + goto close_bdev; } ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, &ctx); if (ret) - goto out_finish; + goto close_bdev; tg = blkg_to_tg(ctx.blkg); tg_update_carryover(tg); @@ -1573,11 +1577,11 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, p = tok; strsep(&p, "="); if (!p || (sscanf(p, "%llu", &val) != 1 && strcmp(p, "max"))) - goto out_finish; + goto unprep; ret = -ERANGE; if (!val) - goto out_finish; + goto unprep; ret = -EINVAL; if (!strcmp(tok, "rbps")) @@ -1589,7 +1593,7 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, else if (!strcmp(tok, "wiops")) v[3] = min_t(u64, val, UINT_MAX); else - goto out_finish; + goto unprep; } tg->bps[READ] = v[0]; @@ -1599,8 +1603,10 @@ static ssize_t tg_set_limit(struct kernfs_open_file *of, tg_conf_updated(tg, false); ret = 0; -out_finish: - blkg_conf_exit(&ctx); +unprep: + blkg_conf_unprep(&ctx); +close_bdev: + blkg_conf_close_bdev(&ctx); return ret ?: nbytes; } From fd58342fed5eca7e41f8f612fac547c83535e0ff Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:47 -0700 Subject: [PATCH 05/12] block/cgroup: Improve lock context annotations Add lock context annotations where these are missing. Move the blkg_conf_prep() annotation into block/blk-cgroup.h to make it visible to all blkg_conf_prep() callers. Signed-off-by: Bart Van Assche --- block/blk-cgroup.c | 1 - block/blk-cgroup.h | 15 ++++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 38d7bcfcbbe8..86513c54c217 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -862,7 +862,6 @@ unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx) */ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, struct blkg_conf_ctx *ctx) - __acquires(&bdev->bd_queue->queue_lock) { struct gendisk *disk; struct request_queue *q; diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index ce90f5b60d52..f0a3af520c55 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -218,14 +218,19 @@ struct blkg_conf_ctx { }; void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input); -int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx); +int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) + __cond_acquires(0, &ctx->bdev->bd_queue->rq_qos_mutex); unsigned long blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx); int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, - struct blkg_conf_ctx *ctx); -void blkg_conf_unprep(struct blkg_conf_ctx *ctx); -void blkg_conf_close_bdev(struct blkg_conf_ctx *ctx); + struct blkg_conf_ctx *ctx) + __cond_acquires(0, &ctx->bdev->bd_disk->queue->queue_lock); +void blkg_conf_unprep(struct blkg_conf_ctx *ctx) + __releases(ctx->bdev->bd_disk->queue->queue_lock); +void blkg_conf_close_bdev(struct blkg_conf_ctx *ctx) + __releases(&ctx->bdev->bd_queue->rq_qos_mutex); void blkg_conf_close_bdev_frozen(struct blkg_conf_ctx *ctx, - unsigned long memflags); + unsigned long memflags) + __releases(&ctx->bdev->bd_queue->rq_qos_mutex); /** * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg From 10d3c256fba1e75908d15e5f89710eef34b3bbbb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:48 -0700 Subject: [PATCH 06/12] block/cgroup: Inline blkg_conf_{open,close}_bdev_frozen() The blkg_conf_open_bdev_frozen() calling convention is not compatible with lock context annotations. Inline both blkg_conf_open_bdev_frozen() and blkg_conf_close_bdev_frozen() because these functions only have a single caller. This patch prepares for enabling lock context analysis. The type of 'memflags' has been changed from unsigned long into unsigned int to match the type of current->flags. See also . Cc: Tejun Heo Signed-off-by: Bart Van Assche --- block/blk-cgroup.c | 46 ---------------------------------------------- block/blk-cgroup.h | 4 ---- block/blk-iocost.c | 29 +++++++++++++++++++++++------ 3 files changed, 23 insertions(+), 56 deletions(-) diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 86513c54c217..de0f753b8fe5 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -812,39 +812,6 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) } EXPORT_SYMBOL_GPL(blkg_conf_open_bdev); -/* - * Similar to blkg_conf_open_bdev, but additionally freezes the queue, - * ensures the correct locking order between freeze queue and q->rq_qos_mutex. - * - * This function returns negative error on failure. On success it returns - * memflags which must be saved and later passed to - * blkg_conf_close_bdev_frozen() for restoring the memalloc scope. - */ -unsigned long __must_check blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx) -{ - int ret; - unsigned long memflags; - - if (ctx->bdev) - return -EINVAL; - - ret = blkg_conf_open_bdev(ctx); - if (ret < 0) - return ret; - /* - * At this point, we haven’t started protecting anything related to QoS, - * so we release q->rq_qos_mutex here, which was first acquired in blkg_ - * conf_open_bdev. Later, we re-acquire q->rq_qos_mutex after freezing - * the queue to maintain the correct locking order. - */ - mutex_unlock(&ctx->bdev->bd_queue->rq_qos_mutex); - - memflags = blk_mq_freeze_queue(ctx->bdev->bd_queue); - mutex_lock(&ctx->bdev->bd_queue->rq_qos_mutex); - - return memflags; -} - /** * blkg_conf_prep - parse and prepare for per-blkg config update * @blkcg: target block cgroup @@ -991,19 +958,6 @@ void blkg_conf_close_bdev(struct blkg_conf_ctx *ctx) } EXPORT_SYMBOL_GPL(blkg_conf_close_bdev); -/* - * Similar to blkg_close_bdev, but also unfreezes the queue. Should be used - * when blkg_conf_open_bdev_frozen is used to open the bdev. - */ -void blkg_conf_close_bdev_frozen(struct blkg_conf_ctx *ctx, - unsigned long memflags) -{ - struct request_queue *q = ctx->bdev->bd_queue; - - blkg_conf_close_bdev(ctx); - blk_mq_unfreeze_queue(q, memflags); -} - static void blkg_iostat_add(struct blkg_iostat *dst, struct blkg_iostat *src) { int i; diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index f0a3af520c55..f25fecb87c43 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h @@ -220,7 +220,6 @@ struct blkg_conf_ctx { void blkg_conf_init(struct blkg_conf_ctx *ctx, char *input); int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx) __cond_acquires(0, &ctx->bdev->bd_queue->rq_qos_mutex); -unsigned long blkg_conf_open_bdev_frozen(struct blkg_conf_ctx *ctx); int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, struct blkg_conf_ctx *ctx) __cond_acquires(0, &ctx->bdev->bd_disk->queue->queue_lock); @@ -228,9 +227,6 @@ void blkg_conf_unprep(struct blkg_conf_ctx *ctx) __releases(ctx->bdev->bd_disk->queue->queue_lock); void blkg_conf_close_bdev(struct blkg_conf_ctx *ctx) __releases(&ctx->bdev->bd_queue->rq_qos_mutex); -void blkg_conf_close_bdev_frozen(struct blkg_conf_ctx *ctx, - unsigned long memflags) - __releases(&ctx->bdev->bd_queue->rq_qos_mutex); /** * bio_issue_as_root_blkg - see if this bio needs to be issued as root blkg diff --git a/block/blk-iocost.c b/block/blk-iocost.c index e611dd63d712..353c165c5cd4 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -3233,19 +3233,30 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, size_t nbytes, loff_t off) { struct blkg_conf_ctx ctx; + struct request_queue *q; struct gendisk *disk; struct ioc *ioc; u32 qos[NR_QOS_PARAMS]; bool enable, user; char *body, *p; - unsigned long memflags; + unsigned int memflags; int ret; blkg_conf_init(&ctx, input); - memflags = blkg_conf_open_bdev_frozen(&ctx); - if (IS_ERR_VALUE(memflags)) - return memflags; + ret = blkg_conf_open_bdev(&ctx); + if (ret) + return ret; + /* + * At this point, we haven’t started protecting anything related to QoS, + * so we release q->rq_qos_mutex here, which was first acquired in blkg_ + * conf_open_bdev. Later, we re-acquire q->rq_qos_mutex after freezing + * the queue to maintain the correct locking order. + */ + mutex_unlock(&ctx.bdev->bd_queue->rq_qos_mutex); + + memflags = blk_mq_freeze_queue(ctx.bdev->bd_queue); + mutex_lock(&ctx.bdev->bd_queue->rq_qos_mutex); body = ctx.body; disk = ctx.bdev->bd_disk; @@ -3362,14 +3373,20 @@ static ssize_t ioc_qos_write(struct kernfs_open_file *of, char *input, blk_mq_unquiesce_queue(disk->queue); - blkg_conf_close_bdev_frozen(&ctx, memflags); + q = ctx.bdev->bd_queue; + blkg_conf_close_bdev(&ctx); + blk_mq_unfreeze_queue(q, memflags); + return nbytes; + einval: spin_unlock_irq(&ioc->lock); blk_mq_unquiesce_queue(disk->queue); ret = -EINVAL; err: - blkg_conf_close_bdev_frozen(&ctx, memflags); + q = ctx.bdev->bd_queue; + blkg_conf_close_bdev(&ctx); + blk_mq_unfreeze_queue(q, memflags); return ret; } From a226a13217d0c7d71e4906fb19eada94c6b16964 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:49 -0700 Subject: [PATCH 07/12] block/crypto: Annotate the crypto functions Add the lock context annotations required for Clang's thread-safety analysis. Reviewed-by: Christoph Hellwig Cc: Eric Biggers Signed-off-by: Bart Van Assche --- block/blk-crypto-profile.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-crypto-profile.c b/block/blk-crypto-profile.c index 4ac74443687a..cf447ba4a66e 100644 --- a/block/blk-crypto-profile.c +++ b/block/blk-crypto-profile.c @@ -43,6 +43,7 @@ struct blk_crypto_keyslot { }; static inline void blk_crypto_hw_enter(struct blk_crypto_profile *profile) + __acquires(&profile->lock) { /* * Calling into the driver requires profile->lock held and the device @@ -55,6 +56,7 @@ static inline void blk_crypto_hw_enter(struct blk_crypto_profile *profile) } static inline void blk_crypto_hw_exit(struct blk_crypto_profile *profile) + __releases(&profile->lock) { up_write(&profile->lock); if (profile->dev) From 2679c4b10b1aeb58980f3e246dc997a8a6d96862 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:50 -0700 Subject: [PATCH 08/12] block/blk-iocost: Add lock context annotations Since iocg_lock() and iocg_unlock() both use conditional locking, annotate both with __no_context_analysis and use token_context_lock() to introduce a new lock context. Signed-off-by: Bart Van Assche --- block/blk-iocost.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block/blk-iocost.c b/block/blk-iocost.c index 353c165c5cd4..3bb8ce50af42 100644 --- a/block/blk-iocost.c +++ b/block/blk-iocost.c @@ -727,7 +727,11 @@ static void iocg_commit_bio(struct ioc_gq *iocg, struct bio *bio, put_cpu_ptr(gcs); } +token_context_lock(ioc_lock); + static void iocg_lock(struct ioc_gq *iocg, bool lock_ioc, unsigned long *flags) + __acquires(ioc_lock) + __context_unsafe(conditional locking) { if (lock_ioc) { spin_lock_irqsave(&iocg->ioc->lock, *flags); @@ -738,6 +742,8 @@ static void iocg_lock(struct ioc_gq *iocg, bool lock_ioc, unsigned long *flags) } static void iocg_unlock(struct ioc_gq *iocg, bool unlock_ioc, unsigned long *flags) + __releases(ioc_lock) + __context_unsafe(conditional locking) { if (unlock_ioc) { spin_unlock(&iocg->waitq.lock); From 08becb40b21769e758d6f60a60ddc24e754588dc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:51 -0700 Subject: [PATCH 09/12] block/blk-mq-debugfs: Improve lock context annotations Make the existing lock context annotations compatible with Clang. Add the lock context annotations that are missing. Signed-off-by: Bart Van Assche --- block/blk-mq-debugfs.c | 12 ++++++------ block/blk.h | 4 ++++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 047ec887456b..5c168e82273e 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -20,7 +20,7 @@ static int queue_poll_stat_show(void *data, struct seq_file *m) } static void *queue_requeue_list_start(struct seq_file *m, loff_t *pos) - __acquires(&q->requeue_lock) + __acquires(&((struct request_queue *)m->private)->requeue_lock) { struct request_queue *q = m->private; @@ -36,7 +36,7 @@ static void *queue_requeue_list_next(struct seq_file *m, void *v, loff_t *pos) } static void queue_requeue_list_stop(struct seq_file *m, void *v) - __releases(&q->requeue_lock) + __releases(&((struct request_queue *)m->private)->requeue_lock) { struct request_queue *q = m->private; @@ -298,7 +298,7 @@ int blk_mq_debugfs_rq_show(struct seq_file *m, void *v) EXPORT_SYMBOL_GPL(blk_mq_debugfs_rq_show); static void *hctx_dispatch_start(struct seq_file *m, loff_t *pos) - __acquires(&hctx->lock) + __acquires(&((struct blk_mq_hw_ctx *)m->private)->lock) { struct blk_mq_hw_ctx *hctx = m->private; @@ -314,7 +314,7 @@ static void *hctx_dispatch_next(struct seq_file *m, void *v, loff_t *pos) } static void hctx_dispatch_stop(struct seq_file *m, void *v) - __releases(&hctx->lock) + __releases(&((struct blk_mq_hw_ctx *)m->private)->lock) { struct blk_mq_hw_ctx *hctx = m->private; @@ -486,7 +486,7 @@ static int hctx_dispatch_busy_show(void *data, struct seq_file *m) #define CTX_RQ_SEQ_OPS(name, type) \ static void *ctx_##name##_rq_list_start(struct seq_file *m, loff_t *pos) \ - __acquires(&ctx->lock) \ + __acquires(&((struct blk_mq_ctx *)m->private)->lock) \ { \ struct blk_mq_ctx *ctx = m->private; \ \ @@ -503,7 +503,7 @@ static void *ctx_##name##_rq_list_next(struct seq_file *m, void *v, \ } \ \ static void ctx_##name##_rq_list_stop(struct seq_file *m, void *v) \ - __releases(&ctx->lock) \ + __releases(&((struct blk_mq_ctx *)m->private)->lock) \ { \ struct blk_mq_ctx *ctx = m->private; \ \ diff --git a/block/blk.h b/block/blk.h index b998a7761faf..70b794e89a67 100644 --- a/block/blk.h +++ b/block/blk.h @@ -754,16 +754,19 @@ static inline void blk_unfreeze_release_lock(struct request_queue *q) * reclaim from triggering block I/O. */ static inline void blk_debugfs_lock_nomemsave(struct request_queue *q) + __acquires(&q->debugfs_mutex) { mutex_lock(&q->debugfs_mutex); } static inline void blk_debugfs_unlock_nomemrestore(struct request_queue *q) + __releases(&q->debugfs_mutex) { mutex_unlock(&q->debugfs_mutex); } static inline unsigned int __must_check blk_debugfs_lock(struct request_queue *q) + __acquires(&q->debugfs_mutex) { unsigned int memflags = memalloc_noio_save(); @@ -773,6 +776,7 @@ static inline unsigned int __must_check blk_debugfs_lock(struct request_queue *q static inline void blk_debugfs_unlock(struct request_queue *q, unsigned int memflags) + __releases(&q->debugfs_mutex) { blk_debugfs_unlock_nomemrestore(q); memalloc_noio_restore(memflags); From 14e37ea03663795e27fe5cddd64d937813a22d52 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:52 -0700 Subject: [PATCH 10/12] block/kyber: Make the lock context annotations compatible with Clang While sparse ignores the __acquires() and __releases() arguments, Clang verifies these. Make the arguments of __acquires() and __releases() acceptable for Clang. Signed-off-by: Bart Van Assche --- block/kyber-iosched.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c index b84163d1f851..971818bcdc9d 100644 --- a/block/kyber-iosched.c +++ b/block/kyber-iosched.c @@ -882,6 +882,9 @@ static const struct elv_fs_entry kyber_sched_attrs[] = { }; #undef KYBER_LAT_ATTR +#define HCTX_FROM_SEQ_FILE(m) ((struct blk_mq_hw_ctx *)(m)->private) +#define KYBER_HCTX_DATA(hctx) ((struct kyber_hctx_data *)(hctx)->sched_data) + #ifdef CONFIG_BLK_DEBUG_FS #define KYBER_DEBUGFS_DOMAIN_ATTRS(domain, name) \ static int kyber_##name##_tokens_show(void *data, struct seq_file *m) \ @@ -894,7 +897,7 @@ static int kyber_##name##_tokens_show(void *data, struct seq_file *m) \ } \ \ static void *kyber_##name##_rqs_start(struct seq_file *m, loff_t *pos) \ - __acquires(&khd->lock) \ + __acquires(&KYBER_HCTX_DATA(HCTX_FROM_SEQ_FILE(m))->lock) \ { \ struct blk_mq_hw_ctx *hctx = m->private; \ struct kyber_hctx_data *khd = hctx->sched_data; \ @@ -913,7 +916,7 @@ static void *kyber_##name##_rqs_next(struct seq_file *m, void *v, \ } \ \ static void kyber_##name##_rqs_stop(struct seq_file *m, void *v) \ - __releases(&khd->lock) \ + __releases(&KYBER_HCTX_DATA(HCTX_FROM_SEQ_FILE(m))->lock) \ { \ struct blk_mq_hw_ctx *hctx = m->private; \ struct kyber_hctx_data *khd = hctx->sched_data; \ From cbd84000d8f470aaf1d7a1dabf6e772cc6d6b745 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:53 -0700 Subject: [PATCH 11/12] block/mq-deadline: Make the lock context annotations compatible with Clang While sparse ignores the __acquires() and __releases() arguments, Clang verifies these. Make the arguments of __acquires() and __releases() acceptable for Clang. Signed-off-by: Bart Van Assche --- block/mq-deadline.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/block/mq-deadline.c b/block/mq-deadline.c index 95917a88976f..824bfc17b2c6 100644 --- a/block/mq-deadline.c +++ b/block/mq-deadline.c @@ -794,11 +794,15 @@ static const struct elv_fs_entry deadline_attrs[] = { __ATTR_NULL }; +#define RQ_FROM_SEQ_FILE(m) ((struct request_queue *)(m)->private) +#define DD_DATA_FROM_RQ(rq) \ + ((struct deadline_data *)(rq)->elevator->elevator_data) + #ifdef CONFIG_BLK_DEBUG_FS #define DEADLINE_DEBUGFS_DDIR_ATTRS(prio, data_dir, name) \ static void *deadline_##name##_fifo_start(struct seq_file *m, \ loff_t *pos) \ - __acquires(&dd->lock) \ + __acquires(&DD_DATA_FROM_RQ(RQ_FROM_SEQ_FILE(m))->lock) \ { \ struct request_queue *q = m->private; \ struct deadline_data *dd = q->elevator->elevator_data; \ @@ -819,7 +823,7 @@ static void *deadline_##name##_fifo_next(struct seq_file *m, void *v, \ } \ \ static void deadline_##name##_fifo_stop(struct seq_file *m, void *v) \ - __releases(&dd->lock) \ + __releases(&DD_DATA_FROM_RQ(RQ_FROM_SEQ_FILE(m))->lock) \ { \ struct request_queue *q = m->private; \ struct deadline_data *dd = q->elevator->elevator_data; \ @@ -921,7 +925,7 @@ static int dd_owned_by_driver_show(void *data, struct seq_file *m) } static void *deadline_dispatch_start(struct seq_file *m, loff_t *pos) - __acquires(&dd->lock) + __acquires(&DD_DATA_FROM_RQ(RQ_FROM_SEQ_FILE(m))->lock) { struct request_queue *q = m->private; struct deadline_data *dd = q->elevator->elevator_data; @@ -939,7 +943,7 @@ static void *deadline_dispatch_next(struct seq_file *m, void *v, loff_t *pos) } static void deadline_dispatch_stop(struct seq_file *m, void *v) - __releases(&dd->lock) + __releases(&DD_DATA_FROM_RQ(RQ_FROM_SEQ_FILE(m))->lock) { struct request_queue *q = m->private; struct deadline_data *dd = q->elevator->elevator_data; From 51958ccc6a52ce546814fe8468280f1d915a954d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 11 May 2026 09:30:54 -0700 Subject: [PATCH 12/12] block: Enable lock context analysis Now that all block/*.c files have been annotated, enable lock context analysis for all these source files. Signed-off-by: Bart Van Assche --- block/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/Makefile b/block/Makefile index 7dce2e44276c..54130faacc21 100644 --- a/block/Makefile +++ b/block/Makefile @@ -3,6 +3,8 @@ # Makefile for the kernel block layer # +CONTEXT_ANALYSIS := y + obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-merge.o blk-timeout.o blk-lib.o blk-mq.o \