Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions block/Kconfig.iosched
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,12 @@ config BFQ_CGROUP_DEBUG
Enable some debugging help. Currently it exports additional stat
files in a cgroup which can be useful for debugging.

config IOSCHED_UFQ
tristate "UFQ I/O scheduler"
default y
help
The UFQ I/O scheduler is a programmable I/O scheduler. When
enabled, an out-of-kernel I/O scheduler based on eBPF can be
designed to interact with it, leveraging its customizable
hooks to redefine I/O scheduling policies.
endmenu
1 change: 1 addition & 0 deletions block/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ obj-$(CONFIG_MQ_IOSCHED_DEADLINE) += mq-deadline.o
obj-$(CONFIG_MQ_IOSCHED_KYBER) += kyber-iosched.o
bfq-y := bfq-iosched.o bfq-wf2q.o bfq-cgroup.o
obj-$(CONFIG_IOSCHED_BFQ) += bfq.o
obj-$(CONFIG_IOSCHED_UFQ) += ufq-iosched.o ufq-bpfops.o ufq-kfunc.o

obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o \
bio-integrity-auto.o bio-integrity-fs.o
Expand Down
28 changes: 17 additions & 11 deletions block/blk-merge.c
Original file line number Diff line number Diff line change
Expand Up @@ -774,8 +774,8 @@ u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next,
* For non-mq, this has to be called with the request spinlock acquired.
* For mq with scheduling, the appropriate queue wide lock should be held.
*/
static struct request *attempt_merge(struct request_queue *q,
struct request *req, struct request *next)
static struct request *attempt_merge(struct request_queue *q, struct request *req,
struct request *next, bool nohash)
{
if (!rq_mergeable(req) || !rq_mergeable(next))
return NULL;
Expand Down Expand Up @@ -842,7 +842,7 @@ static struct request *attempt_merge(struct request_queue *q,

req->__data_len += blk_rq_bytes(next);

if (!blk_discard_mergable(req))
if (!nohash && !blk_discard_mergable(req))
elv_merge_requests(q, req, next);

blk_crypto_rq_put_keyslot(next);
Expand All @@ -868,7 +868,7 @@ static struct request *attempt_back_merge(struct request_queue *q,
struct request *next = elv_latter_request(q, rq);

if (next)
return attempt_merge(q, rq, next);
return attempt_merge(q, rq, next, false);

return NULL;
}
Expand All @@ -879,11 +879,17 @@ static struct request *attempt_front_merge(struct request_queue *q,
struct request *prev = elv_former_request(q, rq);

if (prev)
return attempt_merge(q, prev, rq);
return attempt_merge(q, prev, rq, false);

return NULL;
}

struct request *bpf_attempt_merge(struct request_queue *q, struct request *rq,
struct request *next)
{
return attempt_merge(q, rq, next, true);
}

/*
* Try to merge 'next' into 'rq'. Return true if the merge happened, false
* otherwise. The caller is responsible for freeing 'next' if the merge
Expand All @@ -892,7 +898,7 @@ static struct request *attempt_front_merge(struct request_queue *q,
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next)
{
return attempt_merge(q, rq, next);
return attempt_merge(q, rq, next, false);
}

bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
Expand Down Expand Up @@ -1035,11 +1041,11 @@ static enum bio_merge_status bio_attempt_discard_merge(struct request_queue *q,
return BIO_MERGE_FAILED;
}

static enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
struct request *rq,
struct bio *bio,
unsigned int nr_segs,
bool sched_allow_merge)
enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
struct request *rq,
struct bio *bio,
unsigned int nr_segs,
bool sched_allow_merge)
{
if (!blk_rq_merge_ok(rq, bio))
return BIO_MERGE_NONE;
Expand Down
8 changes: 7 additions & 1 deletion block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -796,7 +796,7 @@ static void blk_mq_finish_request(struct request *rq)
}
}

static void __blk_mq_free_request(struct request *rq)
void __blk_mq_free_request(struct request *rq)
{
struct request_queue *q = rq->q;
struct blk_mq_ctx *ctx = rq->mq_ctx;
Expand Down Expand Up @@ -1844,6 +1844,12 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
if (list_empty(&ctx->rq_lists[type]))
sbitmap_clear_bit(sb, bitnr);
}

if (dispatch_data->rq) {
dispatch_data->rq->rq_flags |= RQF_STARTED;
if (hctx->queue->last_merge == dispatch_data->rq)
hctx->queue->last_merge = NULL;
}
spin_unlock(&ctx->lock);

return !dispatch_data->rq;
Expand Down
2 changes: 1 addition & 1 deletion block/blk-mq.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *start);
void blk_mq_put_rq_ref(struct request *rq);

void __blk_mq_free_request(struct request *rq);
/*
* Internal helpers for allocating/freeing the request map
*/
Expand Down
5 changes: 5 additions & 0 deletions block/blk.h
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,9 @@ enum bio_merge_status {

enum bio_merge_status bio_attempt_back_merge(struct request *req,
struct bio *bio, unsigned int nr_segs);
enum bio_merge_status blk_attempt_bio_merge(struct request_queue *q,
struct request *rq, struct bio *bio, unsigned int nr_segs,
bool sched_allow_merge);
bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
unsigned int nr_segs);
bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
Expand Down Expand Up @@ -465,6 +468,8 @@ static inline unsigned get_max_segment_size(const struct queue_limits *lim,

int ll_back_merge_fn(struct request *req, struct bio *bio,
unsigned int nr_segs);
struct request *bpf_attempt_merge(struct request_queue *q, struct request *rq,
struct request *next);
bool blk_attempt_req_merge(struct request_queue *q, struct request *rq,
struct request *next);
unsigned int blk_recalc_rq_segments(struct request *rq);
Expand Down
241 changes: 241 additions & 0 deletions block/ufq-bpfops.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,241 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2026 KylinSoft Corporation.
* Copyright (c) 2026 Kaitao Cheng <chengkaitao@kylinos.cn>
*/
#include <linux/init.h>
#include <linux/types.h>
#include <linux/bpf_verifier.h>
#include <linux/bpf.h>
#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/string.h>
#include <linux/wait.h>
#include <linux/rcupdate.h>
#include "ufq-iosched.h"

struct ufq_iosched_ops ufq_ops;
static atomic_t ufq_bpfops_enabled;
static atomic_t ufq_bpfops_users;
static DECLARE_WAIT_QUEUE_HEAD(ufq_bpfops_wq);

const struct ufq_iosched_ops *ufq_bpfops_tryget(void)
{
if (!atomic_read(&ufq_bpfops_enabled))
return NULL;

atomic_inc(&ufq_bpfops_users);
/*
* Pairs with disable path flipping ufq_bpfops_enabled to make sure no
* callback runs after teardown starts.
*/
smp_mb__after_atomic();

if (unlikely(!atomic_read(&ufq_bpfops_enabled))) {
if (atomic_dec_and_test(&ufq_bpfops_users))
wake_up_all(&ufq_bpfops_wq);
return NULL;
}

return &ufq_ops;
}

void ufq_bpfops_put(void)
{
if (atomic_dec_and_test(&ufq_bpfops_users))
wake_up_all(&ufq_bpfops_wq);
}

static const struct bpf_func_proto *
bpf_ufq_get_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
{
return bpf_base_func_proto(func_id, prog);
}

static bool bpf_ufq_is_valid_access(int off, int size,
enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
{
if (type != BPF_READ)
return false;
if (off < 0 || off >= sizeof(__u64) * MAX_BPF_FUNC_ARGS)
return false;
if (off % size != 0)
return false;

/*
* btf_ctx_access() treats pointers that are not "pointer to struct"
* as scalars (no reg_type), so loading pointers like merge_req()'s
* int *type or merge_bio()'s bool *merged from ctx leaves a SCALAR
* and stores through them fail verification. Model both as writable
* buffers.
*/
if (size == sizeof(__u64) && prog->aux->attach_func_name &&
((!strcmp(prog->aux->attach_func_name, "merge_req") && off == 16) ||
(!strcmp(prog->aux->attach_func_name, "merge_bio") && off == 24))) {
if (!btf_ctx_access(off, size, type, prog, info))
return false;
info->reg_type = PTR_TO_BUF;
return true;
}

return btf_ctx_access(off, size, type, prog, info);
}

static const struct bpf_verifier_ops bpf_ufq_verifier_ops = {
.get_func_proto = bpf_ufq_get_func_proto,
.is_valid_access = bpf_ufq_is_valid_access,
};

static int bpf_ufq_init_member(const struct btf_type *t,
const struct btf_member *member,
void *kdata, const void *udata)
{
const struct ufq_iosched_ops *uops = udata;
struct ufq_iosched_ops *ops = kdata;
u32 moff = __btf_member_bit_offset(t, member) / 8;
int ret;

switch (moff) {
case offsetof(struct ufq_iosched_ops, name):
ret = bpf_obj_name_cpy(ops->name, uops->name,
sizeof(ops->name));
if (ret < 0)
return ret;
if (ret == 0)
return -EINVAL;
return 1;
/* other var adding .... */
}

return 0;
}

static int bpf_ufq_check_member(const struct btf_type *t,
const struct btf_member *member,
const struct bpf_prog *prog)
{
return 0;
}

static int bpf_ufq_enable(void *ops)
{
ufq_ops = *(struct ufq_iosched_ops *)ops;
atomic_set(&ufq_bpfops_enabled, 1);
return 0;
}

static void bpf_ufq_disable(struct ufq_iosched_ops *ops)
{
atomic_set(&ufq_bpfops_enabled, 0);
wait_event(ufq_bpfops_wq, !atomic_read(&ufq_bpfops_users));
memset(&ufq_ops, 0, sizeof(ufq_ops));
}

static int bpf_ufq_reg(void *kdata, struct bpf_link *link)
{
return ufq_prepare_bpf_attach(bpf_ufq_enable, kdata);
}

static void bpf_ufq_unreg(void *kdata, struct bpf_link *link)
{
bpf_ufq_disable(kdata);
ufq_kick_all_hw_queues();
}

static int bpf_ufq_init(struct btf *btf)
{
return 0;
}

static int bpf_ufq_update(void *kdata, void *old_kdata, struct bpf_link *link)
{
/*
* UFQ does not support live-updating an already-attached BPF scheduler:
* partial failure during callback setup (e.g. init_sched) would be hard
* to reason about, and update can race with unregister/teardown.
*/
return -EOPNOTSUPP;
}

static int bpf_ufq_validate(void *kdata)
{
return 0;
}

static int init_sched_stub(struct request_queue *q)
{
return -EPERM;
}

static int exit_sched_stub(struct request_queue *q)
{
return -EPERM;
}

static int insert_req_stub(struct request_queue *q, struct request *rq,
blk_insert_t flags)
{
return 0;
}

static struct request *dispatch_req_stub(struct request_queue *q)
{
return NULL;
}

static bool has_req_stub(struct request_queue *q, int rqs_count)
{
return rqs_count > 0;
}

static void finish_req_stub(struct request *rq)
{
}

static struct request *merge_req_stub(struct request_queue *q, struct request *rq,
int *type)
{
*type = ELEVATOR_NO_MERGE;
return NULL;
}

static struct request *merge_bio_stub(struct request_queue *q, struct bio *bio,
unsigned int nr_segs, bool *merged)
{
if (merged)
*merged = false;

return NULL;
}

static struct ufq_iosched_ops __bpf_ops_ufq_ops = {
.init_sched = init_sched_stub,
.exit_sched = exit_sched_stub,
.insert_req = insert_req_stub,
.dispatch_req = dispatch_req_stub,
.has_req = has_req_stub,
.merge_req = merge_req_stub,
.finish_req = finish_req_stub,
.merge_bio = merge_bio_stub,
};

static struct bpf_struct_ops bpf_iosched_ufq_ops = {
.verifier_ops = &bpf_ufq_verifier_ops,
.reg = bpf_ufq_reg,
.unreg = bpf_ufq_unreg,
.check_member = bpf_ufq_check_member,
.init_member = bpf_ufq_init_member,
.init = bpf_ufq_init,
.update = bpf_ufq_update,
.validate = bpf_ufq_validate,
.name = "ufq_iosched_ops",
.owner = THIS_MODULE,
.cfi_stubs = &__bpf_ops_ufq_ops
};

int bpf_ufq_ops_init(void)
{
return register_bpf_struct_ops(&bpf_iosched_ufq_ops, ufq_iosched_ops);
}
Loading