From bf8d57c32ba31e905552a6885e06bc988d71ee9a Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 13 May 2026 11:51:51 -0700 Subject: [PATCH 1/3] block: clear BLK_FEAT_PCI_P2PDMA in blk_stack_limits() for non-supporting devices BLK_FEAT_NOWAIT and BLK_FEAT_POLL are cleared in blk_stack_limits() when an underlying device does not support them. Apply the same treatment to BLK_FEAT_PCI_P2PDMA: stacking drivers set it unconditionally and rely on the core to clear it whenever a non-supporting member device is stacked. Tested-by: Pranjal Shrivastava Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Nitesh Shetty Signed-off-by: Chaitanya Kulkarni --- block/blk-settings.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/block/blk-settings.c b/block/blk-settings.c index 78c83817b9d36..8274631290dbf 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -795,6 +795,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, t->features &= ~BLK_FEAT_NOWAIT; if (!(b->features & BLK_FEAT_POLL)) t->features &= ~BLK_FEAT_POLL; + if (!(b->features & BLK_FEAT_PCI_P2PDMA)) + t->features &= ~BLK_FEAT_PCI_P2PDMA; t->flags |= (b->flags & BLK_FLAG_MISALIGNED); From 81d41f49e6d7af68f2500a58f1e190f614c33090 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Modukuri Date: Wed, 13 May 2026 11:51:52 -0700 Subject: [PATCH 2/3] md: propagate BLK_FEAT_PCI_P2PDMA from member devices to RAID device MD RAID does not propagate BLK_FEAT_PCI_P2PDMA from member devices to the RAID device, preventing peer-to-peer DMA through the RAID layer even when all underlying devices support it. Enable BLK_FEAT_PCI_P2PDMA unconditionally in raid0, raid1 and raid10 personalities during queue limits setup. blk_stack_limits() clears it automatically if any member device lacks support, consistent with how BLK_FEAT_NOWAIT and BLK_FEAT_POLL are handled in the block core. Parity RAID personalities (raid4/5/6) are excluded because they require CPU access to data pages for parity computation, which is incompatible with P2P mappings. Tested with RAID0/1/10 arrays containing multiple NVMe devices with P2PDMA support, confirming that peer-to-peer transfers work correctly through the RAID layer. Tested-by: Pranjal Shrivastava Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Xiao Ni Signed-off-by: Kiran Kumar Modukuri Signed-off-by: Chaitanya Kulkarni --- drivers/md/raid0.c | 1 + drivers/md/raid1.c | 1 + drivers/md/raid10.c | 1 + 3 files changed, 3 insertions(+) diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 5e38a51e349ad..2cdaf7495d92e 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -392,6 +392,7 @@ static int raid0_set_limits(struct mddev *mddev) lim.io_opt = lim.io_min * mddev->raid_disks; lim.chunk_sectors = mddev->chunk_sectors; lim.features |= BLK_FEAT_ATOMIC_WRITES; + lim.features |= BLK_FEAT_PCI_P2PDMA; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) return err; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 64d970e2ef50f..cc628a1be52c7 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3208,6 +3208,7 @@ static int raid1_set_limits(struct mddev *mddev) lim.max_hw_wzeroes_unmap_sectors = 0; lim.logical_block_size = mddev->logical_block_size; lim.features |= BLK_FEAT_ATOMIC_WRITES; + lim.features |= BLK_FEAT_PCI_P2PDMA; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) return err; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 39085e7dd6d26..f905dc391b745 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -3941,6 +3941,7 @@ static int raid10_set_queue_limits(struct mddev *mddev) lim.chunk_sectors = mddev->chunk_sectors; lim.io_opt = lim.io_min * raid10_nr_stripes(conf); lim.features |= BLK_FEAT_ATOMIC_WRITES; + lim.features |= BLK_FEAT_PCI_P2PDMA; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) return err; From 19489440d60fd72463a79eb6a86ac43f63e67184 Mon Sep 17 00:00:00 2001 From: Kiran Kumar Modukuri Date: Wed, 13 May 2026 11:51:53 -0700 Subject: [PATCH 3/3] nvme-multipath: enable PCI P2PDMA for multipath devices NVMe multipath does not expose BLK_FEAT_PCI_P2PDMA on the head disk even when all underlying controllers support it. Set BLK_FEAT_PCI_P2PDMA unconditionally in nvme_mpath_alloc_disk() alongside the other features. nvme_update_ns_info_block() already calls queue_limits_stack_bdev() to stack each path's limits onto the head disk, which routes through blk_stack_limits(). The core now clears BLK_FEAT_PCI_P2PDMA automatically if any path (e.g., FC) does not support it, consistent with how BLK_FEAT_NOWAIT and BLK_FEAT_POLL are handled. Tested-by: Pranjal Shrivastava Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Nitesh Shetty Signed-off-by: Kiran Kumar Modukuri Signed-off-by: Chaitanya Kulkarni --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 263161cb8ac06..ff442bbf2937a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -730,7 +730,7 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) blk_set_stacking_limits(&lim); lim.dma_alignment = 3; lim.features |= BLK_FEAT_IO_STAT | BLK_FEAT_NOWAIT | - BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES; + BLK_FEAT_POLL | BLK_FEAT_ATOMIC_WRITES | BLK_FEAT_PCI_P2PDMA; if (head->ids.csi == NVME_CSI_ZNS) lim.features |= BLK_FEAT_ZONED;