From 5341a011d012b2b7572987c85f3b4c7c1357b69e Mon Sep 17 00:00:00 2001 From: Natalia Wochtman Date: Fri, 19 Sep 2025 15:30:15 +0200 Subject: [PATCH] ixgbevf: Add pseudo header split This patch introduces pseudo header split support in the ixgbevf driver, specifically targeting ixgbe_mac_82599_vf. On older hardware (e.g. ixgbe_mac_82599_vf), RX DMA write size can only be limited in 1K increments. This causes issues when attempting to fit multiple packets per page, as a DMA write may overwrite the headroom of the next packet. To address this, introduce pseudo header split support, where the hardware copies the full L2 header into a dedicated header buffer. This avoids the need for HR/TR alignment and allows safe skb construction from the header buffer without risking overwrites. Signed-off-by: Natalia Wochtman --- drivers/net/ethernet/intel/ixgbevf/ixgbevf.h | 6 + .../net/ethernet/intel/ixgbevf/ixgbevf_main.c | 150 ++++++++++++++++-- 2 files changed, 141 insertions(+), 15 deletions(-) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 8b3c8c33f00bfa..f4f792da308d7c 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -89,6 +89,7 @@ struct ixgbevf_ring { dma_addr_t dma; /* phys. address of descriptor ring */ unsigned int size; /* length in bytes */ u32 truesize; /* Rx buffer full size */ + u32 hdr_truesize; /* Rx header buffer full size */ u16 count; /* amount of descriptors */ u16 next_to_use; u16 next_to_clean; @@ -104,6 +105,8 @@ struct ixgbevf_ring { struct ixgbevf_tx_queue_stats tx_stats; struct ixgbevf_rx_queue_stats rx_stats; }; + struct libeth_fqe *hdr_fqes; + struct page_pool *hdr_pp; struct xdp_rxq_info xdp_rxq; u64 hw_csum_rx_error; u8 __iomem *tail; @@ -113,6 +116,7 @@ struct ixgbevf_ring { */ u16 reg_idx; int queue_index; /* needed for multiqueue queue management */ + u32 hdr_buf_len; u32 rx_buf_len; struct libeth_xdp_buff_stash xdp_stash; } ____cacheline_internodealigned_in_smp; @@ -147,6 +151,8 @@ struct ixgbevf_ring { #define IXGBEVF_RX_PAGE_LEN(hr) (ALIGN_DOWN(LIBETH_RX_PAGE_LEN(hr), \ IXGBE_SRRCTL_BSIZEPKT_STEP)) +#define IXGBEVF_FLAG_HSPLIT BIT(0) + #define IXGBE_TX_FLAGS_CSUM BIT(0) #define IXGBE_TX_FLAGS_VLAN BIT(1) #define IXGBE_TX_FLAGS_TSO BIT(2) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index a9c5d82f03d85b..b0a2fccc6b5c39 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -568,6 +568,12 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, .truesize = rx_ring->truesize, .count = rx_ring->count, }; + const struct libeth_fq_fp hdr_fq = { + .pp = rx_ring->hdr_pp, + .fqes = rx_ring->hdr_fqes, + .truesize = rx_ring->hdr_truesize, + .count = rx_ring->count, + }; u16 ntu = rx_ring->next_to_use; /* nothing to do or no valid netdev defined */ @@ -584,6 +590,14 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, rx_desc->read.pkt_addr = cpu_to_le64(addr); + if (!hdr_fq.pp) + goto next; + + addr = libeth_rx_alloc(&hdr_fq, ntu); + if (addr == DMA_MAPPING_ERROR) + return; + +next: rx_desc++; ntu++; if (unlikely(ntu == fq.count)) { @@ -781,6 +795,32 @@ static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, return result; } +static u32 ixgbevf_rx_hsplit_wa(const struct libeth_fqe *hdr, + struct libeth_fqe *buf, u32 data_len) +{ + u32 copy = data_len <= L1_CACHE_BYTES ? data_len : ETH_HLEN; + struct page *hdr_page, *buf_page; + const void *src; + void *dst; + + if (unlikely(netmem_is_net_iov(buf->netmem)) || + !libeth_rx_sync_for_cpu(buf, copy)) + return 0; + + hdr_page = __netmem_to_page(hdr->netmem); + buf_page = __netmem_to_page(buf->netmem); + + dst = page_address(hdr_page) + hdr->offset + + pp_page_to_nmdesc(hdr_page)->pp->p.offset; + src = page_address(buf_page) + buf->offset + + pp_page_to_nmdesc(buf_page)->pp->p.offset; + + memcpy(dst, src, LARGEST_ALIGN(copy)); + buf->offset += copy; + + return copy; +} + static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) @@ -818,6 +858,23 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, rmb(); rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; + + if (unlikely(rx_ring->hdr_pp)) { + struct libeth_fqe *hdr_buff; + unsigned int hdr_size = 0; + + hdr_buff = &rx_ring->hdr_fqes[rx_ring->next_to_clean]; + + if (!xdp->data) { + hdr_size = ixgbevf_rx_hsplit_wa(hdr_buff, + rx_buffer, + size); + size -= hdr_size ? : size; + } + + libeth_xdp_process_buff(xdp, hdr_buff, hdr_size); + } + libeth_xdp_process_buff(xdp, rx_buffer, size); cleaned_count++; @@ -3054,19 +3111,38 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) return err; } -/** - * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors) - * @adapter: board private structure - * @rx_ring: Rx descriptor ring (for a specific queue) to setup - * - * Returns 0 on success, negative on failure - **/ -int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *rx_ring) +static void ixgbvf_rx_destroy_pp(struct ixgbevf_ring *rx_ring) { + struct libeth_fq fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + }; + + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; + + if (!rx_ring->hdr_pp) + return; + + fq = (struct libeth_fq) { + .pp = rx_ring->hdr_pp, + .fqes = rx_ring->hdr_fqes, + }; + + libeth_rx_fq_destroy(&fq); + rx_ring->hdr_fqes = NULL; + rx_ring->hdr_pp = NULL; +} + +static int ixgbvf_rx_create_pp(struct ixgbevf_ring *rx_ring) +{ + u32 adapter_flags = rx_ring->q_vector->adapter->flags; + struct libeth_fq fq = { .count = rx_ring->count, .nid = NUMA_NO_NODE, + .hsplit = adapter_flags & IXGBEVF_FLAG_HSPLIT, .type = LIBETH_FQE_MTU, .xdp = !!rx_ring->xdp_prog, .buf_len = IXGBEVF_RX_PAGE_LEN(rx_ring->xdp_prog ? @@ -3084,13 +3160,55 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, rx_ring->truesize = fq.truesize; rx_ring->rx_buf_len = fq.buf_len; + if (!fq.hsplit) + return 0; + + fq = (struct libeth_fq) { + .count = rx_ring->count, + .nid = NUMA_NO_NODE, + .type = LIBETH_FQE_HDR, + .xdp = !!rx_ring->xdp_prog, + }; + + ret = libeth_rx_fq_create(&fq, &rx_ring->q_vector->napi); + if (ret) + goto err; + + rx_ring->hdr_pp = fq.pp; + rx_ring->hdr_fqes = fq.fqes; + rx_ring->hdr_truesize = fq.truesize; + rx_ring->hdr_buf_len = fq.buf_len; + + return 0; + +err: + ixgbvf_rx_destroy_pp(rx_ring); + return ret; +} + +/** + * ixgbevf_setup_rx_resources - allocate Rx resources + * @adapter: board private structure + * @rx_ring: Rx descriptor ring (for a specific queue) to setup + * + * Returns: 0 on success, negative on failure. + **/ +int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring) +{ + int ret; + + ret = ixgbvf_rx_create_pp(rx_ring); + if (ret) + return ret; + u64_stats_init(&rx_ring->syncp); /* Round up to nearest 4K */ rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); rx_ring->size = ALIGN(rx_ring->size, 4096); - rx_ring->desc = dma_alloc_coherent(fq.pp->p.dev, rx_ring->size, + rx_ring->desc = dma_alloc_coherent(rx_ring->pp->p.dev, rx_ring->size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) @@ -3098,20 +3216,19 @@ int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, /* XDP RX-queue info */ ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, 0, fq.buf_len); + rx_ring->queue_index, 0, rx_ring->rx_buf_len); if (ret) goto err; - xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, fq.pp); + xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, rx_ring->pp); rx_ring->xdp_prog = adapter->xdp_prog; return 0; err: - libeth_rx_fq_destroy(&fq); - rx_ring->rx_fqes = NULL; - rx_ring->pp = NULL; + ixgbvf_rx_destroy_pp(rx_ring); dev_err(rx_ring->dev, "Unable to allocate memory for the Rx descriptor ring\n"); + return ret; } @@ -4222,6 +4339,9 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) netdev->priv_flags |= IFF_UNICAST_FLT; netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_RX_SG; + if (adapter->hw.mac.type == ixgbe_mac_82599_vf) + adapter->flags |= IXGBEVF_FLAG_HSPLIT; + /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; switch (adapter->hw.api_version) {