diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index b05cc0d7a15dcc..a1ee0a3ccd3839 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -202,6 +202,7 @@ config IXGBE_IPSEC config IXGBEVF tristate "Intel(R) 10GbE PCI Express Virtual Function Ethernet support" depends on PCI_MSI + select LIBETH_XDP help This driver supports Intel(R) PCI Express virtual functions for the Intel(R) ixgbe driver. For more information on how to identify your diff --git a/drivers/net/ethernet/intel/ixgbevf/Makefile b/drivers/net/ethernet/intel/ixgbevf/Makefile index 01d3e892f3fa7b..cdae62f25fd926 100644 --- a/drivers/net/ethernet/intel/ixgbevf/Makefile +++ b/drivers/net/ethernet/intel/ixgbevf/Makefile @@ -6,5 +6,5 @@ obj-$(CONFIG_IXGBEVF) += ixgbevf.o -ixgbevf-y := vf.o mbx.o ethtool.o ixgbevf_main.o +ixgbevf-y := vf.o mbx.o ethtool.o ixgbevf_main.o ixgbevf_xsk.o ixgbevf-$(CONFIG_IXGBEVF_IPSEC) += ipsec.o diff --git a/drivers/net/ethernet/intel/ixgbevf/defines.h b/drivers/net/ethernet/intel/ixgbevf/defines.h index a9bc96f6399dc0..4a656f72db0d78 100644 --- a/drivers/net/ethernet/intel/ixgbevf/defines.h +++ b/drivers/net/ethernet/intel/ixgbevf/defines.h @@ -70,7 +70,7 @@ typedef u32 ixgbe_link_speed; #define IXGBE_PSRTYPE_L2HDR 0x00001000 /* SRRCTL bit definitions */ -#define IXGBE_SRRCTL_BSIZEPKT_SHIFT 10 /* so many KBs */ +#define IXGBE_SRRCTL_BSIZEPKT_STEP 1024 /* specified in KB */ #define IXGBE_SRRCTL_RDMTS_SHIFT 22 #define IXGBE_SRRCTL_RDMTS_MASK 0x01C00000 #define IXGBE_SRRCTL_DROP_EN 0x10000000 diff --git a/drivers/net/ethernet/intel/ixgbevf/ethtool.c b/drivers/net/ethernet/intel/ixgbevf/ethtool.c index 7ac53171b0410c..a8404cdff27725 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ethtool.c +++ b/drivers/net/ethernet/intel/ixgbevf/ethtool.c @@ -72,13 +72,6 @@ static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { #define IXGBEVF_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN) -static const char ixgbevf_priv_flags_strings[][ETH_GSTRING_LEN] = { -#define IXGBEVF_PRIV_FLAGS_LEGACY_RX BIT(0) - "legacy-rx", -}; - -#define IXGBEVF_PRIV_FLAGS_STR_LEN ARRAY_SIZE(ixgbevf_priv_flags_strings) - static int ixgbevf_get_link_ksettings(struct net_device *netdev, struct ethtool_link_ksettings *cmd) { @@ -216,8 +209,6 @@ static void ixgbevf_get_drvinfo(struct net_device *netdev, strscpy(drvinfo->driver, ixgbevf_driver_name, sizeof(drvinfo->driver)); strscpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info)); - - drvinfo->n_priv_flags = IXGBEVF_PRIV_FLAGS_STR_LEN; } static void ixgbevf_get_ringparam(struct net_device *netdev, @@ -409,8 +400,6 @@ static int ixgbevf_get_sset_count(struct net_device *netdev, int stringset) return IXGBEVF_TEST_LEN; case ETH_SS_STATS: return IXGBEVF_STATS_LEN; - case ETH_SS_PRIV_FLAGS: - return IXGBEVF_PRIV_FLAGS_STR_LEN; default: return -EINVAL; } @@ -538,10 +527,6 @@ static void ixgbevf_get_strings(struct net_device *netdev, u32 stringset, p += ETH_GSTRING_LEN; } break; - case ETH_SS_PRIV_FLAGS: - memcpy(data, ixgbevf_priv_flags_strings, - IXGBEVF_PRIV_FLAGS_STR_LEN * ETH_GSTRING_LEN); - break; } } @@ -939,37 +924,6 @@ static int ixgbevf_get_rxfh(struct net_device *netdev, return err; } -static u32 ixgbevf_get_priv_flags(struct net_device *netdev) -{ - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - u32 priv_flags = 0; - - if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) - priv_flags |= IXGBEVF_PRIV_FLAGS_LEGACY_RX; - - return priv_flags; -} - -static int ixgbevf_set_priv_flags(struct net_device *netdev, u32 priv_flags) -{ - struct ixgbevf_adapter *adapter = netdev_priv(netdev); - unsigned int flags = adapter->flags; - - flags &= ~IXGBEVF_FLAGS_LEGACY_RX; - if (priv_flags & IXGBEVF_PRIV_FLAGS_LEGACY_RX) - flags |= IXGBEVF_FLAGS_LEGACY_RX; - - if (flags != adapter->flags) { - adapter->flags = flags; - - /* reset interface to repopulate queues */ - if (netif_running(netdev)) - ixgbevf_reinit_locked(adapter); - } - - return 0; -} - static const struct ethtool_ops ixgbevf_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS, .get_drvinfo = ixgbevf_get_drvinfo, @@ -992,8 +946,6 @@ static const struct ethtool_ops ixgbevf_ethtool_ops = { .get_rxfh_key_size = ixgbevf_get_rxfh_key_size, .get_rxfh = ixgbevf_get_rxfh, .get_link_ksettings = ixgbevf_get_link_ksettings, - .get_priv_flags = ixgbevf_get_priv_flags, - .set_priv_flags = ixgbevf_set_priv_flags, }; void ixgbevf_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h index 3a379e6a3a2ab2..d4d5d622082987 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include "vf.h" @@ -42,17 +43,6 @@ struct ixgbevf_tx_buffer { u32 tx_flags; }; -struct ixgbevf_rx_buffer { - dma_addr_t dma; - struct page *page; -#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536) - __u32 page_offset; -#else - __u16 page_offset; -#endif - __u16 pagecnt_bias; -}; - struct ixgbevf_stats { u64 packets; u64 bytes; @@ -73,11 +63,11 @@ struct ixgbevf_rx_queue_stats { enum ixgbevf_ring_state_t { __IXGBEVF_RX_3K_BUFFER, - __IXGBEVF_RX_BUILD_SKB_ENABLED, __IXGBEVF_TX_DETECT_HANG, __IXGBEVF_HANG_CHECK_ARMED, __IXGBEVF_TX_XDP_RING, __IXGBEVF_TX_XDP_RING_PRIMED, + __IXGBEVF_RXTX_XSK_RING, }; #define ring_is_xdp(ring) \ @@ -87,24 +77,38 @@ enum ixgbevf_ring_state_t { #define clear_ring_xdp(ring) \ clear_bit(__IXGBEVF_TX_XDP_RING, &(ring)->state) +#define ring_is_xsk(ring) \ + test_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) +#define set_ring_xsk(ring) \ + set_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) +#define clear_ring_xsk(ring) \ + clear_bit(__IXGBEVF_RXTX_XSK_RING, &(ring)->state) + struct ixgbevf_ring { struct ixgbevf_ring *next; struct ixgbevf_q_vector *q_vector; /* backpointer to q_vector */ struct net_device *netdev; struct bpf_prog *xdp_prog; - struct device *dev; + union { + struct page_pool *pp; /* Rx and XDP rings */ + struct device *dev; /* Tx ring */ + }; void *desc; /* descriptor ring memory */ - dma_addr_t dma; /* phys. address of descriptor ring */ - unsigned int size; /* length in bytes */ + u32 truesize; /* Rx buffer full size */ u16 count; /* amount of descriptors */ - u16 next_to_use; u16 next_to_clean; - u16 next_to_alloc; + u32 next_to_use; + u32 pending; /* Sent-not-completed descriptors */ union { struct ixgbevf_tx_buffer *tx_buffer_info; - struct ixgbevf_rx_buffer *rx_buffer_info; + struct libeth_sqe *xdp_sqes; + struct libeth_fqe *rx_fqes; + struct libeth_xdp_buff **xsk_fqes; }; + struct libeth_xdpsq_lock xdpq_lock; + u32 cached_ntu; + u32 thresh; unsigned long state; struct ixgbevf_stats stats; struct u64_stats_sync syncp; @@ -115,13 +119,18 @@ struct ixgbevf_ring { struct xdp_rxq_info xdp_rxq; u64 hw_csum_rx_error; u8 __iomem *tail; - struct sk_buff *skb; /* holds the special value that gets the hardware register offset * associated with this ring, which is different for DCB and RSS modes */ u16 reg_idx; int queue_index; /* needed for multiqueue queue management */ + u32 rx_buf_len; + struct libeth_xdp_buff_stash xdp_stash; + struct libeth_xdp_buff *xsk_xdp_head; + unsigned int dma_size; /* length in bytes */ + dma_addr_t dma; /* phys. address of descriptor ring */ + struct xsk_buff_pool *xsk_pool; /* AF_XDP ZC rings */ } ____cacheline_internodealigned_in_smp; /* How many Rx Buffers do we bundle into one write to the hardware ? */ @@ -145,20 +154,14 @@ struct ixgbevf_ring { /* Supported Rx Buffer Sizes */ #define IXGBEVF_RXBUFFER_256 256 /* Used for packet split */ -#define IXGBEVF_RXBUFFER_2048 2048 #define IXGBEVF_RXBUFFER_3072 3072 #define IXGBEVF_RX_HDR_SIZE IXGBEVF_RXBUFFER_256 #define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) -#define IXGBEVF_SKB_PAD (NET_SKB_PAD + NET_IP_ALIGN) -#if (PAGE_SIZE < 8192) -#define IXGBEVF_MAX_FRAME_BUILD_SKB \ - (SKB_WITH_OVERHEAD(IXGBEVF_RXBUFFER_2048) - IXGBEVF_SKB_PAD) -#else -#define IXGBEVF_MAX_FRAME_BUILD_SKB IXGBEVF_RXBUFFER_2048 -#endif +#define IXGBEVF_RX_PAGE_LEN(hr) (ALIGN_DOWN(LIBETH_RX_PAGE_LEN(hr), \ + IXGBE_SRRCTL_BSIZEPKT_STEP)) #define IXGBE_TX_FLAGS_CSUM BIT(0) #define IXGBE_TX_FLAGS_VLAN BIT(1) @@ -169,43 +172,6 @@ struct ixgbevf_ring { #define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0x0000e000 #define IXGBE_TX_FLAGS_VLAN_SHIFT 16 -#define ring_uses_large_buffer(ring) \ - test_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) -#define set_ring_uses_large_buffer(ring) \ - set_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) -#define clear_ring_uses_large_buffer(ring) \ - clear_bit(__IXGBEVF_RX_3K_BUFFER, &(ring)->state) - -#define ring_uses_build_skb(ring) \ - test_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) -#define set_ring_build_skb_enabled(ring) \ - set_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) -#define clear_ring_build_skb_enabled(ring) \ - clear_bit(__IXGBEVF_RX_BUILD_SKB_ENABLED, &(ring)->state) - -static inline unsigned int ixgbevf_rx_bufsz(struct ixgbevf_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (ring_uses_large_buffer(ring)) - return IXGBEVF_RXBUFFER_3072; - - if (ring_uses_build_skb(ring)) - return IXGBEVF_MAX_FRAME_BUILD_SKB; -#endif - return IXGBEVF_RXBUFFER_2048; -} - -static inline unsigned int ixgbevf_rx_pg_order(struct ixgbevf_ring *ring) -{ -#if (PAGE_SIZE < 8192) - if (ring_uses_large_buffer(ring)) - return 1; -#endif - return 0; -} - -#define ixgbevf_rx_pg_size(_ring) (PAGE_SIZE << ixgbevf_rx_pg_order(_ring)) - #define check_for_tx_hang(ring) \ test_bit(__IXGBEVF_TX_DETECT_HANG, &(ring)->state) #define set_check_for_tx_hang(ring) \ @@ -258,6 +224,7 @@ struct ixgbevf_q_vector { IXGBEVF_QV_STATE_POLL_YIELD) spinlock_t lock; #endif /* CONFIG_NET_RX_BUSY_POLL */ + call_single_data_t xsk_csd; /* trigger xsk-related napi */ }; /* microsecond values for various ITR rates shifted by 2 to fit itr register @@ -386,8 +353,6 @@ struct ixgbevf_adapter { u32 flags; bool link_state; -#define IXGBEVF_FLAGS_LEGACY_RX BIT(1) - #ifdef CONFIG_XFRM struct ixgbevf_ipsec *ipsec; #endif /* CONFIG_XFRM */ @@ -448,14 +413,29 @@ int ixgbevf_open(struct net_device *netdev); int ixgbevf_close(struct net_device *netdev); void ixgbevf_up(struct ixgbevf_adapter *adapter); void ixgbevf_down(struct ixgbevf_adapter *adapter); +void ixgbevf_flush_tx_queue(struct ixgbevf_ring *ring); +void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); +void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); void ixgbevf_reinit_locked(struct ixgbevf_adapter *adapter); void ixgbevf_reset(struct ixgbevf_adapter *adapter); void ixgbevf_set_ethtool_ops(struct net_device *netdev); int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring); +void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter); +void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); +int ixgbevf_setup_fq(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring); int ixgbevf_setup_tx_resources(struct ixgbevf_ring *); +void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring); void ixgbevf_free_rx_resources(struct ixgbevf_ring *); +void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring); +void ixgbevf_rx_fq_destroy(struct ixgbevf_ring *rx_ring); void ixgbevf_free_tx_resources(struct ixgbevf_ring *); +void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring); void ixgbevf_update_stats(struct ixgbevf_adapter *adapter); int ethtool_ioctl(struct ifreq *ifr); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index 535d0f71f52149..2123f848140d39 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -31,8 +31,10 @@ #include #include #include +#include -#include "ixgbevf.h" +#include "ixgbevf_txrx_lib.h" +#include "ixgbevf_xsk.h" const char ixgbevf_driver_name[] = "ixgbevf"; static const char ixgbevf_driver_string[] = @@ -82,6 +84,8 @@ static const struct pci_device_id ixgbevf_pci_tbl[] = { MODULE_DEVICE_TABLE(pci, ixgbevf_pci_tbl); MODULE_DESCRIPTION("Intel(R) 10 Gigabit Virtual Function Network Driver"); +MODULE_IMPORT_NS("LIBETH"); +MODULE_IMPORT_NS("LIBETH_XDP"); MODULE_LICENSE("GPL v2"); #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) @@ -112,9 +116,6 @@ static void ixgbevf_service_event_complete(struct ixgbevf_adapter *adapter) static void ixgbevf_queue_reset_subtask(struct ixgbevf_adapter *adapter); static void ixgbevf_set_itr(struct ixgbevf_q_vector *q_vector); static void ixgbevf_free_all_rx_resources(struct ixgbevf_adapter *adapter); -static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer); -static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *old_buff); static void ixgbevf_remove_adapter(struct ixgbe_hw *hw) { @@ -306,10 +307,7 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, total_ipsec++; /* free the skb */ - if (ring_is_xdp(tx_ring)) - page_frag_free(tx_buffer->data); - else - napi_consume_skb(tx_buffer->skb, napi_budget); + napi_consume_skb(tx_buffer->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -392,9 +390,8 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, eop_desc, (eop_desc ? eop_desc->wb.status : 0), tx_ring->tx_buffer_info[i].time_stamp, jiffies); - if (!ring_is_xdp(tx_ring)) - netif_stop_subqueue(tx_ring->netdev, - tx_ring->queue_index); + netif_stop_subqueue(tx_ring->netdev, + tx_ring->queue_index); /* schedule immediate reset if we believe we hung */ ixgbevf_tx_timeout_reset(adapter); @@ -402,9 +399,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, return true; } - if (ring_is_xdp(tx_ring)) - return !!budget; - #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) && (ixgbevf_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD))) { @@ -425,227 +419,6 @@ static bool ixgbevf_clean_tx_irq(struct ixgbevf_q_vector *q_vector, return !!budget; } -/** - * ixgbevf_rx_skb - Helper function to determine proper Rx method - * @q_vector: structure containing interrupt and ring information - * @skb: packet to send up - **/ -static void ixgbevf_rx_skb(struct ixgbevf_q_vector *q_vector, - struct sk_buff *skb) -{ - napi_gro_receive(&q_vector->napi, skb); -} - -#define IXGBE_RSS_L4_TYPES_MASK \ - ((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \ - (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP)) - -static inline void ixgbevf_rx_hash(struct ixgbevf_ring *ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - u16 rss_type; - - if (!(ring->netdev->features & NETIF_F_RXHASH)) - return; - - rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & - IXGBE_RXDADV_RSSTYPE_MASK; - - if (!rss_type) - return; - - skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), - (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ? - PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); -} - -/** - * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum - * @ring: structure containig ring specific data - * @rx_desc: current Rx descriptor being processed - * @skb: skb currently being received and modified - **/ -static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - skb_checksum_none_assert(skb); - - /* Rx csum disabled */ - if (!(ring->netdev->features & NETIF_F_RXCSUM)) - return; - - /* if IP and error */ - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) && - ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) { - ring->rx_stats.csum_err++; - return; - } - - if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS)) - return; - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { - ring->rx_stats.csum_err++; - return; - } - - /* It must be a TCP or UDP packet with a valid checksum */ - skb->ip_summed = CHECKSUM_UNNECESSARY; -} - -/** - * ixgbevf_process_skb_fields - Populate skb header fields from Rx descriptor - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being populated - * - * This function checks the ring, descriptor, and packet information in - * order to populate the checksum, VLAN, protocol, and other fields within - * the skb. - **/ -static void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - ixgbevf_rx_hash(rx_ring, rx_desc, skb); - ixgbevf_rx_checksum(rx_ring, rx_desc, skb); - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { - u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); - unsigned long *active_vlans = netdev_priv(rx_ring->netdev); - - if (test_bit(vid & VLAN_VID_MASK, active_vlans)) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); - } - - if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) - ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); - - skb->protocol = eth_type_trans(skb, rx_ring->netdev); -} - -static -struct ixgbevf_rx_buffer *ixgbevf_get_rx_buffer(struct ixgbevf_ring *rx_ring, - const unsigned int size) -{ - struct ixgbevf_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; - prefetchw(rx_buffer->page); - - /* we are reusing so sync this buffer for CPU use */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - size, - DMA_FROM_DEVICE); - - rx_buffer->pagecnt_bias--; - - return rx_buffer; -} - -static void ixgbevf_put_rx_buffer(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct sk_buff *skb) -{ - if (ixgbevf_can_reuse_rx_page(rx_buffer)) { - /* hand second half of page back to the ring */ - ixgbevf_reuse_rx_page(rx_ring, rx_buffer); - } else { - if (IS_ERR(skb)) - /* We are not reusing the buffer so unmap it and free - * any references we are holding to it - */ - dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); - } - - /* clear contents of rx_buffer */ - rx_buffer->page = NULL; -} - -/** - * ixgbevf_is_non_eop - process handling of non-EOP buffers - * @rx_ring: Rx ring being processed - * @rx_desc: Rx descriptor for current buffer - * - * This function updates next to clean. If the buffer is an EOP buffer - * this function exits returning false, otherwise it will place the - * sk_buff in the next buffer to be chained and return true indicating - * that this is in fact a non-EOP buffer. - **/ -static bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc) -{ - u32 ntc = rx_ring->next_to_clean + 1; - - /* fetch, update, and store next to clean */ - ntc = (ntc < rx_ring->count) ? ntc : 0; - rx_ring->next_to_clean = ntc; - - prefetch(IXGBEVF_RX_DESC(rx_ring, ntc)); - - if (likely(ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) - return false; - - return true; -} - -static inline unsigned int ixgbevf_rx_offset(struct ixgbevf_ring *rx_ring) -{ - return ring_uses_build_skb(rx_ring) ? IXGBEVF_SKB_PAD : 0; -} - -static bool ixgbevf_alloc_mapped_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *bi) -{ - struct page *page = bi->page; - dma_addr_t dma; - - /* since we are recycling buffers we should seldom need to alloc */ - if (likely(page)) - return true; - - /* alloc new page for storage */ - page = dev_alloc_pages(ixgbevf_rx_pg_order(rx_ring)); - if (unlikely(!page)) { - rx_ring->rx_stats.alloc_rx_page_failed++; - return false; - } - - /* map page for use */ - dma = dma_map_page_attrs(rx_ring->dev, page, 0, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, IXGBEVF_RX_DMA_ATTR); - - /* if mapping failed free memory back to system since - * there isn't much point in holding memory we can't use - */ - if (dma_mapping_error(rx_ring->dev, dma)) { - __free_pages(page, ixgbevf_rx_pg_order(rx_ring)); - - rx_ring->rx_stats.alloc_rx_page_failed++; - return false; - } - - bi->dma = dma; - bi->page = page; - bi->page_offset = ixgbevf_rx_offset(rx_ring); - bi->pagecnt_bias = 1; - rx_ring->rx_stats.alloc_rx_page++; - - return true; -} - /** * ixgbevf_alloc_rx_buffers - Replace used receive buffers; packet split * @rx_ring: rx descriptor ring (for a specific queue) to setup buffers on @@ -655,55 +428,44 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, u16 cleaned_count) { union ixgbe_adv_rx_desc *rx_desc; - struct ixgbevf_rx_buffer *bi; - unsigned int i = rx_ring->next_to_use; + const struct libeth_fq_fp fq = { + .pp = rx_ring->pp, + .fqes = rx_ring->rx_fqes, + .truesize = rx_ring->truesize, + .count = rx_ring->count, + }; + u16 ntu = rx_ring->next_to_use; /* nothing to do or no valid netdev defined */ - if (!cleaned_count || !rx_ring->netdev) + if (unlikely(!cleaned_count || !rx_ring->netdev)) return; - rx_desc = IXGBEVF_RX_DESC(rx_ring, i); - bi = &rx_ring->rx_buffer_info[i]; - i -= rx_ring->count; + rx_desc = IXGBEVF_RX_DESC(rx_ring, ntu); do { - if (!ixgbevf_alloc_mapped_page(rx_ring, bi)) - break; + dma_addr_t addr = libeth_rx_alloc(&fq, ntu); - /* sync the buffer for use by the device */ - dma_sync_single_range_for_device(rx_ring->dev, bi->dma, - bi->page_offset, - ixgbevf_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); + if (unlikely(addr == DMA_MAPPING_ERROR)) + return; - /* Refresh the desc even if pkt_addr didn't change - * because each write-back erases this info. - */ - rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); + rx_desc->read.pkt_addr = cpu_to_le64(addr); rx_desc++; - bi++; - i++; - if (unlikely(!i)) { + ntu++; + if (unlikely(ntu == fq.count)) { rx_desc = IXGBEVF_RX_DESC(rx_ring, 0); - bi = rx_ring->rx_buffer_info; - i -= rx_ring->count; + ntu = 0; } /* clear the length for the next_to_use descriptor */ rx_desc->wb.upper.length = 0; cleaned_count--; - } while (cleaned_count); - - i += rx_ring->count; + } while (likely(cleaned_count)); - if (rx_ring->next_to_use != i) { + if (likely(rx_ring->next_to_use != ntu)) { /* record the next descriptor to use */ - rx_ring->next_to_use = i; - - /* update next to alloc since we have filled the ring */ - rx_ring->next_to_alloc = i; + rx_ring->next_to_use = ntu; /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. (Only @@ -711,208 +473,10 @@ static void ixgbevf_alloc_rx_buffers(struct ixgbevf_ring *rx_ring, * such as IA-64). */ wmb(); - ixgbevf_write_tail(rx_ring, i); + ixgbevf_write_tail(rx_ring, ntu); } } -/** - * ixgbevf_cleanup_headers - Correct corrupted or empty headers - * @rx_ring: rx descriptor ring packet is being transacted on - * @rx_desc: pointer to the EOP Rx descriptor - * @skb: pointer to current skb being fixed - * - * Check for corrupted packet headers caused by senders on the local L2 - * embedded NIC switch not setting up their Tx Descriptors right. These - * should be very rare. - * - * Also address the case where we are pulling data in on pages only - * and as such no data is present in the skb header. - * - * In addition if skb is not at least 60 bytes we need to pad it so that - * it is large enough to qualify as a valid Ethernet frame. - * - * Returns true if an error was encountered and skb was freed. - **/ -static bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, - union ixgbe_adv_rx_desc *rx_desc, - struct sk_buff *skb) -{ - /* verify that the packet does not have any known errors */ - if (unlikely(ixgbevf_test_staterr(rx_desc, - IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { - struct net_device *netdev = rx_ring->netdev; - - if (!(netdev->features & NETIF_F_RXALL)) { - dev_kfree_skb_any(skb); - return true; - } - } - - /* if eth_skb_pad returns an error the skb was freed */ - if (eth_skb_pad(skb)) - return true; - - return false; -} - -/** - * ixgbevf_reuse_rx_page - page flip buffer and store it back on the ring - * @rx_ring: rx descriptor ring to store buffers on - * @old_buff: donor buffer to have page reused - * - * Synchronizes page for reuse by the adapter - **/ -static void ixgbevf_reuse_rx_page(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *old_buff) -{ - struct ixgbevf_rx_buffer *new_buff; - u16 nta = rx_ring->next_to_alloc; - - new_buff = &rx_ring->rx_buffer_info[nta]; - - /* update, and store next to alloc */ - nta++; - rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; - - /* transfer page from old buffer to new buffer */ - new_buff->page = old_buff->page; - new_buff->dma = old_buff->dma; - new_buff->page_offset = old_buff->page_offset; - new_buff->pagecnt_bias = old_buff->pagecnt_bias; -} - -static bool ixgbevf_can_reuse_rx_page(struct ixgbevf_rx_buffer *rx_buffer) -{ - unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; - struct page *page = rx_buffer->page; - - /* avoid re-using remote and pfmemalloc pages */ - if (!dev_page_is_reusable(page)) - return false; - -#if (PAGE_SIZE < 8192) - /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) - return false; -#else -#define IXGBEVF_LAST_OFFSET \ - (SKB_WITH_OVERHEAD(PAGE_SIZE) - IXGBEVF_RXBUFFER_2048) - - if (rx_buffer->page_offset > IXGBEVF_LAST_OFFSET) - return false; - -#endif - - /* If we have drained the page fragment pool we need to update - * the pagecnt_bias and page count so that we fully restock the - * number of references the driver holds. - */ - if (unlikely(!pagecnt_bias)) { - page_ref_add(page, USHRT_MAX); - rx_buffer->pagecnt_bias = USHRT_MAX; - } - - return true; -} - -/** - * ixgbevf_add_rx_frag - Add contents of Rx buffer to sk_buff - * @rx_ring: rx descriptor ring to transact packets on - * @rx_buffer: buffer containing page to add - * @skb: sk_buff to place the data into - * @size: size of buffer to be added - * - * This function will add the data contained in rx_buffer->page to the skb. - **/ -static void ixgbevf_add_rx_frag(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct sk_buff *skb, - unsigned int size) -{ -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = ring_uses_build_skb(rx_ring) ? - SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) : - SKB_DATA_ALIGN(size); -#endif - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page, - rx_buffer->page_offset, size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif -} - -static -struct sk_buff *ixgbevf_construct_skb(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - union ixgbe_adv_rx_desc *rx_desc) -{ - unsigned int size = xdp->data_end - xdp->data; -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); -#endif - unsigned int headlen; - struct sk_buff *skb; - - /* prefetch first cache line of first page */ - net_prefetch(xdp->data); - - /* Note, we get here by enabling legacy-rx via: - * - * ethtool --set-priv-flags legacy-rx on - * - * In this mode, we currently get 0 extra XDP headroom as - * opposed to having legacy-rx off, where we process XDP - * packets going to stack via ixgbevf_build_skb(). - * - * For ixgbevf_construct_skb() mode it means that the - * xdp->data_meta will always point to xdp->data, since - * the helper cannot expand the head. Should this ever - * changed in future for legacy-rx mode on, then lets also - * add xdp->data_meta handling here. - */ - - /* allocate a skb to store the frags */ - skb = napi_alloc_skb(&rx_ring->q_vector->napi, IXGBEVF_RX_HDR_SIZE); - if (unlikely(!skb)) - return NULL; - - /* Determine available headroom for copy */ - headlen = size; - if (headlen > IXGBEVF_RX_HDR_SIZE) - headlen = eth_get_headlen(skb->dev, xdp->data, - IXGBEVF_RX_HDR_SIZE); - - /* align pull length to size of long to optimize memcpy performance */ - memcpy(__skb_put(skb, headlen), xdp->data, - ALIGN(headlen, sizeof(long))); - - /* update all of the pointers */ - size -= headlen; - if (size) { - skb_add_rx_frag(skb, 0, rx_buffer->page, - (xdp->data + headlen) - - page_address(rx_buffer->page), - size, truesize); -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - } else { - rx_buffer->pagecnt_bias++; - } - - return skb; -} - static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, u32 qmask) { @@ -921,230 +485,116 @@ static inline void ixgbevf_irq_enable_queues(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VTEIMS, qmask); } -static struct sk_buff *ixgbevf_build_skb(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - struct xdp_buff *xdp, - union ixgbe_adv_rx_desc *rx_desc) +void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, + u16 to_clean) { - unsigned int metasize = xdp->data - xdp->data_meta; -#if (PAGE_SIZE < 8192) - unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2; -#else - unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) + - SKB_DATA_ALIGN(xdp->data_end - - xdp->data_hard_start); -#endif - struct sk_buff *skb; - - /* Prefetch first cache line of first page. If xdp->data_meta - * is unused, this points to xdp->data, otherwise, we likely - * have a consumer accessing first few bytes of meta data, - * and then actual data. - */ - net_prefetch(xdp->data_meta); - - /* build an skb around the page buffer */ - skb = napi_build_skb(xdp->data_hard_start, truesize); - if (unlikely(!skb)) - return NULL; - - /* update pointers within the skb to store the data */ - skb_reserve(skb, xdp->data - xdp->data_hard_start); - __skb_put(skb, xdp->data_end - xdp->data); - if (metasize) - skb_metadata_set(skb, metasize); - - /* update buffer offset */ -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif - - return skb; -} - -#define IXGBEVF_XDP_PASS 0 -#define IXGBEVF_XDP_CONSUMED 1 -#define IXGBEVF_XDP_TX 2 - -static int ixgbevf_xmit_xdp_ring(struct ixgbevf_ring *ring, - struct xdp_buff *xdp) -{ - struct ixgbevf_tx_buffer *tx_buffer; - union ixgbe_adv_tx_desc *tx_desc; - u32 len, cmd_type; - dma_addr_t dma; - u16 i; - - len = xdp->data_end - xdp->data; - - if (unlikely(!ixgbevf_desc_unused(ring))) - return IXGBEVF_XDP_CONSUMED; - - dma = dma_map_single(ring->dev, xdp->data, len, DMA_TO_DEVICE); - if (dma_mapping_error(ring->dev, dma)) - return IXGBEVF_XDP_CONSUMED; - - /* record the location of the first descriptor for this packet */ - i = ring->next_to_use; - tx_buffer = &ring->tx_buffer_info[i]; - - dma_unmap_len_set(tx_buffer, len, len); - dma_unmap_addr_set(tx_buffer, dma, dma); - tx_buffer->data = xdp->data; - tx_buffer->bytecount = len; - tx_buffer->gso_segs = 1; - tx_buffer->protocol = 0; - - /* Populate minimal context descriptor that will provide for the - * fact that we are expected to process Ethernet frames. - */ - if (!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state)) { - struct ixgbe_adv_tx_context_desc *context_desc; - - set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); + bool xsk_ring = ring_is_xsk(xdp_ring); + struct libeth_xdpsq_napi_stats stats; + u32 ntc = xdp_ring->next_to_clean; + struct xdp_frame_bulk cbulk; + struct libeth_cq_pp cp = { + .bq = &cbulk, + .dev = xdp_ring->dev, + .xss = &stats, + .napi = in_napi, + }; + u32 xsk_frames = 0; - context_desc = IXGBEVF_TX_CTXTDESC(ring, 0); - context_desc->vlan_macip_lens = - cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); - context_desc->fceof_saidx = 0; - context_desc->type_tucmd_mlhl = - cpu_to_le32(IXGBE_TXD_CMD_DEXT | - IXGBE_ADVTXD_DTYP_CTXT); - context_desc->mss_l4len_idx = 0; + xdp_frame_bulk_init(&cbulk); + xdp_ring->pending -= to_clean; - i = 1; + while (likely(to_clean--)) { + xsk_frames += xsk_ring && + likely(!xdp_ring->xdp_sqes[ntc].type) ? 1 : 0; + libeth_xdp_complete_tx(&xdp_ring->xdp_sqes[ntc], &cp); + ntc++; + ntc = unlikely(ntc == xdp_ring->count) ? 0 : ntc; } - /* put descriptor type bits */ - cmd_type = IXGBE_ADVTXD_DTYP_DATA | - IXGBE_ADVTXD_DCMD_DEXT | - IXGBE_ADVTXD_DCMD_IFCS; - cmd_type |= len | IXGBE_TXD_CMD; - - tx_desc = IXGBEVF_TX_DESC(ring, i); - tx_desc->read.buffer_addr = cpu_to_le64(dma); - - tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); - tx_desc->read.olinfo_status = - cpu_to_le32((len << IXGBE_ADVTXD_PAYLEN_SHIFT) | - IXGBE_ADVTXD_CC); - - /* Avoid any potential race with cleanup */ - smp_wmb(); - - /* set next_to_watch value indicating a packet is present */ - i++; - if (i == ring->count) - i = 0; - - tx_buffer->next_to_watch = tx_desc; - ring->next_to_use = i; + xdp_ring->next_to_clean = ntc; + xdp_flush_frame_bulk(&cbulk); + if (xsk_frames) + xsk_tx_completed(xdp_ring->xsk_pool, xsk_frames); +} - return IXGBEVF_XDP_TX; +static void ixgbevf_clean_xdp_ring(struct ixgbevf_ring *xdp_ring) +{ + ixgbevf_clean_xdp_num(xdp_ring, false, xdp_ring->pending); + libeth_xdpsq_put(&xdp_ring->xdpq_lock, xdp_ring->netdev); } -static int ixgbevf_run_xdp(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *rx_ring, - struct xdp_buff *xdp) +static void ixgbevf_xdp_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, + u64 priv) { - int result = IXGBEVF_XDP_PASS; - struct ixgbevf_ring *xdp_ring; - struct bpf_prog *xdp_prog; - u32 act; + union ixgbe_adv_tx_desc *tx_desc = + &((union ixgbe_adv_tx_desc *)sq->descs)[i]; - xdp_prog = READ_ONCE(rx_ring->xdp_prog); + u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS | + desc.len; - if (!xdp_prog) - goto xdp_out; + if (desc.flags & LIBETH_XDP_TX_LAST) + cmd_type |= IXGBE_TXD_CMD_EOP; - act = bpf_prog_run_xdp(xdp_prog, xdp); - switch (act) { - case XDP_PASS: - break; - case XDP_TX: - xdp_ring = adapter->xdp_ring[rx_ring->queue_index]; - result = ixgbevf_xmit_xdp_ring(xdp_ring, xdp); - if (result == IXGBEVF_XDP_CONSUMED) - goto out_failure; - break; - default: - bpf_warn_invalid_xdp_action(rx_ring->netdev, xdp_prog, act); - fallthrough; - case XDP_ABORTED: -out_failure: - trace_xdp_exception(rx_ring->netdev, xdp_prog, act); - fallthrough; /* handle aborts by dropping packet */ - case XDP_DROP: - result = IXGBEVF_XDP_CONSUMED; - break; - } -xdp_out: - return result; -} + if (desc.flags & LIBETH_XDP_TX_FIRST) { + struct libeth_sqe *sqe = &sq->sqes[i]; + struct skb_shared_info *sinfo; + u16 full_len; -static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring, - unsigned int size) -{ - unsigned int truesize; + sinfo = sqe->type == LIBETH_SQE_XDP_TX ? sqe->sinfo : + xdp_get_shared_info_from_frame(sqe->xdpf); + full_len = desc.len + sinfo->xdp_frags_size; + tx_desc->read.olinfo_status = + cpu_to_le32((full_len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + } -#if (PAGE_SIZE < 8192) - truesize = ixgbevf_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */ -#else - truesize = ring_uses_build_skb(rx_ring) ? - SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) : - SKB_DATA_ALIGN(size); -#endif - return truesize; + tx_desc->read.buffer_addr = cpu_to_le64(desc.addr); + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); } -static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring, - struct ixgbevf_rx_buffer *rx_buffer, - unsigned int size) -{ - unsigned int truesize = ixgbevf_rx_frame_truesize(rx_ring, size); - -#if (PAGE_SIZE < 8192) - rx_buffer->page_offset ^= truesize; -#else - rx_buffer->page_offset += truesize; -#endif -} +LIBETH_XDP_DEFINE_START(); +LIBETH_XDP_DEFINE_FLUSH_TX(static ixgbevf_xdp_flush_tx, ixgbevf_prep_xdp_sq, + ixgbevf_xdp_xmit_desc); +LIBETH_XDP_DEFINE_FLUSH_XMIT(static ixgbevf_xdp_flush_xmit, ixgbevf_prep_xdp_sq, + ixgbevf_xdp_xmit_desc); +LIBETH_XDP_DEFINE_RUN_PROG(static ixgbevf_xdp_run_prog, ixgbevf_xdp_flush_tx); +LIBETH_XDP_DEFINE_FINALIZE(static ixgbevf_xdp_finalize_xdp_napi, + ixgbevf_xdp_flush_tx, ixgbevf_xdp_rs_and_bump); +LIBETH_XDP_DEFINE_END(); static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, struct ixgbevf_ring *rx_ring, int budget) { - unsigned int total_rx_bytes = 0, total_rx_packets = 0, frame_sz = 0; + unsigned int total_rx_bytes = 0, total_rx_packets = 0; struct ixgbevf_adapter *adapter = q_vector->adapter; u16 cleaned_count = ixgbevf_desc_unused(rx_ring); - struct sk_buff *skb = rx_ring->skb; - bool xdp_xmit = false; - struct xdp_buff xdp; - int xdp_res = 0; - - /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */ -#if (PAGE_SIZE < 8192) - frame_sz = ixgbevf_rx_frame_truesize(rx_ring, 0); -#endif - xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); + LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); + LIBETH_XDP_ONSTACK_BUFF(xdp); + struct sk_buff *skb; + + libeth_xdp_init_buff(xdp, &rx_ring->xdp_stash, &rx_ring->xdp_rxq); + libeth_xdp_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, adapter->xdp_ring, + adapter->num_xdp_queues); while (likely(total_rx_packets < budget)) { - struct ixgbevf_rx_buffer *rx_buffer; union ixgbe_adv_rx_desc *rx_desc; + struct libeth_fqe *rx_buffer; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ - if (cleaned_count >= IXGBEVF_RX_BUFFER_WRITE) { + if (unlikely(cleaned_count >= IXGBEVF_RX_BUFFER_WRITE)) { ixgbevf_alloc_rx_buffers(rx_ring, cleaned_count); cleaned_count = 0; } rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean); size = le16_to_cpu(rx_desc->wb.upper.length); - if (!size) + if (unlikely(!size)) break; /* This memory barrier is needed to keep us from reading @@ -1153,73 +603,44 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, */ rmb(); - rx_buffer = ixgbevf_get_rx_buffer(rx_ring, size); - - /* retrieve a buffer from the ring */ - if (!skb) { - unsigned int offset = ixgbevf_rx_offset(rx_ring); - unsigned char *hard_start; + rx_buffer = &rx_ring->rx_fqes[rx_ring->next_to_clean]; + libeth_xdp_process_buff(xdp, rx_buffer, size); - hard_start = page_address(rx_buffer->page) + - rx_buffer->page_offset - offset; - xdp_prepare_buff(&xdp, hard_start, offset, size, true); -#if (PAGE_SIZE > 4096) - /* At larger PAGE_SIZE, frame_sz depend on len size */ - xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size); -#endif - xdp_res = ixgbevf_run_xdp(adapter, rx_ring, &xdp); - } + cleaned_count++; + /* fetch next buffer in frame if non-eop */ + if (ixgbevf_is_non_eop(rx_ring, rx_desc)) + continue; - if (xdp_res) { - if (xdp_res == IXGBEVF_XDP_TX) { - xdp_xmit = true; - ixgbevf_rx_buffer_flip(rx_ring, rx_buffer, - size); - } else { - rx_buffer->pagecnt_bias++; - } + if (xdp_tx_bulk.prog && + !ixgbevf_xdp_run_prog(xdp, &xdp_tx_bulk)) { + xdp->data = NULL; total_rx_packets++; total_rx_bytes += size; - } else if (skb) { - ixgbevf_add_rx_frag(rx_ring, rx_buffer, skb, size); - } else if (ring_uses_build_skb(rx_ring)) { - skb = ixgbevf_build_skb(rx_ring, rx_buffer, - &xdp, rx_desc); - } else { - skb = ixgbevf_construct_skb(rx_ring, rx_buffer, - &xdp, rx_desc); + continue; } + skb = xdp_build_skb_from_buff(&xdp->base); + xdp->data = NULL; + /* exit if we failed to retrieve a buffer */ - if (!xdp_res && !skb) { + if (unlikely(!skb)) { rx_ring->rx_stats.alloc_rx_buff_failed++; - rx_buffer->pagecnt_bias++; break; } - ixgbevf_put_rx_buffer(rx_ring, rx_buffer, skb); - cleaned_count++; - - /* fetch next buffer in frame if non-eop */ - if (ixgbevf_is_non_eop(rx_ring, rx_desc)) - continue; - /* verify the packet layout is correct */ - if (xdp_res || ixgbevf_cleanup_headers(rx_ring, rx_desc, skb)) { + if (unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { skb = NULL; continue; } - /* probably a little skewed due to removing CRC */ - total_rx_bytes += skb->len; - /* Workaround hardware that can't do proper VEPA multicast * source pruning. */ - if ((skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) && - ether_addr_equal(rx_ring->netdev->dev_addr, - eth_hdr(skb)->h_source)) { + if (unlikely((skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) && + ether_addr_equal(rx_ring->netdev->dev_addr, + eth_hdr(skb)->h_source))) { dev_kfree_skb_irq(skb); continue; } @@ -1227,28 +648,17 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, /* populate checksum, VLAN, and protocol */ ixgbevf_process_skb_fields(rx_ring, rx_desc, skb); - ixgbevf_rx_skb(q_vector, skb); - - /* reset skb pointer */ - skb = NULL; - - /* update budget accounting */ + /* probably a little skewed due to removing CRC */ + total_rx_bytes += skb->len; total_rx_packets++; + + napi_gro_receive(&q_vector->napi, skb); } /* place incomplete frames back on ring for completion */ - rx_ring->skb = skb; + libeth_xdp_save_buff(&rx_ring->xdp_stash, xdp); - if (xdp_xmit) { - struct ixgbevf_ring *xdp_ring = - adapter->xdp_ring[rx_ring->queue_index]; - - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. - */ - wmb(); - ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); - } + ixgbevf_xdp_finalize_xdp_napi(&xdp_tx_bulk); u64_stats_update_begin(&rx_ring->syncp); rx_ring->stats.packets += total_rx_packets; @@ -1260,6 +670,23 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector, return total_rx_packets; } +static int ixgbevf_xdp_xmit(struct net_device *dev, int n, + struct xdp_frame **frames, u32 flags) +{ + struct ixgbevf_adapter *adapter = netdev_priv(dev); + + if (test_bit(__IXGBEVF_DOWN, &adapter->state)) + return -ENETDOWN; + + if (!adapter->num_xdp_queues) + return -ENXIO; + + return libeth_xdp_xmit_do_bulk(dev, n, frames, flags, adapter->xdp_ring, + adapter->num_xdp_queues, + ixgbevf_xdp_flush_xmit, + ixgbevf_xdp_rs_and_bump); +} + /** * ixgbevf_poll - NAPI polling calback * @napi: napi struct with our devices info in it @@ -1278,8 +705,12 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) bool clean_complete = true; ixgbevf_for_each_ring(ring, q_vector->tx) { - if (!ixgbevf_clean_tx_irq(q_vector, ring, budget)) - clean_complete = false; + if (ring_is_xsk(ring)) + clean_complete = ixgbevf_clean_xsk_tx_irq(q_vector, + ring, budget); + else if (!ring_is_xdp(ring)) + clean_complete = ixgbevf_clean_tx_irq(q_vector, + ring, budget); } if (budget <= 0) @@ -1294,7 +725,9 @@ static int ixgbevf_poll(struct napi_struct *napi, int budget) per_ring_budget = budget; ixgbevf_for_each_ring(ring, q_vector->rx) { - int cleaned = ixgbevf_clean_rx_irq(q_vector, ring, + int cleaned = ring_is_xsk(ring) ? + ixgbevf_clean_xsk_rx_irq(q_vector, ring, budget) : + ixgbevf_clean_rx_irq(q_vector, ring, per_ring_budget); work_done += cleaned; if (cleaned >= per_ring_budget) @@ -1666,7 +1099,7 @@ static inline void ixgbevf_irq_disable(struct ixgbevf_adapter *adapter) * ixgbevf_irq_enable - Enable default interrupt generation settings * @adapter: board private structure **/ -static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) +void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) { struct ixgbe_hw *hw = &adapter->hw; @@ -1682,8 +1115,8 @@ static inline void ixgbevf_irq_enable(struct ixgbevf_adapter *adapter) * * Configure the Tx descriptor ring after a reset. **/ -static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; u64 tdba = ring->dma; @@ -1717,6 +1150,8 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, /* reset ntu and ntc to place SW in sync with hardwdare */ ring->next_to_clean = 0; ring->next_to_use = 0; + ring->pending = 0; + ring->cached_ntu = 0; /* In order to avoid issues WTHRESH + PTHRESH should always be equal * to or less than the number of on chip descriptors, which is @@ -1729,8 +1164,26 @@ static void ixgbevf_configure_tx_ring(struct ixgbevf_adapter *adapter, 32; /* PTHRESH = 32 */ /* reinitialize tx_buffer_info */ - memset(ring->tx_buffer_info, 0, - sizeof(struct ixgbevf_tx_buffer) * ring->count); + if (!ring_is_xdp(ring)) { + memset(ring->tx_buffer_info, 0, + sizeof(struct ixgbevf_tx_buffer) * ring->count); + } else { + memset(ring->xdp_sqes, 0, + sizeof(struct libeth_sqe) * ring->count); + libeth_xdpsq_get(&ring->xdpq_lock, ring->netdev, + num_possible_cpus() > adapter->num_xdp_queues); + } + + ring->xsk_pool = + xsk_get_pool_from_qid(adapter->netdev, ring->queue_index); + if (ring_is_xdp(ring) && ring->xsk_pool) + set_ring_xsk(ring); + else + clear_ring_xsk(ring); + + ring->thresh = ring_is_xsk(ring) ? + libeth_xdp_queue_threshold(ring->count) : + XDP_BULK_QUEUE_SIZE; clear_bit(__IXGBEVF_HANG_CHECK_ARMED, &ring->state); clear_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &ring->state); @@ -1774,10 +1227,7 @@ static void ixgbevf_configure_srrctl(struct ixgbevf_adapter *adapter, srrctl = IXGBE_SRRCTL_DROP_EN; srrctl |= IXGBEVF_RX_HDR_SIZE << IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT; - if (ring_uses_large_buffer(ring)) - srrctl |= IXGBEVF_RXBUFFER_3072 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; - else - srrctl |= IXGBEVF_RXBUFFER_2048 >> IXGBE_SRRCTL_BSIZEPKT_SHIFT; + srrctl |= ring->rx_buf_len / IXGBE_SRRCTL_BSIZEPKT_STEP; srrctl |= IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF; IXGBE_WRITE_REG(hw, IXGBE_VFSRRCTL(index), srrctl); @@ -1799,8 +1249,8 @@ static void ixgbevf_setup_psrtype(struct ixgbevf_adapter *adapter) } #define IXGBEVF_MAX_RX_DESC_POLL 10 -static void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; int wait_loop = IXGBEVF_MAX_RX_DESC_POLL; @@ -1826,8 +1276,8 @@ static void ixgbevf_disable_rx_queue(struct ixgbevf_adapter *adapter, reg_idx); } -static void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_rx_desc_queue_enable(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; int wait_loop = IXGBEVF_MAX_RX_DESC_POLL; @@ -1903,8 +1353,8 @@ static void ixgbevf_setup_vfmrqc(struct ixgbevf_adapter *adapter) IXGBE_WRITE_REG(hw, IXGBE_VFMRQC, vfmrqc); } -static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *ring) +void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *ring) { struct ixgbe_hw *hw = &adapter->hw; union ixgbe_adv_rx_desc *rx_desc; @@ -1936,10 +1386,6 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, IXGBE_WRITE_REG(hw, IXGBE_VFRDT(reg_idx), 0); ring->tail = adapter->io_addr + IXGBE_VFRDT(reg_idx); - /* initialize rx_buffer_info */ - memset(ring->rx_buffer_info, 0, - sizeof(struct ixgbevf_rx_buffer) * ring->count); - /* initialize Rx descriptor 0 */ rx_desc = IXGBEVF_RX_DESC(ring, 0); rx_desc->wb.upper.length = 0; @@ -1947,53 +1393,29 @@ static void ixgbevf_configure_rx_ring(struct ixgbevf_adapter *adapter, /* reset ntu and ntc to place SW in sync with hardwdare */ ring->next_to_clean = 0; ring->next_to_use = 0; - ring->next_to_alloc = 0; + ring->pending = ixgbevf_desc_unused(ring); ixgbevf_configure_srrctl(adapter, ring, reg_idx); /* RXDCTL.RLPML does not work on 82599 */ if (adapter->hw.mac.type != ixgbe_mac_82599_vf) { - rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | - IXGBE_RXDCTL_RLPML_EN); - -#if (PAGE_SIZE < 8192) - /* Limit the maximum frame size so we don't overrun the skb */ - if (ring_uses_build_skb(ring) && - !ring_uses_large_buffer(ring)) - rxdctl |= IXGBEVF_MAX_FRAME_BUILD_SKB | - IXGBE_RXDCTL_RLPML_EN; -#endif + u32 pkt_len = + READ_ONCE(adapter->netdev->mtu) + LIBETH_RX_LL_LEN; + + rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK | IXGBE_RXDCTL_RLPML_EN); + if (pkt_len <= IXGBE_RXDCTL_RLPMLMASK) + rxdctl |= pkt_len | IXGBE_RXDCTL_RLPML_EN; } rxdctl |= IXGBE_RXDCTL_ENABLE | IXGBE_RXDCTL_VME; IXGBE_WRITE_REG(hw, IXGBE_VFRXDCTL(reg_idx), rxdctl); ixgbevf_rx_desc_queue_enable(adapter, ring); - ixgbevf_alloc_rx_buffers(ring, ixgbevf_desc_unused(ring)); -} -static void ixgbevf_set_rx_buffer_len(struct ixgbevf_adapter *adapter, - struct ixgbevf_ring *rx_ring) -{ - struct net_device *netdev = adapter->netdev; - unsigned int max_frame = netdev->mtu + ETH_HLEN + ETH_FCS_LEN; - - /* set build_skb and buffer size flags */ - clear_ring_build_skb_enabled(rx_ring); - clear_ring_uses_large_buffer(rx_ring); - - if (adapter->flags & IXGBEVF_FLAGS_LEGACY_RX) - return; - - if (PAGE_SIZE < 8192) - if (max_frame > IXGBEVF_MAX_FRAME_BUILD_SKB) - set_ring_uses_large_buffer(rx_ring); - - /* 82599 can't rely on RXDCTL.RLPML to restrict the size of the frame */ - if (adapter->hw.mac.type == ixgbe_mac_82599_vf && !ring_uses_large_buffer(rx_ring)) - return; - - set_ring_build_skb_enabled(rx_ring); + if (ring_is_xsk(ring)) + ixgbevf_xsk_alloc_rx_bufs(ring, ring->pending); + else + ixgbevf_alloc_rx_buffers(ring, ring->pending); } /** @@ -2026,7 +1448,6 @@ static void ixgbevf_configure_rx(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_rx_queues; i++) { struct ixgbevf_ring *rx_ring = adapter->rx_ring[i]; - ixgbevf_set_rx_buffer_len(adapter, rx_ring); ixgbevf_configure_rx_ring(adapter, rx_ring); } } @@ -2355,56 +1776,36 @@ void ixgbevf_up(struct ixgbevf_adapter *adapter) * ixgbevf_clean_rx_ring - Free Rx Buffers per Queue * @rx_ring: ring to free buffers from **/ -static void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) +void ixgbevf_clean_rx_ring(struct ixgbevf_ring *rx_ring) { - u16 i = rx_ring->next_to_clean; + if (ring_is_xsk(rx_ring)) { + ixgbevf_rx_xsk_ring_free_buffs(rx_ring); + goto reset; + } /* Free Rx ring sk_buff */ - if (rx_ring->skb) { - dev_kfree_skb(rx_ring->skb); - rx_ring->skb = NULL; - } + libeth_xdp_return_stash(&rx_ring->xdp_stash); /* Free all the Rx ring pages */ - while (i != rx_ring->next_to_alloc) { - struct ixgbevf_rx_buffer *rx_buffer; - - rx_buffer = &rx_ring->rx_buffer_info[i]; - - /* Invalidate cache lines that may have been written to by - * device so that we avoid corrupting memory. - */ - dma_sync_single_range_for_cpu(rx_ring->dev, - rx_buffer->dma, - rx_buffer->page_offset, - ixgbevf_rx_bufsz(rx_ring), - DMA_FROM_DEVICE); - - /* free resources associated with mapping */ - dma_unmap_page_attrs(rx_ring->dev, - rx_buffer->dma, - ixgbevf_rx_pg_size(rx_ring), - DMA_FROM_DEVICE, - IXGBEVF_RX_DMA_ATTR); - - __page_frag_cache_drain(rx_buffer->page, - rx_buffer->pagecnt_bias); + for (u32 i = rx_ring->next_to_clean; i != rx_ring->next_to_use; ) { + const struct libeth_fqe *rx_fqe = &rx_ring->rx_fqes[i]; - i++; - if (i == rx_ring->count) + libeth_rx_recycle_slow(rx_fqe->netmem); + if (unlikely(++i == rx_ring->count)) i = 0; } - rx_ring->next_to_alloc = 0; +reset: rx_ring->next_to_clean = 0; rx_ring->next_to_use = 0; + rx_ring->pending = 0; } /** * ixgbevf_clean_tx_ring - Free Tx Buffers * @tx_ring: ring to be cleaned **/ -static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) +void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) { u16 i = tx_ring->next_to_clean; struct ixgbevf_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i]; @@ -2413,10 +1814,7 @@ static void ixgbevf_clean_tx_ring(struct ixgbevf_ring *tx_ring) union ixgbe_adv_tx_desc *eop_desc, *tx_desc; /* Free all the Tx ring sk_buffs */ - if (ring_is_xdp(tx_ring)) - page_frag_free(tx_buffer->data); - else - dev_kfree_skb_any(tx_buffer->skb); + dev_kfree_skb_any(tx_buffer->skb); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -2485,13 +1883,20 @@ static void ixgbevf_clean_all_tx_rings(struct ixgbevf_adapter *adapter) for (i = 0; i < adapter->num_tx_queues; i++) ixgbevf_clean_tx_ring(adapter->tx_ring[i]); for (i = 0; i < adapter->num_xdp_queues; i++) - ixgbevf_clean_tx_ring(adapter->xdp_ring[i]); + ixgbevf_clean_xdp_ring(adapter->xdp_ring[i]); +} + +void ixgbevf_flush_tx_queue(struct ixgbevf_ring *ring) +{ + u8 reg_idx = ring->reg_idx; + + IXGBE_WRITE_REG(&ring->q_vector->adapter->hw, IXGBE_VFTXDCTL(reg_idx), + IXGBE_TXDCTL_SWFLSH); } void ixgbevf_down(struct ixgbevf_adapter *adapter) { struct net_device *netdev = adapter->netdev; - struct ixgbe_hw *hw = &adapter->hw; int i; /* signal that we are down to the interrupt handler */ @@ -2517,19 +1922,11 @@ void ixgbevf_down(struct ixgbevf_adapter *adapter) timer_delete_sync(&adapter->service_timer); /* disable transmits in the hardware now that interrupts are off */ - for (i = 0; i < adapter->num_tx_queues; i++) { - u8 reg_idx = adapter->tx_ring[i]->reg_idx; - - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), - IXGBE_TXDCTL_SWFLSH); - } - - for (i = 0; i < adapter->num_xdp_queues; i++) { - u8 reg_idx = adapter->xdp_ring[i]->reg_idx; + for (i = 0; i < adapter->num_tx_queues; i++) + ixgbevf_flush_tx_queue(adapter->tx_ring[i]); - IXGBE_WRITE_REG(hw, IXGBE_VFTXDCTL(reg_idx), - IXGBE_TXDCTL_SWFLSH); - } + for (i = 0; i < adapter->num_xdp_queues; i++) + ixgbevf_flush_tx_queue(adapter->xdp_ring[i]); if (!pci_channel_offline(adapter->pdev)) ixgbevf_reset(adapter); @@ -2742,6 +2139,7 @@ static int ixgbevf_alloc_q_vector(struct ixgbevf_adapter *adapter, int v_idx, /* initialize NAPI */ netif_napi_add(adapter->netdev, &q_vector->napi, ixgbevf_poll); + libeth_xsk_init_wakeup(&q_vector->xsk_csd, &q_vector->napi); /* tie q_vector and adapter together */ adapter->q_vector[v_idx] = q_vector; @@ -3374,7 +2772,10 @@ static void ixgbevf_service_task(struct work_struct *work) **/ void ixgbevf_free_tx_resources(struct ixgbevf_ring *tx_ring) { - ixgbevf_clean_tx_ring(tx_ring); + if (!ring_is_xdp(tx_ring)) + ixgbevf_clean_tx_ring(tx_ring); + else + ixgbevf_clean_xdp_ring(tx_ring); vfree(tx_ring->tx_buffer_info); tx_ring->tx_buffer_info = NULL; @@ -3383,7 +2784,7 @@ void ixgbevf_free_tx_resources(struct ixgbevf_ring *tx_ring) if (!tx_ring->desc) return; - dma_free_coherent(tx_ring->dev, tx_ring->size, tx_ring->desc, + dma_free_coherent(tx_ring->dev, tx_ring->dma_size, tx_ring->desc, tx_ring->dma); tx_ring->desc = NULL; @@ -3418,7 +2819,9 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) struct ixgbevf_adapter *adapter = netdev_priv(tx_ring->netdev); int size; - size = sizeof(struct ixgbevf_tx_buffer) * tx_ring->count; + size = (!ring_is_xdp(tx_ring) ? sizeof(struct ixgbevf_tx_buffer) : + sizeof(struct libeth_sqe)) * tx_ring->count; + tx_ring->tx_buffer_info = vmalloc(size); if (!tx_ring->tx_buffer_info) goto err; @@ -3426,10 +2829,10 @@ int ixgbevf_setup_tx_resources(struct ixgbevf_ring *tx_ring) u64_stats_init(&tx_ring->syncp); /* round up to nearest 4K */ - tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); - tx_ring->size = ALIGN(tx_ring->size, 4096); + tx_ring->dma_size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); + tx_ring->dma_size = ALIGN(tx_ring->dma_size, 4096); - tx_ring->desc = dma_alloc_coherent(tx_ring->dev, tx_ring->size, + tx_ring->desc = dma_alloc_coherent(tx_ring->dev, tx_ring->dma_size, &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) goto err; @@ -3484,6 +2887,105 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) return err; } +void ixgbevf_rx_fq_destroy(struct ixgbevf_ring *rx_ring) +{ + xdp_rxq_info_detach_mem_model(&rx_ring->xdp_rxq); + xdp_rxq_info_unreg(&rx_ring->xdp_rxq); + + if (test_and_clear_bit(__IXGBEVF_RXTX_XSK_RING, &rx_ring->state)) { + struct libeth_xskfq xskfq = { + .fqes = rx_ring->xsk_fqes, + }; + + libeth_xskfq_destroy(&xskfq); + rx_ring->xsk_fqes = NULL; + rx_ring->pending = xskfq.pending; + rx_ring->thresh = xskfq.thresh; + rx_ring->rx_buf_len = xskfq.buf_len; + rx_ring->xsk_pool = NULL; + } else { + struct libeth_fq fq = { + .fqes = rx_ring->rx_fqes, + .pp = rx_ring->pp, + }; + + libeth_rx_fq_destroy(&fq); + rx_ring->rx_fqes = NULL; + rx_ring->pp = NULL; + } +} + +int ixgbevf_setup_fq(struct ixgbevf_adapter *adapter, + struct ixgbevf_ring *rx_ring) +{ + struct libeth_fq fq = { + .count = rx_ring->count, + .nid = NUMA_NO_NODE, + .type = LIBETH_FQE_MTU, + .xdp = !!rx_ring->xdp_prog, + .buf_len = IXGBEVF_RX_PAGE_LEN(rx_ring->xdp_prog ? + LIBETH_XDP_HEADROOM : + LIBETH_SKB_HEADROOM), + }; + struct xsk_buff_pool *pool; + int ret; + + pool = xsk_get_pool_from_qid(rx_ring->netdev, rx_ring->queue_index); + if (adapter->xdp_prog && pool) { + struct libeth_xskfq xskfq = { + .nid = numa_node_id(), + .count = rx_ring->count, + .pool = pool, + }; + + ret = libeth_xskfq_create(&xskfq); + if (ret) + return ret; + + rx_ring->xsk_pool = xskfq.pool; + rx_ring->xsk_fqes = xskfq.fqes; + rx_ring->pending = xskfq.pending; + rx_ring->thresh = xskfq.thresh; + rx_ring->rx_buf_len = xskfq.buf_len; + set_ring_xsk(rx_ring); + + ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, + rx_ring->queue_index, 0, fq.buf_len); + if (ret) + goto fq_destroy; + + ret = xdp_rxq_info_reg_mem_model(&rx_ring->xdp_rxq, + MEM_TYPE_XSK_BUFF_POOL, + rx_ring->xsk_pool); + if (ret) + goto fq_destroy; + + return 0; + } + + ret = libeth_rx_fq_create(&fq, &rx_ring->q_vector->napi); + if (ret) + return ret; + + rx_ring->pp = fq.pp; + rx_ring->rx_fqes = fq.fqes; + rx_ring->truesize = fq.truesize; + rx_ring->rx_buf_len = fq.buf_len; + + /* XDP RX-queue info */ + ret = __xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, + rx_ring->queue_index, 0, fq.buf_len); + if (ret) + goto fq_destroy; + + xdp_rxq_info_attach_page_pool(&rx_ring->xdp_rxq, fq.pp); + + return 0; +fq_destroy: + ixgbevf_rx_fq_destroy(rx_ring); + return ret; +} + /** * ixgbevf_setup_rx_resources - allocate Rx resources (Descriptors) * @adapter: board private structure @@ -3494,38 +2996,32 @@ static int ixgbevf_setup_all_tx_resources(struct ixgbevf_adapter *adapter) int ixgbevf_setup_rx_resources(struct ixgbevf_adapter *adapter, struct ixgbevf_ring *rx_ring) { - int size; + int ret; - size = sizeof(struct ixgbevf_rx_buffer) * rx_ring->count; - rx_ring->rx_buffer_info = vmalloc(size); - if (!rx_ring->rx_buffer_info) - goto err; + ret = ixgbevf_setup_fq(adapter, rx_ring); + if (ret) + return ret; u64_stats_init(&rx_ring->syncp); /* Round up to nearest 4K */ - rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); - rx_ring->size = ALIGN(rx_ring->size, 4096); + rx_ring->dma_size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); + rx_ring->dma_size = ALIGN(rx_ring->dma_size, 4096); - rx_ring->desc = dma_alloc_coherent(rx_ring->dev, rx_ring->size, + rx_ring->desc = dma_alloc_coherent(&adapter->pdev->dev, + rx_ring->dma_size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) goto err; - /* XDP RX-queue info */ - if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev, - rx_ring->queue_index, 0) < 0) - goto err; - rx_ring->xdp_prog = adapter->xdp_prog; return 0; err: - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; + ixgbevf_rx_fq_destroy(rx_ring); dev_err(rx_ring->dev, "Unable to allocate memory for the Rx descriptor ring\n"); - return -ENOMEM; + return ret; } /** @@ -3569,14 +3065,12 @@ void ixgbevf_free_rx_resources(struct ixgbevf_ring *rx_ring) ixgbevf_clean_rx_ring(rx_ring); rx_ring->xdp_prog = NULL; - xdp_rxq_info_unreg(&rx_ring->xdp_rxq); - vfree(rx_ring->rx_buffer_info); - rx_ring->rx_buffer_info = NULL; - dma_free_coherent(rx_ring->dev, rx_ring->size, rx_ring->desc, + dma_free_coherent(rx_ring->pp->p.dev, rx_ring->dma_size, rx_ring->desc, rx_ring->dma); - rx_ring->desc = NULL; + + ixgbevf_rx_fq_destroy(rx_ring); } /** @@ -4440,19 +3934,11 @@ ixgbevf_features_check(struct sk_buff *skb, struct net_device *dev, static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) { - int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; struct ixgbevf_adapter *adapter = netdev_priv(dev); struct bpf_prog *old_prog; - /* verify ixgbevf ring attributes are sufficient for XDP */ - for (i = 0; i < adapter->num_rx_queues; i++) { - struct ixgbevf_ring *ring = adapter->rx_ring[i]; - - if (frame_size > ixgbevf_rx_bufsz(ring)) - return -EINVAL; - } - old_prog = xchg(&adapter->xdp_prog, prog); + xdp_features_clear_redirect_target(dev); /* If transitioning XDP modes reconfigure rings */ if (!!prog != !!old_prog) { @@ -4469,13 +3955,16 @@ static int ixgbevf_xdp_setup(struct net_device *dev, struct bpf_prog *prog) if (netif_running(dev)) ixgbevf_open(dev); } else { - for (i = 0; i < adapter->num_rx_queues; i++) + for (int i = 0; i < adapter->num_rx_queues; i++) xchg(&adapter->rx_ring[i]->xdp_prog, adapter->xdp_prog); } if (old_prog) bpf_prog_put(old_prog); + if (prog) + xdp_features_set_redirect_target(dev, true); + return 0; } @@ -4484,6 +3973,9 @@ static int ixgbevf_xdp(struct net_device *dev, struct netdev_bpf *xdp) switch (xdp->command) { case XDP_SETUP_PROG: return ixgbevf_xdp_setup(dev, xdp->prog); + case XDP_SETUP_XSK_POOL: + return ixgbevf_setup_xsk_pool(netdev_priv(dev), xdp->xsk.pool, + xdp->xsk.queue_id); default: return -EINVAL; } @@ -4503,6 +3995,8 @@ static const struct net_device_ops ixgbevf_netdev_ops = { .ndo_vlan_rx_kill_vid = ixgbevf_vlan_rx_kill_vid, .ndo_features_check = ixgbevf_features_check, .ndo_bpf = ixgbevf_xdp, + .ndo_xdp_xmit = ixgbevf_xdp_xmit, + .ndo_xsk_wakeup = ixgbevf_xsk_wakeup, }; static void ixgbevf_assign_netdev_ops(struct net_device *dev) @@ -4635,7 +4129,7 @@ static int ixgbevf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) NETIF_F_HW_VLAN_CTAG_TX; netdev->priv_flags |= IFF_UNICAST_FLT; - netdev->xdp_features = NETDEV_XDP_ACT_BASIC; + libeth_xdp_set_features_noredir(netdev, NULL, 1, NULL); /* MTU range: 68 - 1504 or 9710 */ netdev->min_mtu = ETH_MIN_MTU; diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h new file mode 100644 index 00000000000000..acbf74d9ae5377 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_txrx_lib.h @@ -0,0 +1,276 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 1999 - 2025 Intel Corporation. */ + +#ifndef _IXGBEVF_TXRX_LIB_H_ +#define _IXGBEVF_TXRX_LIB_H_ + +#include + +#include "ixgbevf.h" + +static inline void ixgbevf_xdp_rs_and_bump(void *xdpsq, bool sent, bool flush) +{ + struct ixgbevf_ring *xdp_ring = xdpsq; + union ixgbe_adv_tx_desc *desc; + u32 ltu; + + if ((!flush && xdp_ring->pending < xdp_ring->count - 1) || + xdp_ring->cached_ntu == xdp_ring->next_to_use) + return; + + ltu = (xdp_ring->next_to_use ? : xdp_ring->count) - 1; + + /* We will not get DD on a context descriptor */ + if (unlikely(xdp_ring->xdp_sqes[ltu].type == LIBETH_SQE_CTX)) + return; + + desc = IXGBEVF_TX_DESC(xdp_ring, ltu); + desc->read.cmd_type_len |= cpu_to_le32(IXGBE_TXD_CMD); + + xdp_ring->xdp_sqes[xdp_ring->cached_ntu].rs_idx = ltu + 1; + xdp_ring->cached_ntu = xdp_ring->next_to_use; + + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(xdp_ring, xdp_ring->next_to_use); +} + +/** + * ixgbevf_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static inline bool ixgbevf_is_non_eop(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + rx_ring->pending++; + + prefetch(IXGBEVF_RX_DESC(rx_ring, ntc)); + + if (likely(ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_EOP))) + return false; + + return true; +} + +/** + * ixgbevf_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Check for corrupted packet headers caused by senders on the local L2 + * embedded NIC switch not setting up their Tx Descriptors right. These + * should be very rare. + * + * Also address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static inline bool ixgbevf_cleanup_headers(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + /* verify that the packet does not have any known errors */ + if (unlikely(ixgbevf_test_staterr(rx_desc, + IXGBE_RXDADV_ERR_FRAME_ERR_MASK))) { + struct net_device *netdev = rx_ring->netdev; + + if (!(netdev->features & NETIF_F_RXALL)) { + dev_kfree_skb_any(skb); + return true; + } + } + + /* if eth_skb_pad returns an error the skb was freed */ + if (eth_skb_pad(skb)) + return true; + + return false; +} + +#define IXGBE_RSS_L4_TYPES_MASK \ + ((1ul << IXGBE_RXDADV_RSSTYPE_IPV4_TCP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV4_UDP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_TCP) | \ + (1ul << IXGBE_RXDADV_RSSTYPE_IPV6_UDP)) + +static inline void ixgbevf_rx_hash(struct ixgbevf_ring *ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + u16 rss_type; + + if (!(ring->netdev->features & NETIF_F_RXHASH)) + return; + + rss_type = le16_to_cpu(rx_desc->wb.lower.lo_dword.hs_rss.pkt_info) & + IXGBE_RXDADV_RSSTYPE_MASK; + + if (!rss_type) + return; + + skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + (IXGBE_RSS_L4_TYPES_MASK & (1ul << rss_type)) ? + PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3); +} + +/** + * ixgbevf_rx_checksum - indicate in skb if hw indicated a good cksum + * @ring: structure containing ring specific data + * @rx_desc: current Rx descriptor being processed + * @skb: skb currently being received and modified + **/ +static inline void ixgbevf_rx_checksum(struct ixgbevf_ring *ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + /* Rx csum disabled */ + if (!(ring->netdev->features & NETIF_F_RXCSUM)) + return; + + /* if IP and error */ + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_IPCS) && + ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_IPE)) { + ring->rx_stats.csum_err++; + return; + } + + if (!ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_L4CS)) + return; + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_ERR_TCPE)) { + ring->rx_stats.csum_err++; + return; + } + + /* It must be a TCP or UDP packet with a valid checksum */ + skb->ip_summed = CHECKSUM_UNNECESSARY; +} + +/** + * ixgbevf_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * + * This function checks the ring, descriptor, and packet information in + * order to populate the checksum, VLAN, protocol, and other fields within + * the skb. + **/ +static inline void ixgbevf_process_skb_fields(struct ixgbevf_ring *rx_ring, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + ixgbevf_rx_hash(rx_ring, rx_desc, skb); + ixgbevf_rx_checksum(rx_ring, rx_desc, skb); + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXD_STAT_VP)) { + u16 vid = le16_to_cpu(rx_desc->wb.upper.vlan); + unsigned long *active_vlans = netdev_priv(rx_ring->netdev); + + if (test_bit(vid & VLAN_VID_MASK, active_vlans)) + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); + } + + if (ixgbevf_test_staterr(rx_desc, IXGBE_RXDADV_STAT_SECP)) + ixgbevf_ipsec_rx(rx_ring, rx_desc, skb); +} + +static inline u16 ixgbevf_tx_get_num_sent(struct ixgbevf_ring *xdp_ring) +{ + u16 ntc = xdp_ring->next_to_clean; + u16 to_clean = 0; + + while (likely(to_clean < xdp_ring->pending)) { + u32 idx = xdp_ring->xdp_sqes[ntc].rs_idx; + union ixgbe_adv_tx_desc *rs_desc; + + if (!idx--) + break; + + rs_desc = IXGBEVF_TX_DESC(xdp_ring, idx); + + if (!(rs_desc->wb.status & cpu_to_le32(IXGBE_TXD_STAT_DD))) + break; + + xdp_ring->xdp_sqes[ntc].rs_idx = 0; + + to_clean += + (idx >= ntc ? idx : idx + xdp_ring->count) - ntc + 1; + + ntc = (idx + 1 == xdp_ring->count) ? 0 : idx + 1; + } + + return to_clean; +} + +void ixgbevf_clean_xdp_num(struct ixgbevf_ring *xdp_ring, bool in_napi, + u16 to_clean); + +static inline u32 ixgbevf_prep_xdp_sq(void *xdpsq, struct libeth_xdpsq *sq) +{ + struct ixgbevf_ring *xdp_ring = xdpsq; + + libeth_xdpsq_lock(&xdp_ring->xdpq_lock); + if (unlikely(ixgbevf_desc_unused(xdp_ring) < xdp_ring->thresh)) { + u16 to_clean = ixgbevf_tx_get_num_sent(xdpsq); + + if (likely(to_clean)) + ixgbevf_clean_xdp_num(xdp_ring, true, to_clean); + } + + if (unlikely(!test_bit(__IXGBEVF_TX_XDP_RING_PRIMED, + &xdp_ring->state))) { + struct ixgbe_adv_tx_context_desc *context_desc; + + set_bit(__IXGBEVF_TX_XDP_RING_PRIMED, &xdp_ring->state); + + context_desc = IXGBEVF_TX_CTXTDESC(xdp_ring, 0); + context_desc->vlan_macip_lens = + cpu_to_le32(ETH_HLEN << IXGBE_ADVTXD_MACLEN_SHIFT); + context_desc->fceof_saidx = 0; + context_desc->type_tucmd_mlhl = + cpu_to_le32(IXGBE_TXD_CMD_DEXT | + IXGBE_ADVTXD_DTYP_CTXT); + context_desc->mss_l4len_idx = 0; + + xdp_ring->next_to_use = 1; + xdp_ring->pending = 1; + xdp_ring->xdp_sqes[0].type = LIBETH_SQE_CTX; + + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(xdp_ring, 1); + } + + *sq = (struct libeth_xdpsq) { + .count = xdp_ring->count, + .descs = xdp_ring->desc, + .lock = &xdp_ring->xdpq_lock, + .ntu = &xdp_ring->next_to_use, + .pending = &xdp_ring->pending, + .pool = xdp_ring->xsk_pool, + .sqes = xdp_ring->xdp_sqes, + }; + + return ixgbevf_desc_unused(xdp_ring); +} + +#endif /* _IXGBEVF_TXRX_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c new file mode 100644 index 00000000000000..666dcb78727db1 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.c @@ -0,0 +1,281 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (C) 2025 Intel Corporation */ + +#include + +#include "ixgbevf_txrx_lib.h" +#include "ixgbevf_xsk.h" + +/** + * ixgbevf_single_irq_disable - Mask off interrupt generation on a single vector + * @adapter: board private structure + * @vidx: vector id + **/ +static void ixgbevf_single_irq_disable(struct ixgbevf_adapter *adapter, + u16 vidx) +{ + struct ixgbe_hw *hw = &adapter->hw; + + IXGBE_WRITE_REG(hw, IXGBE_VTEIAM, + adapter->eims_enable_mask & ~BIT(vidx)); + IXGBE_WRITE_REG(hw, IXGBE_VTEIMC, BIT(vidx)); + IXGBE_WRITE_REG(hw, IXGBE_VTEIAC, + adapter->eims_enable_mask & ~BIT(vidx)); + + IXGBE_WRITE_FLUSH(hw); + + synchronize_irq(adapter->msix_entries[vidx].vector); +} + +static void ixgbevf_qp_dis(struct ixgbevf_adapter *adapter, u16 qid) +{ + struct ixgbevf_ring *rx_ring = adapter->rx_ring[qid]; + struct ixgbevf_q_vector *q_vector = rx_ring->q_vector; + struct ixgbevf_ring *ring; + + netif_stop_subqueue(adapter->netdev, qid); + ixgbevf_single_irq_disable(adapter, q_vector->v_idx); + napi_disable(&q_vector->napi); + + ixgbevf_disable_rx_queue(adapter, adapter->rx_ring[qid]); + ixgbevf_clean_rx_ring(rx_ring); + ixgbevf_rx_fq_destroy(rx_ring); + + /* Clean both XDP and normal Tx queue */ + ixgbevf_for_each_ring(ring, q_vector->tx) { + ixgbevf_flush_tx_queue(ring); + ixgbevf_clean_tx_ring(ring); + } +} + +static void ixgbevf_qp_ena(struct ixgbevf_adapter *adapter, u16 qid) +{ + struct ixgbevf_ring *rx_ring = adapter->rx_ring[qid]; + struct ixgbevf_q_vector *q_vector = rx_ring->q_vector; + struct ixgbevf_ring *ring; + + ixgbevf_setup_fq(adapter, rx_ring); + ixgbevf_configure_rx_ring(adapter, rx_ring); + ixgbevf_for_each_ring(ring, q_vector->tx) + ixgbevf_configure_tx_ring(adapter, ring); + + napi_enable(&q_vector->napi); + ixgbevf_irq_enable(adapter); + netif_start_subqueue(adapter->netdev, qid); +} + +int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, + struct xsk_buff_pool *pool, u16 qid) +{ + bool running = !test_bit(__IXGBEVF_DOWN, &adapter->state) && + adapter->xdp_prog; + int err; + + if (running) + ixgbevf_qp_dis(adapter, qid); + + err = libeth_xsk_setup_pool(adapter->netdev, qid, !!pool); + + if (running) + ixgbevf_qp_ena(adapter, qid); + + return err; +} + +static void ixgbevf_fill_rx_xsk_desc(const struct libeth_xskfq_fp *fq, u32 i) +{ + union ixgbe_adv_rx_desc *rx_desc = + &((union ixgbe_adv_rx_desc *)fq->descs)[i]; + + rx_desc->read.pkt_addr = + cpu_to_le64(libeth_xsk_buff_xdp_get_dma(fq->fqes[i])); + rx_desc->wb.upper.length = 0; +} + +void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num) +{ + struct libeth_xskfq_fp fq = { + .count = rx_ring->count, + .descs = rx_ring->desc, + .fqes = rx_ring->xsk_fqes, + .ntu = rx_ring->next_to_use, + .pool = rx_ring->xsk_pool, + }; + u32 done; + + done = libeth_xskfqe_alloc(&fq, num, ixgbevf_fill_rx_xsk_desc); + if (likely(done)) { + /* Finish descriptor writes before bumping tail */ + wmb(); + ixgbevf_write_tail(rx_ring, fq.ntu); + } + + rx_ring->next_to_use = fq.ntu; + rx_ring->pending -= done; +} + +void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring) +{ + u32 ntc = rx_ring->next_to_clean; + + if (rx_ring->xsk_xdp_head) + xsk_buff_free(&rx_ring->xsk_xdp_head->base); + + while (ntc != rx_ring->next_to_use) { + xsk_buff_free(&rx_ring->xsk_fqes[ntc]->base); + ntc++; + ntc = ntc == rx_ring->count ? 0 : ntc; + } +} + +static void ixgbevf_xsk_xmit_desc(struct libeth_xdp_tx_desc desc, u32 i, + const struct libeth_xdpsq *sq, u64 priv) +{ + union ixgbe_adv_tx_desc *tx_desc = + &((union ixgbe_adv_tx_desc *)sq->descs)[i]; + + u32 cmd_type = IXGBE_ADVTXD_DTYP_DATA | + IXGBE_ADVTXD_DCMD_DEXT | + IXGBE_ADVTXD_DCMD_IFCS | + IXGBE_TXD_CMD_EOP | + desc.len; + + tx_desc->read.olinfo_status = + cpu_to_le32((desc.len << IXGBE_ADVTXD_PAYLEN_SHIFT) | + IXGBE_ADVTXD_CC); + + tx_desc->read.buffer_addr = cpu_to_le64(desc.addr); + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); +} + +LIBETH_XDP_DEFINE_START(); +LIBETH_XSK_DEFINE_FLUSH_TX(static ixgbevf_xsk_flush_tx, ixgbevf_prep_xdp_sq, + ixgbevf_xsk_xmit_desc); +LIBETH_XSK_DEFINE_RUN_PROG(static ixgbevf_xsk_run_prog, ixgbevf_xsk_flush_tx); +LIBETH_XSK_DEFINE_FINALIZE(static ixgbevf_xsk_finalize_xdp_napi, + ixgbevf_xsk_flush_tx, ixgbevf_xdp_rs_and_bump); +LIBETH_XDP_DEFINE_END(); + +u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *rx_ring, int budget) +{ + struct ixgbevf_adapter *adapter = q_vector->adapter; + u32 total_rx_bytes = 0, total_rx_packets = 0; + LIBETH_XDP_ONSTACK_BULK(xdp_tx_bulk); + struct libeth_xdp_buff *head_xdp; + struct sk_buff *skb; + + head_xdp = rx_ring->xsk_xdp_head; + libeth_xsk_tx_init_bulk(&xdp_tx_bulk, rx_ring->xdp_prog, + adapter->netdev, adapter->xdp_ring, + adapter->num_xdp_queues); + + while (likely(total_rx_packets < budget)) { + union ixgbe_adv_rx_desc *rx_desc; + struct libeth_xdp_buff *rx_buffer; + unsigned int size; + + if (unlikely(rx_ring->pending >= rx_ring->thresh)) + ixgbevf_xsk_alloc_rx_bufs(rx_ring, rx_ring->thresh); + + rx_desc = IXGBEVF_RX_DESC(rx_ring, rx_ring->next_to_clean); + size = le16_to_cpu(rx_desc->wb.upper.length); + if (unlikely(!size)) + break; + + /* Avoid reading other descriptor fields before checking size */ + rmb(); + + rx_buffer = rx_ring->xsk_fqes[rx_ring->next_to_clean]; + head_xdp = libeth_xsk_process_buff(head_xdp, rx_buffer, size); + if (unlikely(!head_xdp) || ixgbevf_is_non_eop(rx_ring, rx_desc)) + continue; + + if (ixgbevf_xsk_run_prog(head_xdp, &xdp_tx_bulk)) { + head_xdp = NULL; + total_rx_packets++; + total_rx_bytes += size; + continue; + } + + skb = xdp_build_skb_from_zc(&head_xdp->base); + head_xdp = NULL; + + if (unlikely(!skb)) { + libeth_xdp_return_buff_slow(head_xdp); + rx_ring->rx_stats.alloc_rx_buff_failed++; + break; + } + + if (unlikely(ixgbevf_cleanup_headers(rx_ring, rx_desc, skb))) { + skb = NULL; + continue; + } + + if (unlikely((skb->pkt_type == PACKET_BROADCAST || + skb->pkt_type == PACKET_MULTICAST) && + ether_addr_equal(rx_ring->netdev->dev_addr, + eth_hdr(skb)->h_source))) { + dev_kfree_skb_irq(skb); + continue; + } + + /* populate checksum, VLAN, and protocol */ + ixgbevf_process_skb_fields(rx_ring, rx_desc, skb); + + total_rx_bytes += skb->len; + total_rx_packets++; + + napi_gro_receive(&q_vector->napi, skb); + } + + /* place incomplete frames back on ring for completion */ + rx_ring->xsk_xdp_head = head_xdp; + + ixgbevf_xsk_finalize_xdp_napi(&xdp_tx_bulk); + + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->stats.packets += total_rx_packets; + rx_ring->stats.bytes += total_rx_bytes; + u64_stats_update_end(&rx_ring->syncp); + q_vector->rx.total_packets += total_rx_packets; + q_vector->rx.total_bytes += total_rx_bytes; + + return total_rx_packets; +} + +bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *tx_ring, int napi_budget) +{ + u32 budget = min_t(u32, napi_budget, tx_ring->thresh); + + return libeth_xsk_xmit_do_bulk(tx_ring->xsk_pool, tx_ring, budget, + NULL, ixgbevf_prep_xdp_sq, + ixgbevf_xsk_xmit_desc, + ixgbevf_xdp_rs_and_bump); +} + +int ixgbevf_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags) +{ + struct ixgbevf_adapter *adapter = netdev_priv(dev); + struct ixgbevf_q_vector *q_vector; + struct ixgbevf_ring *rx_ring; + + if (unlikely(test_bit(__IXGBEVF_DOWN, &adapter->state))) + return -ENETDOWN; + + if (unlikely(queue_id >= adapter->num_xdp_queues)) + return -EINVAL; + + rx_ring = adapter->rx_ring[queue_id]; + if (unlikely(!ring_is_xsk(rx_ring))) + return -EINVAL; + + q_vector = rx_ring->q_vector; + if (!napi_if_scheduled_mark_missed(&q_vector->napi)) + IXGBE_WRITE_REG(&adapter->hw, IXGBE_VTEICS, + BIT(q_vector->v_idx)); + //libeth_xsk_wakeup(&rx_ring->xsk_csd, queue_id); + + return 0; +} diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h new file mode 100644 index 00000000000000..59a7323fda0846 --- /dev/null +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_xsk.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright(c) 2025 Intel Corporation. */ + +#ifndef _IXGBEVF_XSK_H_ +#define _IXGBEVF_XSK_H_ + +int ixgbevf_setup_xsk_pool(struct ixgbevf_adapter *adapter, + struct xsk_buff_pool *pool, u16 qid); +void ixgbevf_xsk_alloc_rx_bufs(struct ixgbevf_ring *rx_ring, u32 num); +void ixgbevf_rx_xsk_ring_free_buffs(struct ixgbevf_ring *rx_ring); +u32 ixgbevf_clean_xsk_rx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *rx_ring, int budget); +bool ixgbevf_clean_xsk_tx_irq(struct ixgbevf_q_vector *q_vector, + struct ixgbevf_ring *tx_ring, int napi_budget); +int ixgbevf_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags); + +#endif /* _IXGBEVF_XSK_H_ */ diff --git a/include/net/libeth/xdp.h b/include/net/libeth/xdp.h index f4880b50e804c2..add05f351207d3 100644 --- a/include/net/libeth/xdp.h +++ b/include/net/libeth/xdp.h @@ -1094,7 +1094,7 @@ __libeth_xdp_xmit_do_bulk(struct libeth_xdp_tx_bulk *bq, * @xqs: array of XDPSQs driver structs * @nqs: number of active XDPSQs, the above array length * @fl: driver callback to flush an XDP xmit bulk - * @fin: driver cabback to finalize the queue + * @fin: driver callback to finalize the queue * * If the driver has active XDPSQs, perform common checks and send the frames. * Finalize the queue, if requested.