diff --git a/Documentation/networking/xdp-rx-metadata.rst b/Documentation/networking/xdp-rx-metadata.rst index 25ce72af81c216..4ec6ddfd2a524b 100644 --- a/Documentation/networking/xdp-rx-metadata.rst +++ b/Documentation/networking/xdp-rx-metadata.rst @@ -18,7 +18,16 @@ Currently, the following kfuncs are supported. In the future, as more metadata is supported, this set will grow: .. kernel-doc:: net/core/xdp.c - :identifiers: bpf_xdp_metadata_rx_timestamp bpf_xdp_metadata_rx_hash + :identifiers: bpf_xdp_metadata_rx_timestamp + +.. kernel-doc:: net/core/xdp.c + :identifiers: bpf_xdp_metadata_rx_hash + +.. kernel-doc:: net/core/xdp.c + :identifiers: bpf_xdp_metadata_rx_vlan_tag + +.. kernel-doc:: net/core/xdp.c + :identifiers: bpf_xdp_metadata_rx_csum_lvl An XDP program can use these kfuncs to read the metadata into stack variables for its own consumption. Or, to pass the metadata on to other diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 4ba3d99439a0ab..7a973a2229f128 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -943,4 +943,6 @@ static inline void ice_clear_rdma_cap(struct ice_pf *pf) set_bit(ICE_FLAG_UNPLUG_AUX_DEV, pf->flags); clear_bit(ICE_FLAG_RDMA_ENA, pf->flags); } + +extern const struct xdp_metadata_ops ice_xdp_md_ops; #endif /* _ICE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index 8d5cbbd0b3d5ab..3c3b9cbfbcd3df 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -2837,7 +2837,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring, /* clone ring and setup updated count */ rx_rings[i] = *vsi->rx_rings[i]; rx_rings[i].count = new_rx_cnt; - rx_rings[i].cached_phctime = pf->ptp.cached_phc_time; + rx_rings[i].pkt_ctx.cached_phctime = pf->ptp.cached_phc_time; rx_rings[i].desc = NULL; rx_rings[i].rx_buf = NULL; /* this is to allow wr32 to have something to write to diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 89f986a75cc855..d384ddfcb83e97 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -673,6 +673,212 @@ struct ice_tlan_ctx { * Use the enum ice_rx_l2_ptype to decode the packet type * ENDIF */ +#define ICE_PTYPES \ + /* L2 Packet types */ \ + ICE_PTT_UNUSED_ENTRY(0), \ + ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), \ + ICE_PTT_UNUSED_ENTRY(2), \ + ICE_PTT_UNUSED_ENTRY(3), \ + ICE_PTT_UNUSED_ENTRY(4), \ + ICE_PTT_UNUSED_ENTRY(5), \ + ICE_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ + ICE_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ + ICE_PTT_UNUSED_ENTRY(8), \ + ICE_PTT_UNUSED_ENTRY(9), \ + ICE_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ + ICE_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), \ + ICE_PTT_UNUSED_ENTRY(12), \ + ICE_PTT_UNUSED_ENTRY(13), \ + ICE_PTT_UNUSED_ENTRY(14), \ + ICE_PTT_UNUSED_ENTRY(15), \ + ICE_PTT_UNUSED_ENTRY(16), \ + ICE_PTT_UNUSED_ENTRY(17), \ + ICE_PTT_UNUSED_ENTRY(18), \ + ICE_PTT_UNUSED_ENTRY(19), \ + ICE_PTT_UNUSED_ENTRY(20), \ + ICE_PTT_UNUSED_ENTRY(21), \ + \ + /* Non Tunneled IPv4 */ \ + ICE_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), \ + ICE_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), \ + ICE_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(25), \ + ICE_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), \ + ICE_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), \ + ICE_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), \ + \ + /* IPv4 --> IPv4 */ \ + ICE_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), \ + ICE_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), \ + ICE_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(32), \ + ICE_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), \ + ICE_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), \ + ICE_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), \ + \ + /* IPv4 --> IPv6 */ \ + ICE_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), \ + ICE_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), \ + ICE_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(39), \ + ICE_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), \ + ICE_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), \ + ICE_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), \ + \ + /* IPv4 --> GRE/NAT */ \ + ICE_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), \ + \ + /* IPv4 --> GRE/NAT --> IPv4 */ \ + ICE_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), \ + ICE_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), \ + ICE_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(47), \ + ICE_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), \ + ICE_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), \ + ICE_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), \ + \ + /* IPv4 --> GRE/NAT --> IPv6 */ \ + ICE_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), \ + ICE_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), \ + ICE_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(54), \ + ICE_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), \ + ICE_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), \ + ICE_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), \ + \ + /* IPv4 --> GRE/NAT --> MAC */ \ + ICE_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), \ + \ + /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ \ + ICE_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), \ + ICE_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), \ + ICE_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(62), \ + ICE_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), \ + ICE_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), \ + ICE_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), \ + \ + /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ \ + ICE_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), \ + ICE_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), \ + ICE_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(69), \ + ICE_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), \ + ICE_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), \ + ICE_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), \ + \ + /* IPv4 --> GRE/NAT --> MAC/VLAN */ \ + ICE_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), \ + \ + /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ \ + ICE_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), \ + ICE_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), \ + ICE_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(77), \ + ICE_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), \ + ICE_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), \ + ICE_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), \ + \ + /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ \ + ICE_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), \ + ICE_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), \ + ICE_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(84), \ + ICE_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), \ + ICE_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), \ + ICE_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), \ + \ + /* Non Tunneled IPv6 */ \ + ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), \ + ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), \ + ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(91), \ + ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), \ + ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), \ + ICE_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), \ + \ + /* IPv6 --> IPv4 */ \ + ICE_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), \ + ICE_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), \ + ICE_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(98), \ + ICE_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), \ + ICE_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), \ + ICE_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), \ + \ + /* IPv6 --> IPv6 */ \ + ICE_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), \ + ICE_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), \ + ICE_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(105), \ + ICE_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), \ + ICE_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), \ + ICE_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), \ + \ + /* IPv6 --> GRE/NAT */ \ + ICE_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), \ + \ + /* IPv6 --> GRE/NAT -> IPv4 */ \ + ICE_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), \ + ICE_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), \ + ICE_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(113), \ + ICE_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), \ + ICE_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), \ + ICE_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), \ + \ + /* IPv6 --> GRE/NAT -> IPv6 */ \ + ICE_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), \ + ICE_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), \ + ICE_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(120), \ + ICE_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), \ + ICE_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), \ + ICE_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), \ + \ + /* IPv6 --> GRE/NAT -> MAC */ \ + ICE_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), \ + \ + /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ \ + ICE_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), \ + ICE_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), \ + ICE_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(128), \ + ICE_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), \ + ICE_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), \ + ICE_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), \ + \ + /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ \ + ICE_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), \ + ICE_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), \ + ICE_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(135), \ + ICE_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), \ + ICE_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), \ + ICE_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), \ + \ + /* IPv6 --> GRE/NAT -> MAC/VLAN */ \ + ICE_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), \ + \ + /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ \ + ICE_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), \ + ICE_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), \ + ICE_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(143), \ + ICE_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), \ + ICE_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), \ + ICE_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), \ + \ + /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ \ + ICE_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), \ + ICE_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), \ + ICE_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), \ + ICE_PTT_UNUSED_ENTRY(150), \ + ICE_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), \ + ICE_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), \ + ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), + +#define ICE_NUM_DEFINED_PTYPES 154 /* macro to make the table lines short, use explicit indexing with [PTYPE] */ #define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ @@ -695,212 +901,10 @@ struct ice_tlan_ctx { /* Lookup table mapping in the 10-bit HW PTYPE to the bit field for decoding */ static const struct ice_rx_ptype_decoded ice_ptype_lkup[BIT(10)] = { - /* L2 Packet types */ - ICE_PTT_UNUSED_ENTRY(0), - ICE_PTT(1, L2, NONE, NOF, NONE, NONE, NOF, NONE, PAY2), - ICE_PTT_UNUSED_ENTRY(2), - ICE_PTT_UNUSED_ENTRY(3), - ICE_PTT_UNUSED_ENTRY(4), - ICE_PTT_UNUSED_ENTRY(5), - ICE_PTT(6, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - ICE_PTT(7, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - ICE_PTT_UNUSED_ENTRY(8), - ICE_PTT_UNUSED_ENTRY(9), - ICE_PTT(10, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - ICE_PTT(11, L2, NONE, NOF, NONE, NONE, NOF, NONE, NONE), - ICE_PTT_UNUSED_ENTRY(12), - ICE_PTT_UNUSED_ENTRY(13), - ICE_PTT_UNUSED_ENTRY(14), - ICE_PTT_UNUSED_ENTRY(15), - ICE_PTT_UNUSED_ENTRY(16), - ICE_PTT_UNUSED_ENTRY(17), - ICE_PTT_UNUSED_ENTRY(18), - ICE_PTT_UNUSED_ENTRY(19), - ICE_PTT_UNUSED_ENTRY(20), - ICE_PTT_UNUSED_ENTRY(21), - - /* Non Tunneled IPv4 */ - ICE_PTT(22, IP, IPV4, FRG, NONE, NONE, NOF, NONE, PAY3), - ICE_PTT(23, IP, IPV4, NOF, NONE, NONE, NOF, NONE, PAY3), - ICE_PTT(24, IP, IPV4, NOF, NONE, NONE, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(25), - ICE_PTT(26, IP, IPV4, NOF, NONE, NONE, NOF, TCP, PAY4), - ICE_PTT(27, IP, IPV4, NOF, NONE, NONE, NOF, SCTP, PAY4), - ICE_PTT(28, IP, IPV4, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv4 --> IPv4 */ - ICE_PTT(29, IP, IPV4, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - ICE_PTT(30, IP, IPV4, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - ICE_PTT(31, IP, IPV4, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(32), - ICE_PTT(33, IP, IPV4, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - ICE_PTT(34, IP, IPV4, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - ICE_PTT(35, IP, IPV4, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> IPv6 */ - ICE_PTT(36, IP, IPV4, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - ICE_PTT(37, IP, IPV4, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - ICE_PTT(38, IP, IPV4, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(39), - ICE_PTT(40, IP, IPV4, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - ICE_PTT(41, IP, IPV4, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - ICE_PTT(42, IP, IPV4, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT */ - ICE_PTT(43, IP, IPV4, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> IPv4 */ - ICE_PTT(44, IP, IPV4, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - ICE_PTT(45, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - ICE_PTT(46, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(47), - ICE_PTT(48, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - ICE_PTT(49, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - ICE_PTT(50, IP, IPV4, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> IPv6 */ - ICE_PTT(51, IP, IPV4, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - ICE_PTT(52, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - ICE_PTT(53, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(54), - ICE_PTT(55, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - ICE_PTT(56, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - ICE_PTT(57, IP, IPV4, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC */ - ICE_PTT(58, IP, IPV4, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv4 --> GRE/NAT --> MAC --> IPv4 */ - ICE_PTT(59, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - ICE_PTT(60, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - ICE_PTT(61, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(62), - ICE_PTT(63, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - ICE_PTT(64, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - ICE_PTT(65, IP, IPV4, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT -> MAC --> IPv6 */ - ICE_PTT(66, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - ICE_PTT(67, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - ICE_PTT(68, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(69), - ICE_PTT(70, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - ICE_PTT(71, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - ICE_PTT(72, IP, IPV4, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv4 --> GRE/NAT --> MAC/VLAN */ - ICE_PTT(73, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv4 ---> GRE/NAT -> MAC/VLAN --> IPv4 */ - ICE_PTT(74, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - ICE_PTT(75, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - ICE_PTT(76, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(77), - ICE_PTT(78, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - ICE_PTT(79, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - ICE_PTT(80, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv4 -> GRE/NAT -> MAC/VLAN --> IPv6 */ - ICE_PTT(81, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - ICE_PTT(82, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - ICE_PTT(83, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(84), - ICE_PTT(85, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - ICE_PTT(86, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - ICE_PTT(87, IP, IPV4, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), - - /* Non Tunneled IPv6 */ - ICE_PTT(88, IP, IPV6, FRG, NONE, NONE, NOF, NONE, PAY3), - ICE_PTT(89, IP, IPV6, NOF, NONE, NONE, NOF, NONE, PAY3), - ICE_PTT(90, IP, IPV6, NOF, NONE, NONE, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(91), - ICE_PTT(92, IP, IPV6, NOF, NONE, NONE, NOF, TCP, PAY4), - ICE_PTT(93, IP, IPV6, NOF, NONE, NONE, NOF, SCTP, PAY4), - ICE_PTT(94, IP, IPV6, NOF, NONE, NONE, NOF, ICMP, PAY4), - - /* IPv6 --> IPv4 */ - ICE_PTT(95, IP, IPV6, NOF, IP_IP, IPV4, FRG, NONE, PAY3), - ICE_PTT(96, IP, IPV6, NOF, IP_IP, IPV4, NOF, NONE, PAY3), - ICE_PTT(97, IP, IPV6, NOF, IP_IP, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(98), - ICE_PTT(99, IP, IPV6, NOF, IP_IP, IPV4, NOF, TCP, PAY4), - ICE_PTT(100, IP, IPV6, NOF, IP_IP, IPV4, NOF, SCTP, PAY4), - ICE_PTT(101, IP, IPV6, NOF, IP_IP, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> IPv6 */ - ICE_PTT(102, IP, IPV6, NOF, IP_IP, IPV6, FRG, NONE, PAY3), - ICE_PTT(103, IP, IPV6, NOF, IP_IP, IPV6, NOF, NONE, PAY3), - ICE_PTT(104, IP, IPV6, NOF, IP_IP, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(105), - ICE_PTT(106, IP, IPV6, NOF, IP_IP, IPV6, NOF, TCP, PAY4), - ICE_PTT(107, IP, IPV6, NOF, IP_IP, IPV6, NOF, SCTP, PAY4), - ICE_PTT(108, IP, IPV6, NOF, IP_IP, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT */ - ICE_PTT(109, IP, IPV6, NOF, IP_GRENAT, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> IPv4 */ - ICE_PTT(110, IP, IPV6, NOF, IP_GRENAT, IPV4, FRG, NONE, PAY3), - ICE_PTT(111, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, NONE, PAY3), - ICE_PTT(112, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(113), - ICE_PTT(114, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, TCP, PAY4), - ICE_PTT(115, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, SCTP, PAY4), - ICE_PTT(116, IP, IPV6, NOF, IP_GRENAT, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> IPv6 */ - ICE_PTT(117, IP, IPV6, NOF, IP_GRENAT, IPV6, FRG, NONE, PAY3), - ICE_PTT(118, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, NONE, PAY3), - ICE_PTT(119, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(120), - ICE_PTT(121, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, TCP, PAY4), - ICE_PTT(122, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, SCTP, PAY4), - ICE_PTT(123, IP, IPV6, NOF, IP_GRENAT, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC */ - ICE_PTT(124, IP, IPV6, NOF, IP_GRENAT_MAC, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC -> IPv4 */ - ICE_PTT(125, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, FRG, NONE, PAY3), - ICE_PTT(126, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, NONE, PAY3), - ICE_PTT(127, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(128), - ICE_PTT(129, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, TCP, PAY4), - ICE_PTT(130, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, SCTP, PAY4), - ICE_PTT(131, IP, IPV6, NOF, IP_GRENAT_MAC, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC -> IPv6 */ - ICE_PTT(132, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, FRG, NONE, PAY3), - ICE_PTT(133, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, NONE, PAY3), - ICE_PTT(134, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(135), - ICE_PTT(136, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, TCP, PAY4), - ICE_PTT(137, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, SCTP, PAY4), - ICE_PTT(138, IP, IPV6, NOF, IP_GRENAT_MAC, IPV6, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN */ - ICE_PTT(139, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, NONE, NOF, NONE, PAY3), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv4 */ - ICE_PTT(140, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, FRG, NONE, PAY3), - ICE_PTT(141, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, NONE, PAY3), - ICE_PTT(142, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(143), - ICE_PTT(144, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, TCP, PAY4), - ICE_PTT(145, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, SCTP, PAY4), - ICE_PTT(146, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV4, NOF, ICMP, PAY4), - - /* IPv6 --> GRE/NAT -> MAC/VLAN --> IPv6 */ - ICE_PTT(147, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, FRG, NONE, PAY3), - ICE_PTT(148, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, NONE, PAY3), - ICE_PTT(149, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, UDP, PAY4), - ICE_PTT_UNUSED_ENTRY(150), - ICE_PTT(151, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, TCP, PAY4), - ICE_PTT(152, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, SCTP, PAY4), - ICE_PTT(153, IP, IPV6, NOF, IP_GRENAT_MAC_VLAN, IPV6, NOF, ICMP, PAY4), + ICE_PTYPES /* unused entries */ - [154 ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } + [ICE_NUM_DEFINED_PTYPES ... 1023] = { 0, 0, 0, 0, 0, 0, 0, 0, 0 } }; static inline struct ice_rx_ptype_decoded ice_decode_rx_desc_ptype(u16 ptype) diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 00e3afd507a408..eb69b0ac795687 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1445,7 +1445,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->netdev = vsi->netdev; ring->dev = dev; ring->count = vsi->num_rx_desc; - ring->cached_phctime = pf->ptp.cached_phc_time; + ring->pkt_ctx.cached_phctime = pf->ptp.cached_phc_time; WRITE_ONCE(vsi->rx_rings[i], ring); } diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 93979ab18bc1d6..ab7129b0dc6733 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -3384,6 +3384,7 @@ static void ice_set_ops(struct ice_vsi *vsi) netdev->netdev_ops = &ice_netdev_ops; netdev->udp_tunnel_nic_info = &pf->hw.udp_tunnel_nic; + netdev->xdp_metadata_ops = &ice_xdp_md_ops; ice_set_ethtool_ops(netdev); if (vsi->type != ICE_VSI_PF) @@ -5929,6 +5930,23 @@ ice_fix_features(struct net_device *netdev, netdev_features_t features) return features; } +/** + * ice_set_rx_rings_vlan_proto - update rings with new stripped VLAN proto + * @vsi: PF's VSI + * @vlan_ethertype: VLAN ethertype (802.1Q or 802.1ad) in network byte order + * + * Store current stripped VLAN proto in ring packet context, + * so it can be accessed more efficiently by packet processing code. + */ +static void +ice_set_rx_rings_vlan_proto(struct ice_vsi *vsi, __be16 vlan_ethertype) +{ + u16 i; + + ice_for_each_alloc_rxq(vsi, i) + vsi->rx_rings[i]->pkt_ctx.vlan_proto = vlan_ethertype; +} + /** * ice_set_vlan_offload_features - set VLAN offload features for the PF VSI * @vsi: PF's VSI @@ -5971,6 +5989,11 @@ ice_set_vlan_offload_features(struct ice_vsi *vsi, netdev_features_t features) if (strip_err || insert_err) return -EIO; + if (enable_stripping) + ice_set_rx_rings_vlan_proto(vsi, htons(vlan_ethertype)); + else + ice_set_rx_rings_vlan_proto(vsi, 0); + return 0; } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index 81d96a40d5a743..70697e4829dd7c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -1038,7 +1038,7 @@ static int ice_ptp_update_cached_phctime(struct ice_pf *pf) ice_for_each_rxq(vsi, j) { if (!vsi->rx_rings[j]) continue; - WRITE_ONCE(vsi->rx_rings[j]->cached_phctime, systime); + WRITE_ONCE(vsi->rx_rings[j]->pkt_ctx.cached_phctime, systime); } } clear_bit(ICE_CFG_BUSY, pf->state); @@ -2147,30 +2147,24 @@ int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) } /** - * ice_ptp_rx_hwtstamp - Check for an Rx timestamp - * @rx_ring: Ring to get the VSI info + * ice_ptp_get_rx_hwts - Get packet Rx timestamp * @rx_desc: Receive descriptor - * @skb: Particular skb to send timestamp with + * @cached_time: Cached PHC time * * The driver receives a notification in the receive descriptor with timestamp. - * The timestamp is in ns, so we must convert the result first. */ -void -ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, - union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) +u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc, + u64 cached_time) { - struct skb_shared_hwtstamps *hwtstamps; - u64 ts_ns, cached_time; u32 ts_high; + u64 ts_ns; if (!(rx_desc->wb.time_stamp_low & ICE_PTP_TS_VALID)) - return; - - cached_time = READ_ONCE(rx_ring->cached_phctime); + return 0; /* Do not report a timestamp if we don't have a cached PHC time */ if (!cached_time) - return; + return 0; /* Use ice_ptp_extend_32b_ts directly, using the ring-specific cached * PHC value, rather than accessing the PF. This also allows us to @@ -2181,9 +2175,7 @@ ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, ts_high = le32_to_cpu(rx_desc->wb.flex_ts.ts_high); ts_ns = ice_ptp_extend_32b_ts(cached_time, ts_high); - hwtstamps = skb_hwtstamps(skb); - memset(hwtstamps, 0, sizeof(*hwtstamps)); - hwtstamps->hwtstamp = ns_to_ktime(ts_ns); + return ts_ns; } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index 995a57019ba767..523eefbfdf95cd 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -268,9 +268,8 @@ void ice_ptp_extts_event(struct ice_pf *pf); s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); enum ice_tx_tstamp_work ice_ptp_process_ts(struct ice_pf *pf); -void -ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, - union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb); +u64 ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc, + u64 cached_time); void ice_ptp_reset(struct ice_pf *pf); void ice_ptp_prepare_for_reset(struct ice_pf *pf); void ice_ptp_init(struct ice_pf *pf); @@ -304,9 +303,13 @@ static inline bool ice_ptp_process_ts(struct ice_pf *pf) { return true; } -static inline void -ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, - union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { } + +static inline u64 +ice_ptp_get_rx_hwts(const union ice_32b_rx_flex_desc *rx_desc, u64 cached_time) +{ + return 0; +} + static inline void ice_ptp_reset(struct ice_pf *pf) { } static inline void ice_ptp_prepare_for_reset(struct ice_pf *pf) { } static inline void ice_ptp_init(struct ice_pf *pf) { } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 52d0a126eb6161..1eee7d98f92c8f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -557,13 +557,14 @@ ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, const unsigned int size) * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action * @rx_buf: Rx buffer to store the XDP action + * @eop_desc: Last descriptor in packet to read metadata from * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static void ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, - struct ice_rx_buf *rx_buf) + struct ice_rx_buf *rx_buf, union ice_32b_rx_flex_desc *eop_desc) { unsigned int ret = ICE_XDP_PASS; u32 act; @@ -571,6 +572,8 @@ ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, if (!xdp_prog) goto exit; + ice_xdp_meta_set_desc(xdp, eop_desc); + act = bpf_prog_run_xdp(xdp_prog, xdp); switch (act) { case XDP_PASS: @@ -1181,7 +1184,6 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) unsigned int size; u16 stat_err_bits; u16 vlan_tag = 0; - u16 rx_ptype; /* get the Rx desc from Rx ring based on 'next_to_clean' */ rx_desc = ICE_RX_DESC(rx_ring, ntc); @@ -1241,7 +1243,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) if (ice_is_non_eop(rx_ring, rx_desc)) continue; - ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf); + ice_run_xdp(rx_ring, xdp, xdp_prog, xdp_ring, rx_buf, rx_desc); if (rx_buf->act == ICE_XDP_PASS) goto construct_skb; total_rx_bytes += xdp_get_buff_len(xdp); @@ -1276,7 +1278,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) continue; } - vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc); + vlan_tag = ice_get_vlan_tag(rx_desc); /* pad the skb if needed, to make a valid ethernet frame */ if (eth_skb_pad(skb)) @@ -1286,10 +1288,7 @@ int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) total_rx_bytes += skb->len; /* populate checksum, VLAN, and protocol */ - rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & - ICE_RX_FLEX_DESC_PTYPE_M; - - ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); + ice_process_skb_fields(rx_ring, rx_desc, skb); ice_trace(clean_rx_irq_indicate, rx_ring, rx_desc, skb); /* send completed skb up the stack */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 166413fc33f48f..41e0b14e6643c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -257,6 +257,20 @@ enum ice_rx_dtype { ICE_RX_DTYPE_SPLIT_ALWAYS = 2, }; +struct ice_pkt_ctx { + const union ice_32b_rx_flex_desc *eop_desc; + u64 cached_phctime; + __be16 vlan_proto; +}; + +struct ice_xdp_buff { + struct xdp_buff xdp_buff; + struct ice_pkt_ctx pkt_ctx; +}; + +/* Required for compatibility with xdp_buffs from xsk_pool */ +static_assert(offsetof(struct ice_xdp_buff, xdp_buff) == 0); + /* indices into GLINT_ITR registers */ #define ICE_RX_ITR ICE_IDX_ITR0 #define ICE_TX_ITR ICE_IDX_ITR1 @@ -298,7 +312,6 @@ enum ice_dynamic_itr { /* descriptor ring, associated with a VSI */ struct ice_rx_ring { /* CL1 - 1st cacheline starts here */ - struct ice_rx_ring *next; /* pointer to next ring in q_vector */ void *desc; /* Descriptor ring memory */ struct device *dev; /* Used for DMA mapping */ struct net_device *netdev; /* netdev ring maps to */ @@ -310,12 +323,19 @@ struct ice_rx_ring { u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ u16 next_to_alloc; - /* CL2 - 2nd cacheline starts here */ + union { struct ice_rx_buf *rx_buf; struct xdp_buff **xdp_buf; }; - struct xdp_buff xdp; + /* CL2 - 2nd cacheline starts here */ + union { + struct ice_xdp_buff xdp_ext; + struct { + struct xdp_buff xdp; + struct ice_pkt_ctx pkt_ctx; + }; + }; /* CL3 - 3rd cacheline starts here */ struct bpf_prog *xdp_prog; u16 rx_offset; @@ -325,6 +345,8 @@ struct ice_rx_ring { u16 next_to_clean; u16 first_desc; + struct ice_rx_ring *next; /* pointer to next ring in q_vector */ + /* stats structs */ struct ice_ring_stats *ring_stats; @@ -334,7 +356,6 @@ struct ice_rx_ring { struct ice_tx_ring *xdp_ring; struct xsk_buff_pool *xsk_pool; dma_addr_t dma; /* physical address of ring */ - u64 cached_phctime; u16 rx_buf_len; u8 dcb_tc; /* Traffic class of ring */ u8 ptp_rx; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index c8322fb6f2b37f..41ab52b6990d77 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -63,42 +63,80 @@ static enum pkt_hash_types ice_ptype_to_htype(u16 ptype) } /** - * ice_rx_hash - set the hash value in the skb + * ice_get_rx_hash - get RX hash value from descriptor + * @rx_desc: specific descriptor + * + * Returns hash, if present, 0 otherwise. + */ +static u32 +ice_get_rx_hash(const union ice_32b_rx_flex_desc *rx_desc) +{ + const struct ice_32b_rx_flex_desc_nic *nic_mdid; + + if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC) + return 0; + + nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc; + return le32_to_cpu(nic_mdid->rss_hash); +} + +/** + * ice_rx_hash_to_skb - set the hash value in the skb * @rx_ring: descriptor ring * @rx_desc: specific descriptor * @skb: pointer to current skb * @rx_ptype: the ptype value from the descriptor */ static void -ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb, u16 rx_ptype) +ice_rx_hash_to_skb(const struct ice_rx_ring *rx_ring, + const union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb, u16 rx_ptype) { - struct ice_32b_rx_flex_desc_nic *nic_mdid; u32 hash; if (!(rx_ring->netdev->features & NETIF_F_RXHASH)) return; - if (rx_desc->wb.rxdid != ICE_RXDID_FLEX_NIC) - return; + hash = ice_get_rx_hash(rx_desc); + if (likely(hash)) + skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); +} - nic_mdid = (struct ice_32b_rx_flex_desc_nic *)rx_desc; - hash = le32_to_cpu(nic_mdid->rss_hash); - skb_set_hash(skb, hash, ice_ptype_to_htype(rx_ptype)); +enum ice_rx_csum_status { + ICE_RX_CSUM_LVL_0 = 0, + ICE_RX_CSUM_LVL_1 = BIT(0), + ICE_RX_CSUM_NONE = BIT(1), + ICE_RX_CSUM_ERROR = BIT(2), + ICE_RX_CSUM_FAIL = ICE_RX_CSUM_NONE | ICE_RX_CSUM_ERROR, +}; + +/** + * ice_rx_csum_lvl - Get checksum level from status + * @status: driver-specific checksum status + */ +static u8 ice_rx_csum_lvl(enum ice_rx_csum_status status) +{ + return status & ICE_RX_CSUM_LVL_1; } /** - * ice_rx_csum - Indicate in skb if checksum is good - * @ring: the ring we care about - * @skb: skb currently being received and modified + * ice_rx_csum_ip_summed - Checksum status from driver-specific to generic + * @status: driver-specific checksum status + */ +static u8 ice_rx_csum_ip_summed(enum ice_rx_csum_status status) +{ + return status & ICE_RX_CSUM_NONE ? CHECKSUM_NONE : CHECKSUM_UNNECESSARY; +} + +/** + * ice_get_rx_csum_status - Deduce checksum status from descriptor * @rx_desc: the receive descriptor * @ptype: the packet type decoded by hardware * - * skb->protocol must be set before this function is called + * Returns driver-specific checksum status */ -static void -ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, - union ice_32b_rx_flex_desc *rx_desc, u16 ptype) +static enum ice_rx_csum_status +ice_get_rx_csum_status(const union ice_32b_rx_flex_desc *rx_desc, u16 ptype) { struct ice_rx_ptype_decoded decoded; u16 rx_status0, rx_status1; @@ -109,20 +147,12 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, decoded = ice_decode_rx_desc_ptype(ptype); - /* Start with CHECKSUM_NONE and by default csum_level = 0 */ - skb->ip_summed = CHECKSUM_NONE; - skb_checksum_none_assert(skb); - - /* check if Rx checksum is enabled */ - if (!(ring->netdev->features & NETIF_F_RXCSUM)) - return; - /* check if HW has decoded the packet and checksum */ if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S))) - return; + return ICE_RX_CSUM_NONE; if (!(decoded.known && decoded.outer_ip)) - return; + return ICE_RX_CSUM_NONE; ipv4 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) && (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV4); @@ -131,43 +161,94 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) | BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S)))) - goto checksum_fail; + return ICE_RX_CSUM_FAIL; if (ipv6 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S)))) - goto checksum_fail; + return ICE_RX_CSUM_FAIL; /* check for L4 errors and handle packets that were not able to be * checksummed due to arrival speed */ if (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S)) - goto checksum_fail; + return ICE_RX_CSUM_FAIL; /* check for outer UDP checksum error in tunneled packets */ if ((rx_status1 & BIT(ICE_RX_FLEX_DESC_STATUS1_NAT_S)) && (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S))) - goto checksum_fail; - - /* If there is an outer header present that might contain a checksum - * we need to bump the checksum level by 1 to reflect the fact that - * we are indicating we validated the inner checksum. - */ - if (decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT) - skb->csum_level = 1; + return ICE_RX_CSUM_FAIL; /* Only report checksum unnecessary for TCP, UDP, or SCTP */ switch (decoded.inner_prot) { case ICE_RX_PTYPE_INNER_PROT_TCP: case ICE_RX_PTYPE_INNER_PROT_UDP: case ICE_RX_PTYPE_INNER_PROT_SCTP: - skb->ip_summed = CHECKSUM_UNNECESSARY; - break; - default: - break; + /* If there is an outer header present that might contain + * a checksum we need to bump the checksum level by 1 to reflect + * the fact that we have validated the inner checksum. + */ + return decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT ? + ICE_RX_CSUM_LVL_1 : ICE_RX_CSUM_LVL_0; } - return; -checksum_fail: - ring->vsi->back->hw_csum_rx_error++; + return ICE_RX_CSUM_NONE; +} + +/** + * ice_rx_csum_into_skb - Indicate in skb if checksum is good + * @ring: the ring we care about + * @skb: skb currently being received and modified + * @rx_desc: the receive descriptor + * @ptype: the packet type decoded by hardware + */ +static void +ice_rx_csum_into_skb(struct ice_rx_ring *ring, struct sk_buff *skb, + const union ice_32b_rx_flex_desc *rx_desc, u16 ptype) +{ + enum ice_rx_csum_status csum_status; + + /* check if Rx checksum is enabled */ + if (!(ring->netdev->features & NETIF_F_RXCSUM)) + return; + + csum_status = ice_get_rx_csum_status(rx_desc, ptype); + if (csum_status & ICE_RX_CSUM_ERROR) + ring->vsi->back->hw_csum_rx_error++; + + skb->ip_summed = ice_rx_csum_ip_summed(csum_status); + skb->csum_level = ice_rx_csum_lvl(csum_status); +} + +/** + * ice_ptp_rx_hwts_to_skb - Put RX timestamp into skb + * @rx_ring: Ring to get the VSI info + * @rx_desc: Receive descriptor + * @skb: Particular skb to send timestamp with + * + * The timestamp is in ns, so we must convert the result first. + */ +static void +ice_ptp_rx_hwts_to_skb(struct ice_rx_ring *rx_ring, + const union ice_32b_rx_flex_desc *rx_desc, + struct sk_buff *skb) +{ + u64 ts_ns, cached_time; + + cached_time = READ_ONCE(rx_ring->pkt_ctx.cached_phctime); + ts_ns = ice_ptp_get_rx_hwts(rx_desc, cached_time); + + *skb_hwtstamps(skb) = (struct skb_shared_hwtstamps){ + .hwtstamp = ns_to_ktime(ts_ns), + }; +} + +/** + * ice_get_ptype - Read HW packet type from the descriptor + * @rx_desc: RX descriptor + */ +static u16 ice_get_ptype(const union ice_32b_rx_flex_desc *rx_desc) +{ + return le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & + ICE_RX_FLEX_DESC_PTYPE_M; } /** @@ -175,7 +256,6 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, * @rx_ring: Rx descriptor ring packet is being transacted on * @rx_desc: pointer to the EOP Rx descriptor * @skb: pointer to current skb being populated - * @ptype: the packet type decoded by hardware * * This function checks the ring, descriptor, and packet information in * order to populate the hash, checksum, VLAN, protocol, and @@ -184,17 +264,19 @@ ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, void ice_process_skb_fields(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb, u16 ptype) + struct sk_buff *skb) { - ice_rx_hash(rx_ring, rx_desc, skb, ptype); + u16 ptype = ice_get_ptype(rx_desc); + + ice_rx_hash_to_skb(rx_ring, rx_desc, skb, ptype); /* modifies the skb - consumes the enet header */ skb->protocol = eth_type_trans(skb, rx_ring->netdev); - ice_rx_csum(rx_ring, skb, rx_desc, ptype); + ice_rx_csum_into_skb(rx_ring, skb, rx_desc, ptype); if (rx_ring->ptp_rx) - ice_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); + ice_ptp_rx_hwts_to_skb(rx_ring, rx_desc, skb); } /** @@ -209,13 +291,9 @@ ice_process_skb_fields(struct ice_rx_ring *rx_ring, void ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) { - netdev_features_t features = rx_ring->netdev->features; - bool non_zero_vlan = !!(vlan_tag & VLAN_VID_MASK); - - if ((features & NETIF_F_HW_VLAN_CTAG_RX) && non_zero_vlan) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag); - else if ((features & NETIF_F_HW_VLAN_STAG_RX) && non_zero_vlan) - __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021AD), vlan_tag); + if (vlan_tag & VLAN_VID_MASK && rx_ring->pkt_ctx.vlan_proto) + __vlan_hwaccel_put_tag(skb, rx_ring->pkt_ctx.vlan_proto, + vlan_tag); napi_gro_receive(&rx_ring->q_vector->napi, skb); } @@ -464,3 +542,152 @@ void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res, spin_unlock(&xdp_ring->tx_lock); } } + +/** + * ice_xdp_rx_hw_ts - HW timestamp XDP hint handler + * @ctx: XDP buff pointer + * @ts_ns: destination address + * + * Copy HW timestamp (if available) to the destination address. + */ +static int ice_xdp_rx_hw_ts(const struct xdp_md *ctx, u64 *ts_ns) +{ + const struct ice_xdp_buff *xdp_ext = (void *)ctx; + u64 cached_time; + + cached_time = READ_ONCE(xdp_ext->pkt_ctx.cached_phctime); + *ts_ns = ice_ptp_get_rx_hwts(xdp_ext->pkt_ctx.eop_desc, cached_time); + if (!*ts_ns) + return -ENODATA; + + return 0; +} + +/* Define a ptype index -> XDP hash type lookup table. + * It uses the same ptype definitions as ice_decode_rx_desc_ptype[], + * avoiding possible copy-paste errors. + */ +#undef ICE_PTT +#undef ICE_PTT_UNUSED_ENTRY + +#define ICE_PTT(PTYPE, OUTER_IP, OUTER_IP_VER, OUTER_FRAG, T, TE, TEF, I, PL)\ + [PTYPE] = XDP_RSS_L3_##OUTER_IP_VER | XDP_RSS_L4_##I | XDP_RSS_TYPE_##PL + +#define ICE_PTT_UNUSED_ENTRY(PTYPE) [PTYPE] = 0 + +/* A few supplementary definitions for when XDP hash types do not coincide + * with what can be generated from ptype definitions + * by means of preprocessor concatenation. + */ +#define XDP_RSS_L3_NONE XDP_RSS_TYPE_NONE +#define XDP_RSS_L4_NONE XDP_RSS_TYPE_NONE +#define XDP_RSS_TYPE_PAY2 XDP_RSS_TYPE_L2 +#define XDP_RSS_TYPE_PAY3 XDP_RSS_TYPE_NONE +#define XDP_RSS_TYPE_PAY4 XDP_RSS_L4 + +static const enum xdp_rss_hash_type +ice_ptype_to_xdp_hash[ICE_NUM_DEFINED_PTYPES] = { + ICE_PTYPES +}; + +#undef XDP_RSS_L3_NONE +#undef XDP_RSS_L4_NONE +#undef XDP_RSS_TYPE_PAY2 +#undef XDP_RSS_TYPE_PAY3 +#undef XDP_RSS_TYPE_PAY4 + +#undef ICE_PTT +#undef ICE_PTT_UNUSED_ENTRY + +/** + * ice_xdp_rx_hash_type - Get XDP-specific hash type from the RX descriptor + * @eop_desc: End of Packet descriptor + */ +static enum xdp_rss_hash_type +ice_xdp_rx_hash_type(const union ice_32b_rx_flex_desc *eop_desc) +{ + u16 ptype = ice_get_ptype(eop_desc); + + if (unlikely(ptype >= ICE_NUM_DEFINED_PTYPES)) + return 0; + + return ice_ptype_to_xdp_hash[ptype]; +} + +/** + * ice_xdp_rx_hash - RX hash XDP hint handler + * @ctx: XDP buff pointer + * @hash: hash destination address + * @rss_type: XDP hash type destination address + * + * Copy RX hash (if available) and its type to the destination address. + */ +static int ice_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, + enum xdp_rss_hash_type *rss_type) +{ + const struct ice_xdp_buff *xdp_ext = (void *)ctx; + + *hash = ice_get_rx_hash(xdp_ext->pkt_ctx.eop_desc); + *rss_type = ice_xdp_rx_hash_type(xdp_ext->pkt_ctx.eop_desc); + if (!likely(*hash)) + return -ENODATA; + + return 0; +} + +/** + * ice_xdp_rx_vlan_tag - VLAN tag XDP hint handler + * @ctx: XDP buff pointer + * @vlan_tag: destination address for VLAN tag + * @vlan_proto: destination address for VLAN protocol + * + * Copy VLAN tag (if was stripped) and corresponding protocol + * to the destination address. + */ +static int ice_xdp_rx_vlan_tag(const struct xdp_md *ctx, u16 *vlan_tag, + __be16 *vlan_proto) +{ + const struct ice_xdp_buff *xdp_ext = (void *)ctx; + + *vlan_proto = xdp_ext->pkt_ctx.vlan_proto; + if (!*vlan_proto) + return -ENODATA; + + *vlan_tag = ice_get_vlan_tag(xdp_ext->pkt_ctx.eop_desc); + if (!*vlan_tag) + return -ENODATA; + + return 0; +} + +/** + * ice_xdp_rx_csum_lvl - Get level, at which HW has checked the checksum + * @ctx: XDP buff pointer + * @csum_lvl: destination address + * + * Copy HW checksum level (if was checked) to the destination address. + */ +static int ice_xdp_rx_csum_lvl(const struct xdp_md *ctx, u8 *csum_lvl) +{ + const struct ice_xdp_buff *xdp_ext = (void *)ctx; + const union ice_32b_rx_flex_desc *eop_desc; + enum ice_rx_csum_status status; + u16 ptype; + + eop_desc = xdp_ext->pkt_ctx.eop_desc; + ptype = ice_get_ptype(eop_desc); + + status = ice_get_rx_csum_status(eop_desc, ptype); + if (status & ICE_RX_CSUM_NONE) + return -ENODATA; + + *csum_lvl = ice_rx_csum_lvl(status); + return 0; +} + +const struct xdp_metadata_ops ice_xdp_md_ops = { + .xmo_rx_timestamp = ice_xdp_rx_hw_ts, + .xmo_rx_hash = ice_xdp_rx_hash, + .xmo_rx_vlan_tag = ice_xdp_rx_vlan_tag, + .xmo_rx_csum_lvl = ice_xdp_rx_csum_lvl, +}; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 115969ecdf7b97..d0af716c149798 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -84,7 +84,7 @@ ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) } /** - * ice_get_vlan_tag_from_rx_desc - get VLAN from Rx flex descriptor + * ice_get_vlan_tag - get VLAN from Rx flex descriptor * @rx_desc: Rx 32b flex descriptor with RXDID=2 * * The OS and current PF implementation only support stripping a single VLAN tag @@ -92,7 +92,7 @@ ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) * one is found return the tag, else return 0 to mean no VLAN tag was found. */ static inline u16 -ice_get_vlan_tag_from_rx_desc(union ice_32b_rx_flex_desc *rx_desc) +ice_get_vlan_tag(const union ice_32b_rx_flex_desc *rx_desc) { u16 stat_err_bits; @@ -148,7 +148,17 @@ void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val); void ice_process_skb_fields(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, - struct sk_buff *skb, u16 ptype); + struct sk_buff *skb); void ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag); + +static inline void +ice_xdp_meta_set_desc(struct xdp_buff *xdp, + union ice_32b_rx_flex_desc *eop_desc) +{ + struct ice_xdp_buff *xdp_ext = container_of(xdp, struct ice_xdp_buff, + xdp_buff); + + xdp_ext->pkt_ctx.eop_desc = eop_desc; +} #endif /* !_ICE_TXRX_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index a7fe2b4ce6552f..cf205ea177fb1d 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -705,16 +705,25 @@ static int ice_xmit_xdp_tx_zc(struct xdp_buff *xdp, * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run * @xdp_ring: ring to be used for XDP_TX action + * @rx_desc: packet descriptor * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static int ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring) + struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring, + union ice_32b_rx_flex_desc *rx_desc) { int err, result = ICE_XDP_PASS; u32 act; + /* We can safely convert xdp_buff_xsk to ice_xdp_buff, + * because there are XSK_PRIV_MAX bytes reserved in xdp_buff_xsk + * right after xdp_buff, for our private use. + * Macro insures we do not go above the limit. + */ + XSK_CHECK_PRIV_TYPE(struct ice_xdp_buff); + ice_xdp_meta_set_desc(xdp, rx_desc); act = bpf_prog_run_xdp(xdp_prog, xdp); if (likely(act == XDP_REDIRECT)) { @@ -813,7 +822,8 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) xsk_buff_set_size(xdp, size); xsk_buff_dma_sync_for_cpu(xdp, rx_ring->xsk_pool); - xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring); + xdp_res = ice_run_xdp_zc(rx_ring, xdp, xdp_prog, xdp_ring, + rx_desc); if (likely(xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))) { xdp_xmit |= xdp_res; } else if (xdp_res == ICE_XDP_EXIT) { @@ -849,12 +859,12 @@ int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) total_rx_bytes += skb->len; total_rx_packets++; - vlan_tag = ice_get_vlan_tag_from_rx_desc(rx_desc); + vlan_tag = ice_get_vlan_tag(rx_desc); rx_ptype = le16_to_cpu(rx_desc->wb.ptype_flex_flags0) & ICE_RX_FLEX_DESC_PTYPE_M; - ice_process_skb_fields(rx_ring, rx_desc, skb, rx_ptype); + ice_process_skb_fields(rx_ring, rx_desc, skb); ice_receive_skb(rx_ring, skb, vlan_tag); } diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 614f3e3efab095..a7f2b679551d6a 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -1732,6 +1732,44 @@ static int veth_xdp_rx_hash(const struct xdp_md *ctx, u32 *hash, return 0; } +static int veth_xdp_rx_vlan_tag(const struct xdp_md *ctx, u16 *vlan_tag, + __be16 *vlan_proto) +{ + struct veth_xdp_buff *_ctx = (void *)ctx; + struct sk_buff *skb = _ctx->skb; + int err; + + if (!skb) + return -ENODATA; + + err = __vlan_hwaccel_get_tag(skb, vlan_tag); + if (err) + return err; + + *vlan_proto = skb->vlan_proto; + return err; +} + +static int veth_xdp_rx_csum_lvl(const struct xdp_md *ctx, u8 *csum_level) +{ + struct veth_xdp_buff *_ctx = (void *)ctx; + struct sk_buff *skb = _ctx->skb; + + if (!skb) + return -ENODATA; + + if (skb->ip_summed == CHECKSUM_UNNECESSARY) + *csum_level = skb->csum_level; + else if (skb->ip_summed == CHECKSUM_PARTIAL && + skb_checksum_start_offset(skb) == skb_transport_offset(skb) || + skb->csum_valid) + *csum_level = 0; + else + return -ENODATA; + + return 0; +} + static const struct net_device_ops veth_netdev_ops = { .ndo_init = veth_dev_init, .ndo_open = veth_open, @@ -1756,6 +1794,8 @@ static const struct net_device_ops veth_netdev_ops = { static const struct xdp_metadata_ops veth_xdp_metadata_ops = { .xmo_rx_timestamp = veth_xdp_rx_timestamp, .xmo_rx_hash = veth_xdp_rx_hash, + .xmo_rx_vlan_tag = veth_xdp_rx_vlan_tag, + .xmo_rx_csum_lvl = veth_xdp_rx_csum_lvl, }; #define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b828c7a75be20b..56956368717238 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1658,6 +1658,9 @@ struct xdp_metadata_ops { int (*xmo_rx_timestamp)(const struct xdp_md *ctx, u64 *timestamp); int (*xmo_rx_hash)(const struct xdp_md *ctx, u32 *hash, enum xdp_rss_hash_type *rss_type); + int (*xmo_rx_vlan_tag)(const struct xdp_md *ctx, u16 *vlan_tag, + __be16 *vlan_proto); + int (*xmo_rx_csum_lvl)(const struct xdp_md *ctx, u8 *csum_level); }; /** diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 91ed66952580a0..cd49cdd71019dc 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4209,10 +4209,13 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, { const void *a = skb_metadata_end(skb_a); const void *b = skb_metadata_end(skb_b); - /* Using more efficient varaiant than plain call to memcmp(). */ -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 u64 diffs = 0; + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || + BITS_PER_LONG != 64) + goto slow; + + /* Using more efficient variant than plain call to memcmp(). */ switch (meta_len) { #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) @@ -4232,11 +4235,11 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, fallthrough; case 4: diffs |= __it_diff(a, b, 32); break; + default: +slow: + return memcmp(a - meta_len, b - meta_len, meta_len); } return diffs; -#else - return memcmp(a - meta_len, b - meta_len, meta_len); -#endif } static inline bool skb_metadata_differs(const struct sk_buff *skb_a, diff --git a/include/net/xdp.h b/include/net/xdp.h index d1c5381fc95f24..3008042a00e362 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -370,7 +370,12 @@ xdp_data_meta_unsupported(const struct xdp_buff *xdp) static inline bool xdp_metalen_invalid(unsigned long metalen) { - return (metalen & (sizeof(__u32) - 1)) || (metalen > 32); + typeof(metalen) meta_max; + + meta_max = type_max(typeof_member(struct skb_shared_info, meta_len)); + BUILD_BUG_ON(!__builtin_constant_p(meta_max)); + + return !IS_ALIGNED(metalen, sizeof(u32)) || metalen > meta_max; } struct xdp_attachment_info { @@ -389,6 +394,10 @@ void xdp_attachment_setup(struct xdp_attachment_info *info, bpf_xdp_metadata_rx_timestamp) \ XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_HASH, \ bpf_xdp_metadata_rx_hash) \ + XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_VLAN_TAG, \ + bpf_xdp_metadata_rx_vlan_tag) \ + XDP_METADATA_KFUNC(XDP_METADATA_KFUNC_RX_CSUM_LVL, \ + bpf_xdp_metadata_rx_csum_lvl) \ enum { #define XDP_METADATA_KFUNC(name, _) name, @@ -417,6 +426,7 @@ enum xdp_rss_hash_type { XDP_RSS_L4_UDP = BIT(5), XDP_RSS_L4_SCTP = BIT(6), XDP_RSS_L4_IPSEC = BIT(7), /* L4 based hash include IPSEC SPI */ + XDP_RSS_L4_ICMP = BIT(8), /* Second part: RSS hash type combinations used for driver HW mapping */ XDP_RSS_TYPE_NONE = 0, @@ -432,11 +442,13 @@ enum xdp_rss_hash_type { XDP_RSS_TYPE_L4_IPV4_UDP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_UDP, XDP_RSS_TYPE_L4_IPV4_SCTP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, XDP_RSS_TYPE_L4_IPV4_IPSEC = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, + XDP_RSS_TYPE_L4_IPV4_ICMP = XDP_RSS_L3_IPV4 | XDP_RSS_L4 | XDP_RSS_L4_ICMP, XDP_RSS_TYPE_L4_IPV6_TCP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_TCP, XDP_RSS_TYPE_L4_IPV6_UDP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_UDP, XDP_RSS_TYPE_L4_IPV6_SCTP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_SCTP, XDP_RSS_TYPE_L4_IPV6_IPSEC = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_IPSEC, + XDP_RSS_TYPE_L4_IPV6_ICMP = XDP_RSS_L3_IPV6 | XDP_RSS_L4 | XDP_RSS_L4_ICMP, XDP_RSS_TYPE_L4_IPV6_TCP_EX = XDP_RSS_TYPE_L4_IPV6_TCP | XDP_RSS_L3_DYNHDR, XDP_RSS_TYPE_L4_IPV6_UDP_EX = XDP_RSS_TYPE_L4_IPV6_UDP | XDP_RSS_L3_DYNHDR, diff --git a/kernel/bpf/offload.c b/kernel/bpf/offload.c index 8a26cd8814c1b6..a133fb775f4937 100644 --- a/kernel/bpf/offload.c +++ b/kernel/bpf/offload.c @@ -848,6 +848,10 @@ void *bpf_dev_bound_resolve_kfunc(struct bpf_prog *prog, u32 func_id) p = ops->xmo_rx_timestamp; else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_HASH)) p = ops->xmo_rx_hash; + else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_VLAN_TAG)) + p = ops->xmo_rx_vlan_tag; + else if (func_id == bpf_xdp_metadata_kfunc_id(XDP_METADATA_KFUNC_RX_CSUM_LVL)) + p = ops->xmo_rx_csum_lvl; out: up_read(&bpf_devs_lock); diff --git a/net/core/xdp.c b/net/core/xdp.c index 41e5ca8643ec93..c666d3e0a26c92 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -738,6 +738,47 @@ __bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash, return -EOPNOTSUPP; } +/** + * bpf_xdp_metadata_rx_vlan_tag - Get XDP packet outermost VLAN tag with protocol + * @ctx: XDP context pointer. + * @vlan_tag: Destination pointer for VLAN tag + * @vlan_proto: Destination pointer for VLAN protocol identifier in network byte order. + * + * In case of success, vlan_tag contains VLAN tag, including 12 least significant bytes + * containing VLAN ID, vlan_proto contains protocol identifier. + * + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc + * * ``-ENODATA`` : VLAN tag was not stripped or is not available + */ +__bpf_kfunc int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, u16 *vlan_tag, + __be16 *vlan_proto) +{ + return -EOPNOTSUPP; +} + +/** + * bpf_xdp_metadata_rx_csum_lvl - Get depth at which HW has checked the checksum. + * @ctx: XDP context pointer. + * @csum_level: Return value pointer. + * + * In case of success, csum_level contains depth of the last verified checksum. + * If only the outermost checksum was verified, csum_level is 0, if both + * encapsulation and inner transport checksums were verified, csum_level is 1, + * and so on. + * For more details, refer to csum_level field in sk_buff. + * + * Return: + * * Returns 0 on success or ``-errno`` on error. + * * ``-EOPNOTSUPP`` : device driver doesn't implement kfunc + * * ``-ENODATA`` : Checksum was not validated + */ +__bpf_kfunc int bpf_xdp_metadata_rx_csum_lvl(const struct xdp_md *ctx, u8 *csum_level) +{ + return -EOPNOTSUPP; +} + __diag_pop(); BTF_SET8_START(xdp_metadata_kfunc_ids) diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index a105c0cd008a48..19463230ece5e9 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -386,28 +386,51 @@ char *ping_command(int family) return "ping"; } +int get_cur_netns(void) +{ + int nsfd; + + nsfd = open("/proc/self/ns/net", O_RDONLY); + ASSERT_GE(nsfd, 0, "open /proc/self/ns/net"); + return nsfd; +} + +int get_netns(const char *name) +{ + char nspath[PATH_MAX]; + int nsfd; + + snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); + nsfd = open(nspath, O_RDONLY | O_CLOEXEC); + ASSERT_GE(nsfd, 0, "open /proc/self/ns/net"); + return nsfd; +} + +int set_netns(int netns_fd) +{ + return setns(netns_fd, CLONE_NEWNET); +} + struct nstoken { int orig_netns_fd; }; struct nstoken *open_netns(const char *name) { + struct nstoken *token; int nsfd; - char nspath[PATH_MAX]; int err; - struct nstoken *token; token = calloc(1, sizeof(struct nstoken)); if (!ASSERT_OK_PTR(token, "malloc token")) return NULL; - token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY); - if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net")) + token->orig_netns_fd = get_cur_netns(); + if (token->orig_netns_fd < 0) goto fail; - snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name); - nsfd = open(nspath, O_RDONLY | O_CLOEXEC); - if (!ASSERT_GE(nsfd, 0, "open netns fd")) + nsfd = get_netns(name); + if (nsfd < 0) goto fail; err = setns(nsfd, CLONE_NEWNET); diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 694185644da6b2..b18b9619595c01 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -58,6 +58,8 @@ int make_sockaddr(int family, const char *addr_str, __u16 port, char *ping_command(int family); int get_socket_local_port(int sock_fd); +int get_cur_netns(void); +int get_netns(const char *name); struct nstoken; /** * open_netns() - Switch to specified network namespace by name. @@ -67,4 +69,5 @@ struct nstoken; */ struct nstoken *open_netns(const char *name); void close_netns(struct nstoken *token); +int set_netns(int netns_fd); #endif diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 626c461fa34d8a..6c71d712932e55 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -20,7 +20,7 @@ #define UDP_PAYLOAD_BYTES 4 -#define AF_XDP_SOURCE_PORT 1234 +#define UDP_SOURCE_PORT 1234 #define AF_XDP_CONSUMER_PORT 8080 #define UMEM_NUM 16 @@ -33,6 +33,19 @@ #define RX_ADDR "10.0.0.2" #define PREFIX_LEN "8" #define FAMILY AF_INET +#define TX_NETNS_NAME "xdp_metadata_tx" +#define RX_NETNS_NAME "xdp_metadata_rx" +#define TX_MAC "00:00:00:00:00:01" +#define RX_MAC "00:00:00:00:00:02" + +#define VLAN_ID 59 +#define VLAN_ID_STR "59" +#define VLAN_PROTO "802.1Q" +#define VLAN_PID htons(ETH_P_8021Q) +#define TX_NAME_VLAN TX_NAME "." VLAN_ID_STR +#define RX_NAME_VLAN RX_NAME "." VLAN_ID_STR + +#define XDP_RSS_TYPE_L4 BIT(3) struct xsk { void *umem_area; @@ -119,90 +132,28 @@ static void close_xsk(struct xsk *xsk) munmap(xsk->umem_area, UMEM_SIZE); } -static void ip_csum(struct iphdr *iph) -{ - __u32 sum = 0; - __u16 *p; - int i; - - iph->check = 0; - p = (void *)iph; - for (i = 0; i < sizeof(*iph) / sizeof(*p); i++) - sum += p[i]; - - while (sum >> 16) - sum = (sum & 0xffff) + (sum >> 16); - - iph->check = ~sum; -} - -static int generate_packet(struct xsk *xsk, __u16 dst_port) +static int generate_packet_udp(void) { - struct xdp_desc *tx_desc; - struct udphdr *udph; - struct ethhdr *eth; - struct iphdr *iph; - void *data; - __u32 idx; - int ret; - - ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx); - if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve")) - return -1; - - tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx); - tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE; - printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr); - data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr); - - eth = data; - iph = (void *)(eth + 1); - udph = (void *)(iph + 1); - - memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN); - memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN); - eth->h_proto = htons(ETH_P_IP); - - iph->version = 0x4; - iph->ihl = 0x5; - iph->tos = 0x9; - iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES); - iph->id = 0; - iph->frag_off = 0; - iph->ttl = 0; - iph->protocol = IPPROTO_UDP; - ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)"); - ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)"); - ip_csum(iph); - - udph->source = htons(AF_XDP_SOURCE_PORT); - udph->dest = htons(dst_port); - udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES); - udph->check = 0; - - memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES); - - tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES; - xsk_ring_prod__submit(&xsk->tx, 1); - - ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0); - if (!ASSERT_GE(ret, 0, "sendto")) - return ret; - - return 0; -} - -static void complete_tx(struct xsk *xsk) -{ - __u32 idx; - __u64 addr; - - if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) { - addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx); - - printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr); - xsk_ring_cons__release(&xsk->comp, 1); - } + char udp_payload[UDP_PAYLOAD_BYTES]; + struct sockaddr_in rx_addr; + int sock_fd, err = 0; + + /* Build a packet */ + memset(udp_payload, 0xAA, UDP_PAYLOAD_BYTES); + rx_addr.sin_addr.s_addr = inet_addr(RX_ADDR); + rx_addr.sin_family = AF_INET; + rx_addr.sin_port = htons(UDP_SOURCE_PORT); + + sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (!ASSERT_GE(sock_fd, 0, "socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)")) + return sock_fd; + + err = sendto(sock_fd, udp_payload, UDP_PAYLOAD_BYTES, MSG_DONTWAIT, + (void *)&rx_addr, sizeof(rx_addr)); + ASSERT_GE(err, 0, "sendto"); + + close(sock_fd); + return err; } static void refill_rx(struct xsk *xsk, __u64 addr) @@ -268,7 +219,17 @@ static int verify_xsk_metadata(struct xsk *xsk) if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash")) return -1; - ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type"); + if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type")) + return -1; + + if (!ASSERT_EQ(meta->rx_vlan_tag, VLAN_ID, "rx_vlan_tag")) + return -1; + + if (!ASSERT_EQ(meta->rx_vlan_proto, VLAN_PID, "rx_vlan_proto")) + return -1; + + if (!ASSERT_NEQ(meta->rx_csum_lvl, 0, "rx_csum_lvl")) + return -1; xsk_ring_cons__release(&xsk->rx, 1); refill_rx(xsk, comp_addr); @@ -281,40 +242,50 @@ void test_xdp_metadata(void) struct xdp_metadata2 *bpf_obj2 = NULL; struct xdp_metadata *bpf_obj = NULL; struct bpf_program *new_prog, *prog; - struct nstoken *tok = NULL; + int prev_netns, rx_netns, tx_netns; __u32 queue_id = QUEUE_ID; struct bpf_map *prog_arr; - struct xsk tx_xsk = {}; struct xsk rx_xsk = {}; __u32 val, key = 0; int retries = 10; int rx_ifindex; - int tx_ifindex; int sock_fd; int ret; - /* Setup new networking namespace, with a veth pair. */ + /* Setup new networking namespaces, with a veth pair. */ + + SYS(out, "ip netns add " TX_NETNS_NAME); + SYS(out, "ip netns add " RX_NETNS_NAME); + prev_netns = get_cur_netns(); + tx_netns = get_netns(TX_NETNS_NAME); + rx_netns = get_netns(RX_NETNS_NAME); + if (prev_netns < 0 || tx_netns < 0 || rx_netns < 0) + goto close_ns; - SYS(out, "ip netns add xdp_metadata"); - tok = open_netns("xdp_metadata"); + set_netns(tx_netns); SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); - SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01"); - SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02"); + SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME); + + SYS(out, "ip link set dev " TX_NAME " address " TX_MAC); SYS(out, "ip link set dev " TX_NAME " up"); + + SYS(out, "ip link add link " TX_NAME " " TX_NAME_VLAN + " type vlan proto " VLAN_PROTO " id " VLAN_ID_STR); + SYS(out, "ip link set dev " TX_NAME_VLAN " up"); + SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME_VLAN); + + /* Avoid ARP calls */ + SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN); + + set_netns(rx_netns); + SYS(out, "ip link set dev " RX_NAME " address " RX_MAC); SYS(out, "ip link set dev " RX_NAME " up"); - SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); - rx_ifindex = if_nametoindex(RX_NAME); - tx_ifindex = if_nametoindex(TX_NAME); /* Setup separate AF_XDP for TX and RX interfaces. */ - ret = open_xsk(tx_ifindex, &tx_xsk); - if (!ASSERT_OK(ret, "open_xsk(TX_NAME)")) - goto out; - ret = open_xsk(rx_ifindex, &rx_xsk); if (!ASSERT_OK(ret, "open_xsk(RX_NAME)")) goto out; @@ -355,17 +326,16 @@ void test_xdp_metadata(void) goto out; /* Send packet destined to RX AF_XDP socket. */ - if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, - "generate AF_XDP_CONSUMER_PORT")) + set_netns(tx_netns); + if (!ASSERT_GE(generate_packet_udp(), 0, "generate UDP packet")) goto out; /* Verify AF_XDP RX packet has proper metadata. */ + set_netns(rx_netns); if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0, "verify_xsk_metadata")) goto out; - complete_tx(&tx_xsk); - /* Make sure freplace correctly picks up original bound device * and doesn't crash. */ @@ -384,10 +354,11 @@ void test_xdp_metadata(void) goto out; /* Send packet to trigger . */ - if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, - "generate freplace packet")) + set_netns(tx_netns); + if (!ASSERT_GE(generate_packet_udp(), 0, "generate freplace packet")) goto out; + set_netns(rx_netns); while (!retries--) { if (bpf_obj2->bss->called) break; @@ -397,10 +368,14 @@ void test_xdp_metadata(void) out: close_xsk(&rx_xsk); - close_xsk(&tx_xsk); xdp_metadata2__destroy(bpf_obj2); xdp_metadata__destroy(bpf_obj); - if (tok) - close_netns(tok); - SYS_NOFAIL("ip netns del xdp_metadata"); + set_netns(prev_netns); +close_ns: + close(prev_netns); + close(tx_netns); + close(rx_netns); + + SYS_NOFAIL("ip netns del " RX_NETNS_NAME); + SYS_NOFAIL("ip netns del " TX_NETNS_NAME); } diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c index b2dfd7066c6e4f..f46f75db21b44c 100644 --- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c @@ -20,21 +20,34 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, enum xdp_rss_hash_type *rss_type) __ksym; +extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, + __u16 *vlan_tag, + __be16 *vlan_proto) __ksym; +extern int bpf_xdp_metadata_rx_csum_lvl(const struct xdp_md *ctx, + __u8 *csum_level) __ksym; SEC("xdp") int rx(struct xdp_md *ctx) { void *data, *data_meta, *data_end; struct ipv6hdr *ip6h = NULL; - struct ethhdr *eth = NULL; struct udphdr *udp = NULL; struct iphdr *iph = NULL; struct xdp_meta *meta; + struct ethhdr *eth; int err; data = (void *)(long)ctx->data; data_end = (void *)(long)ctx->data_end; eth = data; + + if (eth + 1 < data_end && (eth->h_proto == bpf_htons(ETH_P_8021AD) || + eth->h_proto == bpf_htons(ETH_P_8021Q))) + eth = (void *)eth + sizeof(struct vlan_hdr); + + if (eth + 1 < data_end && eth->h_proto == bpf_htons(ETH_P_8021Q)) + eth = (void *)eth + sizeof(struct vlan_hdr); + if (eth + 1 < data_end) { if (eth->h_proto == bpf_htons(ETH_P_IP)) { iph = (void *)(eth + 1); @@ -76,15 +89,35 @@ int rx(struct xdp_md *ctx) return XDP_PASS; } + meta->hint_valid = 0; + err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp); - if (!err) + if (err) { + meta->rx_timestamp_err = err; + } else { + meta->hint_valid |= XDP_META_FIELD_TS; meta->xdp_timestamp = bpf_ktime_get_tai_ns(); + } + + err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, + &meta->rx_hash_type); + if (err) + meta->rx_hash_err = err; else - meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */ + meta->hint_valid |= XDP_META_FIELD_RSS; - err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); - if (err < 0) - meta->rx_hash_err = err; /* Used by AF_XDP as no hash signal */ + err = bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_tag, + &meta->rx_vlan_proto); + if (err) + meta->rx_vlan_tag_err = err; + else + meta->hint_valid |= XDP_META_FIELD_VLAN_TAG; + + err = bpf_xdp_metadata_rx_csum_lvl(ctx, &meta->rx_csum_lvl); + if (err) + meta->rx_csum_err = err; + else + meta->hint_valid |= XDP_META_FIELD_CSUM_LVL; __sync_add_and_fetch(&pkts_redir, 1); return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c index d151d406a123ef..6f7223d581b7c1 100644 --- a/tools/testing/selftests/bpf/progs/xdp_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c @@ -23,6 +23,11 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, enum xdp_rss_hash_type *rss_type) __ksym; +extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, + __u16 *vlan_tag, + __be16 *vlan_proto) __ksym; +extern int bpf_xdp_metadata_rx_csum_lvl(const struct xdp_md *ctx, + __u8 *csum_level) __ksym; SEC("xdp") int rx(struct xdp_md *ctx) @@ -57,6 +62,12 @@ int rx(struct xdp_md *ctx) meta->rx_timestamp = 1; bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); + bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_tag, &meta->rx_vlan_proto); + + /* Same as with timestamp, zero is expected */ + ret = bpf_xdp_metadata_rx_csum_lvl(ctx, &meta->rx_csum_lvl); + if (!ret && meta->rx_csum_lvl == 0) + meta->rx_csum_lvl = 1; return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); } diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 613321eb84c190..d234cbcc9103ac 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -19,6 +19,9 @@ #include "xsk.h" #include +#include +#include +#include #include #include #include @@ -150,21 +153,34 @@ static __u64 gettime(clockid_t clock_id) return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec; } +#define VLAN_PRIO_MASK GENMASK(15, 13) /* Priority Code Point */ +#define VLAN_CFI_MASK GENMASK(12, 12) /* Canonical Format / Drop Eligible Indicator */ +#define VLAN_VID_MASK GENMASK(11, 0) /* VLAN Identifier */ +static void print_vlan_tag(__u16 tag) +{ + __u16 vlan_id = FIELD_GET(VLAN_VID_MASK, tag); + __u8 pcp = FIELD_GET(VLAN_PRIO_MASK, tag); + bool cfi = FIELD_GET(VLAN_CFI_MASK, tag); + + printf("PCP=%u, CFI=%d, VID=0x%X\n", pcp, cfi, vlan_id); +} + static void verify_xdp_metadata(void *data, clockid_t clock_id) { struct xdp_meta *meta; meta = data - sizeof(*meta); - if (meta->rx_hash_err < 0) - printf("No rx_hash err=%d\n", meta->rx_hash_err); - else + if (meta->hint_valid & XDP_META_FIELD_RSS) printf("rx_hash: 0x%X with RSS type:0x%X\n", meta->rx_hash, meta->rx_hash_type); + else + printf("No rx_hash, err=%d\n", meta->rx_hash_err); + + if (meta->hint_valid & XDP_META_FIELD_TS) { + printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp, + (double)meta->rx_timestamp / NANOSEC_PER_SEC); - printf("rx_timestamp: %llu (sec:%0.4f)\n", meta->rx_timestamp, - (double)meta->rx_timestamp / NANOSEC_PER_SEC); - if (meta->rx_timestamp) { __u64 usr_clock = gettime(clock_id); __u64 xdp_clock = meta->xdp_timestamp; __s64 delta_X = xdp_clock - meta->rx_timestamp; @@ -179,8 +195,22 @@ static void verify_xdp_metadata(void *data, clockid_t clock_id) usr_clock, (double)usr_clock / NANOSEC_PER_SEC, (double)delta_X2U / NANOSEC_PER_SEC, (double)delta_X2U / 1000); + } else { + printf("No rx_timestamp, err=%d\n", meta->rx_timestamp_err); } + if (meta->hint_valid & XDP_META_FIELD_VLAN_TAG) { + printf("rx_vlan_proto: 0x%X\n", ntohs(meta->rx_vlan_proto)); + printf("rx_vlan_tag: "); + print_vlan_tag(meta->rx_vlan_tag); + } else { + printf("No rx_vlan_tag or rx_vlan_proto, err=%d\n", meta->rx_vlan_tag_err); + } + + if (meta->hint_valid & XDP_META_FIELD_CSUM_LVL) + printf("Checksum was checked at level %u\n", meta->rx_csum_lvl); + else + printf("Checksum was not checked, err=%d\n", meta->rx_csum_err); } static void verify_skb_metadata(int fd) diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h index 938a729bd3072f..ff1372244d345e 100644 --- a/tools/testing/selftests/bpf/xdp_metadata.h +++ b/tools/testing/selftests/bpf/xdp_metadata.h @@ -9,12 +9,46 @@ #define ETH_P_IPV6 0x86DD #endif +#ifndef ETH_P_8021Q +#define ETH_P_8021Q 0x8100 +#endif + +#ifndef ETH_P_8021AD +#define ETH_P_8021AD 0x88A8 +#endif + +#ifndef BIT +#define BIT(nr) (1 << (nr)) +#endif + +enum xdp_meta_field { + XDP_META_FIELD_TS = BIT(0), + XDP_META_FIELD_RSS = BIT(1), + XDP_META_FIELD_VLAN_TAG = BIT(2), + XDP_META_FIELD_CSUM_LVL = BIT(3), +}; + struct xdp_meta { - __u64 rx_timestamp; + union { + __u64 rx_timestamp; + __s32 rx_timestamp_err; + }; __u64 xdp_timestamp; __u32 rx_hash; union { __u32 rx_hash_type; __s32 rx_hash_err; }; + union { + struct { + __u16 rx_vlan_tag; + __be16 rx_vlan_proto; + }; + __s32 rx_vlan_tag_err; + }; + union { + __u8 rx_csum_lvl; + __s32 rx_csum_err; + }; + enum xdp_meta_field hint_valid; };